4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _GetWantedNodes(lu, nodes):
707 """Returns list of checked and expanded node names.
709 @type lu: L{LogicalUnit}
710 @param lu: the logical unit on whose behalf we execute
712 @param nodes: list of node names or None for all nodes
714 @return: the list of nodes, sorted
715 @raise errors.ProgrammerError: if the nodes parameter is wrong type
719 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
721 return utils.NiceSort(lu.cfg.GetNodeList())
724 def _GetWantedInstances(lu, instances):
725 """Returns list of checked and expanded instance names.
727 @type lu: L{LogicalUnit}
728 @param lu: the logical unit on whose behalf we execute
729 @type instances: list
730 @param instances: list of instance names or None for all instances
732 @return: the list of instances, sorted
733 @raise errors.OpPrereqError: if the instances parameter is wrong type
734 @raise errors.OpPrereqError: if any of the passed instances is not found
738 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
740 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
744 def _GetUpdatedParams(old_params, update_dict,
745 use_default=True, use_none=False):
746 """Return the new version of a parameter dictionary.
748 @type old_params: dict
749 @param old_params: old parameters
750 @type update_dict: dict
751 @param update_dict: dict containing new parameter values, or
752 constants.VALUE_DEFAULT to reset the parameter to its default
754 @param use_default: boolean
755 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
756 values as 'to be deleted' values
757 @param use_none: boolean
758 @type use_none: whether to recognise C{None} values as 'to be
761 @return: the new parameter dictionary
764 params_copy = copy.deepcopy(old_params)
765 for key, val in update_dict.iteritems():
766 if ((use_default and val == constants.VALUE_DEFAULT) or
767 (use_none and val is None)):
773 params_copy[key] = val
777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
778 """Return the new version of a instance policy.
780 @param group_policy: whether this policy applies to a group and thus
781 we should support removal of policy entries
784 use_none = use_default = group_policy
785 ipolicy = copy.deepcopy(old_ipolicy)
786 for key, value in new_ipolicy.items():
787 if key not in constants.IPOLICY_ALL_KEYS:
788 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
790 if key in constants.IPOLICY_ISPECS:
791 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
792 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
794 use_default=use_default)
796 if (not value or value == [constants.VALUE_DEFAULT] or
797 value == constants.VALUE_DEFAULT):
801 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
802 " on the cluster'" % key,
805 if key in constants.IPOLICY_PARAMETERS:
806 # FIXME: we assume all such values are float
808 ipolicy[key] = float(value)
809 except (TypeError, ValueError), err:
810 raise errors.OpPrereqError("Invalid value for attribute"
811 " '%s': '%s', error: %s" %
812 (key, value, err), errors.ECODE_INVAL)
814 # FIXME: we assume all others are lists; this should be redone
816 ipolicy[key] = list(value)
818 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
819 except errors.ConfigurationError, err:
820 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
825 def _UpdateAndVerifySubDict(base, updates, type_check):
826 """Updates and verifies a dict with sub dicts of the same type.
828 @param base: The dict with the old data
829 @param updates: The dict with the new data
830 @param type_check: Dict suitable to ForceDictType to verify correct types
831 @returns: A new dict with updated and verified values
835 new = _GetUpdatedParams(old, value)
836 utils.ForceDictType(new, type_check)
839 ret = copy.deepcopy(base)
840 ret.update(dict((key, fn(base.get(key, {}), value))
841 for key, value in updates.items()))
845 def _MergeAndVerifyHvState(op_input, obj_input):
846 """Combines the hv state from an opcode with the one of the object
848 @param op_input: The input dict from the opcode
849 @param obj_input: The input dict from the objects
850 @return: The verified and updated dict
854 invalid_hvs = set(op_input) - constants.HYPER_TYPES
856 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
857 " %s" % utils.CommaJoin(invalid_hvs),
859 if obj_input is None:
861 type_check = constants.HVSTS_PARAMETER_TYPES
862 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
867 def _MergeAndVerifyDiskState(op_input, obj_input):
868 """Combines the disk state from an opcode with the one of the object
870 @param op_input: The input dict from the opcode
871 @param obj_input: The input dict from the objects
872 @return: The verified and updated dict
875 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
877 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
878 utils.CommaJoin(invalid_dst),
880 type_check = constants.DSS_PARAMETER_TYPES
881 if obj_input is None:
883 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
885 for key, value in op_input.items())
890 def _ReleaseLocks(lu, level, names=None, keep=None):
891 """Releases locks owned by an LU.
893 @type lu: L{LogicalUnit}
894 @param level: Lock level
895 @type names: list or None
896 @param names: Names of locks to release
897 @type keep: list or None
898 @param keep: Names of locks to retain
901 assert not (keep is not None and names is not None), \
902 "Only one of the 'names' and the 'keep' parameters can be given"
904 if names is not None:
905 should_release = names.__contains__
907 should_release = lambda name: name not in keep
909 should_release = None
911 owned = lu.owned_locks(level)
913 # Not owning any lock at this level, do nothing
920 # Determine which locks to release
922 if should_release(name):
927 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
929 # Release just some locks
930 lu.glm.release(level, names=release)
932 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
935 lu.glm.release(level)
937 assert not lu.glm.is_owned(level), "No locks should be owned"
940 def _MapInstanceDisksToNodes(instances):
941 """Creates a map from (node, volume) to instance name.
943 @type instances: list of L{objects.Instance}
944 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
947 return dict(((node, vol), inst.name)
948 for inst in instances
949 for (node, vols) in inst.MapLVsByNode().items()
953 def _RunPostHook(lu, node_name):
954 """Runs the post-hook for an opcode on a single node.
957 hm = lu.proc.BuildHooksManager(lu)
959 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
961 # pylint: disable=W0702
962 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
965 def _CheckOutputFields(static, dynamic, selected):
966 """Checks whether all selected fields are valid.
968 @type static: L{utils.FieldSet}
969 @param static: static fields set
970 @type dynamic: L{utils.FieldSet}
971 @param dynamic: dynamic fields set
978 delta = f.NonMatching(selected)
980 raise errors.OpPrereqError("Unknown output fields selected: %s"
981 % ",".join(delta), errors.ECODE_INVAL)
984 def _CheckGlobalHvParams(params):
985 """Validates that given hypervisor params are not global ones.
987 This will ensure that instances don't get customised versions of
991 used_globals = constants.HVC_GLOBALS.intersection(params)
993 msg = ("The following hypervisor parameters are global and cannot"
994 " be customized at instance level, please modify them at"
995 " cluster level: %s" % utils.CommaJoin(used_globals))
996 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
999 def _CheckNodeOnline(lu, node, msg=None):
1000 """Ensure that a given node is online.
1002 @param lu: the LU on behalf of which we make the check
1003 @param node: the node to check
1004 @param msg: if passed, should be a message to replace the default one
1005 @raise errors.OpPrereqError: if the node is offline
1009 msg = "Can't use offline node"
1010 if lu.cfg.GetNodeInfo(node).offline:
1011 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1014 def _CheckNodeNotDrained(lu, node):
1015 """Ensure that a given node is not drained.
1017 @param lu: the LU on behalf of which we make the check
1018 @param node: the node to check
1019 @raise errors.OpPrereqError: if the node is drained
1022 if lu.cfg.GetNodeInfo(node).drained:
1023 raise errors.OpPrereqError("Can't use drained node %s" % node,
1027 def _CheckNodeVmCapable(lu, node):
1028 """Ensure that a given node is vm capable.
1030 @param lu: the LU on behalf of which we make the check
1031 @param node: the node to check
1032 @raise errors.OpPrereqError: if the node is not vm capable
1035 if not lu.cfg.GetNodeInfo(node).vm_capable:
1036 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1040 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1041 """Ensure that a node supports a given OS.
1043 @param lu: the LU on behalf of which we make the check
1044 @param node: the node to check
1045 @param os_name: the OS to query about
1046 @param force_variant: whether to ignore variant errors
1047 @raise errors.OpPrereqError: if the node is not supporting the OS
1050 result = lu.rpc.call_os_get(node, os_name)
1051 result.Raise("OS '%s' not in supported OS list for node %s" %
1053 prereq=True, ecode=errors.ECODE_INVAL)
1054 if not force_variant:
1055 _CheckOSVariant(result.payload, os_name)
1058 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1059 """Ensure that a node has the given secondary ip.
1061 @type lu: L{LogicalUnit}
1062 @param lu: the LU on behalf of which we make the check
1064 @param node: the node to check
1065 @type secondary_ip: string
1066 @param secondary_ip: the ip to check
1067 @type prereq: boolean
1068 @param prereq: whether to throw a prerequisite or an execute error
1069 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1070 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1073 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1074 result.Raise("Failure checking secondary ip on node %s" % node,
1075 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1076 if not result.payload:
1077 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1078 " please fix and re-run this command" % secondary_ip)
1080 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1082 raise errors.OpExecError(msg)
1085 def _GetClusterDomainSecret():
1086 """Reads the cluster domain secret.
1089 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1093 def _CheckInstanceState(lu, instance, req_states, msg=None):
1094 """Ensure that an instance is in one of the required states.
1096 @param lu: the LU on behalf of which we make the check
1097 @param instance: the instance to check
1098 @param msg: if passed, should be a message to replace the default one
1099 @raise errors.OpPrereqError: if the instance is not in the required state
1103 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1104 if instance.admin_state not in req_states:
1105 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1106 (instance.name, instance.admin_state, msg),
1109 if constants.ADMINST_UP not in req_states:
1110 pnode = instance.primary_node
1111 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113 prereq=True, ecode=errors.ECODE_ENVIRON)
1115 if instance.name in ins_l.payload:
1116 raise errors.OpPrereqError("Instance %s is running, %s" %
1117 (instance.name, msg), errors.ECODE_STATE)
1120 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1121 """Computes if value is in the desired range.
1123 @param name: name of the parameter for which we perform the check
1124 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1126 @param ipolicy: dictionary containing min, max and std values
1127 @param value: actual value that we want to use
1128 @return: None or element not meeting the criteria
1132 if value in [None, constants.VALUE_AUTO]:
1134 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1135 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1136 if value > max_v or min_v > value:
1138 fqn = "%s/%s" % (name, qualifier)
1141 return ("%s value %s is not in range [%s, %s]" %
1142 (fqn, value, min_v, max_v))
1146 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1147 nic_count, disk_sizes, spindle_use,
1148 _compute_fn=_ComputeMinMaxSpec):
1149 """Verifies ipolicy against provided specs.
1152 @param ipolicy: The ipolicy
1154 @param mem_size: The memory size
1155 @type cpu_count: int
1156 @param cpu_count: Used cpu cores
1157 @type disk_count: int
1158 @param disk_count: Number of disks used
1159 @type nic_count: int
1160 @param nic_count: Number of nics used
1161 @type disk_sizes: list of ints
1162 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1163 @type spindle_use: int
1164 @param spindle_use: The number of spindles this instance uses
1165 @param _compute_fn: The compute function (unittest only)
1166 @return: A list of violations, or an empty list of no violations are found
1169 assert disk_count == len(disk_sizes)
1172 (constants.ISPEC_MEM_SIZE, "", mem_size),
1173 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1174 (constants.ISPEC_DISK_COUNT, "", disk_count),
1175 (constants.ISPEC_NIC_COUNT, "", nic_count),
1176 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1177 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1178 for idx, d in enumerate(disk_sizes)]
1181 (_compute_fn(name, qualifier, ipolicy, value)
1182 for (name, qualifier, value) in test_settings))
1185 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1186 _compute_fn=_ComputeIPolicySpecViolation):
1187 """Compute if instance meets the specs of ipolicy.
1190 @param ipolicy: The ipolicy to verify against
1191 @type instance: L{objects.Instance}
1192 @param instance: The instance to verify
1193 @param _compute_fn: The function to verify ipolicy (unittest only)
1194 @see: L{_ComputeIPolicySpecViolation}
1197 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1198 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1199 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1200 disk_count = len(instance.disks)
1201 disk_sizes = [disk.size for disk in instance.disks]
1202 nic_count = len(instance.nics)
1204 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1205 disk_sizes, spindle_use)
1208 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1209 _compute_fn=_ComputeIPolicySpecViolation):
1210 """Compute if instance specs meets the specs of ipolicy.
1213 @param ipolicy: The ipolicy to verify against
1214 @param instance_spec: dict
1215 @param instance_spec: The instance spec to verify
1216 @param _compute_fn: The function to verify ipolicy (unittest only)
1217 @see: L{_ComputeIPolicySpecViolation}
1220 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1221 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1222 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1223 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1224 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1225 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1227 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1228 disk_sizes, spindle_use)
1231 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1233 _compute_fn=_ComputeIPolicyInstanceViolation):
1234 """Compute if instance meets the specs of the new target group.
1236 @param ipolicy: The ipolicy to verify
1237 @param instance: The instance object to verify
1238 @param current_group: The current group of the instance
1239 @param target_group: The new group of the instance
1240 @param _compute_fn: The function to verify ipolicy (unittest only)
1241 @see: L{_ComputeIPolicySpecViolation}
1244 if current_group == target_group:
1247 return _compute_fn(ipolicy, instance)
1250 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1251 _compute_fn=_ComputeIPolicyNodeViolation):
1252 """Checks that the target node is correct in terms of instance policy.
1254 @param ipolicy: The ipolicy to verify
1255 @param instance: The instance object to verify
1256 @param node: The new node to relocate
1257 @param ignore: Ignore violations of the ipolicy
1258 @param _compute_fn: The function to verify ipolicy (unittest only)
1259 @see: L{_ComputeIPolicySpecViolation}
1262 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1263 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1266 msg = ("Instance does not meet target node group's (%s) instance"
1267 " policy: %s") % (node.group, utils.CommaJoin(res))
1271 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1274 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1275 """Computes a set of any instances that would violate the new ipolicy.
1277 @param old_ipolicy: The current (still in-place) ipolicy
1278 @param new_ipolicy: The new (to become) ipolicy
1279 @param instances: List of instances to verify
1280 @return: A list of instances which violates the new ipolicy but
1284 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1285 _ComputeViolatingInstances(old_ipolicy, instances))
1288 def _ExpandItemName(fn, name, kind):
1289 """Expand an item name.
1291 @param fn: the function to use for expansion
1292 @param name: requested item name
1293 @param kind: text description ('Node' or 'Instance')
1294 @return: the resolved (full) name
1295 @raise errors.OpPrereqError: if the item is not found
1298 full_name = fn(name)
1299 if full_name is None:
1300 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1305 def _ExpandNodeName(cfg, name):
1306 """Wrapper over L{_ExpandItemName} for nodes."""
1307 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1310 def _ExpandInstanceName(cfg, name):
1311 """Wrapper over L{_ExpandItemName} for instance."""
1312 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1315 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1316 minmem, maxmem, vcpus, nics, disk_template, disks,
1317 bep, hvp, hypervisor_name, tags):
1318 """Builds instance related env variables for hooks
1320 This builds the hook environment from individual variables.
1323 @param name: the name of the instance
1324 @type primary_node: string
1325 @param primary_node: the name of the instance's primary node
1326 @type secondary_nodes: list
1327 @param secondary_nodes: list of secondary nodes as strings
1328 @type os_type: string
1329 @param os_type: the name of the instance's OS
1330 @type status: string
1331 @param status: the desired status of the instance
1332 @type minmem: string
1333 @param minmem: the minimum memory size of the instance
1334 @type maxmem: string
1335 @param maxmem: the maximum memory size of the instance
1337 @param vcpus: the count of VCPUs the instance has
1339 @param nics: list of tuples (ip, mac, mode, link) representing
1340 the NICs the instance has
1341 @type disk_template: string
1342 @param disk_template: the disk template of the instance
1344 @param disks: the list of (size, mode) pairs
1346 @param bep: the backend parameters for the instance
1348 @param hvp: the hypervisor parameters for the instance
1349 @type hypervisor_name: string
1350 @param hypervisor_name: the hypervisor for the instance
1352 @param tags: list of instance tags as strings
1354 @return: the hook environment for this instance
1359 "INSTANCE_NAME": name,
1360 "INSTANCE_PRIMARY": primary_node,
1361 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1362 "INSTANCE_OS_TYPE": os_type,
1363 "INSTANCE_STATUS": status,
1364 "INSTANCE_MINMEM": minmem,
1365 "INSTANCE_MAXMEM": maxmem,
1366 # TODO(2.7) remove deprecated "memory" value
1367 "INSTANCE_MEMORY": maxmem,
1368 "INSTANCE_VCPUS": vcpus,
1369 "INSTANCE_DISK_TEMPLATE": disk_template,
1370 "INSTANCE_HYPERVISOR": hypervisor_name,
1373 nic_count = len(nics)
1374 for idx, (ip, mac, mode, link) in enumerate(nics):
1377 env["INSTANCE_NIC%d_IP" % idx] = ip
1378 env["INSTANCE_NIC%d_MAC" % idx] = mac
1379 env["INSTANCE_NIC%d_MODE" % idx] = mode
1380 env["INSTANCE_NIC%d_LINK" % idx] = link
1381 if mode == constants.NIC_MODE_BRIDGED:
1382 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1386 env["INSTANCE_NIC_COUNT"] = nic_count
1389 disk_count = len(disks)
1390 for idx, (size, mode) in enumerate(disks):
1391 env["INSTANCE_DISK%d_SIZE" % idx] = size
1392 env["INSTANCE_DISK%d_MODE" % idx] = mode
1396 env["INSTANCE_DISK_COUNT"] = disk_count
1401 env["INSTANCE_TAGS"] = " ".join(tags)
1403 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1404 for key, value in source.items():
1405 env["INSTANCE_%s_%s" % (kind, key)] = value
1410 def _NICListToTuple(lu, nics):
1411 """Build a list of nic information tuples.
1413 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1414 value in LUInstanceQueryData.
1416 @type lu: L{LogicalUnit}
1417 @param lu: the logical unit on whose behalf we execute
1418 @type nics: list of L{objects.NIC}
1419 @param nics: list of nics to convert to hooks tuples
1423 cluster = lu.cfg.GetClusterInfo()
1427 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1428 mode = filled_params[constants.NIC_MODE]
1429 link = filled_params[constants.NIC_LINK]
1430 hooks_nics.append((ip, mac, mode, link))
1434 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1435 """Builds instance related env variables for hooks from an object.
1437 @type lu: L{LogicalUnit}
1438 @param lu: the logical unit on whose behalf we execute
1439 @type instance: L{objects.Instance}
1440 @param instance: the instance for which we should build the
1442 @type override: dict
1443 @param override: dictionary with key/values that will override
1446 @return: the hook environment dictionary
1449 cluster = lu.cfg.GetClusterInfo()
1450 bep = cluster.FillBE(instance)
1451 hvp = cluster.FillHV(instance)
1453 "name": instance.name,
1454 "primary_node": instance.primary_node,
1455 "secondary_nodes": instance.secondary_nodes,
1456 "os_type": instance.os,
1457 "status": instance.admin_state,
1458 "maxmem": bep[constants.BE_MAXMEM],
1459 "minmem": bep[constants.BE_MINMEM],
1460 "vcpus": bep[constants.BE_VCPUS],
1461 "nics": _NICListToTuple(lu, instance.nics),
1462 "disk_template": instance.disk_template,
1463 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1466 "hypervisor_name": instance.hypervisor,
1467 "tags": instance.tags,
1470 args.update(override)
1471 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1474 def _AdjustCandidatePool(lu, exceptions):
1475 """Adjust the candidate pool after node operations.
1478 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1480 lu.LogInfo("Promoted nodes to master candidate role: %s",
1481 utils.CommaJoin(node.name for node in mod_list))
1482 for name in mod_list:
1483 lu.context.ReaddNode(name)
1484 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1486 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1490 def _DecideSelfPromotion(lu, exceptions=None):
1491 """Decide whether I should promote myself as a master candidate.
1494 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1495 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1496 # the new node will increase mc_max with one, so:
1497 mc_should = min(mc_should + 1, cp_size)
1498 return mc_now < mc_should
1501 def _CalculateGroupIPolicy(cluster, group):
1502 """Calculate instance policy for group.
1505 return cluster.SimpleFillIPolicy(group.ipolicy)
1508 def _ComputeViolatingInstances(ipolicy, instances):
1509 """Computes a set of instances who violates given ipolicy.
1511 @param ipolicy: The ipolicy to verify
1512 @type instances: object.Instance
1513 @param instances: List of instances to verify
1514 @return: A frozenset of instance names violating the ipolicy
1517 return frozenset([inst.name for inst in instances
1518 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1521 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1522 """Check that the brigdes needed by a list of nics exist.
1525 cluster = lu.cfg.GetClusterInfo()
1526 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1527 brlist = [params[constants.NIC_LINK] for params in paramslist
1528 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1530 result = lu.rpc.call_bridges_exist(target_node, brlist)
1531 result.Raise("Error checking bridges on destination node '%s'" %
1532 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1535 def _CheckInstanceBridgesExist(lu, instance, node=None):
1536 """Check that the brigdes needed by an instance exist.
1540 node = instance.primary_node
1541 _CheckNicsBridgesExist(lu, instance.nics, node)
1544 def _CheckOSVariant(os_obj, name):
1545 """Check whether an OS name conforms to the os variants specification.
1547 @type os_obj: L{objects.OS}
1548 @param os_obj: OS object to check
1550 @param name: OS name passed by the user, to check for validity
1553 variant = objects.OS.GetVariant(name)
1554 if not os_obj.supported_variants:
1556 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1557 " passed)" % (os_obj.name, variant),
1561 raise errors.OpPrereqError("OS name must include a variant",
1564 if variant not in os_obj.supported_variants:
1565 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1568 def _GetNodeInstancesInner(cfg, fn):
1569 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1572 def _GetNodeInstances(cfg, node_name):
1573 """Returns a list of all primary and secondary instances on a node.
1577 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1580 def _GetNodePrimaryInstances(cfg, node_name):
1581 """Returns primary instances on a node.
1584 return _GetNodeInstancesInner(cfg,
1585 lambda inst: node_name == inst.primary_node)
1588 def _GetNodeSecondaryInstances(cfg, node_name):
1589 """Returns secondary instances on a node.
1592 return _GetNodeInstancesInner(cfg,
1593 lambda inst: node_name in inst.secondary_nodes)
1596 def _GetStorageTypeArgs(cfg, storage_type):
1597 """Returns the arguments for a storage type.
1600 # Special case for file storage
1601 if storage_type == constants.ST_FILE:
1602 # storage.FileStorage wants a list of storage directories
1603 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1608 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1611 for dev in instance.disks:
1612 cfg.SetDiskID(dev, node_name)
1614 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1616 result.Raise("Failed to get disk status from node %s" % node_name,
1617 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1619 for idx, bdev_status in enumerate(result.payload):
1620 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1626 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1627 """Check the sanity of iallocator and node arguments and use the
1628 cluster-wide iallocator if appropriate.
1630 Check that at most one of (iallocator, node) is specified. If none is
1631 specified, then the LU's opcode's iallocator slot is filled with the
1632 cluster-wide default iallocator.
1634 @type iallocator_slot: string
1635 @param iallocator_slot: the name of the opcode iallocator slot
1636 @type node_slot: string
1637 @param node_slot: the name of the opcode target node slot
1640 node = getattr(lu.op, node_slot, None)
1641 iallocator = getattr(lu.op, iallocator_slot, None)
1643 if node is not None and iallocator is not None:
1644 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1646 elif node is None and iallocator is None:
1647 default_iallocator = lu.cfg.GetDefaultIAllocator()
1648 if default_iallocator:
1649 setattr(lu.op, iallocator_slot, default_iallocator)
1651 raise errors.OpPrereqError("No iallocator or node given and no"
1652 " cluster-wide default iallocator found;"
1653 " please specify either an iallocator or a"
1654 " node, or set a cluster-wide default"
1658 def _GetDefaultIAllocator(cfg, iallocator):
1659 """Decides on which iallocator to use.
1661 @type cfg: L{config.ConfigWriter}
1662 @param cfg: Cluster configuration object
1663 @type iallocator: string or None
1664 @param iallocator: Iallocator specified in opcode
1666 @return: Iallocator name
1670 # Use default iallocator
1671 iallocator = cfg.GetDefaultIAllocator()
1674 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1675 " opcode nor as a cluster-wide default",
1681 class LUClusterPostInit(LogicalUnit):
1682 """Logical unit for running hooks after cluster initialization.
1685 HPATH = "cluster-init"
1686 HTYPE = constants.HTYPE_CLUSTER
1688 def BuildHooksEnv(self):
1693 "OP_TARGET": self.cfg.GetClusterName(),
1696 def BuildHooksNodes(self):
1697 """Build hooks nodes.
1700 return ([], [self.cfg.GetMasterNode()])
1702 def Exec(self, feedback_fn):
1709 class LUClusterDestroy(LogicalUnit):
1710 """Logical unit for destroying the cluster.
1713 HPATH = "cluster-destroy"
1714 HTYPE = constants.HTYPE_CLUSTER
1716 def BuildHooksEnv(self):
1721 "OP_TARGET": self.cfg.GetClusterName(),
1724 def BuildHooksNodes(self):
1725 """Build hooks nodes.
1730 def CheckPrereq(self):
1731 """Check prerequisites.
1733 This checks whether the cluster is empty.
1735 Any errors are signaled by raising errors.OpPrereqError.
1738 master = self.cfg.GetMasterNode()
1740 nodelist = self.cfg.GetNodeList()
1741 if len(nodelist) != 1 or nodelist[0] != master:
1742 raise errors.OpPrereqError("There are still %d node(s) in"
1743 " this cluster." % (len(nodelist) - 1),
1745 instancelist = self.cfg.GetInstanceList()
1747 raise errors.OpPrereqError("There are still %d instance(s) in"
1748 " this cluster." % len(instancelist),
1751 def Exec(self, feedback_fn):
1752 """Destroys the cluster.
1755 master_params = self.cfg.GetMasterNetworkParameters()
1757 # Run post hooks on master node before it's removed
1758 _RunPostHook(self, master_params.name)
1760 ems = self.cfg.GetUseExternalMipScript()
1761 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1764 self.LogWarning("Error disabling the master IP address: %s",
1767 return master_params.name
1770 def _VerifyCertificate(filename):
1771 """Verifies a certificate for L{LUClusterVerifyConfig}.
1773 @type filename: string
1774 @param filename: Path to PEM file
1778 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1779 utils.ReadFile(filename))
1780 except Exception, err: # pylint: disable=W0703
1781 return (LUClusterVerifyConfig.ETYPE_ERROR,
1782 "Failed to load X509 certificate %s: %s" % (filename, err))
1785 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1786 constants.SSL_CERT_EXPIRATION_ERROR)
1789 fnamemsg = "While verifying %s: %s" % (filename, msg)
1794 return (None, fnamemsg)
1795 elif errcode == utils.CERT_WARNING:
1796 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1797 elif errcode == utils.CERT_ERROR:
1798 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1800 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1803 def _GetAllHypervisorParameters(cluster, instances):
1804 """Compute the set of all hypervisor parameters.
1806 @type cluster: L{objects.Cluster}
1807 @param cluster: the cluster object
1808 @param instances: list of L{objects.Instance}
1809 @param instances: additional instances from which to obtain parameters
1810 @rtype: list of (origin, hypervisor, parameters)
1811 @return: a list with all parameters found, indicating the hypervisor they
1812 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1817 for hv_name in cluster.enabled_hypervisors:
1818 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1820 for os_name, os_hvp in cluster.os_hvp.items():
1821 for hv_name, hv_params in os_hvp.items():
1823 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1824 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1826 # TODO: collapse identical parameter values in a single one
1827 for instance in instances:
1828 if instance.hvparams:
1829 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1830 cluster.FillHV(instance)))
1835 class _VerifyErrors(object):
1836 """Mix-in for cluster/group verify LUs.
1838 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1839 self.op and self._feedback_fn to be available.)
1843 ETYPE_FIELD = "code"
1844 ETYPE_ERROR = "ERROR"
1845 ETYPE_WARNING = "WARNING"
1847 def _Error(self, ecode, item, msg, *args, **kwargs):
1848 """Format an error message.
1850 Based on the opcode's error_codes parameter, either format a
1851 parseable error code, or a simpler error string.
1853 This must be called only from Exec and functions called from Exec.
1856 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1857 itype, etxt, _ = ecode
1858 # first complete the msg
1861 # then format the whole message
1862 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1863 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1869 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1870 # and finally report it via the feedback_fn
1871 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1873 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1874 """Log an error message if the passed condition is True.
1878 or self.op.debug_simulate_errors) # pylint: disable=E1101
1880 # If the error code is in the list of ignored errors, demote the error to a
1882 (_, etxt, _) = ecode
1883 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1884 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1887 self._Error(ecode, *args, **kwargs)
1889 # do not mark the operation as failed for WARN cases only
1890 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1891 self.bad = self.bad or cond
1894 class LUClusterVerify(NoHooksLU):
1895 """Submits all jobs necessary to verify the cluster.
1900 def ExpandNames(self):
1901 self.needed_locks = {}
1903 def Exec(self, feedback_fn):
1906 if self.op.group_name:
1907 groups = [self.op.group_name]
1908 depends_fn = lambda: None
1910 groups = self.cfg.GetNodeGroupList()
1912 # Verify global configuration
1914 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1917 # Always depend on global verification
1918 depends_fn = lambda: [(-len(jobs), [])]
1920 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1921 ignore_errors=self.op.ignore_errors,
1922 depends=depends_fn())]
1923 for group in groups)
1925 # Fix up all parameters
1926 for op in itertools.chain(*jobs): # pylint: disable=W0142
1927 op.debug_simulate_errors = self.op.debug_simulate_errors
1928 op.verbose = self.op.verbose
1929 op.error_codes = self.op.error_codes
1931 op.skip_checks = self.op.skip_checks
1932 except AttributeError:
1933 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1935 return ResultWithJobs(jobs)
1938 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1939 """Verifies the cluster config.
1944 def _VerifyHVP(self, hvp_data):
1945 """Verifies locally the syntax of the hypervisor parameters.
1948 for item, hv_name, hv_params in hvp_data:
1949 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1952 hv_class = hypervisor.GetHypervisor(hv_name)
1953 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1954 hv_class.CheckParameterSyntax(hv_params)
1955 except errors.GenericError, err:
1956 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1958 def ExpandNames(self):
1959 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1960 self.share_locks = _ShareAll()
1962 def CheckPrereq(self):
1963 """Check prerequisites.
1966 # Retrieve all information
1967 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1968 self.all_node_info = self.cfg.GetAllNodesInfo()
1969 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1971 def Exec(self, feedback_fn):
1972 """Verify integrity of cluster, performing various test on nodes.
1976 self._feedback_fn = feedback_fn
1978 feedback_fn("* Verifying cluster config")
1980 for msg in self.cfg.VerifyConfig():
1981 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1983 feedback_fn("* Verifying cluster certificate files")
1985 for cert_filename in constants.ALL_CERT_FILES:
1986 (errcode, msg) = _VerifyCertificate(cert_filename)
1987 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1989 feedback_fn("* Verifying hypervisor parameters")
1991 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1992 self.all_inst_info.values()))
1994 feedback_fn("* Verifying all nodes belong to an existing group")
1996 # We do this verification here because, should this bogus circumstance
1997 # occur, it would never be caught by VerifyGroup, which only acts on
1998 # nodes/instances reachable from existing node groups.
2000 dangling_nodes = set(node.name for node in self.all_node_info.values()
2001 if node.group not in self.all_group_info)
2003 dangling_instances = {}
2004 no_node_instances = []
2006 for inst in self.all_inst_info.values():
2007 if inst.primary_node in dangling_nodes:
2008 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2009 elif inst.primary_node not in self.all_node_info:
2010 no_node_instances.append(inst.name)
2015 utils.CommaJoin(dangling_instances.get(node.name,
2017 for node in dangling_nodes]
2019 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2021 "the following nodes (and their instances) belong to a non"
2022 " existing group: %s", utils.CommaJoin(pretty_dangling))
2024 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2026 "the following instances have a non-existing primary-node:"
2027 " %s", utils.CommaJoin(no_node_instances))
2032 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2033 """Verifies the status of a node group.
2036 HPATH = "cluster-verify"
2037 HTYPE = constants.HTYPE_CLUSTER
2040 _HOOKS_INDENT_RE = re.compile("^", re.M)
2042 class NodeImage(object):
2043 """A class representing the logical and physical status of a node.
2046 @ivar name: the node name to which this object refers
2047 @ivar volumes: a structure as returned from
2048 L{ganeti.backend.GetVolumeList} (runtime)
2049 @ivar instances: a list of running instances (runtime)
2050 @ivar pinst: list of configured primary instances (config)
2051 @ivar sinst: list of configured secondary instances (config)
2052 @ivar sbp: dictionary of {primary-node: list of instances} for all
2053 instances for which this node is secondary (config)
2054 @ivar mfree: free memory, as reported by hypervisor (runtime)
2055 @ivar dfree: free disk, as reported by the node (runtime)
2056 @ivar offline: the offline status (config)
2057 @type rpc_fail: boolean
2058 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2059 not whether the individual keys were correct) (runtime)
2060 @type lvm_fail: boolean
2061 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2062 @type hyp_fail: boolean
2063 @ivar hyp_fail: whether the RPC call didn't return the instance list
2064 @type ghost: boolean
2065 @ivar ghost: whether this is a known node or not (config)
2066 @type os_fail: boolean
2067 @ivar os_fail: whether the RPC call didn't return valid OS data
2069 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2070 @type vm_capable: boolean
2071 @ivar vm_capable: whether the node can host instances
2074 def __init__(self, offline=False, name=None, vm_capable=True):
2083 self.offline = offline
2084 self.vm_capable = vm_capable
2085 self.rpc_fail = False
2086 self.lvm_fail = False
2087 self.hyp_fail = False
2089 self.os_fail = False
2092 def ExpandNames(self):
2093 # This raises errors.OpPrereqError on its own:
2094 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2096 # Get instances in node group; this is unsafe and needs verification later
2098 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2100 self.needed_locks = {
2101 locking.LEVEL_INSTANCE: inst_names,
2102 locking.LEVEL_NODEGROUP: [self.group_uuid],
2103 locking.LEVEL_NODE: [],
2106 self.share_locks = _ShareAll()
2108 def DeclareLocks(self, level):
2109 if level == locking.LEVEL_NODE:
2110 # Get members of node group; this is unsafe and needs verification later
2111 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2113 all_inst_info = self.cfg.GetAllInstancesInfo()
2115 # In Exec(), we warn about mirrored instances that have primary and
2116 # secondary living in separate node groups. To fully verify that
2117 # volumes for these instances are healthy, we will need to do an
2118 # extra call to their secondaries. We ensure here those nodes will
2120 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2121 # Important: access only the instances whose lock is owned
2122 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2123 nodes.update(all_inst_info[inst].secondary_nodes)
2125 self.needed_locks[locking.LEVEL_NODE] = nodes
2127 def CheckPrereq(self):
2128 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2129 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2131 group_nodes = set(self.group_info.members)
2133 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2136 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2138 unlocked_instances = \
2139 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2142 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2143 utils.CommaJoin(unlocked_nodes),
2146 if unlocked_instances:
2147 raise errors.OpPrereqError("Missing lock for instances: %s" %
2148 utils.CommaJoin(unlocked_instances),
2151 self.all_node_info = self.cfg.GetAllNodesInfo()
2152 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2154 self.my_node_names = utils.NiceSort(group_nodes)
2155 self.my_inst_names = utils.NiceSort(group_instances)
2157 self.my_node_info = dict((name, self.all_node_info[name])
2158 for name in self.my_node_names)
2160 self.my_inst_info = dict((name, self.all_inst_info[name])
2161 for name in self.my_inst_names)
2163 # We detect here the nodes that will need the extra RPC calls for verifying
2164 # split LV volumes; they should be locked.
2165 extra_lv_nodes = set()
2167 for inst in self.my_inst_info.values():
2168 if inst.disk_template in constants.DTS_INT_MIRROR:
2169 for nname in inst.all_nodes:
2170 if self.all_node_info[nname].group != self.group_uuid:
2171 extra_lv_nodes.add(nname)
2173 unlocked_lv_nodes = \
2174 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2176 if unlocked_lv_nodes:
2177 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2178 utils.CommaJoin(unlocked_lv_nodes),
2180 self.extra_lv_nodes = list(extra_lv_nodes)
2182 def _VerifyNode(self, ninfo, nresult):
2183 """Perform some basic validation on data returned from a node.
2185 - check the result data structure is well formed and has all the
2187 - check ganeti version
2189 @type ninfo: L{objects.Node}
2190 @param ninfo: the node to check
2191 @param nresult: the results from the node
2193 @return: whether overall this call was successful (and we can expect
2194 reasonable values in the respose)
2198 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2200 # main result, nresult should be a non-empty dict
2201 test = not nresult or not isinstance(nresult, dict)
2202 _ErrorIf(test, constants.CV_ENODERPC, node,
2203 "unable to verify node: no data returned")
2207 # compares ganeti version
2208 local_version = constants.PROTOCOL_VERSION
2209 remote_version = nresult.get("version", None)
2210 test = not (remote_version and
2211 isinstance(remote_version, (list, tuple)) and
2212 len(remote_version) == 2)
2213 _ErrorIf(test, constants.CV_ENODERPC, node,
2214 "connection to node returned invalid data")
2218 test = local_version != remote_version[0]
2219 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2220 "incompatible protocol versions: master %s,"
2221 " node %s", local_version, remote_version[0])
2225 # node seems compatible, we can actually try to look into its results
2227 # full package version
2228 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2229 constants.CV_ENODEVERSION, node,
2230 "software version mismatch: master %s, node %s",
2231 constants.RELEASE_VERSION, remote_version[1],
2232 code=self.ETYPE_WARNING)
2234 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2235 if ninfo.vm_capable and isinstance(hyp_result, dict):
2236 for hv_name, hv_result in hyp_result.iteritems():
2237 test = hv_result is not None
2238 _ErrorIf(test, constants.CV_ENODEHV, node,
2239 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2241 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2242 if ninfo.vm_capable and isinstance(hvp_result, list):
2243 for item, hv_name, hv_result in hvp_result:
2244 _ErrorIf(True, constants.CV_ENODEHV, node,
2245 "hypervisor %s parameter verify failure (source %s): %s",
2246 hv_name, item, hv_result)
2248 test = nresult.get(constants.NV_NODESETUP,
2249 ["Missing NODESETUP results"])
2250 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2255 def _VerifyNodeTime(self, ninfo, nresult,
2256 nvinfo_starttime, nvinfo_endtime):
2257 """Check the node time.
2259 @type ninfo: L{objects.Node}
2260 @param ninfo: the node to check
2261 @param nresult: the remote results for the node
2262 @param nvinfo_starttime: the start time of the RPC call
2263 @param nvinfo_endtime: the end time of the RPC call
2267 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2269 ntime = nresult.get(constants.NV_TIME, None)
2271 ntime_merged = utils.MergeTime(ntime)
2272 except (ValueError, TypeError):
2273 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2276 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2277 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2278 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2279 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2283 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2284 "Node time diverges by at least %s from master node time",
2287 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2288 """Check the node LVM results.
2290 @type ninfo: L{objects.Node}
2291 @param ninfo: the node to check
2292 @param nresult: the remote results for the node
2293 @param vg_name: the configured VG name
2300 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2302 # checks vg existence and size > 20G
2303 vglist = nresult.get(constants.NV_VGLIST, None)
2305 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2307 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2308 constants.MIN_VG_SIZE)
2309 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2312 pvlist = nresult.get(constants.NV_PVLIST, None)
2313 test = pvlist is None
2314 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2316 # check that ':' is not present in PV names, since it's a
2317 # special character for lvcreate (denotes the range of PEs to
2319 for _, pvname, owner_vg in pvlist:
2320 test = ":" in pvname
2321 _ErrorIf(test, constants.CV_ENODELVM, node,
2322 "Invalid character ':' in PV '%s' of VG '%s'",
2325 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2326 """Check the node bridges.
2328 @type ninfo: L{objects.Node}
2329 @param ninfo: the node to check
2330 @param nresult: the remote results for the node
2331 @param bridges: the expected list of bridges
2338 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2340 missing = nresult.get(constants.NV_BRIDGES, None)
2341 test = not isinstance(missing, list)
2342 _ErrorIf(test, constants.CV_ENODENET, node,
2343 "did not return valid bridge information")
2345 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2346 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2348 def _VerifyNodeUserScripts(self, ninfo, nresult):
2349 """Check the results of user scripts presence and executability on the node
2351 @type ninfo: L{objects.Node}
2352 @param ninfo: the node to check
2353 @param nresult: the remote results for the node
2358 test = not constants.NV_USERSCRIPTS in nresult
2359 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2360 "did not return user scripts information")
2362 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2364 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2365 "user scripts not present or not executable: %s" %
2366 utils.CommaJoin(sorted(broken_scripts)))
2368 def _VerifyNodeNetwork(self, ninfo, nresult):
2369 """Check the node network connectivity results.
2371 @type ninfo: L{objects.Node}
2372 @param ninfo: the node to check
2373 @param nresult: the remote results for the node
2377 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2379 test = constants.NV_NODELIST not in nresult
2380 _ErrorIf(test, constants.CV_ENODESSH, node,
2381 "node hasn't returned node ssh connectivity data")
2383 if nresult[constants.NV_NODELIST]:
2384 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2385 _ErrorIf(True, constants.CV_ENODESSH, node,
2386 "ssh communication with node '%s': %s", a_node, a_msg)
2388 test = constants.NV_NODENETTEST not in nresult
2389 _ErrorIf(test, constants.CV_ENODENET, node,
2390 "node hasn't returned node tcp connectivity data")
2392 if nresult[constants.NV_NODENETTEST]:
2393 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2395 _ErrorIf(True, constants.CV_ENODENET, node,
2396 "tcp communication with node '%s': %s",
2397 anode, nresult[constants.NV_NODENETTEST][anode])
2399 test = constants.NV_MASTERIP not in nresult
2400 _ErrorIf(test, constants.CV_ENODENET, node,
2401 "node hasn't returned node master IP reachability data")
2403 if not nresult[constants.NV_MASTERIP]:
2404 if node == self.master_node:
2405 msg = "the master node cannot reach the master IP (not configured?)"
2407 msg = "cannot reach the master IP"
2408 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2410 def _VerifyInstance(self, instance, instanceconfig, node_image,
2412 """Verify an instance.
2414 This function checks to see if the required block devices are
2415 available on the instance's node.
2418 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2419 node_current = instanceconfig.primary_node
2421 node_vol_should = {}
2422 instanceconfig.MapLVsByNode(node_vol_should)
2424 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2425 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2426 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2428 for node in node_vol_should:
2429 n_img = node_image[node]
2430 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2431 # ignore missing volumes on offline or broken nodes
2433 for volume in node_vol_should[node]:
2434 test = volume not in n_img.volumes
2435 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2436 "volume %s missing on node %s", volume, node)
2438 if instanceconfig.admin_state == constants.ADMINST_UP:
2439 pri_img = node_image[node_current]
2440 test = instance not in pri_img.instances and not pri_img.offline
2441 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2442 "instance not running on its primary node %s",
2445 diskdata = [(nname, success, status, idx)
2446 for (nname, disks) in diskstatus.items()
2447 for idx, (success, status) in enumerate(disks)]
2449 for nname, success, bdev_status, idx in diskdata:
2450 # the 'ghost node' construction in Exec() ensures that we have a
2452 snode = node_image[nname]
2453 bad_snode = snode.ghost or snode.offline
2454 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2455 not success and not bad_snode,
2456 constants.CV_EINSTANCEFAULTYDISK, instance,
2457 "couldn't retrieve status for disk/%s on %s: %s",
2458 idx, nname, bdev_status)
2459 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2460 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2461 constants.CV_EINSTANCEFAULTYDISK, instance,
2462 "disk/%s on %s is faulty", idx, nname)
2464 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2465 """Verify if there are any unknown volumes in the cluster.
2467 The .os, .swap and backup volumes are ignored. All other volumes are
2468 reported as unknown.
2470 @type reserved: L{ganeti.utils.FieldSet}
2471 @param reserved: a FieldSet of reserved volume names
2474 for node, n_img in node_image.items():
2475 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2476 self.all_node_info[node].group != self.group_uuid):
2477 # skip non-healthy nodes
2479 for volume in n_img.volumes:
2480 test = ((node not in node_vol_should or
2481 volume not in node_vol_should[node]) and
2482 not reserved.Matches(volume))
2483 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2484 "volume %s is unknown", volume)
2486 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2487 """Verify N+1 Memory Resilience.
2489 Check that if one single node dies we can still start all the
2490 instances it was primary for.
2493 cluster_info = self.cfg.GetClusterInfo()
2494 for node, n_img in node_image.items():
2495 # This code checks that every node which is now listed as
2496 # secondary has enough memory to host all instances it is
2497 # supposed to should a single other node in the cluster fail.
2498 # FIXME: not ready for failover to an arbitrary node
2499 # FIXME: does not support file-backed instances
2500 # WARNING: we currently take into account down instances as well
2501 # as up ones, considering that even if they're down someone
2502 # might want to start them even in the event of a node failure.
2503 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2504 # we're skipping nodes marked offline and nodes in other groups from
2505 # the N+1 warning, since most likely we don't have good memory
2506 # infromation from them; we already list instances living on such
2507 # nodes, and that's enough warning
2509 #TODO(dynmem): also consider ballooning out other instances
2510 for prinode, instances in n_img.sbp.items():
2512 for instance in instances:
2513 bep = cluster_info.FillBE(instance_cfg[instance])
2514 if bep[constants.BE_AUTO_BALANCE]:
2515 needed_mem += bep[constants.BE_MINMEM]
2516 test = n_img.mfree < needed_mem
2517 self._ErrorIf(test, constants.CV_ENODEN1, node,
2518 "not enough memory to accomodate instance failovers"
2519 " should node %s fail (%dMiB needed, %dMiB available)",
2520 prinode, needed_mem, n_img.mfree)
2523 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2524 (files_all, files_opt, files_mc, files_vm)):
2525 """Verifies file checksums collected from all nodes.
2527 @param errorif: Callback for reporting errors
2528 @param nodeinfo: List of L{objects.Node} objects
2529 @param master_node: Name of master node
2530 @param all_nvinfo: RPC results
2533 # Define functions determining which nodes to consider for a file
2536 (files_mc, lambda node: (node.master_candidate or
2537 node.name == master_node)),
2538 (files_vm, lambda node: node.vm_capable),
2541 # Build mapping from filename to list of nodes which should have the file
2543 for (files, fn) in files2nodefn:
2545 filenodes = nodeinfo
2547 filenodes = filter(fn, nodeinfo)
2548 nodefiles.update((filename,
2549 frozenset(map(operator.attrgetter("name"), filenodes)))
2550 for filename in files)
2552 assert set(nodefiles) == (files_all | files_mc | files_vm)
2554 fileinfo = dict((filename, {}) for filename in nodefiles)
2555 ignore_nodes = set()
2557 for node in nodeinfo:
2559 ignore_nodes.add(node.name)
2562 nresult = all_nvinfo[node.name]
2564 if nresult.fail_msg or not nresult.payload:
2567 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2569 test = not (node_files and isinstance(node_files, dict))
2570 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2571 "Node did not return file checksum data")
2573 ignore_nodes.add(node.name)
2576 # Build per-checksum mapping from filename to nodes having it
2577 for (filename, checksum) in node_files.items():
2578 assert filename in nodefiles
2579 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2581 for (filename, checksums) in fileinfo.items():
2582 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2584 # Nodes having the file
2585 with_file = frozenset(node_name
2586 for nodes in fileinfo[filename].values()
2587 for node_name in nodes) - ignore_nodes
2589 expected_nodes = nodefiles[filename] - ignore_nodes
2591 # Nodes missing file
2592 missing_file = expected_nodes - with_file
2594 if filename in files_opt:
2596 errorif(missing_file and missing_file != expected_nodes,
2597 constants.CV_ECLUSTERFILECHECK, None,
2598 "File %s is optional, but it must exist on all or no"
2599 " nodes (not found on %s)",
2600 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2602 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2603 "File %s is missing from node(s) %s", filename,
2604 utils.CommaJoin(utils.NiceSort(missing_file)))
2606 # Warn if a node has a file it shouldn't
2607 unexpected = with_file - expected_nodes
2609 constants.CV_ECLUSTERFILECHECK, None,
2610 "File %s should not exist on node(s) %s",
2611 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2613 # See if there are multiple versions of the file
2614 test = len(checksums) > 1
2616 variants = ["variant %s on %s" %
2617 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2618 for (idx, (checksum, nodes)) in
2619 enumerate(sorted(checksums.items()))]
2623 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2624 "File %s found with %s different checksums (%s)",
2625 filename, len(checksums), "; ".join(variants))
2627 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2629 """Verifies and the node DRBD status.
2631 @type ninfo: L{objects.Node}
2632 @param ninfo: the node to check
2633 @param nresult: the remote results for the node
2634 @param instanceinfo: the dict of instances
2635 @param drbd_helper: the configured DRBD usermode helper
2636 @param drbd_map: the DRBD map as returned by
2637 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2641 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2644 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2645 test = (helper_result == None)
2646 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2647 "no drbd usermode helper returned")
2649 status, payload = helper_result
2651 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2652 "drbd usermode helper check unsuccessful: %s", payload)
2653 test = status and (payload != drbd_helper)
2654 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2655 "wrong drbd usermode helper: %s", payload)
2657 # compute the DRBD minors
2659 for minor, instance in drbd_map[node].items():
2660 test = instance not in instanceinfo
2661 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2662 "ghost instance '%s' in temporary DRBD map", instance)
2663 # ghost instance should not be running, but otherwise we
2664 # don't give double warnings (both ghost instance and
2665 # unallocated minor in use)
2667 node_drbd[minor] = (instance, False)
2669 instance = instanceinfo[instance]
2670 node_drbd[minor] = (instance.name,
2671 instance.admin_state == constants.ADMINST_UP)
2673 # and now check them
2674 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2675 test = not isinstance(used_minors, (tuple, list))
2676 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2677 "cannot parse drbd status file: %s", str(used_minors))
2679 # we cannot check drbd status
2682 for minor, (iname, must_exist) in node_drbd.items():
2683 test = minor not in used_minors and must_exist
2684 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2685 "drbd minor %d of instance %s is not active", minor, iname)
2686 for minor in used_minors:
2687 test = minor not in node_drbd
2688 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2689 "unallocated drbd minor %d is in use", minor)
2691 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2692 """Builds the node OS structures.
2694 @type ninfo: L{objects.Node}
2695 @param ninfo: the node to check
2696 @param nresult: the remote results for the node
2697 @param nimg: the node image object
2701 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2703 remote_os = nresult.get(constants.NV_OSLIST, None)
2704 test = (not isinstance(remote_os, list) or
2705 not compat.all(isinstance(v, list) and len(v) == 7
2706 for v in remote_os))
2708 _ErrorIf(test, constants.CV_ENODEOS, node,
2709 "node hasn't returned valid OS data")
2718 for (name, os_path, status, diagnose,
2719 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2721 if name not in os_dict:
2724 # parameters is a list of lists instead of list of tuples due to
2725 # JSON lacking a real tuple type, fix it:
2726 parameters = [tuple(v) for v in parameters]
2727 os_dict[name].append((os_path, status, diagnose,
2728 set(variants), set(parameters), set(api_ver)))
2730 nimg.oslist = os_dict
2732 def _VerifyNodeOS(self, ninfo, nimg, base):
2733 """Verifies the node OS list.
2735 @type ninfo: L{objects.Node}
2736 @param ninfo: the node to check
2737 @param nimg: the node image object
2738 @param base: the 'template' node we match against (e.g. from the master)
2742 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2744 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2746 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2747 for os_name, os_data in nimg.oslist.items():
2748 assert os_data, "Empty OS status for OS %s?!" % os_name
2749 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2750 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2751 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2752 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2753 "OS '%s' has multiple entries (first one shadows the rest): %s",
2754 os_name, utils.CommaJoin([v[0] for v in os_data]))
2755 # comparisons with the 'base' image
2756 test = os_name not in base.oslist
2757 _ErrorIf(test, constants.CV_ENODEOS, node,
2758 "Extra OS %s not present on reference node (%s)",
2762 assert base.oslist[os_name], "Base node has empty OS status?"
2763 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2765 # base OS is invalid, skipping
2767 for kind, a, b in [("API version", f_api, b_api),
2768 ("variants list", f_var, b_var),
2769 ("parameters", beautify_params(f_param),
2770 beautify_params(b_param))]:
2771 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2772 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2773 kind, os_name, base.name,
2774 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2776 # check any missing OSes
2777 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2778 _ErrorIf(missing, constants.CV_ENODEOS, node,
2779 "OSes present on reference node %s but missing on this node: %s",
2780 base.name, utils.CommaJoin(missing))
2782 def _VerifyOob(self, ninfo, nresult):
2783 """Verifies out of band functionality of a node.
2785 @type ninfo: L{objects.Node}
2786 @param ninfo: the node to check
2787 @param nresult: the remote results for the node
2791 # We just have to verify the paths on master and/or master candidates
2792 # as the oob helper is invoked on the master
2793 if ((ninfo.master_candidate or ninfo.master_capable) and
2794 constants.NV_OOB_PATHS in nresult):
2795 for path_result in nresult[constants.NV_OOB_PATHS]:
2796 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2798 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2799 """Verifies and updates the node volume data.
2801 This function will update a L{NodeImage}'s internal structures
2802 with data from the remote call.
2804 @type ninfo: L{objects.Node}
2805 @param ninfo: the node to check
2806 @param nresult: the remote results for the node
2807 @param nimg: the node image object
2808 @param vg_name: the configured VG name
2812 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2814 nimg.lvm_fail = True
2815 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2818 elif isinstance(lvdata, basestring):
2819 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2820 utils.SafeEncode(lvdata))
2821 elif not isinstance(lvdata, dict):
2822 _ErrorIf(True, constants.CV_ENODELVM, node,
2823 "rpc call to node failed (lvlist)")
2825 nimg.volumes = lvdata
2826 nimg.lvm_fail = False
2828 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2829 """Verifies and updates the node instance list.
2831 If the listing was successful, then updates this node's instance
2832 list. Otherwise, it marks the RPC call as failed for the instance
2835 @type ninfo: L{objects.Node}
2836 @param ninfo: the node to check
2837 @param nresult: the remote results for the node
2838 @param nimg: the node image object
2841 idata = nresult.get(constants.NV_INSTANCELIST, None)
2842 test = not isinstance(idata, list)
2843 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2844 "rpc call to node failed (instancelist): %s",
2845 utils.SafeEncode(str(idata)))
2847 nimg.hyp_fail = True
2849 nimg.instances = idata
2851 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2852 """Verifies and computes a node information map
2854 @type ninfo: L{objects.Node}
2855 @param ninfo: the node to check
2856 @param nresult: the remote results for the node
2857 @param nimg: the node image object
2858 @param vg_name: the configured VG name
2862 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2864 # try to read free memory (from the hypervisor)
2865 hv_info = nresult.get(constants.NV_HVINFO, None)
2866 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2867 _ErrorIf(test, constants.CV_ENODEHV, node,
2868 "rpc call to node failed (hvinfo)")
2871 nimg.mfree = int(hv_info["memory_free"])
2872 except (ValueError, TypeError):
2873 _ErrorIf(True, constants.CV_ENODERPC, node,
2874 "node returned invalid nodeinfo, check hypervisor")
2876 # FIXME: devise a free space model for file based instances as well
2877 if vg_name is not None:
2878 test = (constants.NV_VGLIST not in nresult or
2879 vg_name not in nresult[constants.NV_VGLIST])
2880 _ErrorIf(test, constants.CV_ENODELVM, node,
2881 "node didn't return data for the volume group '%s'"
2882 " - it is either missing or broken", vg_name)
2885 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2886 except (ValueError, TypeError):
2887 _ErrorIf(True, constants.CV_ENODERPC, node,
2888 "node returned invalid LVM info, check LVM status")
2890 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2891 """Gets per-disk status information for all instances.
2893 @type nodelist: list of strings
2894 @param nodelist: Node names
2895 @type node_image: dict of (name, L{objects.Node})
2896 @param node_image: Node objects
2897 @type instanceinfo: dict of (name, L{objects.Instance})
2898 @param instanceinfo: Instance objects
2899 @rtype: {instance: {node: [(succes, payload)]}}
2900 @return: a dictionary of per-instance dictionaries with nodes as
2901 keys and disk information as values; the disk information is a
2902 list of tuples (success, payload)
2905 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2908 node_disks_devonly = {}
2909 diskless_instances = set()
2910 diskless = constants.DT_DISKLESS
2912 for nname in nodelist:
2913 node_instances = list(itertools.chain(node_image[nname].pinst,
2914 node_image[nname].sinst))
2915 diskless_instances.update(inst for inst in node_instances
2916 if instanceinfo[inst].disk_template == diskless)
2917 disks = [(inst, disk)
2918 for inst in node_instances
2919 for disk in instanceinfo[inst].disks]
2922 # No need to collect data
2925 node_disks[nname] = disks
2927 # _AnnotateDiskParams makes already copies of the disks
2929 for (inst, dev) in disks:
2930 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2931 self.cfg.SetDiskID(anno_disk, nname)
2932 devonly.append(anno_disk)
2934 node_disks_devonly[nname] = devonly
2936 assert len(node_disks) == len(node_disks_devonly)
2938 # Collect data from all nodes with disks
2939 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2942 assert len(result) == len(node_disks)
2946 for (nname, nres) in result.items():
2947 disks = node_disks[nname]
2950 # No data from this node
2951 data = len(disks) * [(False, "node offline")]
2954 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2955 "while getting disk information: %s", msg)
2957 # No data from this node
2958 data = len(disks) * [(False, msg)]
2961 for idx, i in enumerate(nres.payload):
2962 if isinstance(i, (tuple, list)) and len(i) == 2:
2965 logging.warning("Invalid result from node %s, entry %d: %s",
2967 data.append((False, "Invalid result from the remote node"))
2969 for ((inst, _), status) in zip(disks, data):
2970 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2972 # Add empty entries for diskless instances.
2973 for inst in diskless_instances:
2974 assert inst not in instdisk
2977 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2978 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2979 compat.all(isinstance(s, (tuple, list)) and
2980 len(s) == 2 for s in statuses)
2981 for inst, nnames in instdisk.items()
2982 for nname, statuses in nnames.items())
2983 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2988 def _SshNodeSelector(group_uuid, all_nodes):
2989 """Create endless iterators for all potential SSH check hosts.
2992 nodes = [node for node in all_nodes
2993 if (node.group != group_uuid and
2995 keyfunc = operator.attrgetter("group")
2997 return map(itertools.cycle,
2998 [sorted(map(operator.attrgetter("name"), names))
2999 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3003 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3004 """Choose which nodes should talk to which other nodes.
3006 We will make nodes contact all nodes in their group, and one node from
3009 @warning: This algorithm has a known issue if one node group is much
3010 smaller than others (e.g. just one node). In such a case all other
3011 nodes will talk to the single node.
3014 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3015 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3017 return (online_nodes,
3018 dict((name, sorted([i.next() for i in sel]))
3019 for name in online_nodes))
3021 def BuildHooksEnv(self):
3024 Cluster-Verify hooks just ran in the post phase and their failure makes
3025 the output be logged in the verify output and the verification to fail.
3029 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3032 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3033 for node in self.my_node_info.values())
3037 def BuildHooksNodes(self):
3038 """Build hooks nodes.
3041 return ([], self.my_node_names)
3043 def Exec(self, feedback_fn):
3044 """Verify integrity of the node group, performing various test on nodes.
3047 # This method has too many local variables. pylint: disable=R0914
3048 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3050 if not self.my_node_names:
3052 feedback_fn("* Empty node group, skipping verification")
3056 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3057 verbose = self.op.verbose
3058 self._feedback_fn = feedback_fn
3060 vg_name = self.cfg.GetVGName()
3061 drbd_helper = self.cfg.GetDRBDHelper()
3062 cluster = self.cfg.GetClusterInfo()
3063 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3064 hypervisors = cluster.enabled_hypervisors
3065 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3067 i_non_redundant = [] # Non redundant instances
3068 i_non_a_balanced = [] # Non auto-balanced instances
3069 i_offline = 0 # Count of offline instances
3070 n_offline = 0 # Count of offline nodes
3071 n_drained = 0 # Count of nodes being drained
3072 node_vol_should = {}
3074 # FIXME: verify OS list
3077 filemap = _ComputeAncillaryFiles(cluster, False)
3079 # do local checksums
3080 master_node = self.master_node = self.cfg.GetMasterNode()
3081 master_ip = self.cfg.GetMasterIP()
3083 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3086 if self.cfg.GetUseExternalMipScript():
3087 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3089 node_verify_param = {
3090 constants.NV_FILELIST:
3091 utils.UniqueSequence(filename
3092 for files in filemap
3093 for filename in files),
3094 constants.NV_NODELIST:
3095 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3096 self.all_node_info.values()),
3097 constants.NV_HYPERVISOR: hypervisors,
3098 constants.NV_HVPARAMS:
3099 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3100 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3101 for node in node_data_list
3102 if not node.offline],
3103 constants.NV_INSTANCELIST: hypervisors,
3104 constants.NV_VERSION: None,
3105 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3106 constants.NV_NODESETUP: None,
3107 constants.NV_TIME: None,
3108 constants.NV_MASTERIP: (master_node, master_ip),
3109 constants.NV_OSLIST: None,
3110 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3111 constants.NV_USERSCRIPTS: user_scripts,
3114 if vg_name is not None:
3115 node_verify_param[constants.NV_VGLIST] = None
3116 node_verify_param[constants.NV_LVLIST] = vg_name
3117 node_verify_param[constants.NV_PVLIST] = [vg_name]
3118 node_verify_param[constants.NV_DRBDLIST] = None
3121 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3124 # FIXME: this needs to be changed per node-group, not cluster-wide
3126 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3127 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3128 bridges.add(default_nicpp[constants.NIC_LINK])
3129 for instance in self.my_inst_info.values():
3130 for nic in instance.nics:
3131 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3132 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3133 bridges.add(full_nic[constants.NIC_LINK])
3136 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3138 # Build our expected cluster state
3139 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3141 vm_capable=node.vm_capable))
3142 for node in node_data_list)
3146 for node in self.all_node_info.values():
3147 path = _SupportsOob(self.cfg, node)
3148 if path and path not in oob_paths:
3149 oob_paths.append(path)
3152 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3154 for instance in self.my_inst_names:
3155 inst_config = self.my_inst_info[instance]
3156 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3159 for nname in inst_config.all_nodes:
3160 if nname not in node_image:
3161 gnode = self.NodeImage(name=nname)
3162 gnode.ghost = (nname not in self.all_node_info)
3163 node_image[nname] = gnode
3165 inst_config.MapLVsByNode(node_vol_should)
3167 pnode = inst_config.primary_node
3168 node_image[pnode].pinst.append(instance)
3170 for snode in inst_config.secondary_nodes:
3171 nimg = node_image[snode]
3172 nimg.sinst.append(instance)
3173 if pnode not in nimg.sbp:
3174 nimg.sbp[pnode] = []
3175 nimg.sbp[pnode].append(instance)
3177 # At this point, we have the in-memory data structures complete,
3178 # except for the runtime information, which we'll gather next
3180 # Due to the way our RPC system works, exact response times cannot be
3181 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3182 # time before and after executing the request, we can at least have a time
3184 nvinfo_starttime = time.time()
3185 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3187 self.cfg.GetClusterName())
3188 nvinfo_endtime = time.time()
3190 if self.extra_lv_nodes and vg_name is not None:
3192 self.rpc.call_node_verify(self.extra_lv_nodes,
3193 {constants.NV_LVLIST: vg_name},
3194 self.cfg.GetClusterName())
3196 extra_lv_nvinfo = {}
3198 all_drbd_map = self.cfg.ComputeDRBDMap()
3200 feedback_fn("* Gathering disk information (%s nodes)" %
3201 len(self.my_node_names))
3202 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3205 feedback_fn("* Verifying configuration file consistency")
3207 # If not all nodes are being checked, we need to make sure the master node
3208 # and a non-checked vm_capable node are in the list.
3209 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3211 vf_nvinfo = all_nvinfo.copy()
3212 vf_node_info = list(self.my_node_info.values())
3213 additional_nodes = []
3214 if master_node not in self.my_node_info:
3215 additional_nodes.append(master_node)
3216 vf_node_info.append(self.all_node_info[master_node])
3217 # Add the first vm_capable node we find which is not included,
3218 # excluding the master node (which we already have)
3219 for node in absent_nodes:
3220 nodeinfo = self.all_node_info[node]
3221 if (nodeinfo.vm_capable and not nodeinfo.offline and
3222 node != master_node):
3223 additional_nodes.append(node)
3224 vf_node_info.append(self.all_node_info[node])
3226 key = constants.NV_FILELIST
3227 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3228 {key: node_verify_param[key]},
3229 self.cfg.GetClusterName()))
3231 vf_nvinfo = all_nvinfo
3232 vf_node_info = self.my_node_info.values()
3234 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3236 feedback_fn("* Verifying node status")
3240 for node_i in node_data_list:
3242 nimg = node_image[node]
3246 feedback_fn("* Skipping offline node %s" % (node,))
3250 if node == master_node:
3252 elif node_i.master_candidate:
3253 ntype = "master candidate"
3254 elif node_i.drained:
3260 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3262 msg = all_nvinfo[node].fail_msg
3263 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3266 nimg.rpc_fail = True
3269 nresult = all_nvinfo[node].payload
3271 nimg.call_ok = self._VerifyNode(node_i, nresult)
3272 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3273 self._VerifyNodeNetwork(node_i, nresult)
3274 self._VerifyNodeUserScripts(node_i, nresult)
3275 self._VerifyOob(node_i, nresult)
3278 self._VerifyNodeLVM(node_i, nresult, vg_name)
3279 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3282 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3283 self._UpdateNodeInstances(node_i, nresult, nimg)
3284 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3285 self._UpdateNodeOS(node_i, nresult, nimg)
3287 if not nimg.os_fail:
3288 if refos_img is None:
3290 self._VerifyNodeOS(node_i, nimg, refos_img)
3291 self._VerifyNodeBridges(node_i, nresult, bridges)
3293 # Check whether all running instancies are primary for the node. (This
3294 # can no longer be done from _VerifyInstance below, since some of the
3295 # wrong instances could be from other node groups.)
3296 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3298 for inst in non_primary_inst:
3299 test = inst in self.all_inst_info
3300 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3301 "instance should not run on node %s", node_i.name)
3302 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3303 "node is running unknown instance %s", inst)
3305 for node, result in extra_lv_nvinfo.items():
3306 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3307 node_image[node], vg_name)
3309 feedback_fn("* Verifying instance status")
3310 for instance in self.my_inst_names:
3312 feedback_fn("* Verifying instance %s" % instance)
3313 inst_config = self.my_inst_info[instance]
3314 self._VerifyInstance(instance, inst_config, node_image,
3316 inst_nodes_offline = []
3318 pnode = inst_config.primary_node
3319 pnode_img = node_image[pnode]
3320 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3321 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3322 " primary node failed", instance)
3324 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3326 constants.CV_EINSTANCEBADNODE, instance,
3327 "instance is marked as running and lives on offline node %s",
3328 inst_config.primary_node)
3330 # If the instance is non-redundant we cannot survive losing its primary
3331 # node, so we are not N+1 compliant. On the other hand we have no disk
3332 # templates with more than one secondary so that situation is not well
3334 # FIXME: does not support file-backed instances
3335 if not inst_config.secondary_nodes:
3336 i_non_redundant.append(instance)
3338 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3339 constants.CV_EINSTANCELAYOUT,
3340 instance, "instance has multiple secondary nodes: %s",
3341 utils.CommaJoin(inst_config.secondary_nodes),
3342 code=self.ETYPE_WARNING)
3344 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3345 pnode = inst_config.primary_node
3346 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3347 instance_groups = {}
3349 for node in instance_nodes:
3350 instance_groups.setdefault(self.all_node_info[node].group,
3354 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3355 # Sort so that we always list the primary node first.
3356 for group, nodes in sorted(instance_groups.items(),
3357 key=lambda (_, nodes): pnode in nodes,
3360 self._ErrorIf(len(instance_groups) > 1,
3361 constants.CV_EINSTANCESPLITGROUPS,
3362 instance, "instance has primary and secondary nodes in"
3363 " different groups: %s", utils.CommaJoin(pretty_list),
3364 code=self.ETYPE_WARNING)
3366 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3367 i_non_a_balanced.append(instance)
3369 for snode in inst_config.secondary_nodes:
3370 s_img = node_image[snode]
3371 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3372 snode, "instance %s, connection to secondary node failed",
3376 inst_nodes_offline.append(snode)
3378 # warn that the instance lives on offline nodes
3379 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3380 "instance has offline secondary node(s) %s",
3381 utils.CommaJoin(inst_nodes_offline))
3382 # ... or ghost/non-vm_capable nodes
3383 for node in inst_config.all_nodes:
3384 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3385 instance, "instance lives on ghost node %s", node)
3386 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3387 instance, "instance lives on non-vm_capable node %s", node)
3389 feedback_fn("* Verifying orphan volumes")
3390 reserved = utils.FieldSet(*cluster.reserved_lvs)
3392 # We will get spurious "unknown volume" warnings if any node of this group
3393 # is secondary for an instance whose primary is in another group. To avoid
3394 # them, we find these instances and add their volumes to node_vol_should.
3395 for inst in self.all_inst_info.values():
3396 for secondary in inst.secondary_nodes:
3397 if (secondary in self.my_node_info
3398 and inst.name not in self.my_inst_info):
3399 inst.MapLVsByNode(node_vol_should)
3402 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3404 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3405 feedback_fn("* Verifying N+1 Memory redundancy")
3406 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3408 feedback_fn("* Other Notes")
3410 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3411 % len(i_non_redundant))
3413 if i_non_a_balanced:
3414 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3415 % len(i_non_a_balanced))
3418 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3421 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3424 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3428 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3429 """Analyze the post-hooks' result
3431 This method analyses the hook result, handles it, and sends some
3432 nicely-formatted feedback back to the user.
3434 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3435 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3436 @param hooks_results: the results of the multi-node hooks rpc call
3437 @param feedback_fn: function used send feedback back to the caller
3438 @param lu_result: previous Exec result
3439 @return: the new Exec result, based on the previous result
3443 # We only really run POST phase hooks, only for non-empty groups,
3444 # and are only interested in their results
3445 if not self.my_node_names:
3448 elif phase == constants.HOOKS_PHASE_POST:
3449 # Used to change hooks' output to proper indentation
3450 feedback_fn("* Hooks Results")
3451 assert hooks_results, "invalid result from hooks"
3453 for node_name in hooks_results:
3454 res = hooks_results[node_name]
3456 test = msg and not res.offline
3457 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3458 "Communication failure in hooks execution: %s", msg)
3459 if res.offline or msg:
3460 # No need to investigate payload if node is offline or gave
3463 for script, hkr, output in res.payload:
3464 test = hkr == constants.HKR_FAIL
3465 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3466 "Script %s failed, output:", script)
3468 output = self._HOOKS_INDENT_RE.sub(" ", output)
3469 feedback_fn("%s" % output)
3475 class LUClusterVerifyDisks(NoHooksLU):
3476 """Verifies the cluster disks status.
3481 def ExpandNames(self):
3482 self.share_locks = _ShareAll()
3483 self.needed_locks = {
3484 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3487 def Exec(self, feedback_fn):
3488 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3490 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3491 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3492 for group in group_names])
3495 class LUGroupVerifyDisks(NoHooksLU):
3496 """Verifies the status of all disks in a node group.
3501 def ExpandNames(self):
3502 # Raises errors.OpPrereqError on its own if group can't be found
3503 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3505 self.share_locks = _ShareAll()
3506 self.needed_locks = {
3507 locking.LEVEL_INSTANCE: [],
3508 locking.LEVEL_NODEGROUP: [],
3509 locking.LEVEL_NODE: [],
3512 def DeclareLocks(self, level):
3513 if level == locking.LEVEL_INSTANCE:
3514 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3516 # Lock instances optimistically, needs verification once node and group
3517 # locks have been acquired
3518 self.needed_locks[locking.LEVEL_INSTANCE] = \
3519 self.cfg.GetNodeGroupInstances(self.group_uuid)
3521 elif level == locking.LEVEL_NODEGROUP:
3522 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3524 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3525 set([self.group_uuid] +
3526 # Lock all groups used by instances optimistically; this requires
3527 # going via the node before it's locked, requiring verification
3530 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3531 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3533 elif level == locking.LEVEL_NODE:
3534 # This will only lock the nodes in the group to be verified which contain
3536 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3537 self._LockInstancesNodes()
3539 # Lock all nodes in group to be verified
3540 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3541 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3542 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3544 def CheckPrereq(self):
3545 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3546 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3547 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3549 assert self.group_uuid in owned_groups
3551 # Check if locked instances are still correct
3552 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3554 # Get instance information
3555 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3557 # Check if node groups for locked instances are still correct
3558 _CheckInstancesNodeGroups(self.cfg, self.instances,
3559 owned_groups, owned_nodes, self.group_uuid)
3561 def Exec(self, feedback_fn):
3562 """Verify integrity of cluster disks.
3564 @rtype: tuple of three items
3565 @return: a tuple of (dict of node-to-node_error, list of instances
3566 which need activate-disks, dict of instance: (node, volume) for
3571 res_instances = set()
3574 nv_dict = _MapInstanceDisksToNodes([inst
3575 for inst in self.instances.values()
3576 if inst.admin_state == constants.ADMINST_UP])
3579 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3580 set(self.cfg.GetVmCapableNodeList()))
3582 node_lvs = self.rpc.call_lv_list(nodes, [])
3584 for (node, node_res) in node_lvs.items():
3585 if node_res.offline:
3588 msg = node_res.fail_msg
3590 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3591 res_nodes[node] = msg
3594 for lv_name, (_, _, lv_online) in node_res.payload.items():
3595 inst = nv_dict.pop((node, lv_name), None)
3596 if not (lv_online or inst is None):
3597 res_instances.add(inst)
3599 # any leftover items in nv_dict are missing LVs, let's arrange the data
3601 for key, inst in nv_dict.iteritems():
3602 res_missing.setdefault(inst, []).append(list(key))
3604 return (res_nodes, list(res_instances), res_missing)
3607 class LUClusterRepairDiskSizes(NoHooksLU):
3608 """Verifies the cluster disks sizes.
3613 def ExpandNames(self):
3614 if self.op.instances:
3615 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3616 self.needed_locks = {
3617 locking.LEVEL_NODE_RES: [],
3618 locking.LEVEL_INSTANCE: self.wanted_names,
3620 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3622 self.wanted_names = None
3623 self.needed_locks = {
3624 locking.LEVEL_NODE_RES: locking.ALL_SET,
3625 locking.LEVEL_INSTANCE: locking.ALL_SET,
3627 self.share_locks = {
3628 locking.LEVEL_NODE_RES: 1,
3629 locking.LEVEL_INSTANCE: 0,
3632 def DeclareLocks(self, level):
3633 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3634 self._LockInstancesNodes(primary_only=True, level=level)
3636 def CheckPrereq(self):
3637 """Check prerequisites.
3639 This only checks the optional instance list against the existing names.
3642 if self.wanted_names is None:
3643 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3645 self.wanted_instances = \
3646 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3648 def _EnsureChildSizes(self, disk):
3649 """Ensure children of the disk have the needed disk size.
3651 This is valid mainly for DRBD8 and fixes an issue where the
3652 children have smaller disk size.
3654 @param disk: an L{ganeti.objects.Disk} object
3657 if disk.dev_type == constants.LD_DRBD8:
3658 assert disk.children, "Empty children for DRBD8?"
3659 fchild = disk.children[0]
3660 mismatch = fchild.size < disk.size
3662 self.LogInfo("Child disk has size %d, parent %d, fixing",
3663 fchild.size, disk.size)
3664 fchild.size = disk.size
3666 # and we recurse on this child only, not on the metadev
3667 return self._EnsureChildSizes(fchild) or mismatch
3671 def Exec(self, feedback_fn):
3672 """Verify the size of cluster disks.
3675 # TODO: check child disks too
3676 # TODO: check differences in size between primary/secondary nodes
3678 for instance in self.wanted_instances:
3679 pnode = instance.primary_node
3680 if pnode not in per_node_disks:
3681 per_node_disks[pnode] = []
3682 for idx, disk in enumerate(instance.disks):
3683 per_node_disks[pnode].append((instance, idx, disk))
3685 assert not (frozenset(per_node_disks.keys()) -
3686 self.owned_locks(locking.LEVEL_NODE_RES)), \
3687 "Not owning correct locks"
3688 assert not self.owned_locks(locking.LEVEL_NODE)
3691 for node, dskl in per_node_disks.items():
3692 newl = [v[2].Copy() for v in dskl]
3694 self.cfg.SetDiskID(dsk, node)
3695 result = self.rpc.call_blockdev_getsize(node, newl)
3697 self.LogWarning("Failure in blockdev_getsize call to node"
3698 " %s, ignoring", node)
3700 if len(result.payload) != len(dskl):
3701 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3702 " result.payload=%s", node, len(dskl), result.payload)
3703 self.LogWarning("Invalid result from node %s, ignoring node results",
3706 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3708 self.LogWarning("Disk %d of instance %s did not return size"
3709 " information, ignoring", idx, instance.name)
3711 if not isinstance(size, (int, long)):
3712 self.LogWarning("Disk %d of instance %s did not return valid"
3713 " size information, ignoring", idx, instance.name)
3716 if size != disk.size:
3717 self.LogInfo("Disk %d of instance %s has mismatched size,"
3718 " correcting: recorded %d, actual %d", idx,
3719 instance.name, disk.size, size)
3721 self.cfg.Update(instance, feedback_fn)
3722 changed.append((instance.name, idx, size))
3723 if self._EnsureChildSizes(disk):
3724 self.cfg.Update(instance, feedback_fn)
3725 changed.append((instance.name, idx, disk.size))
3729 class LUClusterRename(LogicalUnit):
3730 """Rename the cluster.
3733 HPATH = "cluster-rename"
3734 HTYPE = constants.HTYPE_CLUSTER
3736 def BuildHooksEnv(self):
3741 "OP_TARGET": self.cfg.GetClusterName(),
3742 "NEW_NAME": self.op.name,
3745 def BuildHooksNodes(self):
3746 """Build hooks nodes.
3749 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3751 def CheckPrereq(self):
3752 """Verify that the passed name is a valid one.
3755 hostname = netutils.GetHostname(name=self.op.name,
3756 family=self.cfg.GetPrimaryIPFamily())
3758 new_name = hostname.name
3759 self.ip = new_ip = hostname.ip
3760 old_name = self.cfg.GetClusterName()
3761 old_ip = self.cfg.GetMasterIP()
3762 if new_name == old_name and new_ip == old_ip:
3763 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3764 " cluster has changed",
3766 if new_ip != old_ip:
3767 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3768 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3769 " reachable on the network" %
3770 new_ip, errors.ECODE_NOTUNIQUE)
3772 self.op.name = new_name
3774 def Exec(self, feedback_fn):
3775 """Rename the cluster.
3778 clustername = self.op.name
3781 # shutdown the master IP
3782 master_params = self.cfg.GetMasterNetworkParameters()
3783 ems = self.cfg.GetUseExternalMipScript()
3784 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3786 result.Raise("Could not disable the master role")
3789 cluster = self.cfg.GetClusterInfo()
3790 cluster.cluster_name = clustername
3791 cluster.master_ip = new_ip
3792 self.cfg.Update(cluster, feedback_fn)
3794 # update the known hosts file
3795 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3796 node_list = self.cfg.GetOnlineNodeList()
3798 node_list.remove(master_params.name)
3801 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3803 master_params.ip = new_ip
3804 result = self.rpc.call_node_activate_master_ip(master_params.name,
3806 msg = result.fail_msg
3808 self.LogWarning("Could not re-enable the master role on"
3809 " the master, please restart manually: %s", msg)
3814 def _ValidateNetmask(cfg, netmask):
3815 """Checks if a netmask is valid.
3817 @type cfg: L{config.ConfigWriter}
3818 @param cfg: The cluster configuration
3820 @param netmask: the netmask to be verified
3821 @raise errors.OpPrereqError: if the validation fails
3824 ip_family = cfg.GetPrimaryIPFamily()
3826 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3827 except errors.ProgrammerError:
3828 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3830 if not ipcls.ValidateNetmask(netmask):
3831 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3835 class LUClusterSetParams(LogicalUnit):
3836 """Change the parameters of the cluster.
3839 HPATH = "cluster-modify"
3840 HTYPE = constants.HTYPE_CLUSTER
3843 def CheckArguments(self):
3847 if self.op.uid_pool:
3848 uidpool.CheckUidPool(self.op.uid_pool)
3850 if self.op.add_uids:
3851 uidpool.CheckUidPool(self.op.add_uids)
3853 if self.op.remove_uids:
3854 uidpool.CheckUidPool(self.op.remove_uids)
3856 if self.op.master_netmask is not None:
3857 _ValidateNetmask(self.cfg, self.op.master_netmask)
3859 if self.op.diskparams:
3860 for dt_params in self.op.diskparams.values():
3861 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3863 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3864 except errors.OpPrereqError, err:
3865 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3868 def ExpandNames(self):
3869 # FIXME: in the future maybe other cluster params won't require checking on
3870 # all nodes to be modified.
3871 self.needed_locks = {
3872 locking.LEVEL_NODE: locking.ALL_SET,
3873 locking.LEVEL_INSTANCE: locking.ALL_SET,
3874 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3876 self.share_locks = {
3877 locking.LEVEL_NODE: 1,
3878 locking.LEVEL_INSTANCE: 1,
3879 locking.LEVEL_NODEGROUP: 1,
3882 def BuildHooksEnv(self):
3887 "OP_TARGET": self.cfg.GetClusterName(),
3888 "NEW_VG_NAME": self.op.vg_name,
3891 def BuildHooksNodes(self):
3892 """Build hooks nodes.
3895 mn = self.cfg.GetMasterNode()
3898 def CheckPrereq(self):
3899 """Check prerequisites.
3901 This checks whether the given params don't conflict and
3902 if the given volume group is valid.
3905 if self.op.vg_name is not None and not self.op.vg_name:
3906 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3907 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3908 " instances exist", errors.ECODE_INVAL)
3910 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3911 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3912 raise errors.OpPrereqError("Cannot disable drbd helper while"
3913 " drbd-based instances exist",
3916 node_list = self.owned_locks(locking.LEVEL_NODE)
3918 # if vg_name not None, checks given volume group on all nodes
3920 vglist = self.rpc.call_vg_list(node_list)
3921 for node in node_list:
3922 msg = vglist[node].fail_msg
3924 # ignoring down node
3925 self.LogWarning("Error while gathering data on node %s"
3926 " (ignoring node): %s", node, msg)
3928 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3930 constants.MIN_VG_SIZE)
3932 raise errors.OpPrereqError("Error on node '%s': %s" %
3933 (node, vgstatus), errors.ECODE_ENVIRON)
3935 if self.op.drbd_helper:
3936 # checks given drbd helper on all nodes
3937 helpers = self.rpc.call_drbd_helper(node_list)
3938 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3940 self.LogInfo("Not checking drbd helper on offline node %s", node)
3942 msg = helpers[node].fail_msg
3944 raise errors.OpPrereqError("Error checking drbd helper on node"
3945 " '%s': %s" % (node, msg),
3946 errors.ECODE_ENVIRON)
3947 node_helper = helpers[node].payload
3948 if node_helper != self.op.drbd_helper:
3949 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3950 (node, node_helper), errors.ECODE_ENVIRON)
3952 self.cluster = cluster = self.cfg.GetClusterInfo()
3953 # validate params changes
3954 if self.op.beparams:
3955 objects.UpgradeBeParams(self.op.beparams)
3956 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3957 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3959 if self.op.ndparams:
3960 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3961 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3963 # TODO: we need a more general way to handle resetting
3964 # cluster-level parameters to default values
3965 if self.new_ndparams["oob_program"] == "":
3966 self.new_ndparams["oob_program"] = \
3967 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3969 if self.op.hv_state:
3970 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3971 self.cluster.hv_state_static)
3972 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3973 for hv, values in new_hv_state.items())
3975 if self.op.disk_state:
3976 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3977 self.cluster.disk_state_static)
3978 self.new_disk_state = \
3979 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3980 for name, values in svalues.items()))
3981 for storage, svalues in new_disk_state.items())
3984 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3987 all_instances = self.cfg.GetAllInstancesInfo().values()
3989 for group in self.cfg.GetAllNodeGroupsInfo().values():
3990 instances = frozenset([inst for inst in all_instances
3991 if compat.any(node in group.members
3992 for node in inst.all_nodes)])
3993 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3994 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3996 new_ipolicy, instances)
3998 violations.update(new)
4001 self.LogWarning("After the ipolicy change the following instances"
4002 " violate them: %s",
4003 utils.CommaJoin(utils.NiceSort(violations)))
4005 if self.op.nicparams:
4006 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4007 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4008 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4011 # check all instances for consistency
4012 for instance in self.cfg.GetAllInstancesInfo().values():
4013 for nic_idx, nic in enumerate(instance.nics):
4014 params_copy = copy.deepcopy(nic.nicparams)
4015 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4017 # check parameter syntax
4019 objects.NIC.CheckParameterSyntax(params_filled)
4020 except errors.ConfigurationError, err:
4021 nic_errors.append("Instance %s, nic/%d: %s" %
4022 (instance.name, nic_idx, err))
4024 # if we're moving instances to routed, check that they have an ip
4025 target_mode = params_filled[constants.NIC_MODE]
4026 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4027 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4028 " address" % (instance.name, nic_idx))
4030 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4031 "\n".join(nic_errors))
4033 # hypervisor list/parameters
4034 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4035 if self.op.hvparams:
4036 for hv_name, hv_dict in self.op.hvparams.items():
4037 if hv_name not in self.new_hvparams:
4038 self.new_hvparams[hv_name] = hv_dict
4040 self.new_hvparams[hv_name].update(hv_dict)
4042 # disk template parameters
4043 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4044 if self.op.diskparams:
4045 for dt_name, dt_params in self.op.diskparams.items():
4046 if dt_name not in self.op.diskparams:
4047 self.new_diskparams[dt_name] = dt_params
4049 self.new_diskparams[dt_name].update(dt_params)
4051 # os hypervisor parameters
4052 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4054 for os_name, hvs in self.op.os_hvp.items():
4055 if os_name not in self.new_os_hvp:
4056 self.new_os_hvp[os_name] = hvs
4058 for hv_name, hv_dict in hvs.items():
4059 if hv_name not in self.new_os_hvp[os_name]:
4060 self.new_os_hvp[os_name][hv_name] = hv_dict
4062 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4065 self.new_osp = objects.FillDict(cluster.osparams, {})
4066 if self.op.osparams:
4067 for os_name, osp in self.op.osparams.items():
4068 if os_name not in self.new_osp:
4069 self.new_osp[os_name] = {}
4071 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4074 if not self.new_osp[os_name]:
4075 # we removed all parameters
4076 del self.new_osp[os_name]
4078 # check the parameter validity (remote check)
4079 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4080 os_name, self.new_osp[os_name])
4082 # changes to the hypervisor list
4083 if self.op.enabled_hypervisors is not None:
4084 self.hv_list = self.op.enabled_hypervisors
4085 for hv in self.hv_list:
4086 # if the hypervisor doesn't already exist in the cluster
4087 # hvparams, we initialize it to empty, and then (in both
4088 # cases) we make sure to fill the defaults, as we might not
4089 # have a complete defaults list if the hypervisor wasn't
4091 if hv not in new_hvp:
4093 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4094 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4096 self.hv_list = cluster.enabled_hypervisors
4098 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4099 # either the enabled list has changed, or the parameters have, validate
4100 for hv_name, hv_params in self.new_hvparams.items():
4101 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4102 (self.op.enabled_hypervisors and
4103 hv_name in self.op.enabled_hypervisors)):
4104 # either this is a new hypervisor, or its parameters have changed
4105 hv_class = hypervisor.GetHypervisor(hv_name)
4106 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4107 hv_class.CheckParameterSyntax(hv_params)
4108 _CheckHVParams(self, node_list, hv_name, hv_params)
4111 # no need to check any newly-enabled hypervisors, since the
4112 # defaults have already been checked in the above code-block
4113 for os_name, os_hvp in self.new_os_hvp.items():
4114 for hv_name, hv_params in os_hvp.items():
4115 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4116 # we need to fill in the new os_hvp on top of the actual hv_p
4117 cluster_defaults = self.new_hvparams.get(hv_name, {})
4118 new_osp = objects.FillDict(cluster_defaults, hv_params)
4119 hv_class = hypervisor.GetHypervisor(hv_name)
4120 hv_class.CheckParameterSyntax(new_osp)
4121 _CheckHVParams(self, node_list, hv_name, new_osp)
4123 if self.op.default_iallocator:
4124 alloc_script = utils.FindFile(self.op.default_iallocator,
4125 constants.IALLOCATOR_SEARCH_PATH,
4127 if alloc_script is None:
4128 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4129 " specified" % self.op.default_iallocator,
4132 def Exec(self, feedback_fn):
4133 """Change the parameters of the cluster.
4136 if self.op.vg_name is not None:
4137 new_volume = self.op.vg_name
4140 if new_volume != self.cfg.GetVGName():
4141 self.cfg.SetVGName(new_volume)
4143 feedback_fn("Cluster LVM configuration already in desired"
4144 " state, not changing")
4145 if self.op.drbd_helper is not None:
4146 new_helper = self.op.drbd_helper
4149 if new_helper != self.cfg.GetDRBDHelper():
4150 self.cfg.SetDRBDHelper(new_helper)
4152 feedback_fn("Cluster DRBD helper already in desired state,"
4154 if self.op.hvparams:
4155 self.cluster.hvparams = self.new_hvparams
4157 self.cluster.os_hvp = self.new_os_hvp
4158 if self.op.enabled_hypervisors is not None:
4159 self.cluster.hvparams = self.new_hvparams
4160 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4161 if self.op.beparams:
4162 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4163 if self.op.nicparams:
4164 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4166 self.cluster.ipolicy = self.new_ipolicy
4167 if self.op.osparams:
4168 self.cluster.osparams = self.new_osp
4169 if self.op.ndparams:
4170 self.cluster.ndparams = self.new_ndparams
4171 if self.op.diskparams:
4172 self.cluster.diskparams = self.new_diskparams
4173 if self.op.hv_state:
4174 self.cluster.hv_state_static = self.new_hv_state
4175 if self.op.disk_state:
4176 self.cluster.disk_state_static = self.new_disk_state
4178 if self.op.candidate_pool_size is not None:
4179 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4180 # we need to update the pool size here, otherwise the save will fail
4181 _AdjustCandidatePool(self, [])
4183 if self.op.maintain_node_health is not None:
4184 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4185 feedback_fn("Note: CONFD was disabled at build time, node health"
4186 " maintenance is not useful (still enabling it)")
4187 self.cluster.maintain_node_health = self.op.maintain_node_health
4189 if self.op.prealloc_wipe_disks is not None:
4190 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4192 if self.op.add_uids is not None:
4193 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4195 if self.op.remove_uids is not None:
4196 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4198 if self.op.uid_pool is not None:
4199 self.cluster.uid_pool = self.op.uid_pool
4201 if self.op.default_iallocator is not None:
4202 self.cluster.default_iallocator = self.op.default_iallocator
4204 if self.op.reserved_lvs is not None:
4205 self.cluster.reserved_lvs = self.op.reserved_lvs
4207 if self.op.use_external_mip_script is not None:
4208 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4210 def helper_os(aname, mods, desc):
4212 lst = getattr(self.cluster, aname)
4213 for key, val in mods:
4214 if key == constants.DDM_ADD:
4216 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4219 elif key == constants.DDM_REMOVE:
4223 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4225 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4227 if self.op.hidden_os:
4228 helper_os("hidden_os", self.op.hidden_os, "hidden")
4230 if self.op.blacklisted_os:
4231 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4233 if self.op.master_netdev:
4234 master_params = self.cfg.GetMasterNetworkParameters()
4235 ems = self.cfg.GetUseExternalMipScript()
4236 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4237 self.cluster.master_netdev)
4238 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4240 result.Raise("Could not disable the master ip")
4241 feedback_fn("Changing master_netdev from %s to %s" %
4242 (master_params.netdev, self.op.master_netdev))
4243 self.cluster.master_netdev = self.op.master_netdev
4245 if self.op.master_netmask:
4246 master_params = self.cfg.GetMasterNetworkParameters()
4247 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4248 result = self.rpc.call_node_change_master_netmask(master_params.name,
4249 master_params.netmask,
4250 self.op.master_netmask,
4252 master_params.netdev)
4254 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4257 self.cluster.master_netmask = self.op.master_netmask
4259 self.cfg.Update(self.cluster, feedback_fn)
4261 if self.op.master_netdev:
4262 master_params = self.cfg.GetMasterNetworkParameters()
4263 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4264 self.op.master_netdev)
4265 ems = self.cfg.GetUseExternalMipScript()
4266 result = self.rpc.call_node_activate_master_ip(master_params.name,
4269 self.LogWarning("Could not re-enable the master ip on"
4270 " the master, please restart manually: %s",
4274 def _UploadHelper(lu, nodes, fname):
4275 """Helper for uploading a file and showing warnings.
4278 if os.path.exists(fname):
4279 result = lu.rpc.call_upload_file(nodes, fname)
4280 for to_node, to_result in result.items():
4281 msg = to_result.fail_msg
4283 msg = ("Copy of file %s to node %s failed: %s" %
4284 (fname, to_node, msg))
4285 lu.proc.LogWarning(msg)
4288 def _ComputeAncillaryFiles(cluster, redist):
4289 """Compute files external to Ganeti which need to be consistent.
4291 @type redist: boolean
4292 @param redist: Whether to include files which need to be redistributed
4295 # Compute files for all nodes
4297 constants.SSH_KNOWN_HOSTS_FILE,
4298 constants.CONFD_HMAC_KEY,
4299 constants.CLUSTER_DOMAIN_SECRET_FILE,
4300 constants.SPICE_CERT_FILE,
4301 constants.SPICE_CACERT_FILE,
4302 constants.RAPI_USERS_FILE,
4306 files_all.update(constants.ALL_CERT_FILES)
4307 files_all.update(ssconf.SimpleStore().GetFileList())
4309 # we need to ship at least the RAPI certificate
4310 files_all.add(constants.RAPI_CERT_FILE)
4312 if cluster.modify_etc_hosts:
4313 files_all.add(constants.ETC_HOSTS)
4315 if cluster.use_external_mip_script:
4316 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4318 # Files which are optional, these must:
4319 # - be present in one other category as well
4320 # - either exist or not exist on all nodes of that category (mc, vm all)
4322 constants.RAPI_USERS_FILE,
4325 # Files which should only be on master candidates
4329 files_mc.add(constants.CLUSTER_CONF_FILE)
4331 # Files which should only be on VM-capable nodes
4332 files_vm = set(filename
4333 for hv_name in cluster.enabled_hypervisors
4334 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4336 files_opt |= set(filename
4337 for hv_name in cluster.enabled_hypervisors
4338 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4340 # Filenames in each category must be unique
4341 all_files_set = files_all | files_mc | files_vm
4342 assert (len(all_files_set) ==
4343 sum(map(len, [files_all, files_mc, files_vm]))), \
4344 "Found file listed in more than one file list"
4346 # Optional files must be present in one other category
4347 assert all_files_set.issuperset(files_opt), \
4348 "Optional file not in a different required list"
4350 return (files_all, files_opt, files_mc, files_vm)
4353 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4354 """Distribute additional files which are part of the cluster configuration.
4356 ConfigWriter takes care of distributing the config and ssconf files, but
4357 there are more files which should be distributed to all nodes. This function
4358 makes sure those are copied.
4360 @param lu: calling logical unit
4361 @param additional_nodes: list of nodes not in the config to distribute to
4362 @type additional_vm: boolean
4363 @param additional_vm: whether the additional nodes are vm-capable or not
4366 # Gather target nodes
4367 cluster = lu.cfg.GetClusterInfo()
4368 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4370 online_nodes = lu.cfg.GetOnlineNodeList()
4371 online_set = frozenset(online_nodes)
4372 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4374 if additional_nodes is not None:
4375 online_nodes.extend(additional_nodes)
4377 vm_nodes.extend(additional_nodes)
4379 # Never distribute to master node
4380 for nodelist in [online_nodes, vm_nodes]:
4381 if master_info.name in nodelist:
4382 nodelist.remove(master_info.name)
4385 (files_all, _, files_mc, files_vm) = \
4386 _ComputeAncillaryFiles(cluster, True)
4388 # Never re-distribute configuration file from here
4389 assert not (constants.CLUSTER_CONF_FILE in files_all or
4390 constants.CLUSTER_CONF_FILE in files_vm)
4391 assert not files_mc, "Master candidates not handled in this function"
4394 (online_nodes, files_all),
4395 (vm_nodes, files_vm),
4399 for (node_list, files) in filemap:
4401 _UploadHelper(lu, node_list, fname)
4404 class LUClusterRedistConf(NoHooksLU):
4405 """Force the redistribution of cluster configuration.
4407 This is a very simple LU.
4412 def ExpandNames(self):
4413 self.needed_locks = {
4414 locking.LEVEL_NODE: locking.ALL_SET,
4416 self.share_locks[locking.LEVEL_NODE] = 1
4418 def Exec(self, feedback_fn):
4419 """Redistribute the configuration.
4422 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4423 _RedistributeAncillaryFiles(self)
4426 class LUClusterActivateMasterIp(NoHooksLU):
4427 """Activate the master IP on the master node.
4430 def Exec(self, feedback_fn):
4431 """Activate the master IP.
4434 master_params = self.cfg.GetMasterNetworkParameters()
4435 ems = self.cfg.GetUseExternalMipScript()
4436 result = self.rpc.call_node_activate_master_ip(master_params.name,
4438 result.Raise("Could not activate the master IP")
4441 class LUClusterDeactivateMasterIp(NoHooksLU):
4442 """Deactivate the master IP on the master node.
4445 def Exec(self, feedback_fn):
4446 """Deactivate the master IP.
4449 master_params = self.cfg.GetMasterNetworkParameters()
4450 ems = self.cfg.GetUseExternalMipScript()
4451 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4453 result.Raise("Could not deactivate the master IP")
4456 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4457 """Sleep and poll for an instance's disk to sync.
4460 if not instance.disks or disks is not None and not disks:
4463 disks = _ExpandCheckDisks(instance, disks)
4466 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4468 node = instance.primary_node
4471 lu.cfg.SetDiskID(dev, node)
4473 # TODO: Convert to utils.Retry
4476 degr_retries = 10 # in seconds, as we sleep 1 second each time
4480 cumul_degraded = False
4481 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4482 msg = rstats.fail_msg
4484 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4487 raise errors.RemoteError("Can't contact node %s for mirror data,"
4488 " aborting." % node)
4491 rstats = rstats.payload
4493 for i, mstat in enumerate(rstats):
4495 lu.LogWarning("Can't compute data for node %s/%s",
4496 node, disks[i].iv_name)
4499 cumul_degraded = (cumul_degraded or
4500 (mstat.is_degraded and mstat.sync_percent is None))
4501 if mstat.sync_percent is not None:
4503 if mstat.estimated_time is not None:
4504 rem_time = ("%s remaining (estimated)" %
4505 utils.FormatSeconds(mstat.estimated_time))
4506 max_time = mstat.estimated_time
4508 rem_time = "no time estimate"
4509 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4510 (disks[i].iv_name, mstat.sync_percent, rem_time))
4512 # if we're done but degraded, let's do a few small retries, to
4513 # make sure we see a stable and not transient situation; therefore
4514 # we force restart of the loop
4515 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4516 logging.info("Degraded disks found, %d retries left", degr_retries)
4524 time.sleep(min(60, max_time))
4527 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4528 return not cumul_degraded
4531 def _BlockdevFind(lu, node, dev, instance):
4532 """Wrapper around call_blockdev_find to annotate diskparams.
4534 @param lu: A reference to the lu object
4535 @param node: The node to call out
4536 @param dev: The device to find
4537 @param instance: The instance object the device belongs to
4538 @returns The result of the rpc call
4541 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4542 return lu.rpc.call_blockdev_find(node, disk)
4545 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4546 """Wrapper around L{_CheckDiskConsistencyInner}.
4549 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4550 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4554 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4556 """Check that mirrors are not degraded.
4558 @attention: The device has to be annotated already.
4560 The ldisk parameter, if True, will change the test from the
4561 is_degraded attribute (which represents overall non-ok status for
4562 the device(s)) to the ldisk (representing the local storage status).
4565 lu.cfg.SetDiskID(dev, node)
4569 if on_primary or dev.AssembleOnSecondary():
4570 rstats = lu.rpc.call_blockdev_find(node, dev)
4571 msg = rstats.fail_msg
4573 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4575 elif not rstats.payload:
4576 lu.LogWarning("Can't find disk on node %s", node)
4580 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4582 result = result and not rstats.payload.is_degraded
4585 for child in dev.children:
4586 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4592 class LUOobCommand(NoHooksLU):
4593 """Logical unit for OOB handling.
4597 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4599 def ExpandNames(self):
4600 """Gather locks we need.
4603 if self.op.node_names:
4604 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4605 lock_names = self.op.node_names
4607 lock_names = locking.ALL_SET
4609 self.needed_locks = {
4610 locking.LEVEL_NODE: lock_names,
4613 def CheckPrereq(self):
4614 """Check prerequisites.
4617 - the node exists in the configuration
4620 Any errors are signaled by raising errors.OpPrereqError.
4624 self.master_node = self.cfg.GetMasterNode()
4626 assert self.op.power_delay >= 0.0
4628 if self.op.node_names:
4629 if (self.op.command in self._SKIP_MASTER and
4630 self.master_node in self.op.node_names):
4631 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4632 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4634 if master_oob_handler:
4635 additional_text = ("run '%s %s %s' if you want to operate on the"
4636 " master regardless") % (master_oob_handler,
4640 additional_text = "it does not support out-of-band operations"
4642 raise errors.OpPrereqError(("Operating on the master node %s is not"
4643 " allowed for %s; %s") %
4644 (self.master_node, self.op.command,
4645 additional_text), errors.ECODE_INVAL)
4647 self.op.node_names = self.cfg.GetNodeList()
4648 if self.op.command in self._SKIP_MASTER:
4649 self.op.node_names.remove(self.master_node)
4651 if self.op.command in self._SKIP_MASTER:
4652 assert self.master_node not in self.op.node_names
4654 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4656 raise errors.OpPrereqError("Node %s not found" % node_name,
4659 self.nodes.append(node)
4661 if (not self.op.ignore_status and
4662 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4663 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4664 " not marked offline") % node_name,
4667 def Exec(self, feedback_fn):
4668 """Execute OOB and return result if we expect any.
4671 master_node = self.master_node
4674 for idx, node in enumerate(utils.NiceSort(self.nodes,
4675 key=lambda node: node.name)):
4676 node_entry = [(constants.RS_NORMAL, node.name)]
4677 ret.append(node_entry)
4679 oob_program = _SupportsOob(self.cfg, node)
4682 node_entry.append((constants.RS_UNAVAIL, None))
4685 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4686 self.op.command, oob_program, node.name)
4687 result = self.rpc.call_run_oob(master_node, oob_program,
4688 self.op.command, node.name,
4692 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4693 node.name, result.fail_msg)
4694 node_entry.append((constants.RS_NODATA, None))
4697 self._CheckPayload(result)
4698 except errors.OpExecError, err:
4699 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4701 node_entry.append((constants.RS_NODATA, None))
4703 if self.op.command == constants.OOB_HEALTH:
4704 # For health we should log important events
4705 for item, status in result.payload:
4706 if status in [constants.OOB_STATUS_WARNING,
4707 constants.OOB_STATUS_CRITICAL]:
4708 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4709 item, node.name, status)
4711 if self.op.command == constants.OOB_POWER_ON:
4713 elif self.op.command == constants.OOB_POWER_OFF:
4714 node.powered = False
4715 elif self.op.command == constants.OOB_POWER_STATUS:
4716 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4717 if powered != node.powered:
4718 logging.warning(("Recorded power state (%s) of node '%s' does not"
4719 " match actual power state (%s)"), node.powered,
4722 # For configuration changing commands we should update the node
4723 if self.op.command in (constants.OOB_POWER_ON,
4724 constants.OOB_POWER_OFF):
4725 self.cfg.Update(node, feedback_fn)
4727 node_entry.append((constants.RS_NORMAL, result.payload))
4729 if (self.op.command == constants.OOB_POWER_ON and
4730 idx < len(self.nodes) - 1):
4731 time.sleep(self.op.power_delay)
4735 def _CheckPayload(self, result):
4736 """Checks if the payload is valid.
4738 @param result: RPC result
4739 @raises errors.OpExecError: If payload is not valid
4743 if self.op.command == constants.OOB_HEALTH:
4744 if not isinstance(result.payload, list):
4745 errs.append("command 'health' is expected to return a list but got %s" %
4746 type(result.payload))
4748 for item, status in result.payload:
4749 if status not in constants.OOB_STATUSES:
4750 errs.append("health item '%s' has invalid status '%s'" %
4753 if self.op.command == constants.OOB_POWER_STATUS:
4754 if not isinstance(result.payload, dict):
4755 errs.append("power-status is expected to return a dict but got %s" %
4756 type(result.payload))
4758 if self.op.command in [
4759 constants.OOB_POWER_ON,
4760 constants.OOB_POWER_OFF,
4761 constants.OOB_POWER_CYCLE,
4763 if result.payload is not None:
4764 errs.append("%s is expected to not return payload but got '%s'" %
4765 (self.op.command, result.payload))
4768 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4769 utils.CommaJoin(errs))
4772 class _OsQuery(_QueryBase):
4773 FIELDS = query.OS_FIELDS
4775 def ExpandNames(self, lu):
4776 # Lock all nodes in shared mode
4777 # Temporary removal of locks, should be reverted later
4778 # TODO: reintroduce locks when they are lighter-weight
4779 lu.needed_locks = {}
4780 #self.share_locks[locking.LEVEL_NODE] = 1
4781 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4783 # The following variables interact with _QueryBase._GetNames
4785 self.wanted = self.names
4787 self.wanted = locking.ALL_SET
4789 self.do_locking = self.use_locking
4791 def DeclareLocks(self, lu, level):
4795 def _DiagnoseByOS(rlist):
4796 """Remaps a per-node return list into an a per-os per-node dictionary
4798 @param rlist: a map with node names as keys and OS objects as values
4801 @return: a dictionary with osnames as keys and as value another
4802 map, with nodes as keys and tuples of (path, status, diagnose,
4803 variants, parameters, api_versions) as values, eg::
4805 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4806 (/srv/..., False, "invalid api")],
4807 "node2": [(/srv/..., True, "", [], [])]}
4812 # we build here the list of nodes that didn't fail the RPC (at RPC
4813 # level), so that nodes with a non-responding node daemon don't
4814 # make all OSes invalid
4815 good_nodes = [node_name for node_name in rlist
4816 if not rlist[node_name].fail_msg]
4817 for node_name, nr in rlist.items():
4818 if nr.fail_msg or not nr.payload:
4820 for (name, path, status, diagnose, variants,
4821 params, api_versions) in nr.payload:
4822 if name not in all_os:
4823 # build a list of nodes for this os containing empty lists
4824 # for each node in node_list
4826 for nname in good_nodes:
4827 all_os[name][nname] = []
4828 # convert params from [name, help] to (name, help)
4829 params = [tuple(v) for v in params]
4830 all_os[name][node_name].append((path, status, diagnose,
4831 variants, params, api_versions))
4834 def _GetQueryData(self, lu):
4835 """Computes the list of nodes and their attributes.
4838 # Locking is not used
4839 assert not (compat.any(lu.glm.is_owned(level)
4840 for level in locking.LEVELS
4841 if level != locking.LEVEL_CLUSTER) or
4842 self.do_locking or self.use_locking)
4844 valid_nodes = [node.name
4845 for node in lu.cfg.GetAllNodesInfo().values()
4846 if not node.offline and node.vm_capable]
4847 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4848 cluster = lu.cfg.GetClusterInfo()
4852 for (os_name, os_data) in pol.items():
4853 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4854 hidden=(os_name in cluster.hidden_os),
4855 blacklisted=(os_name in cluster.blacklisted_os))
4859 api_versions = set()
4861 for idx, osl in enumerate(os_data.values()):
4862 info.valid = bool(info.valid and osl and osl[0][1])
4866 (node_variants, node_params, node_api) = osl[0][3:6]
4869 variants.update(node_variants)
4870 parameters.update(node_params)
4871 api_versions.update(node_api)
4873 # Filter out inconsistent values
4874 variants.intersection_update(node_variants)
4875 parameters.intersection_update(node_params)
4876 api_versions.intersection_update(node_api)
4878 info.variants = list(variants)
4879 info.parameters = list(parameters)
4880 info.api_versions = list(api_versions)
4882 data[os_name] = info
4884 # Prepare data in requested order
4885 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4889 class LUOsDiagnose(NoHooksLU):
4890 """Logical unit for OS diagnose/query.
4896 def _BuildFilter(fields, names):
4897 """Builds a filter for querying OSes.
4900 name_filter = qlang.MakeSimpleFilter("name", names)
4902 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4903 # respective field is not requested
4904 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4905 for fname in ["hidden", "blacklisted"]
4906 if fname not in fields]
4907 if "valid" not in fields:
4908 status_filter.append([qlang.OP_TRUE, "valid"])
4911 status_filter.insert(0, qlang.OP_AND)
4913 status_filter = None
4915 if name_filter and status_filter:
4916 return [qlang.OP_AND, name_filter, status_filter]
4920 return status_filter
4922 def CheckArguments(self):
4923 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4924 self.op.output_fields, False)
4926 def ExpandNames(self):
4927 self.oq.ExpandNames(self)
4929 def Exec(self, feedback_fn):
4930 return self.oq.OldStyleQuery(self)
4933 class LUNodeRemove(LogicalUnit):
4934 """Logical unit for removing a node.
4937 HPATH = "node-remove"
4938 HTYPE = constants.HTYPE_NODE
4940 def BuildHooksEnv(self):
4945 "OP_TARGET": self.op.node_name,
4946 "NODE_NAME": self.op.node_name,
4949 def BuildHooksNodes(self):
4950 """Build hooks nodes.
4952 This doesn't run on the target node in the pre phase as a failed
4953 node would then be impossible to remove.
4956 all_nodes = self.cfg.GetNodeList()
4958 all_nodes.remove(self.op.node_name)
4961 return (all_nodes, all_nodes)
4963 def CheckPrereq(self):
4964 """Check prerequisites.
4967 - the node exists in the configuration
4968 - it does not have primary or secondary instances
4969 - it's not the master
4971 Any errors are signaled by raising errors.OpPrereqError.
4974 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4975 node = self.cfg.GetNodeInfo(self.op.node_name)
4976 assert node is not None
4978 masternode = self.cfg.GetMasterNode()
4979 if node.name == masternode:
4980 raise errors.OpPrereqError("Node is the master node, failover to another"
4981 " node is required", errors.ECODE_INVAL)
4983 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4984 if node.name in instance.all_nodes:
4985 raise errors.OpPrereqError("Instance %s is still running on the node,"
4986 " please remove first" % instance_name,
4988 self.op.node_name = node.name
4991 def Exec(self, feedback_fn):
4992 """Removes the node from the cluster.
4996 logging.info("Stopping the node daemon and removing configs from node %s",
4999 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5001 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5004 # Promote nodes to master candidate as needed
5005 _AdjustCandidatePool(self, exceptions=[node.name])
5006 self.context.RemoveNode(node.name)
5008 # Run post hooks on the node before it's removed
5009 _RunPostHook(self, node.name)
5011 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5012 msg = result.fail_msg
5014 self.LogWarning("Errors encountered on the remote node while leaving"
5015 " the cluster: %s", msg)
5017 # Remove node from our /etc/hosts
5018 if self.cfg.GetClusterInfo().modify_etc_hosts:
5019 master_node = self.cfg.GetMasterNode()
5020 result = self.rpc.call_etc_hosts_modify(master_node,
5021 constants.ETC_HOSTS_REMOVE,
5023 result.Raise("Can't update hosts file with new host data")
5024 _RedistributeAncillaryFiles(self)
5027 class _NodeQuery(_QueryBase):
5028 FIELDS = query.NODE_FIELDS
5030 def ExpandNames(self, lu):
5031 lu.needed_locks = {}
5032 lu.share_locks = _ShareAll()
5035 self.wanted = _GetWantedNodes(lu, self.names)
5037 self.wanted = locking.ALL_SET
5039 self.do_locking = (self.use_locking and
5040 query.NQ_LIVE in self.requested_data)
5043 # If any non-static field is requested we need to lock the nodes
5044 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5046 def DeclareLocks(self, lu, level):
5049 def _GetQueryData(self, lu):
5050 """Computes the list of nodes and their attributes.
5053 all_info = lu.cfg.GetAllNodesInfo()
5055 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5057 # Gather data as requested
5058 if query.NQ_LIVE in self.requested_data:
5059 # filter out non-vm_capable nodes
5060 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5062 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5063 [lu.cfg.GetHypervisorType()])
5064 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5065 for (name, nresult) in node_data.items()
5066 if not nresult.fail_msg and nresult.payload)
5070 if query.NQ_INST in self.requested_data:
5071 node_to_primary = dict([(name, set()) for name in nodenames])
5072 node_to_secondary = dict([(name, set()) for name in nodenames])
5074 inst_data = lu.cfg.GetAllInstancesInfo()
5076 for inst in inst_data.values():
5077 if inst.primary_node in node_to_primary:
5078 node_to_primary[inst.primary_node].add(inst.name)
5079 for secnode in inst.secondary_nodes:
5080 if secnode in node_to_secondary:
5081 node_to_secondary[secnode].add(inst.name)
5083 node_to_primary = None
5084 node_to_secondary = None
5086 if query.NQ_OOB in self.requested_data:
5087 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5088 for name, node in all_info.iteritems())
5092 if query.NQ_GROUP in self.requested_data:
5093 groups = lu.cfg.GetAllNodeGroupsInfo()
5097 return query.NodeQueryData([all_info[name] for name in nodenames],
5098 live_data, lu.cfg.GetMasterNode(),
5099 node_to_primary, node_to_secondary, groups,
5100 oob_support, lu.cfg.GetClusterInfo())
5103 class LUNodeQuery(NoHooksLU):
5104 """Logical unit for querying nodes.
5107 # pylint: disable=W0142
5110 def CheckArguments(self):
5111 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5112 self.op.output_fields, self.op.use_locking)
5114 def ExpandNames(self):
5115 self.nq.ExpandNames(self)
5117 def DeclareLocks(self, level):
5118 self.nq.DeclareLocks(self, level)
5120 def Exec(self, feedback_fn):
5121 return self.nq.OldStyleQuery(self)
5124 class LUNodeQueryvols(NoHooksLU):
5125 """Logical unit for getting volumes on node(s).
5129 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5130 _FIELDS_STATIC = utils.FieldSet("node")
5132 def CheckArguments(self):
5133 _CheckOutputFields(static=self._FIELDS_STATIC,
5134 dynamic=self._FIELDS_DYNAMIC,
5135 selected=self.op.output_fields)
5137 def ExpandNames(self):
5138 self.share_locks = _ShareAll()
5139 self.needed_locks = {}
5141 if not self.op.nodes:
5142 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5144 self.needed_locks[locking.LEVEL_NODE] = \
5145 _GetWantedNodes(self, self.op.nodes)
5147 def Exec(self, feedback_fn):
5148 """Computes the list of nodes and their attributes.
5151 nodenames = self.owned_locks(locking.LEVEL_NODE)
5152 volumes = self.rpc.call_node_volumes(nodenames)
5154 ilist = self.cfg.GetAllInstancesInfo()
5155 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5158 for node in nodenames:
5159 nresult = volumes[node]
5162 msg = nresult.fail_msg
5164 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5167 node_vols = sorted(nresult.payload,
5168 key=operator.itemgetter("dev"))
5170 for vol in node_vols:
5172 for field in self.op.output_fields:
5175 elif field == "phys":
5179 elif field == "name":
5181 elif field == "size":
5182 val = int(float(vol["size"]))
5183 elif field == "instance":
5184 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5186 raise errors.ParameterError(field)
5187 node_output.append(str(val))
5189 output.append(node_output)
5194 class LUNodeQueryStorage(NoHooksLU):
5195 """Logical unit for getting information on storage units on node(s).
5198 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5201 def CheckArguments(self):
5202 _CheckOutputFields(static=self._FIELDS_STATIC,
5203 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5204 selected=self.op.output_fields)
5206 def ExpandNames(self):
5207 self.share_locks = _ShareAll()
5208 self.needed_locks = {}
5211 self.needed_locks[locking.LEVEL_NODE] = \
5212 _GetWantedNodes(self, self.op.nodes)
5214 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5216 def Exec(self, feedback_fn):
5217 """Computes the list of nodes and their attributes.
5220 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5222 # Always get name to sort by
5223 if constants.SF_NAME in self.op.output_fields:
5224 fields = self.op.output_fields[:]
5226 fields = [constants.SF_NAME] + self.op.output_fields
5228 # Never ask for node or type as it's only known to the LU
5229 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5230 while extra in fields:
5231 fields.remove(extra)
5233 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5234 name_idx = field_idx[constants.SF_NAME]
5236 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5237 data = self.rpc.call_storage_list(self.nodes,
5238 self.op.storage_type, st_args,
5239 self.op.name, fields)
5243 for node in utils.NiceSort(self.nodes):
5244 nresult = data[node]
5248 msg = nresult.fail_msg
5250 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5253 rows = dict([(row[name_idx], row) for row in nresult.payload])
5255 for name in utils.NiceSort(rows.keys()):
5260 for field in self.op.output_fields:
5261 if field == constants.SF_NODE:
5263 elif field == constants.SF_TYPE:
5264 val = self.op.storage_type
5265 elif field in field_idx:
5266 val = row[field_idx[field]]
5268 raise errors.ParameterError(field)
5277 class _InstanceQuery(_QueryBase):
5278 FIELDS = query.INSTANCE_FIELDS
5280 def ExpandNames(self, lu):
5281 lu.needed_locks = {}
5282 lu.share_locks = _ShareAll()
5285 self.wanted = _GetWantedInstances(lu, self.names)
5287 self.wanted = locking.ALL_SET
5289 self.do_locking = (self.use_locking and
5290 query.IQ_LIVE in self.requested_data)
5292 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5293 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5294 lu.needed_locks[locking.LEVEL_NODE] = []
5295 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5297 self.do_grouplocks = (self.do_locking and
5298 query.IQ_NODES in self.requested_data)
5300 def DeclareLocks(self, lu, level):
5302 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5303 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5305 # Lock all groups used by instances optimistically; this requires going
5306 # via the node before it's locked, requiring verification later on
5307 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5309 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5310 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5311 elif level == locking.LEVEL_NODE:
5312 lu._LockInstancesNodes() # pylint: disable=W0212
5315 def _CheckGroupLocks(lu):
5316 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5317 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5319 # Check if node groups for locked instances are still correct
5320 for instance_name in owned_instances:
5321 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5323 def _GetQueryData(self, lu):
5324 """Computes the list of instances and their attributes.
5327 if self.do_grouplocks:
5328 self._CheckGroupLocks(lu)
5330 cluster = lu.cfg.GetClusterInfo()
5331 all_info = lu.cfg.GetAllInstancesInfo()
5333 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5335 instance_list = [all_info[name] for name in instance_names]
5336 nodes = frozenset(itertools.chain(*(inst.all_nodes
5337 for inst in instance_list)))
5338 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5341 wrongnode_inst = set()
5343 # Gather data as requested
5344 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5346 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5348 result = node_data[name]
5350 # offline nodes will be in both lists
5351 assert result.fail_msg
5352 offline_nodes.append(name)
5354 bad_nodes.append(name)
5355 elif result.payload:
5356 for inst in result.payload:
5357 if inst in all_info:
5358 if all_info[inst].primary_node == name:
5359 live_data.update(result.payload)
5361 wrongnode_inst.add(inst)
5363 # orphan instance; we don't list it here as we don't
5364 # handle this case yet in the output of instance listing
5365 logging.warning("Orphan instance '%s' found on node %s",
5367 # else no instance is alive
5371 if query.IQ_DISKUSAGE in self.requested_data:
5372 disk_usage = dict((inst.name,
5373 _ComputeDiskSize(inst.disk_template,
5374 [{constants.IDISK_SIZE: disk.size}
5375 for disk in inst.disks]))
5376 for inst in instance_list)
5380 if query.IQ_CONSOLE in self.requested_data:
5382 for inst in instance_list:
5383 if inst.name in live_data:
5384 # Instance is running
5385 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5387 consinfo[inst.name] = None
5388 assert set(consinfo.keys()) == set(instance_names)
5392 if query.IQ_NODES in self.requested_data:
5393 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5395 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5396 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5397 for uuid in set(map(operator.attrgetter("group"),
5403 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5404 disk_usage, offline_nodes, bad_nodes,
5405 live_data, wrongnode_inst, consinfo,
5409 class LUQuery(NoHooksLU):
5410 """Query for resources/items of a certain kind.
5413 # pylint: disable=W0142
5416 def CheckArguments(self):
5417 qcls = _GetQueryImplementation(self.op.what)
5419 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5421 def ExpandNames(self):
5422 self.impl.ExpandNames(self)
5424 def DeclareLocks(self, level):
5425 self.impl.DeclareLocks(self, level)
5427 def Exec(self, feedback_fn):
5428 return self.impl.NewStyleQuery(self)
5431 class LUQueryFields(NoHooksLU):
5432 """Query for resources/items of a certain kind.
5435 # pylint: disable=W0142
5438 def CheckArguments(self):
5439 self.qcls = _GetQueryImplementation(self.op.what)
5441 def ExpandNames(self):
5442 self.needed_locks = {}
5444 def Exec(self, feedback_fn):
5445 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5448 class LUNodeModifyStorage(NoHooksLU):
5449 """Logical unit for modifying a storage volume on a node.
5454 def CheckArguments(self):
5455 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5457 storage_type = self.op.storage_type
5460 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5462 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5463 " modified" % storage_type,
5466 diff = set(self.op.changes.keys()) - modifiable
5468 raise errors.OpPrereqError("The following fields can not be modified for"
5469 " storage units of type '%s': %r" %
5470 (storage_type, list(diff)),
5473 def ExpandNames(self):
5474 self.needed_locks = {
5475 locking.LEVEL_NODE: self.op.node_name,
5478 def Exec(self, feedback_fn):
5479 """Computes the list of nodes and their attributes.
5482 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5483 result = self.rpc.call_storage_modify(self.op.node_name,
5484 self.op.storage_type, st_args,
5485 self.op.name, self.op.changes)
5486 result.Raise("Failed to modify storage unit '%s' on %s" %
5487 (self.op.name, self.op.node_name))
5490 class LUNodeAdd(LogicalUnit):
5491 """Logical unit for adding node to the cluster.
5495 HTYPE = constants.HTYPE_NODE
5496 _NFLAGS = ["master_capable", "vm_capable"]
5498 def CheckArguments(self):
5499 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5500 # validate/normalize the node name
5501 self.hostname = netutils.GetHostname(name=self.op.node_name,
5502 family=self.primary_ip_family)
5503 self.op.node_name = self.hostname.name
5505 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5506 raise errors.OpPrereqError("Cannot readd the master node",
5509 if self.op.readd and self.op.group:
5510 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5511 " being readded", errors.ECODE_INVAL)
5513 def BuildHooksEnv(self):
5516 This will run on all nodes before, and on all nodes + the new node after.
5520 "OP_TARGET": self.op.node_name,
5521 "NODE_NAME": self.op.node_name,
5522 "NODE_PIP": self.op.primary_ip,
5523 "NODE_SIP": self.op.secondary_ip,
5524 "MASTER_CAPABLE": str(self.op.master_capable),
5525 "VM_CAPABLE": str(self.op.vm_capable),
5528 def BuildHooksNodes(self):
5529 """Build hooks nodes.
5532 # Exclude added node
5533 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5534 post_nodes = pre_nodes + [self.op.node_name, ]
5536 return (pre_nodes, post_nodes)
5538 def CheckPrereq(self):
5539 """Check prerequisites.
5542 - the new node is not already in the config
5544 - its parameters (single/dual homed) matches the cluster
5546 Any errors are signaled by raising errors.OpPrereqError.
5550 hostname = self.hostname
5551 node = hostname.name
5552 primary_ip = self.op.primary_ip = hostname.ip
5553 if self.op.secondary_ip is None:
5554 if self.primary_ip_family == netutils.IP6Address.family:
5555 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5556 " IPv4 address must be given as secondary",
5558 self.op.secondary_ip = primary_ip
5560 secondary_ip = self.op.secondary_ip
5561 if not netutils.IP4Address.IsValid(secondary_ip):
5562 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5563 " address" % secondary_ip, errors.ECODE_INVAL)
5565 node_list = cfg.GetNodeList()
5566 if not self.op.readd and node in node_list:
5567 raise errors.OpPrereqError("Node %s is already in the configuration" %
5568 node, errors.ECODE_EXISTS)
5569 elif self.op.readd and node not in node_list:
5570 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5573 self.changed_primary_ip = False
5575 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5576 if self.op.readd and node == existing_node_name:
5577 if existing_node.secondary_ip != secondary_ip:
5578 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5579 " address configuration as before",
5581 if existing_node.primary_ip != primary_ip:
5582 self.changed_primary_ip = True
5586 if (existing_node.primary_ip == primary_ip or
5587 existing_node.secondary_ip == primary_ip or
5588 existing_node.primary_ip == secondary_ip or
5589 existing_node.secondary_ip == secondary_ip):
5590 raise errors.OpPrereqError("New node ip address(es) conflict with"
5591 " existing node %s" % existing_node.name,
5592 errors.ECODE_NOTUNIQUE)
5594 # After this 'if' block, None is no longer a valid value for the
5595 # _capable op attributes
5597 old_node = self.cfg.GetNodeInfo(node)
5598 assert old_node is not None, "Can't retrieve locked node %s" % node
5599 for attr in self._NFLAGS:
5600 if getattr(self.op, attr) is None:
5601 setattr(self.op, attr, getattr(old_node, attr))
5603 for attr in self._NFLAGS:
5604 if getattr(self.op, attr) is None:
5605 setattr(self.op, attr, True)
5607 if self.op.readd and not self.op.vm_capable:
5608 pri, sec = cfg.GetNodeInstances(node)
5610 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5611 " flag set to false, but it already holds"
5612 " instances" % node,
5615 # check that the type of the node (single versus dual homed) is the
5616 # same as for the master
5617 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5618 master_singlehomed = myself.secondary_ip == myself.primary_ip
5619 newbie_singlehomed = secondary_ip == primary_ip
5620 if master_singlehomed != newbie_singlehomed:
5621 if master_singlehomed:
5622 raise errors.OpPrereqError("The master has no secondary ip but the"
5623 " new node has one",
5626 raise errors.OpPrereqError("The master has a secondary ip but the"
5627 " new node doesn't have one",
5630 # checks reachability
5631 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5632 raise errors.OpPrereqError("Node not reachable by ping",
5633 errors.ECODE_ENVIRON)
5635 if not newbie_singlehomed:
5636 # check reachability from my secondary ip to newbie's secondary ip
5637 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5638 source=myself.secondary_ip):
5639 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5640 " based ping to node daemon port",
5641 errors.ECODE_ENVIRON)
5648 if self.op.master_capable:
5649 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5651 self.master_candidate = False
5654 self.new_node = old_node
5656 node_group = cfg.LookupNodeGroup(self.op.group)
5657 self.new_node = objects.Node(name=node,
5658 primary_ip=primary_ip,
5659 secondary_ip=secondary_ip,
5660 master_candidate=self.master_candidate,
5661 offline=False, drained=False,
5664 if self.op.ndparams:
5665 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5667 if self.op.hv_state:
5668 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5670 if self.op.disk_state:
5671 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5673 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5674 # it a property on the base class.
5675 result = rpc.DnsOnlyRunner().call_version([node])[node]
5676 result.Raise("Can't get version information from node %s" % node)
5677 if constants.PROTOCOL_VERSION == result.payload:
5678 logging.info("Communication to node %s fine, sw version %s match",
5679 node, result.payload)
5681 raise errors.OpPrereqError("Version mismatch master version %s,"
5682 " node version %s" %
5683 (constants.PROTOCOL_VERSION, result.payload),
5684 errors.ECODE_ENVIRON)
5686 def Exec(self, feedback_fn):
5687 """Adds the new node to the cluster.
5690 new_node = self.new_node
5691 node = new_node.name
5693 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5696 # We adding a new node so we assume it's powered
5697 new_node.powered = True
5699 # for re-adds, reset the offline/drained/master-candidate flags;
5700 # we need to reset here, otherwise offline would prevent RPC calls
5701 # later in the procedure; this also means that if the re-add
5702 # fails, we are left with a non-offlined, broken node
5704 new_node.drained = new_node.offline = False # pylint: disable=W0201
5705 self.LogInfo("Readding a node, the offline/drained flags were reset")
5706 # if we demote the node, we do cleanup later in the procedure
5707 new_node.master_candidate = self.master_candidate
5708 if self.changed_primary_ip:
5709 new_node.primary_ip = self.op.primary_ip
5711 # copy the master/vm_capable flags
5712 for attr in self._NFLAGS:
5713 setattr(new_node, attr, getattr(self.op, attr))
5715 # notify the user about any possible mc promotion
5716 if new_node.master_candidate:
5717 self.LogInfo("Node will be a master candidate")
5719 if self.op.ndparams:
5720 new_node.ndparams = self.op.ndparams
5722 new_node.ndparams = {}
5724 if self.op.hv_state:
5725 new_node.hv_state_static = self.new_hv_state
5727 if self.op.disk_state:
5728 new_node.disk_state_static = self.new_disk_state
5730 # Add node to our /etc/hosts, and add key to known_hosts
5731 if self.cfg.GetClusterInfo().modify_etc_hosts:
5732 master_node = self.cfg.GetMasterNode()
5733 result = self.rpc.call_etc_hosts_modify(master_node,
5734 constants.ETC_HOSTS_ADD,
5737 result.Raise("Can't update hosts file with new host data")
5739 if new_node.secondary_ip != new_node.primary_ip:
5740 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5743 node_verify_list = [self.cfg.GetMasterNode()]
5744 node_verify_param = {
5745 constants.NV_NODELIST: ([node], {}),
5746 # TODO: do a node-net-test as well?
5749 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5750 self.cfg.GetClusterName())
5751 for verifier in node_verify_list:
5752 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5753 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5755 for failed in nl_payload:
5756 feedback_fn("ssh/hostname verification failed"
5757 " (checking from %s): %s" %
5758 (verifier, nl_payload[failed]))
5759 raise errors.OpExecError("ssh/hostname verification failed")
5762 _RedistributeAncillaryFiles(self)
5763 self.context.ReaddNode(new_node)
5764 # make sure we redistribute the config
5765 self.cfg.Update(new_node, feedback_fn)
5766 # and make sure the new node will not have old files around
5767 if not new_node.master_candidate:
5768 result = self.rpc.call_node_demote_from_mc(new_node.name)
5769 msg = result.fail_msg
5771 self.LogWarning("Node failed to demote itself from master"
5772 " candidate status: %s" % msg)
5774 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5775 additional_vm=self.op.vm_capable)
5776 self.context.AddNode(new_node, self.proc.GetECId())
5779 class LUNodeSetParams(LogicalUnit):
5780 """Modifies the parameters of a node.
5782 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5783 to the node role (as _ROLE_*)
5784 @cvar _R2F: a dictionary from node role to tuples of flags
5785 @cvar _FLAGS: a list of attribute names corresponding to the flags
5788 HPATH = "node-modify"
5789 HTYPE = constants.HTYPE_NODE
5791 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5793 (True, False, False): _ROLE_CANDIDATE,
5794 (False, True, False): _ROLE_DRAINED,
5795 (False, False, True): _ROLE_OFFLINE,
5796 (False, False, False): _ROLE_REGULAR,
5798 _R2F = dict((v, k) for k, v in _F2R.items())
5799 _FLAGS = ["master_candidate", "drained", "offline"]
5801 def CheckArguments(self):
5802 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5803 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5804 self.op.master_capable, self.op.vm_capable,
5805 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5807 if all_mods.count(None) == len(all_mods):
5808 raise errors.OpPrereqError("Please pass at least one modification",
5810 if all_mods.count(True) > 1:
5811 raise errors.OpPrereqError("Can't set the node into more than one"
5812 " state at the same time",
5815 # Boolean value that tells us whether we might be demoting from MC
5816 self.might_demote = (self.op.master_candidate == False or
5817 self.op.offline == True or
5818 self.op.drained == True or
5819 self.op.master_capable == False)
5821 if self.op.secondary_ip:
5822 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5823 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5824 " address" % self.op.secondary_ip,
5827 self.lock_all = self.op.auto_promote and self.might_demote
5828 self.lock_instances = self.op.secondary_ip is not None
5830 def _InstanceFilter(self, instance):
5831 """Filter for getting affected instances.
5834 return (instance.disk_template in constants.DTS_INT_MIRROR and
5835 self.op.node_name in instance.all_nodes)
5837 def ExpandNames(self):
5839 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5841 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5843 # Since modifying a node can have severe effects on currently running
5844 # operations the resource lock is at least acquired in shared mode
5845 self.needed_locks[locking.LEVEL_NODE_RES] = \
5846 self.needed_locks[locking.LEVEL_NODE]
5848 # Get node resource and instance locks in shared mode; they are not used
5849 # for anything but read-only access
5850 self.share_locks[locking.LEVEL_NODE_RES] = 1
5851 self.share_locks[locking.LEVEL_INSTANCE] = 1
5853 if self.lock_instances:
5854 self.needed_locks[locking.LEVEL_INSTANCE] = \
5855 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5857 def BuildHooksEnv(self):
5860 This runs on the master node.
5864 "OP_TARGET": self.op.node_name,
5865 "MASTER_CANDIDATE": str(self.op.master_candidate),
5866 "OFFLINE": str(self.op.offline),
5867 "DRAINED": str(self.op.drained),
5868 "MASTER_CAPABLE": str(self.op.master_capable),
5869 "VM_CAPABLE": str(self.op.vm_capable),
5872 def BuildHooksNodes(self):
5873 """Build hooks nodes.
5876 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5879 def CheckPrereq(self):
5880 """Check prerequisites.
5882 This only checks the instance list against the existing names.
5885 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5887 if self.lock_instances:
5888 affected_instances = \
5889 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5891 # Verify instance locks
5892 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5893 wanted_instances = frozenset(affected_instances.keys())
5894 if wanted_instances - owned_instances:
5895 raise errors.OpPrereqError("Instances affected by changing node %s's"
5896 " secondary IP address have changed since"
5897 " locks were acquired, wanted '%s', have"
5898 " '%s'; retry the operation" %
5900 utils.CommaJoin(wanted_instances),
5901 utils.CommaJoin(owned_instances)),
5904 affected_instances = None
5906 if (self.op.master_candidate is not None or
5907 self.op.drained is not None or
5908 self.op.offline is not None):
5909 # we can't change the master's node flags
5910 if self.op.node_name == self.cfg.GetMasterNode():
5911 raise errors.OpPrereqError("The master role can be changed"
5912 " only via master-failover",
5915 if self.op.master_candidate and not node.master_capable:
5916 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5917 " it a master candidate" % node.name,
5920 if self.op.vm_capable == False:
5921 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5923 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5924 " the vm_capable flag" % node.name,
5927 if node.master_candidate and self.might_demote and not self.lock_all:
5928 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5929 # check if after removing the current node, we're missing master
5931 (mc_remaining, mc_should, _) = \
5932 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5933 if mc_remaining < mc_should:
5934 raise errors.OpPrereqError("Not enough master candidates, please"
5935 " pass auto promote option to allow"
5936 " promotion (--auto-promote or RAPI"
5937 " auto_promote=True)", errors.ECODE_STATE)
5939 self.old_flags = old_flags = (node.master_candidate,
5940 node.drained, node.offline)
5941 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5942 self.old_role = old_role = self._F2R[old_flags]
5944 # Check for ineffective changes
5945 for attr in self._FLAGS:
5946 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5947 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5948 setattr(self.op, attr, None)
5950 # Past this point, any flag change to False means a transition
5951 # away from the respective state, as only real changes are kept
5953 # TODO: We might query the real power state if it supports OOB
5954 if _SupportsOob(self.cfg, node):
5955 if self.op.offline is False and not (node.powered or
5956 self.op.powered == True):
5957 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5958 " offline status can be reset") %
5960 elif self.op.powered is not None:
5961 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5962 " as it does not support out-of-band"
5963 " handling") % self.op.node_name)
5965 # If we're being deofflined/drained, we'll MC ourself if needed
5966 if (self.op.drained == False or self.op.offline == False or
5967 (self.op.master_capable and not node.master_capable)):
5968 if _DecideSelfPromotion(self):
5969 self.op.master_candidate = True
5970 self.LogInfo("Auto-promoting node to master candidate")
5972 # If we're no longer master capable, we'll demote ourselves from MC
5973 if self.op.master_capable == False and node.master_candidate:
5974 self.LogInfo("Demoting from master candidate")
5975 self.op.master_candidate = False
5978 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5979 if self.op.master_candidate:
5980 new_role = self._ROLE_CANDIDATE
5981 elif self.op.drained:
5982 new_role = self._ROLE_DRAINED
5983 elif self.op.offline:
5984 new_role = self._ROLE_OFFLINE
5985 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5986 # False is still in new flags, which means we're un-setting (the
5988 new_role = self._ROLE_REGULAR
5989 else: # no new flags, nothing, keep old role
5992 self.new_role = new_role
5994 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5995 # Trying to transition out of offline status
5996 result = self.rpc.call_version([node.name])[node.name]
5998 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5999 " to report its version: %s" %
6000 (node.name, result.fail_msg),
6003 self.LogWarning("Transitioning node from offline to online state"
6004 " without using re-add. Please make sure the node"
6007 if self.op.secondary_ip:
6008 # Ok even without locking, because this can't be changed by any LU
6009 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6010 master_singlehomed = master.secondary_ip == master.primary_ip
6011 if master_singlehomed and self.op.secondary_ip:
6012 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6013 " homed cluster", errors.ECODE_INVAL)
6015 assert not (frozenset(affected_instances) -
6016 self.owned_locks(locking.LEVEL_INSTANCE))
6019 if affected_instances:
6020 raise errors.OpPrereqError("Cannot change secondary IP address:"
6021 " offline node has instances (%s)"
6022 " configured to use it" %
6023 utils.CommaJoin(affected_instances.keys()))
6025 # On online nodes, check that no instances are running, and that
6026 # the node has the new ip and we can reach it.
6027 for instance in affected_instances.values():
6028 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6029 msg="cannot change secondary ip")
6031 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6032 if master.name != node.name:
6033 # check reachability from master secondary ip to new secondary ip
6034 if not netutils.TcpPing(self.op.secondary_ip,
6035 constants.DEFAULT_NODED_PORT,
6036 source=master.secondary_ip):
6037 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6038 " based ping to node daemon port",
6039 errors.ECODE_ENVIRON)
6041 if self.op.ndparams:
6042 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6043 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6044 self.new_ndparams = new_ndparams
6046 if self.op.hv_state:
6047 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6048 self.node.hv_state_static)
6050 if self.op.disk_state:
6051 self.new_disk_state = \
6052 _MergeAndVerifyDiskState(self.op.disk_state,
6053 self.node.disk_state_static)
6055 def Exec(self, feedback_fn):
6060 old_role = self.old_role
6061 new_role = self.new_role
6065 if self.op.ndparams:
6066 node.ndparams = self.new_ndparams
6068 if self.op.powered is not None:
6069 node.powered = self.op.powered
6071 if self.op.hv_state:
6072 node.hv_state_static = self.new_hv_state
6074 if self.op.disk_state:
6075 node.disk_state_static = self.new_disk_state
6077 for attr in ["master_capable", "vm_capable"]:
6078 val = getattr(self.op, attr)
6080 setattr(node, attr, val)
6081 result.append((attr, str(val)))
6083 if new_role != old_role:
6084 # Tell the node to demote itself, if no longer MC and not offline
6085 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6086 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6088 self.LogWarning("Node failed to demote itself: %s", msg)
6090 new_flags = self._R2F[new_role]
6091 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6093 result.append((desc, str(nf)))
6094 (node.master_candidate, node.drained, node.offline) = new_flags
6096 # we locked all nodes, we adjust the CP before updating this node
6098 _AdjustCandidatePool(self, [node.name])
6100 if self.op.secondary_ip:
6101 node.secondary_ip = self.op.secondary_ip
6102 result.append(("secondary_ip", self.op.secondary_ip))
6104 # this will trigger configuration file update, if needed
6105 self.cfg.Update(node, feedback_fn)
6107 # this will trigger job queue propagation or cleanup if the mc
6109 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6110 self.context.ReaddNode(node)
6115 class LUNodePowercycle(NoHooksLU):
6116 """Powercycles a node.
6121 def CheckArguments(self):
6122 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6123 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6124 raise errors.OpPrereqError("The node is the master and the force"
6125 " parameter was not set",
6128 def ExpandNames(self):
6129 """Locking for PowercycleNode.
6131 This is a last-resort option and shouldn't block on other
6132 jobs. Therefore, we grab no locks.
6135 self.needed_locks = {}
6137 def Exec(self, feedback_fn):
6141 result = self.rpc.call_node_powercycle(self.op.node_name,
6142 self.cfg.GetHypervisorType())
6143 result.Raise("Failed to schedule the reboot")
6144 return result.payload
6147 class LUClusterQuery(NoHooksLU):
6148 """Query cluster configuration.
6153 def ExpandNames(self):
6154 self.needed_locks = {}
6156 def Exec(self, feedback_fn):
6157 """Return cluster config.
6160 cluster = self.cfg.GetClusterInfo()
6163 # Filter just for enabled hypervisors
6164 for os_name, hv_dict in cluster.os_hvp.items():
6165 os_hvp[os_name] = {}
6166 for hv_name, hv_params in hv_dict.items():
6167 if hv_name in cluster.enabled_hypervisors:
6168 os_hvp[os_name][hv_name] = hv_params
6170 # Convert ip_family to ip_version
6171 primary_ip_version = constants.IP4_VERSION
6172 if cluster.primary_ip_family == netutils.IP6Address.family:
6173 primary_ip_version = constants.IP6_VERSION
6176 "software_version": constants.RELEASE_VERSION,
6177 "protocol_version": constants.PROTOCOL_VERSION,
6178 "config_version": constants.CONFIG_VERSION,
6179 "os_api_version": max(constants.OS_API_VERSIONS),
6180 "export_version": constants.EXPORT_VERSION,
6181 "architecture": runtime.GetArchInfo(),
6182 "name": cluster.cluster_name,
6183 "master": cluster.master_node,
6184 "default_hypervisor": cluster.primary_hypervisor,
6185 "enabled_hypervisors": cluster.enabled_hypervisors,
6186 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6187 for hypervisor_name in cluster.enabled_hypervisors]),
6189 "beparams": cluster.beparams,
6190 "osparams": cluster.osparams,
6191 "ipolicy": cluster.ipolicy,
6192 "nicparams": cluster.nicparams,
6193 "ndparams": cluster.ndparams,
6194 "diskparams": cluster.diskparams,
6195 "candidate_pool_size": cluster.candidate_pool_size,
6196 "master_netdev": cluster.master_netdev,
6197 "master_netmask": cluster.master_netmask,
6198 "use_external_mip_script": cluster.use_external_mip_script,
6199 "volume_group_name": cluster.volume_group_name,
6200 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6201 "file_storage_dir": cluster.file_storage_dir,
6202 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6203 "maintain_node_health": cluster.maintain_node_health,
6204 "ctime": cluster.ctime,
6205 "mtime": cluster.mtime,
6206 "uuid": cluster.uuid,
6207 "tags": list(cluster.GetTags()),
6208 "uid_pool": cluster.uid_pool,
6209 "default_iallocator": cluster.default_iallocator,
6210 "reserved_lvs": cluster.reserved_lvs,
6211 "primary_ip_version": primary_ip_version,
6212 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6213 "hidden_os": cluster.hidden_os,
6214 "blacklisted_os": cluster.blacklisted_os,
6220 class LUClusterConfigQuery(NoHooksLU):
6221 """Return configuration values.
6226 def CheckArguments(self):
6227 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6229 def ExpandNames(self):
6230 self.cq.ExpandNames(self)
6232 def DeclareLocks(self, level):
6233 self.cq.DeclareLocks(self, level)
6235 def Exec(self, feedback_fn):
6236 result = self.cq.OldStyleQuery(self)
6238 assert len(result) == 1
6243 class _ClusterQuery(_QueryBase):
6244 FIELDS = query.CLUSTER_FIELDS
6246 #: Do not sort (there is only one item)
6249 def ExpandNames(self, lu):
6250 lu.needed_locks = {}
6252 # The following variables interact with _QueryBase._GetNames
6253 self.wanted = locking.ALL_SET
6254 self.do_locking = self.use_locking
6257 raise errors.OpPrereqError("Can not use locking for cluster queries",
6260 def DeclareLocks(self, lu, level):
6263 def _GetQueryData(self, lu):
6264 """Computes the list of nodes and their attributes.
6267 # Locking is not used
6268 assert not (compat.any(lu.glm.is_owned(level)
6269 for level in locking.LEVELS
6270 if level != locking.LEVEL_CLUSTER) or
6271 self.do_locking or self.use_locking)
6273 if query.CQ_CONFIG in self.requested_data:
6274 cluster = lu.cfg.GetClusterInfo()
6276 cluster = NotImplemented
6278 if query.CQ_QUEUE_DRAINED in self.requested_data:
6279 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6281 drain_flag = NotImplemented
6283 if query.CQ_WATCHER_PAUSE in self.requested_data:
6284 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6286 watcher_pause = NotImplemented
6288 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6291 class LUInstanceActivateDisks(NoHooksLU):
6292 """Bring up an instance's disks.
6297 def ExpandNames(self):
6298 self._ExpandAndLockInstance()
6299 self.needed_locks[locking.LEVEL_NODE] = []
6300 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6302 def DeclareLocks(self, level):
6303 if level == locking.LEVEL_NODE:
6304 self._LockInstancesNodes()
6306 def CheckPrereq(self):
6307 """Check prerequisites.
6309 This checks that the instance is in the cluster.
6312 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6313 assert self.instance is not None, \
6314 "Cannot retrieve locked instance %s" % self.op.instance_name
6315 _CheckNodeOnline(self, self.instance.primary_node)
6317 def Exec(self, feedback_fn):
6318 """Activate the disks.
6321 disks_ok, disks_info = \
6322 _AssembleInstanceDisks(self, self.instance,
6323 ignore_size=self.op.ignore_size)
6325 raise errors.OpExecError("Cannot activate block devices")
6330 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6332 """Prepare the block devices for an instance.
6334 This sets up the block devices on all nodes.
6336 @type lu: L{LogicalUnit}
6337 @param lu: the logical unit on whose behalf we execute
6338 @type instance: L{objects.Instance}
6339 @param instance: the instance for whose disks we assemble
6340 @type disks: list of L{objects.Disk} or None
6341 @param disks: which disks to assemble (or all, if None)
6342 @type ignore_secondaries: boolean
6343 @param ignore_secondaries: if true, errors on secondary nodes
6344 won't result in an error return from the function
6345 @type ignore_size: boolean
6346 @param ignore_size: if true, the current known size of the disk
6347 will not be used during the disk activation, useful for cases
6348 when the size is wrong
6349 @return: False if the operation failed, otherwise a list of
6350 (host, instance_visible_name, node_visible_name)
6351 with the mapping from node devices to instance devices
6356 iname = instance.name
6357 disks = _ExpandCheckDisks(instance, disks)
6359 # With the two passes mechanism we try to reduce the window of
6360 # opportunity for the race condition of switching DRBD to primary
6361 # before handshaking occured, but we do not eliminate it
6363 # The proper fix would be to wait (with some limits) until the
6364 # connection has been made and drbd transitions from WFConnection
6365 # into any other network-connected state (Connected, SyncTarget,
6368 # 1st pass, assemble on all nodes in secondary mode
6369 for idx, inst_disk in enumerate(disks):
6370 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6372 node_disk = node_disk.Copy()
6373 node_disk.UnsetSize()
6374 lu.cfg.SetDiskID(node_disk, node)
6375 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6377 msg = result.fail_msg
6379 is_offline_secondary = (node in instance.secondary_nodes and
6381 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6382 " (is_primary=False, pass=1): %s",
6383 inst_disk.iv_name, node, msg)
6384 if not (ignore_secondaries or is_offline_secondary):
6387 # FIXME: race condition on drbd migration to primary
6389 # 2nd pass, do only the primary node
6390 for idx, inst_disk in enumerate(disks):
6393 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6394 if node != instance.primary_node:
6397 node_disk = node_disk.Copy()
6398 node_disk.UnsetSize()
6399 lu.cfg.SetDiskID(node_disk, node)
6400 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6402 msg = result.fail_msg
6404 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6405 " (is_primary=True, pass=2): %s",
6406 inst_disk.iv_name, node, msg)
6409 dev_path = result.payload
6411 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6413 # leave the disks configured for the primary node
6414 # this is a workaround that would be fixed better by
6415 # improving the logical/physical id handling
6417 lu.cfg.SetDiskID(disk, instance.primary_node)
6419 return disks_ok, device_info
6422 def _StartInstanceDisks(lu, instance, force):
6423 """Start the disks of an instance.
6426 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6427 ignore_secondaries=force)
6429 _ShutdownInstanceDisks(lu, instance)
6430 if force is not None and not force:
6431 lu.proc.LogWarning("", hint="If the message above refers to a"
6433 " you can retry the operation using '--force'.")
6434 raise errors.OpExecError("Disk consistency error")
6437 class LUInstanceDeactivateDisks(NoHooksLU):
6438 """Shutdown an instance's disks.
6443 def ExpandNames(self):
6444 self._ExpandAndLockInstance()
6445 self.needed_locks[locking.LEVEL_NODE] = []
6446 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6448 def DeclareLocks(self, level):
6449 if level == locking.LEVEL_NODE:
6450 self._LockInstancesNodes()
6452 def CheckPrereq(self):
6453 """Check prerequisites.
6455 This checks that the instance is in the cluster.
6458 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6459 assert self.instance is not None, \
6460 "Cannot retrieve locked instance %s" % self.op.instance_name
6462 def Exec(self, feedback_fn):
6463 """Deactivate the disks
6466 instance = self.instance
6468 _ShutdownInstanceDisks(self, instance)
6470 _SafeShutdownInstanceDisks(self, instance)
6473 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6474 """Shutdown block devices of an instance.
6476 This function checks if an instance is running, before calling
6477 _ShutdownInstanceDisks.
6480 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6481 _ShutdownInstanceDisks(lu, instance, disks=disks)
6484 def _ExpandCheckDisks(instance, disks):
6485 """Return the instance disks selected by the disks list
6487 @type disks: list of L{objects.Disk} or None
6488 @param disks: selected disks
6489 @rtype: list of L{objects.Disk}
6490 @return: selected instance disks to act on
6494 return instance.disks
6496 if not set(disks).issubset(instance.disks):
6497 raise errors.ProgrammerError("Can only act on disks belonging to the"
6502 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6503 """Shutdown block devices of an instance.
6505 This does the shutdown on all nodes of the instance.
6507 If the ignore_primary is false, errors on the primary node are
6512 disks = _ExpandCheckDisks(instance, disks)
6515 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6516 lu.cfg.SetDiskID(top_disk, node)
6517 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6518 msg = result.fail_msg
6520 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6521 disk.iv_name, node, msg)
6522 if ((node == instance.primary_node and not ignore_primary) or
6523 (node != instance.primary_node and not result.offline)):
6528 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6529 """Checks if a node has enough free memory.
6531 This function check if a given node has the needed amount of free
6532 memory. In case the node has less memory or we cannot get the
6533 information from the node, this function raise an OpPrereqError
6536 @type lu: C{LogicalUnit}
6537 @param lu: a logical unit from which we get configuration data
6539 @param node: the node to check
6540 @type reason: C{str}
6541 @param reason: string to use in the error message
6542 @type requested: C{int}
6543 @param requested: the amount of memory in MiB to check for
6544 @type hypervisor_name: C{str}
6545 @param hypervisor_name: the hypervisor to ask for memory stats
6547 @return: node current free memory
6548 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6549 we cannot check the node
6552 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6553 nodeinfo[node].Raise("Can't get data from node %s" % node,
6554 prereq=True, ecode=errors.ECODE_ENVIRON)
6555 (_, _, (hv_info, )) = nodeinfo[node].payload
6557 free_mem = hv_info.get("memory_free", None)
6558 if not isinstance(free_mem, int):
6559 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6560 " was '%s'" % (node, free_mem),
6561 errors.ECODE_ENVIRON)
6562 if requested > free_mem:
6563 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6564 " needed %s MiB, available %s MiB" %
6565 (node, reason, requested, free_mem),
6570 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6571 """Checks if nodes have enough free disk space in the all VGs.
6573 This function check if all given nodes have the needed amount of
6574 free disk. In case any node has less disk or we cannot get the
6575 information from the node, this function raise an OpPrereqError
6578 @type lu: C{LogicalUnit}
6579 @param lu: a logical unit from which we get configuration data
6580 @type nodenames: C{list}
6581 @param nodenames: the list of node names to check
6582 @type req_sizes: C{dict}
6583 @param req_sizes: the hash of vg and corresponding amount of disk in
6585 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6586 or we cannot check the node
6589 for vg, req_size in req_sizes.items():
6590 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6593 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6594 """Checks if nodes have enough free disk space in the specified VG.
6596 This function check if all given nodes have the needed amount of
6597 free disk. In case any node has less disk or we cannot get the
6598 information from the node, this function raise an OpPrereqError
6601 @type lu: C{LogicalUnit}
6602 @param lu: a logical unit from which we get configuration data
6603 @type nodenames: C{list}
6604 @param nodenames: the list of node names to check
6606 @param vg: the volume group to check
6607 @type requested: C{int}
6608 @param requested: the amount of disk in MiB to check for
6609 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6610 or we cannot check the node
6613 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6614 for node in nodenames:
6615 info = nodeinfo[node]
6616 info.Raise("Cannot get current information from node %s" % node,
6617 prereq=True, ecode=errors.ECODE_ENVIRON)
6618 (_, (vg_info, ), _) = info.payload
6619 vg_free = vg_info.get("vg_free", None)
6620 if not isinstance(vg_free, int):
6621 raise errors.OpPrereqError("Can't compute free disk space on node"
6622 " %s for vg %s, result was '%s'" %
6623 (node, vg, vg_free), errors.ECODE_ENVIRON)
6624 if requested > vg_free:
6625 raise errors.OpPrereqError("Not enough disk space on target node %s"
6626 " vg %s: required %d MiB, available %d MiB" %
6627 (node, vg, requested, vg_free),
6631 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6632 """Checks if nodes have enough physical CPUs
6634 This function checks if all given nodes have the needed number of
6635 physical CPUs. In case any node has less CPUs or we cannot get the
6636 information from the node, this function raises an OpPrereqError
6639 @type lu: C{LogicalUnit}
6640 @param lu: a logical unit from which we get configuration data
6641 @type nodenames: C{list}
6642 @param nodenames: the list of node names to check
6643 @type requested: C{int}
6644 @param requested: the minimum acceptable number of physical CPUs
6645 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6646 or we cannot check the node
6649 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6650 for node in nodenames:
6651 info = nodeinfo[node]
6652 info.Raise("Cannot get current information from node %s" % node,
6653 prereq=True, ecode=errors.ECODE_ENVIRON)
6654 (_, _, (hv_info, )) = info.payload
6655 num_cpus = hv_info.get("cpu_total", None)
6656 if not isinstance(num_cpus, int):
6657 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6658 " on node %s, result was '%s'" %
6659 (node, num_cpus), errors.ECODE_ENVIRON)
6660 if requested > num_cpus:
6661 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6662 "required" % (node, num_cpus, requested),
6666 class LUInstanceStartup(LogicalUnit):
6667 """Starts an instance.
6670 HPATH = "instance-start"
6671 HTYPE = constants.HTYPE_INSTANCE
6674 def CheckArguments(self):
6676 if self.op.beparams:
6677 # fill the beparams dict
6678 objects.UpgradeBeParams(self.op.beparams)
6679 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6681 def ExpandNames(self):
6682 self._ExpandAndLockInstance()
6683 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6685 def DeclareLocks(self, level):
6686 if level == locking.LEVEL_NODE_RES:
6687 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6689 def BuildHooksEnv(self):
6692 This runs on master, primary and secondary nodes of the instance.
6696 "FORCE": self.op.force,
6699 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6703 def BuildHooksNodes(self):
6704 """Build hooks nodes.
6707 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6710 def CheckPrereq(self):
6711 """Check prerequisites.
6713 This checks that the instance is in the cluster.
6716 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6717 assert self.instance is not None, \
6718 "Cannot retrieve locked instance %s" % self.op.instance_name
6721 if self.op.hvparams:
6722 # check hypervisor parameter syntax (locally)
6723 cluster = self.cfg.GetClusterInfo()
6724 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6725 filled_hvp = cluster.FillHV(instance)
6726 filled_hvp.update(self.op.hvparams)
6727 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6728 hv_type.CheckParameterSyntax(filled_hvp)
6729 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6731 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6733 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6735 if self.primary_offline and self.op.ignore_offline_nodes:
6736 self.proc.LogWarning("Ignoring offline primary node")
6738 if self.op.hvparams or self.op.beparams:
6739 self.proc.LogWarning("Overridden parameters are ignored")
6741 _CheckNodeOnline(self, instance.primary_node)
6743 bep = self.cfg.GetClusterInfo().FillBE(instance)
6744 bep.update(self.op.beparams)
6746 # check bridges existence
6747 _CheckInstanceBridgesExist(self, instance)
6749 remote_info = self.rpc.call_instance_info(instance.primary_node,
6751 instance.hypervisor)
6752 remote_info.Raise("Error checking node %s" % instance.primary_node,
6753 prereq=True, ecode=errors.ECODE_ENVIRON)
6754 if not remote_info.payload: # not running already
6755 _CheckNodeFreeMemory(self, instance.primary_node,
6756 "starting instance %s" % instance.name,
6757 bep[constants.BE_MINMEM], instance.hypervisor)
6759 def Exec(self, feedback_fn):
6760 """Start the instance.
6763 instance = self.instance
6764 force = self.op.force
6766 if not self.op.no_remember:
6767 self.cfg.MarkInstanceUp(instance.name)
6769 if self.primary_offline:
6770 assert self.op.ignore_offline_nodes
6771 self.proc.LogInfo("Primary node offline, marked instance as started")
6773 node_current = instance.primary_node
6775 _StartInstanceDisks(self, instance, force)
6778 self.rpc.call_instance_start(node_current,
6779 (instance, self.op.hvparams,
6781 self.op.startup_paused)
6782 msg = result.fail_msg
6784 _ShutdownInstanceDisks(self, instance)
6785 raise errors.OpExecError("Could not start instance: %s" % msg)
6788 class LUInstanceReboot(LogicalUnit):
6789 """Reboot an instance.
6792 HPATH = "instance-reboot"
6793 HTYPE = constants.HTYPE_INSTANCE
6796 def ExpandNames(self):
6797 self._ExpandAndLockInstance()
6799 def BuildHooksEnv(self):
6802 This runs on master, primary and secondary nodes of the instance.
6806 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6807 "REBOOT_TYPE": self.op.reboot_type,
6808 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6811 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6815 def BuildHooksNodes(self):
6816 """Build hooks nodes.
6819 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6822 def CheckPrereq(self):
6823 """Check prerequisites.
6825 This checks that the instance is in the cluster.
6828 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6829 assert self.instance is not None, \
6830 "Cannot retrieve locked instance %s" % self.op.instance_name
6831 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6832 _CheckNodeOnline(self, instance.primary_node)
6834 # check bridges existence
6835 _CheckInstanceBridgesExist(self, instance)
6837 def Exec(self, feedback_fn):
6838 """Reboot the instance.
6841 instance = self.instance
6842 ignore_secondaries = self.op.ignore_secondaries
6843 reboot_type = self.op.reboot_type
6845 remote_info = self.rpc.call_instance_info(instance.primary_node,
6847 instance.hypervisor)
6848 remote_info.Raise("Error checking node %s" % instance.primary_node)
6849 instance_running = bool(remote_info.payload)
6851 node_current = instance.primary_node
6853 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6854 constants.INSTANCE_REBOOT_HARD]:
6855 for disk in instance.disks:
6856 self.cfg.SetDiskID(disk, node_current)
6857 result = self.rpc.call_instance_reboot(node_current, instance,
6859 self.op.shutdown_timeout)
6860 result.Raise("Could not reboot instance")
6862 if instance_running:
6863 result = self.rpc.call_instance_shutdown(node_current, instance,
6864 self.op.shutdown_timeout)
6865 result.Raise("Could not shutdown instance for full reboot")
6866 _ShutdownInstanceDisks(self, instance)
6868 self.LogInfo("Instance %s was already stopped, starting now",
6870 _StartInstanceDisks(self, instance, ignore_secondaries)
6871 result = self.rpc.call_instance_start(node_current,
6872 (instance, None, None), False)
6873 msg = result.fail_msg
6875 _ShutdownInstanceDisks(self, instance)
6876 raise errors.OpExecError("Could not start instance for"
6877 " full reboot: %s" % msg)
6879 self.cfg.MarkInstanceUp(instance.name)
6882 class LUInstanceShutdown(LogicalUnit):
6883 """Shutdown an instance.
6886 HPATH = "instance-stop"
6887 HTYPE = constants.HTYPE_INSTANCE
6890 def ExpandNames(self):
6891 self._ExpandAndLockInstance()
6893 def BuildHooksEnv(self):
6896 This runs on master, primary and secondary nodes of the instance.
6899 env = _BuildInstanceHookEnvByObject(self, self.instance)
6900 env["TIMEOUT"] = self.op.timeout
6903 def BuildHooksNodes(self):
6904 """Build hooks nodes.
6907 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6910 def CheckPrereq(self):
6911 """Check prerequisites.
6913 This checks that the instance is in the cluster.
6916 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6917 assert self.instance is not None, \
6918 "Cannot retrieve locked instance %s" % self.op.instance_name
6920 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6922 self.primary_offline = \
6923 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6925 if self.primary_offline and self.op.ignore_offline_nodes:
6926 self.proc.LogWarning("Ignoring offline primary node")
6928 _CheckNodeOnline(self, self.instance.primary_node)
6930 def Exec(self, feedback_fn):
6931 """Shutdown the instance.
6934 instance = self.instance
6935 node_current = instance.primary_node
6936 timeout = self.op.timeout
6938 if not self.op.no_remember:
6939 self.cfg.MarkInstanceDown(instance.name)
6941 if self.primary_offline:
6942 assert self.op.ignore_offline_nodes
6943 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6945 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6946 msg = result.fail_msg
6948 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6950 _ShutdownInstanceDisks(self, instance)
6953 class LUInstanceReinstall(LogicalUnit):
6954 """Reinstall an instance.
6957 HPATH = "instance-reinstall"
6958 HTYPE = constants.HTYPE_INSTANCE
6961 def ExpandNames(self):
6962 self._ExpandAndLockInstance()
6964 def BuildHooksEnv(self):
6967 This runs on master, primary and secondary nodes of the instance.
6970 return _BuildInstanceHookEnvByObject(self, self.instance)
6972 def BuildHooksNodes(self):
6973 """Build hooks nodes.
6976 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6979 def CheckPrereq(self):
6980 """Check prerequisites.
6982 This checks that the instance is in the cluster and is not running.
6985 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6986 assert instance is not None, \
6987 "Cannot retrieve locked instance %s" % self.op.instance_name
6988 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6989 " offline, cannot reinstall")
6991 if instance.disk_template == constants.DT_DISKLESS:
6992 raise errors.OpPrereqError("Instance '%s' has no disks" %
6993 self.op.instance_name,
6995 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6997 if self.op.os_type is not None:
6999 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7000 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7001 instance_os = self.op.os_type
7003 instance_os = instance.os
7005 nodelist = list(instance.all_nodes)
7007 if self.op.osparams:
7008 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7009 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7010 self.os_inst = i_osdict # the new dict (without defaults)
7014 self.instance = instance
7016 def Exec(self, feedback_fn):
7017 """Reinstall the instance.
7020 inst = self.instance
7022 if self.op.os_type is not None:
7023 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7024 inst.os = self.op.os_type
7025 # Write to configuration
7026 self.cfg.Update(inst, feedback_fn)
7028 _StartInstanceDisks(self, inst, None)
7030 feedback_fn("Running the instance OS create scripts...")
7031 # FIXME: pass debug option from opcode to backend
7032 result = self.rpc.call_instance_os_add(inst.primary_node,
7033 (inst, self.os_inst), True,
7034 self.op.debug_level)
7035 result.Raise("Could not install OS for instance %s on node %s" %
7036 (inst.name, inst.primary_node))
7038 _ShutdownInstanceDisks(self, inst)
7041 class LUInstanceRecreateDisks(LogicalUnit):
7042 """Recreate an instance's missing disks.
7045 HPATH = "instance-recreate-disks"
7046 HTYPE = constants.HTYPE_INSTANCE
7049 _MODIFYABLE = frozenset([
7050 constants.IDISK_SIZE,
7051 constants.IDISK_MODE,
7054 # New or changed disk parameters may have different semantics
7055 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7056 constants.IDISK_ADOPT,
7058 # TODO: Implement support changing VG while recreating
7060 constants.IDISK_METAVG,
7061 constants.IDISK_PROVIDER,
7064 def CheckArguments(self):
7065 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7066 # Normalize and convert deprecated list of disk indices
7067 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7069 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7071 raise errors.OpPrereqError("Some disks have been specified more than"
7072 " once: %s" % utils.CommaJoin(duplicates),
7075 for (idx, params) in self.op.disks:
7076 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7077 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7079 raise errors.OpPrereqError("Parameters for disk %s try to change"
7080 " unmodifyable parameter(s): %s" %
7081 (idx, utils.CommaJoin(unsupported)),
7084 def ExpandNames(self):
7085 self._ExpandAndLockInstance()
7086 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7088 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7089 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7091 self.needed_locks[locking.LEVEL_NODE] = []
7092 self.needed_locks[locking.LEVEL_NODE_RES] = []
7094 def DeclareLocks(self, level):
7095 if level == locking.LEVEL_NODE:
7096 # if we replace the nodes, we only need to lock the old primary,
7097 # otherwise we need to lock all nodes for disk re-creation
7098 primary_only = bool(self.op.nodes)
7099 self._LockInstancesNodes(primary_only=primary_only)
7100 elif level == locking.LEVEL_NODE_RES:
7102 self.needed_locks[locking.LEVEL_NODE_RES] = \
7103 self.needed_locks[locking.LEVEL_NODE][:]
7105 def BuildHooksEnv(self):
7108 This runs on master, primary and secondary nodes of the instance.
7111 return _BuildInstanceHookEnvByObject(self, self.instance)
7113 def BuildHooksNodes(self):
7114 """Build hooks nodes.
7117 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7120 def CheckPrereq(self):
7121 """Check prerequisites.
7123 This checks that the instance is in the cluster and is not running.
7126 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7127 assert instance is not None, \
7128 "Cannot retrieve locked instance %s" % self.op.instance_name
7130 if len(self.op.nodes) != len(instance.all_nodes):
7131 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7132 " %d replacement nodes were specified" %
7133 (instance.name, len(instance.all_nodes),
7134 len(self.op.nodes)),
7136 assert instance.disk_template != constants.DT_DRBD8 or \
7137 len(self.op.nodes) == 2
7138 assert instance.disk_template != constants.DT_PLAIN or \
7139 len(self.op.nodes) == 1
7140 primary_node = self.op.nodes[0]
7142 primary_node = instance.primary_node
7143 _CheckNodeOnline(self, primary_node)
7145 if instance.disk_template == constants.DT_DISKLESS:
7146 raise errors.OpPrereqError("Instance '%s' has no disks" %
7147 self.op.instance_name, errors.ECODE_INVAL)
7149 # if we replace nodes *and* the old primary is offline, we don't
7151 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7152 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7153 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7154 if not (self.op.nodes and old_pnode.offline):
7155 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7156 msg="cannot recreate disks")
7159 self.disks = dict(self.op.disks)
7161 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7163 maxidx = max(self.disks.keys())
7164 if maxidx >= len(instance.disks):
7165 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7168 if (self.op.nodes and
7169 sorted(self.disks.keys()) != range(len(instance.disks))):
7170 raise errors.OpPrereqError("Can't recreate disks partially and"
7171 " change the nodes at the same time",
7174 self.instance = instance
7176 def Exec(self, feedback_fn):
7177 """Recreate the disks.
7180 instance = self.instance
7182 assert (self.owned_locks(locking.LEVEL_NODE) ==
7183 self.owned_locks(locking.LEVEL_NODE_RES))
7186 mods = [] # keeps track of needed changes
7188 for idx, disk in enumerate(instance.disks):
7190 changes = self.disks[idx]
7192 # Disk should not be recreated
7196 # update secondaries for disks, if needed
7197 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7198 # need to update the nodes and minors
7199 assert len(self.op.nodes) == 2
7200 assert len(disk.logical_id) == 6 # otherwise disk internals
7202 (_, _, old_port, _, _, old_secret) = disk.logical_id
7203 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7204 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7205 new_minors[0], new_minors[1], old_secret)
7206 assert len(disk.logical_id) == len(new_id)
7210 mods.append((idx, new_id, changes))
7212 # now that we have passed all asserts above, we can apply the mods
7213 # in a single run (to avoid partial changes)
7214 for idx, new_id, changes in mods:
7215 disk = instance.disks[idx]
7216 if new_id is not None:
7217 assert disk.dev_type == constants.LD_DRBD8
7218 disk.logical_id = new_id
7220 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7221 mode=changes.get(constants.IDISK_MODE, None))
7223 # change primary node, if needed
7225 instance.primary_node = self.op.nodes[0]
7226 self.LogWarning("Changing the instance's nodes, you will have to"
7227 " remove any disks left on the older nodes manually")
7230 self.cfg.Update(instance, feedback_fn)
7232 _CreateDisks(self, instance, to_skip=to_skip)
7235 class LUInstanceRename(LogicalUnit):
7236 """Rename an instance.
7239 HPATH = "instance-rename"
7240 HTYPE = constants.HTYPE_INSTANCE
7242 def CheckArguments(self):
7246 if self.op.ip_check and not self.op.name_check:
7247 # TODO: make the ip check more flexible and not depend on the name check
7248 raise errors.OpPrereqError("IP address check requires a name check",
7251 def BuildHooksEnv(self):
7254 This runs on master, primary and secondary nodes of the instance.
7257 env = _BuildInstanceHookEnvByObject(self, self.instance)
7258 env["INSTANCE_NEW_NAME"] = self.op.new_name
7261 def BuildHooksNodes(self):
7262 """Build hooks nodes.
7265 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7268 def CheckPrereq(self):
7269 """Check prerequisites.
7271 This checks that the instance is in the cluster and is not running.
7274 self.op.instance_name = _ExpandInstanceName(self.cfg,
7275 self.op.instance_name)
7276 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7277 assert instance is not None
7278 _CheckNodeOnline(self, instance.primary_node)
7279 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7280 msg="cannot rename")
7281 self.instance = instance
7283 new_name = self.op.new_name
7284 if self.op.name_check:
7285 hostname = netutils.GetHostname(name=new_name)
7286 if hostname.name != new_name:
7287 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7289 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7290 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7291 " same as given hostname '%s'") %
7292 (hostname.name, self.op.new_name),
7294 new_name = self.op.new_name = hostname.name
7295 if (self.op.ip_check and
7296 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7297 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7298 (hostname.ip, new_name),
7299 errors.ECODE_NOTUNIQUE)
7301 instance_list = self.cfg.GetInstanceList()
7302 if new_name in instance_list and new_name != instance.name:
7303 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7304 new_name, errors.ECODE_EXISTS)
7306 def Exec(self, feedback_fn):
7307 """Rename the instance.
7310 inst = self.instance
7311 old_name = inst.name
7313 rename_file_storage = False
7314 if (inst.disk_template in constants.DTS_FILEBASED and
7315 self.op.new_name != inst.name):
7316 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7317 rename_file_storage = True
7319 self.cfg.RenameInstance(inst.name, self.op.new_name)
7320 # Change the instance lock. This is definitely safe while we hold the BGL.
7321 # Otherwise the new lock would have to be added in acquired mode.
7323 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7324 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7326 # re-read the instance from the configuration after rename
7327 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7329 if rename_file_storage:
7330 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7331 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7332 old_file_storage_dir,
7333 new_file_storage_dir)
7334 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7335 " (but the instance has been renamed in Ganeti)" %
7336 (inst.primary_node, old_file_storage_dir,
7337 new_file_storage_dir))
7339 _StartInstanceDisks(self, inst, None)
7341 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7342 old_name, self.op.debug_level)
7343 msg = result.fail_msg
7345 msg = ("Could not run OS rename script for instance %s on node %s"
7346 " (but the instance has been renamed in Ganeti): %s" %
7347 (inst.name, inst.primary_node, msg))
7348 self.proc.LogWarning(msg)
7350 _ShutdownInstanceDisks(self, inst)
7355 class LUInstanceRemove(LogicalUnit):
7356 """Remove an instance.
7359 HPATH = "instance-remove"
7360 HTYPE = constants.HTYPE_INSTANCE
7363 def ExpandNames(self):
7364 self._ExpandAndLockInstance()
7365 self.needed_locks[locking.LEVEL_NODE] = []
7366 self.needed_locks[locking.LEVEL_NODE_RES] = []
7367 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7369 def DeclareLocks(self, level):
7370 if level == locking.LEVEL_NODE:
7371 self._LockInstancesNodes()
7372 elif level == locking.LEVEL_NODE_RES:
7374 self.needed_locks[locking.LEVEL_NODE_RES] = \
7375 self.needed_locks[locking.LEVEL_NODE][:]
7377 def BuildHooksEnv(self):
7380 This runs on master, primary and secondary nodes of the instance.
7383 env = _BuildInstanceHookEnvByObject(self, self.instance)
7384 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7387 def BuildHooksNodes(self):
7388 """Build hooks nodes.
7391 nl = [self.cfg.GetMasterNode()]
7392 nl_post = list(self.instance.all_nodes) + nl
7393 return (nl, nl_post)
7395 def CheckPrereq(self):
7396 """Check prerequisites.
7398 This checks that the instance is in the cluster.
7401 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7402 assert self.instance is not None, \
7403 "Cannot retrieve locked instance %s" % self.op.instance_name
7405 def Exec(self, feedback_fn):
7406 """Remove the instance.
7409 instance = self.instance
7410 logging.info("Shutting down instance %s on node %s",
7411 instance.name, instance.primary_node)
7413 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7414 self.op.shutdown_timeout)
7415 msg = result.fail_msg
7417 if self.op.ignore_failures:
7418 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7420 raise errors.OpExecError("Could not shutdown instance %s on"
7422 (instance.name, instance.primary_node, msg))
7424 assert (self.owned_locks(locking.LEVEL_NODE) ==
7425 self.owned_locks(locking.LEVEL_NODE_RES))
7426 assert not (set(instance.all_nodes) -
7427 self.owned_locks(locking.LEVEL_NODE)), \
7428 "Not owning correct locks"
7430 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7433 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7434 """Utility function to remove an instance.
7437 logging.info("Removing block devices for instance %s", instance.name)
7439 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7440 if not ignore_failures:
7441 raise errors.OpExecError("Can't remove instance's disks")
7442 feedback_fn("Warning: can't remove instance's disks")
7444 logging.info("Removing instance %s out of cluster config", instance.name)
7446 lu.cfg.RemoveInstance(instance.name)
7448 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7449 "Instance lock removal conflict"
7451 # Remove lock for the instance
7452 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7455 class LUInstanceQuery(NoHooksLU):
7456 """Logical unit for querying instances.
7459 # pylint: disable=W0142
7462 def CheckArguments(self):
7463 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7464 self.op.output_fields, self.op.use_locking)
7466 def ExpandNames(self):
7467 self.iq.ExpandNames(self)
7469 def DeclareLocks(self, level):
7470 self.iq.DeclareLocks(self, level)
7472 def Exec(self, feedback_fn):
7473 return self.iq.OldStyleQuery(self)
7476 class LUInstanceFailover(LogicalUnit):
7477 """Failover an instance.
7480 HPATH = "instance-failover"
7481 HTYPE = constants.HTYPE_INSTANCE
7484 def CheckArguments(self):
7485 """Check the arguments.
7488 self.iallocator = getattr(self.op, "iallocator", None)
7489 self.target_node = getattr(self.op, "target_node", None)
7491 def ExpandNames(self):
7492 self._ExpandAndLockInstance()
7494 if self.op.target_node is not None:
7495 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7497 self.needed_locks[locking.LEVEL_NODE] = []
7498 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7500 self.needed_locks[locking.LEVEL_NODE_RES] = []
7501 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7503 ignore_consistency = self.op.ignore_consistency
7504 shutdown_timeout = self.op.shutdown_timeout
7505 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7508 ignore_consistency=ignore_consistency,
7509 shutdown_timeout=shutdown_timeout,
7510 ignore_ipolicy=self.op.ignore_ipolicy)
7511 self.tasklets = [self._migrater]
7513 def DeclareLocks(self, level):
7514 if level == locking.LEVEL_NODE:
7515 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7516 if instance.disk_template in constants.DTS_EXT_MIRROR:
7517 if self.op.target_node is None:
7518 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7520 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7521 self.op.target_node]
7522 del self.recalculate_locks[locking.LEVEL_NODE]
7524 self._LockInstancesNodes()
7525 elif level == locking.LEVEL_NODE_RES:
7527 self.needed_locks[locking.LEVEL_NODE_RES] = \
7528 self.needed_locks[locking.LEVEL_NODE][:]
7530 def BuildHooksEnv(self):
7533 This runs on master, primary and secondary nodes of the instance.
7536 instance = self._migrater.instance
7537 source_node = instance.primary_node
7538 target_node = self.op.target_node
7540 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7541 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7542 "OLD_PRIMARY": source_node,
7543 "NEW_PRIMARY": target_node,
7546 if instance.disk_template in constants.DTS_INT_MIRROR:
7547 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7548 env["NEW_SECONDARY"] = source_node
7550 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7552 env.update(_BuildInstanceHookEnvByObject(self, instance))
7556 def BuildHooksNodes(self):
7557 """Build hooks nodes.
7560 instance = self._migrater.instance
7561 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7562 return (nl, nl + [instance.primary_node])
7565 class LUInstanceMigrate(LogicalUnit):
7566 """Migrate an instance.
7568 This is migration without shutting down, compared to the failover,
7569 which is done with shutdown.
7572 HPATH = "instance-migrate"
7573 HTYPE = constants.HTYPE_INSTANCE
7576 def ExpandNames(self):
7577 self._ExpandAndLockInstance()
7579 if self.op.target_node is not None:
7580 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7582 self.needed_locks[locking.LEVEL_NODE] = []
7583 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7585 self.needed_locks[locking.LEVEL_NODE] = []
7586 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7589 TLMigrateInstance(self, self.op.instance_name,
7590 cleanup=self.op.cleanup,
7592 fallback=self.op.allow_failover,
7593 allow_runtime_changes=self.op.allow_runtime_changes,
7594 ignore_ipolicy=self.op.ignore_ipolicy)
7595 self.tasklets = [self._migrater]
7597 def DeclareLocks(self, level):
7598 if level == locking.LEVEL_NODE:
7599 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7600 if instance.disk_template in constants.DTS_EXT_MIRROR:
7601 if self.op.target_node is None:
7602 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7604 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7605 self.op.target_node]
7606 del self.recalculate_locks[locking.LEVEL_NODE]
7608 self._LockInstancesNodes()
7609 elif level == locking.LEVEL_NODE_RES:
7611 self.needed_locks[locking.LEVEL_NODE_RES] = \
7612 self.needed_locks[locking.LEVEL_NODE][:]
7614 def BuildHooksEnv(self):
7617 This runs on master, primary and secondary nodes of the instance.
7620 instance = self._migrater.instance
7621 source_node = instance.primary_node
7622 target_node = self.op.target_node
7623 env = _BuildInstanceHookEnvByObject(self, instance)
7625 "MIGRATE_LIVE": self._migrater.live,
7626 "MIGRATE_CLEANUP": self.op.cleanup,
7627 "OLD_PRIMARY": source_node,
7628 "NEW_PRIMARY": target_node,
7629 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7632 if instance.disk_template in constants.DTS_INT_MIRROR:
7633 env["OLD_SECONDARY"] = target_node
7634 env["NEW_SECONDARY"] = source_node
7636 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7640 def BuildHooksNodes(self):
7641 """Build hooks nodes.
7644 instance = self._migrater.instance
7645 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7646 return (nl, nl + [instance.primary_node])
7649 class LUInstanceMove(LogicalUnit):
7650 """Move an instance by data-copying.
7653 HPATH = "instance-move"
7654 HTYPE = constants.HTYPE_INSTANCE
7657 def ExpandNames(self):
7658 self._ExpandAndLockInstance()
7659 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7660 self.op.target_node = target_node
7661 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7662 self.needed_locks[locking.LEVEL_NODE_RES] = []
7663 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7665 def DeclareLocks(self, level):
7666 if level == locking.LEVEL_NODE:
7667 self._LockInstancesNodes(primary_only=True)
7668 elif level == locking.LEVEL_NODE_RES:
7670 self.needed_locks[locking.LEVEL_NODE_RES] = \
7671 self.needed_locks[locking.LEVEL_NODE][:]
7673 def BuildHooksEnv(self):
7676 This runs on master, primary and secondary nodes of the instance.
7680 "TARGET_NODE": self.op.target_node,
7681 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7683 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7686 def BuildHooksNodes(self):
7687 """Build hooks nodes.
7691 self.cfg.GetMasterNode(),
7692 self.instance.primary_node,
7693 self.op.target_node,
7697 def CheckPrereq(self):
7698 """Check prerequisites.
7700 This checks that the instance is in the cluster.
7703 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7704 assert self.instance is not None, \
7705 "Cannot retrieve locked instance %s" % self.op.instance_name
7707 node = self.cfg.GetNodeInfo(self.op.target_node)
7708 assert node is not None, \
7709 "Cannot retrieve locked node %s" % self.op.target_node
7711 self.target_node = target_node = node.name
7713 if target_node == instance.primary_node:
7714 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7715 (instance.name, target_node),
7718 bep = self.cfg.GetClusterInfo().FillBE(instance)
7720 for idx, dsk in enumerate(instance.disks):
7721 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7722 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7723 " cannot copy" % idx, errors.ECODE_STATE)
7725 _CheckNodeOnline(self, target_node)
7726 _CheckNodeNotDrained(self, target_node)
7727 _CheckNodeVmCapable(self, target_node)
7728 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7729 self.cfg.GetNodeGroup(node.group))
7730 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7731 ignore=self.op.ignore_ipolicy)
7733 if instance.admin_state == constants.ADMINST_UP:
7734 # check memory requirements on the secondary node
7735 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7736 instance.name, bep[constants.BE_MAXMEM],
7737 instance.hypervisor)
7739 self.LogInfo("Not checking memory on the secondary node as"
7740 " instance will not be started")
7742 # check bridge existance
7743 _CheckInstanceBridgesExist(self, instance, node=target_node)
7745 def Exec(self, feedback_fn):
7746 """Move an instance.
7748 The move is done by shutting it down on its present node, copying
7749 the data over (slow) and starting it on the new node.
7752 instance = self.instance
7754 source_node = instance.primary_node
7755 target_node = self.target_node
7757 self.LogInfo("Shutting down instance %s on source node %s",
7758 instance.name, source_node)
7760 assert (self.owned_locks(locking.LEVEL_NODE) ==
7761 self.owned_locks(locking.LEVEL_NODE_RES))
7763 result = self.rpc.call_instance_shutdown(source_node, instance,
7764 self.op.shutdown_timeout)
7765 msg = result.fail_msg
7767 if self.op.ignore_consistency:
7768 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7769 " Proceeding anyway. Please make sure node"
7770 " %s is down. Error details: %s",
7771 instance.name, source_node, source_node, msg)
7773 raise errors.OpExecError("Could not shutdown instance %s on"
7775 (instance.name, source_node, msg))
7777 # create the target disks
7779 _CreateDisks(self, instance, target_node=target_node)
7780 except errors.OpExecError:
7781 self.LogWarning("Device creation failed, reverting...")
7783 _RemoveDisks(self, instance, target_node=target_node)
7785 self.cfg.ReleaseDRBDMinors(instance.name)
7788 cluster_name = self.cfg.GetClusterInfo().cluster_name
7791 # activate, get path, copy the data over
7792 for idx, disk in enumerate(instance.disks):
7793 self.LogInfo("Copying data for disk %d", idx)
7794 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7795 instance.name, True, idx)
7797 self.LogWarning("Can't assemble newly created disk %d: %s",
7798 idx, result.fail_msg)
7799 errs.append(result.fail_msg)
7801 dev_path = result.payload
7802 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7803 target_node, dev_path,
7806 self.LogWarning("Can't copy data over for disk %d: %s",
7807 idx, result.fail_msg)
7808 errs.append(result.fail_msg)
7812 self.LogWarning("Some disks failed to copy, aborting")
7814 _RemoveDisks(self, instance, target_node=target_node)
7816 self.cfg.ReleaseDRBDMinors(instance.name)
7817 raise errors.OpExecError("Errors during disk copy: %s" %
7820 instance.primary_node = target_node
7821 self.cfg.Update(instance, feedback_fn)
7823 self.LogInfo("Removing the disks on the original node")
7824 _RemoveDisks(self, instance, target_node=source_node)
7826 # Only start the instance if it's marked as up
7827 if instance.admin_state == constants.ADMINST_UP:
7828 self.LogInfo("Starting instance %s on node %s",
7829 instance.name, target_node)
7831 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7832 ignore_secondaries=True)
7834 _ShutdownInstanceDisks(self, instance)
7835 raise errors.OpExecError("Can't activate the instance's disks")
7837 result = self.rpc.call_instance_start(target_node,
7838 (instance, None, None), False)
7839 msg = result.fail_msg
7841 _ShutdownInstanceDisks(self, instance)
7842 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7843 (instance.name, target_node, msg))
7846 class LUNodeMigrate(LogicalUnit):
7847 """Migrate all instances from a node.
7850 HPATH = "node-migrate"
7851 HTYPE = constants.HTYPE_NODE
7854 def CheckArguments(self):
7857 def ExpandNames(self):
7858 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7860 self.share_locks = _ShareAll()
7861 self.needed_locks = {
7862 locking.LEVEL_NODE: [self.op.node_name],
7865 def BuildHooksEnv(self):
7868 This runs on the master, the primary and all the secondaries.
7872 "NODE_NAME": self.op.node_name,
7873 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7876 def BuildHooksNodes(self):
7877 """Build hooks nodes.
7880 nl = [self.cfg.GetMasterNode()]
7883 def CheckPrereq(self):
7886 def Exec(self, feedback_fn):
7887 # Prepare jobs for migration instances
7888 allow_runtime_changes = self.op.allow_runtime_changes
7890 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7893 iallocator=self.op.iallocator,
7894 target_node=self.op.target_node,
7895 allow_runtime_changes=allow_runtime_changes,
7896 ignore_ipolicy=self.op.ignore_ipolicy)]
7897 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7900 # TODO: Run iallocator in this opcode and pass correct placement options to
7901 # OpInstanceMigrate. Since other jobs can modify the cluster between
7902 # running the iallocator and the actual migration, a good consistency model
7903 # will have to be found.
7905 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7906 frozenset([self.op.node_name]))
7908 return ResultWithJobs(jobs)
7911 class TLMigrateInstance(Tasklet):
7912 """Tasklet class for instance migration.
7915 @ivar live: whether the migration will be done live or non-live;
7916 this variable is initalized only after CheckPrereq has run
7917 @type cleanup: boolean
7918 @ivar cleanup: Wheater we cleanup from a failed migration
7919 @type iallocator: string
7920 @ivar iallocator: The iallocator used to determine target_node
7921 @type target_node: string
7922 @ivar target_node: If given, the target_node to reallocate the instance to
7923 @type failover: boolean
7924 @ivar failover: Whether operation results in failover or migration
7925 @type fallback: boolean
7926 @ivar fallback: Whether fallback to failover is allowed if migration not
7928 @type ignore_consistency: boolean
7929 @ivar ignore_consistency: Wheter we should ignore consistency between source
7931 @type shutdown_timeout: int
7932 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7933 @type ignore_ipolicy: bool
7934 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7939 _MIGRATION_POLL_INTERVAL = 1 # seconds
7940 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7942 def __init__(self, lu, instance_name, cleanup=False,
7943 failover=False, fallback=False,
7944 ignore_consistency=False,
7945 allow_runtime_changes=True,
7946 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7947 ignore_ipolicy=False):
7948 """Initializes this class.
7951 Tasklet.__init__(self, lu)
7954 self.instance_name = instance_name
7955 self.cleanup = cleanup
7956 self.live = False # will be overridden later
7957 self.failover = failover
7958 self.fallback = fallback
7959 self.ignore_consistency = ignore_consistency
7960 self.shutdown_timeout = shutdown_timeout
7961 self.ignore_ipolicy = ignore_ipolicy
7962 self.allow_runtime_changes = allow_runtime_changes
7964 def CheckPrereq(self):
7965 """Check prerequisites.
7967 This checks that the instance is in the cluster.
7970 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7971 instance = self.cfg.GetInstanceInfo(instance_name)
7972 assert instance is not None
7973 self.instance = instance
7974 cluster = self.cfg.GetClusterInfo()
7976 if (not self.cleanup and
7977 not instance.admin_state == constants.ADMINST_UP and
7978 not self.failover and self.fallback):
7979 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7980 " switching to failover")
7981 self.failover = True
7983 if instance.disk_template not in constants.DTS_MIRRORED:
7988 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7989 " %s" % (instance.disk_template, text),
7992 if instance.disk_template in constants.DTS_EXT_MIRROR:
7993 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7995 if self.lu.op.iallocator:
7996 self._RunAllocator()
7998 # We set set self.target_node as it is required by
8000 self.target_node = self.lu.op.target_node
8002 # Check that the target node is correct in terms of instance policy
8003 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8004 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8005 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8006 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8007 ignore=self.ignore_ipolicy)
8009 # self.target_node is already populated, either directly or by the
8011 target_node = self.target_node
8012 if self.target_node == instance.primary_node:
8013 raise errors.OpPrereqError("Cannot migrate instance %s"
8014 " to its primary (%s)" %
8015 (instance.name, instance.primary_node))
8017 if len(self.lu.tasklets) == 1:
8018 # It is safe to release locks only when we're the only tasklet
8020 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8021 keep=[instance.primary_node, self.target_node])
8024 secondary_nodes = instance.secondary_nodes
8025 if not secondary_nodes:
8026 raise errors.ConfigurationError("No secondary node but using"
8027 " %s disk template" %
8028 instance.disk_template)
8029 target_node = secondary_nodes[0]
8030 if self.lu.op.iallocator or (self.lu.op.target_node and
8031 self.lu.op.target_node != target_node):
8033 text = "failed over"
8036 raise errors.OpPrereqError("Instances with disk template %s cannot"
8037 " be %s to arbitrary nodes"
8038 " (neither an iallocator nor a target"
8039 " node can be passed)" %
8040 (instance.disk_template, text),
8042 nodeinfo = self.cfg.GetNodeInfo(target_node)
8043 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8044 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8045 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8046 ignore=self.ignore_ipolicy)
8048 i_be = cluster.FillBE(instance)
8050 # check memory requirements on the secondary node
8051 if (not self.cleanup and
8052 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8053 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8054 "migrating instance %s" %
8056 i_be[constants.BE_MINMEM],
8057 instance.hypervisor)
8059 self.lu.LogInfo("Not checking memory on the secondary node as"
8060 " instance will not be started")
8062 # check if failover must be forced instead of migration
8063 if (not self.cleanup and not self.failover and
8064 i_be[constants.BE_ALWAYS_FAILOVER]):
8066 self.lu.LogInfo("Instance configured to always failover; fallback"
8068 self.failover = True
8070 raise errors.OpPrereqError("This instance has been configured to"
8071 " always failover, please allow failover",
8074 # check bridge existance
8075 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8077 if not self.cleanup:
8078 _CheckNodeNotDrained(self.lu, target_node)
8079 if not self.failover:
8080 result = self.rpc.call_instance_migratable(instance.primary_node,
8082 if result.fail_msg and self.fallback:
8083 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8085 self.failover = True
8087 result.Raise("Can't migrate, please use failover",
8088 prereq=True, ecode=errors.ECODE_STATE)
8090 assert not (self.failover and self.cleanup)
8092 if not self.failover:
8093 if self.lu.op.live is not None and self.lu.op.mode is not None:
8094 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8095 " parameters are accepted",
8097 if self.lu.op.live is not None:
8099 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8101 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8102 # reset the 'live' parameter to None so that repeated
8103 # invocations of CheckPrereq do not raise an exception
8104 self.lu.op.live = None
8105 elif self.lu.op.mode is None:
8106 # read the default value from the hypervisor
8107 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8108 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8110 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8112 # Failover is never live
8115 if not (self.failover or self.cleanup):
8116 remote_info = self.rpc.call_instance_info(instance.primary_node,
8118 instance.hypervisor)
8119 remote_info.Raise("Error checking instance on node %s" %
8120 instance.primary_node)
8121 instance_running = bool(remote_info.payload)
8122 if instance_running:
8123 self.current_mem = int(remote_info.payload["memory"])
8125 def _RunAllocator(self):
8126 """Run the allocator based on input opcode.
8129 # FIXME: add a self.ignore_ipolicy option
8130 ial = IAllocator(self.cfg, self.rpc,
8131 mode=constants.IALLOCATOR_MODE_RELOC,
8132 name=self.instance_name,
8133 relocate_from=[self.instance.primary_node],
8136 ial.Run(self.lu.op.iallocator)
8139 raise errors.OpPrereqError("Can't compute nodes using"
8140 " iallocator '%s': %s" %
8141 (self.lu.op.iallocator, ial.info),
8143 if len(ial.result) != ial.required_nodes:
8144 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8145 " of nodes (%s), required %s" %
8146 (self.lu.op.iallocator, len(ial.result),
8147 ial.required_nodes), errors.ECODE_FAULT)
8148 self.target_node = ial.result[0]
8149 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8150 self.instance_name, self.lu.op.iallocator,
8151 utils.CommaJoin(ial.result))
8153 def _WaitUntilSync(self):
8154 """Poll with custom rpc for disk sync.
8156 This uses our own step-based rpc call.
8159 self.feedback_fn("* wait until resync is done")
8163 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8165 (self.instance.disks,
8168 for node, nres in result.items():
8169 nres.Raise("Cannot resync disks on node %s" % node)
8170 node_done, node_percent = nres.payload
8171 all_done = all_done and node_done
8172 if node_percent is not None:
8173 min_percent = min(min_percent, node_percent)
8175 if min_percent < 100:
8176 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8179 def _EnsureSecondary(self, node):
8180 """Demote a node to secondary.
8183 self.feedback_fn("* switching node %s to secondary mode" % node)
8185 for dev in self.instance.disks:
8186 self.cfg.SetDiskID(dev, node)
8188 result = self.rpc.call_blockdev_close(node, self.instance.name,
8189 self.instance.disks)
8190 result.Raise("Cannot change disk to secondary on node %s" % node)
8192 def _GoStandalone(self):
8193 """Disconnect from the network.
8196 self.feedback_fn("* changing into standalone mode")
8197 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8198 self.instance.disks)
8199 for node, nres in result.items():
8200 nres.Raise("Cannot disconnect disks node %s" % node)
8202 def _GoReconnect(self, multimaster):
8203 """Reconnect to the network.
8209 msg = "single-master"
8210 self.feedback_fn("* changing disks into %s mode" % msg)
8211 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8212 (self.instance.disks, self.instance),
8213 self.instance.name, multimaster)
8214 for node, nres in result.items():
8215 nres.Raise("Cannot change disks config on node %s" % node)
8217 def _ExecCleanup(self):
8218 """Try to cleanup after a failed migration.
8220 The cleanup is done by:
8221 - check that the instance is running only on one node
8222 (and update the config if needed)
8223 - change disks on its secondary node to secondary
8224 - wait until disks are fully synchronized
8225 - disconnect from the network
8226 - change disks into single-master mode
8227 - wait again until disks are fully synchronized
8230 instance = self.instance
8231 target_node = self.target_node
8232 source_node = self.source_node
8234 # check running on only one node
8235 self.feedback_fn("* checking where the instance actually runs"
8236 " (if this hangs, the hypervisor might be in"
8238 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8239 for node, result in ins_l.items():
8240 result.Raise("Can't contact node %s" % node)
8242 runningon_source = instance.name in ins_l[source_node].payload
8243 runningon_target = instance.name in ins_l[target_node].payload
8245 if runningon_source and runningon_target:
8246 raise errors.OpExecError("Instance seems to be running on two nodes,"
8247 " or the hypervisor is confused; you will have"
8248 " to ensure manually that it runs only on one"
8249 " and restart this operation")
8251 if not (runningon_source or runningon_target):
8252 raise errors.OpExecError("Instance does not seem to be running at all;"
8253 " in this case it's safer to repair by"
8254 " running 'gnt-instance stop' to ensure disk"
8255 " shutdown, and then restarting it")
8257 if runningon_target:
8258 # the migration has actually succeeded, we need to update the config
8259 self.feedback_fn("* instance running on secondary node (%s),"
8260 " updating config" % target_node)
8261 instance.primary_node = target_node
8262 self.cfg.Update(instance, self.feedback_fn)
8263 demoted_node = source_node
8265 self.feedback_fn("* instance confirmed to be running on its"
8266 " primary node (%s)" % source_node)
8267 demoted_node = target_node
8269 if instance.disk_template in constants.DTS_INT_MIRROR:
8270 self._EnsureSecondary(demoted_node)
8272 self._WaitUntilSync()
8273 except errors.OpExecError:
8274 # we ignore here errors, since if the device is standalone, it
8275 # won't be able to sync
8277 self._GoStandalone()
8278 self._GoReconnect(False)
8279 self._WaitUntilSync()
8281 self.feedback_fn("* done")
8283 def _RevertDiskStatus(self):
8284 """Try to revert the disk status after a failed migration.
8287 target_node = self.target_node
8288 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8292 self._EnsureSecondary(target_node)
8293 self._GoStandalone()
8294 self._GoReconnect(False)
8295 self._WaitUntilSync()
8296 except errors.OpExecError, err:
8297 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8298 " please try to recover the instance manually;"
8299 " error '%s'" % str(err))
8301 def _AbortMigration(self):
8302 """Call the hypervisor code to abort a started migration.
8305 instance = self.instance
8306 target_node = self.target_node
8307 source_node = self.source_node
8308 migration_info = self.migration_info
8310 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8314 abort_msg = abort_result.fail_msg
8316 logging.error("Aborting migration failed on target node %s: %s",
8317 target_node, abort_msg)
8318 # Don't raise an exception here, as we stil have to try to revert the
8319 # disk status, even if this step failed.
8321 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8322 instance, False, self.live)
8323 abort_msg = abort_result.fail_msg
8325 logging.error("Aborting migration failed on source node %s: %s",
8326 source_node, abort_msg)
8328 def _ExecMigration(self):
8329 """Migrate an instance.
8331 The migrate is done by:
8332 - change the disks into dual-master mode
8333 - wait until disks are fully synchronized again
8334 - migrate the instance
8335 - change disks on the new secondary node (the old primary) to secondary
8336 - wait until disks are fully synchronized
8337 - change disks into single-master mode
8340 instance = self.instance
8341 target_node = self.target_node
8342 source_node = self.source_node
8344 # Check for hypervisor version mismatch and warn the user.
8345 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8346 None, [self.instance.hypervisor])
8347 for ninfo in nodeinfo.values():
8348 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8350 (_, _, (src_info, )) = nodeinfo[source_node].payload
8351 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8353 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8354 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8355 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8356 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8357 if src_version != dst_version:
8358 self.feedback_fn("* warning: hypervisor version mismatch between"
8359 " source (%s) and target (%s) node" %
8360 (src_version, dst_version))
8362 self.feedback_fn("* checking disk consistency between source and target")
8363 for (idx, dev) in enumerate(instance.disks):
8364 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8365 raise errors.OpExecError("Disk %s is degraded or not fully"
8366 " synchronized on target node,"
8367 " aborting migration" % idx)
8369 if self.current_mem > self.tgt_free_mem:
8370 if not self.allow_runtime_changes:
8371 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8372 " free memory to fit instance %s on target"
8373 " node %s (have %dMB, need %dMB)" %
8374 (instance.name, target_node,
8375 self.tgt_free_mem, self.current_mem))
8376 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8377 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8380 rpcres.Raise("Cannot modify instance runtime memory")
8382 # First get the migration information from the remote node
8383 result = self.rpc.call_migration_info(source_node, instance)
8384 msg = result.fail_msg
8386 log_err = ("Failed fetching source migration information from %s: %s" %
8388 logging.error(log_err)
8389 raise errors.OpExecError(log_err)
8391 self.migration_info = migration_info = result.payload
8393 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8394 # Then switch the disks to master/master mode
8395 self._EnsureSecondary(target_node)
8396 self._GoStandalone()
8397 self._GoReconnect(True)
8398 self._WaitUntilSync()
8400 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8401 result = self.rpc.call_accept_instance(target_node,
8404 self.nodes_ip[target_node])
8406 msg = result.fail_msg
8408 logging.error("Instance pre-migration failed, trying to revert"
8409 " disk status: %s", msg)
8410 self.feedback_fn("Pre-migration failed, aborting")
8411 self._AbortMigration()
8412 self._RevertDiskStatus()
8413 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8414 (instance.name, msg))
8416 self.feedback_fn("* migrating instance to %s" % target_node)
8417 result = self.rpc.call_instance_migrate(source_node, instance,
8418 self.nodes_ip[target_node],
8420 msg = result.fail_msg
8422 logging.error("Instance migration failed, trying to revert"
8423 " disk status: %s", msg)
8424 self.feedback_fn("Migration failed, aborting")
8425 self._AbortMigration()
8426 self._RevertDiskStatus()
8427 raise errors.OpExecError("Could not migrate instance %s: %s" %
8428 (instance.name, msg))
8430 self.feedback_fn("* starting memory transfer")
8431 last_feedback = time.time()
8433 result = self.rpc.call_instance_get_migration_status(source_node,
8435 msg = result.fail_msg
8436 ms = result.payload # MigrationStatus instance
8437 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8438 logging.error("Instance migration failed, trying to revert"
8439 " disk status: %s", msg)
8440 self.feedback_fn("Migration failed, aborting")
8441 self._AbortMigration()
8442 self._RevertDiskStatus()
8443 raise errors.OpExecError("Could not migrate instance %s: %s" %
8444 (instance.name, msg))
8446 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8447 self.feedback_fn("* memory transfer complete")
8450 if (utils.TimeoutExpired(last_feedback,
8451 self._MIGRATION_FEEDBACK_INTERVAL) and
8452 ms.transferred_ram is not None):
8453 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8454 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8455 last_feedback = time.time()
8457 time.sleep(self._MIGRATION_POLL_INTERVAL)
8459 result = self.rpc.call_instance_finalize_migration_src(source_node,
8463 msg = result.fail_msg
8465 logging.error("Instance migration succeeded, but finalization failed"
8466 " on the source node: %s", msg)
8467 raise errors.OpExecError("Could not finalize instance migration: %s" %
8470 instance.primary_node = target_node
8472 # distribute new instance config to the other nodes
8473 self.cfg.Update(instance, self.feedback_fn)
8475 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8479 msg = result.fail_msg
8481 logging.error("Instance migration succeeded, but finalization failed"
8482 " on the target node: %s", msg)
8483 raise errors.OpExecError("Could not finalize instance migration: %s" %
8486 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8487 self._EnsureSecondary(source_node)
8488 self._WaitUntilSync()
8489 self._GoStandalone()
8490 self._GoReconnect(False)
8491 self._WaitUntilSync()
8493 # If the instance's disk template is `rbd' or `ext' and there was a
8494 # successful migration, unmap the device from the source node.
8495 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
8496 disks = _ExpandCheckDisks(instance, instance.disks)
8497 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8499 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8500 msg = result.fail_msg
8502 logging.error("Migration was successful, but couldn't unmap the"
8503 " block device %s on source node %s: %s",
8504 disk.iv_name, source_node, msg)
8505 logging.error("You need to unmap the device %s manually on %s",
8506 disk.iv_name, source_node)
8508 self.feedback_fn("* done")
8510 def _ExecFailover(self):
8511 """Failover an instance.
8513 The failover is done by shutting it down on its present node and
8514 starting it on the secondary.
8517 instance = self.instance
8518 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8520 source_node = instance.primary_node
8521 target_node = self.target_node
8523 if instance.admin_state == constants.ADMINST_UP:
8524 self.feedback_fn("* checking disk consistency between source and target")
8525 for (idx, dev) in enumerate(instance.disks):
8526 # for drbd, these are drbd over lvm
8527 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8529 if primary_node.offline:
8530 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8532 (primary_node.name, idx, target_node))
8533 elif not self.ignore_consistency:
8534 raise errors.OpExecError("Disk %s is degraded on target node,"
8535 " aborting failover" % idx)
8537 self.feedback_fn("* not checking disk consistency as instance is not"
8540 self.feedback_fn("* shutting down instance on source node")
8541 logging.info("Shutting down instance %s on node %s",
8542 instance.name, source_node)
8544 result = self.rpc.call_instance_shutdown(source_node, instance,
8545 self.shutdown_timeout)
8546 msg = result.fail_msg
8548 if self.ignore_consistency or primary_node.offline:
8549 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8550 " proceeding anyway; please make sure node"
8551 " %s is down; error details: %s",
8552 instance.name, source_node, source_node, msg)
8554 raise errors.OpExecError("Could not shutdown instance %s on"
8556 (instance.name, source_node, msg))
8558 self.feedback_fn("* deactivating the instance's disks on source node")
8559 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8560 raise errors.OpExecError("Can't shut down the instance's disks")
8562 instance.primary_node = target_node
8563 # distribute new instance config to the other nodes
8564 self.cfg.Update(instance, self.feedback_fn)
8566 # Only start the instance if it's marked as up
8567 if instance.admin_state == constants.ADMINST_UP:
8568 self.feedback_fn("* activating the instance's disks on target node %s" %
8570 logging.info("Starting instance %s on node %s",
8571 instance.name, target_node)
8573 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8574 ignore_secondaries=True)
8576 _ShutdownInstanceDisks(self.lu, instance)
8577 raise errors.OpExecError("Can't activate the instance's disks")
8579 self.feedback_fn("* starting the instance on the target node %s" %
8581 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8583 msg = result.fail_msg
8585 _ShutdownInstanceDisks(self.lu, instance)
8586 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8587 (instance.name, target_node, msg))
8589 def Exec(self, feedback_fn):
8590 """Perform the migration.
8593 self.feedback_fn = feedback_fn
8594 self.source_node = self.instance.primary_node
8596 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8597 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8598 self.target_node = self.instance.secondary_nodes[0]
8599 # Otherwise self.target_node has been populated either
8600 # directly, or through an iallocator.
8602 self.all_nodes = [self.source_node, self.target_node]
8603 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8604 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8607 feedback_fn("Failover instance %s" % self.instance.name)
8608 self._ExecFailover()
8610 feedback_fn("Migrating instance %s" % self.instance.name)
8613 return self._ExecCleanup()
8615 return self._ExecMigration()
8618 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8620 """Wrapper around L{_CreateBlockDevInner}.
8622 This method annotates the root device first.
8625 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8626 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8630 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8632 """Create a tree of block devices on a given node.
8634 If this device type has to be created on secondaries, create it and
8637 If not, just recurse to children keeping the same 'force' value.
8639 @attention: The device has to be annotated already.
8641 @param lu: the lu on whose behalf we execute
8642 @param node: the node on which to create the device
8643 @type instance: L{objects.Instance}
8644 @param instance: the instance which owns the device
8645 @type device: L{objects.Disk}
8646 @param device: the device to create
8647 @type force_create: boolean
8648 @param force_create: whether to force creation of this device; this
8649 will be change to True whenever we find a device which has
8650 CreateOnSecondary() attribute
8651 @param info: the extra 'metadata' we should attach to the device
8652 (this will be represented as a LVM tag)
8653 @type force_open: boolean
8654 @param force_open: this parameter will be passes to the
8655 L{backend.BlockdevCreate} function where it specifies
8656 whether we run on primary or not, and it affects both
8657 the child assembly and the device own Open() execution
8660 if device.CreateOnSecondary():
8664 for child in device.children:
8665 _CreateBlockDevInner(lu, node, instance, child, force_create,
8668 if not force_create:
8671 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8674 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8675 """Create a single block device on a given node.
8677 This will not recurse over children of the device, so they must be
8680 @param lu: the lu on whose behalf we execute
8681 @param node: the node on which to create the device
8682 @type instance: L{objects.Instance}
8683 @param instance: the instance which owns the device
8684 @type device: L{objects.Disk}
8685 @param device: the device to create
8686 @param info: the extra 'metadata' we should attach to the device
8687 (this will be represented as a LVM tag)
8688 @type force_open: boolean
8689 @param force_open: this parameter will be passes to the
8690 L{backend.BlockdevCreate} function where it specifies
8691 whether we run on primary or not, and it affects both
8692 the child assembly and the device own Open() execution
8695 lu.cfg.SetDiskID(device, node)
8696 result = lu.rpc.call_blockdev_create(node, device, device.size,
8697 instance.name, force_open, info)
8698 result.Raise("Can't create block device %s on"
8699 " node %s for instance %s" % (device, node, instance.name))
8700 if device.physical_id is None:
8701 device.physical_id = result.payload
8704 def _GenerateUniqueNames(lu, exts):
8705 """Generate a suitable LV name.
8707 This will generate a logical volume name for the given instance.
8712 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8713 results.append("%s%s" % (new_id, val))
8717 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8718 iv_name, p_minor, s_minor):
8719 """Generate a drbd8 device complete with its children.
8722 assert len(vgnames) == len(names) == 2
8723 port = lu.cfg.AllocatePort()
8724 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8726 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8727 logical_id=(vgnames[0], names[0]),
8729 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8730 logical_id=(vgnames[1], names[1]),
8732 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8733 logical_id=(primary, secondary, port,
8736 children=[dev_data, dev_meta],
8737 iv_name=iv_name, params={})
8741 _DISK_TEMPLATE_NAME_PREFIX = {
8742 constants.DT_PLAIN: "",
8743 constants.DT_RBD: ".rbd",
8744 constants.DT_EXT: ".ext",
8748 _DISK_TEMPLATE_DEVICE_TYPE = {
8749 constants.DT_PLAIN: constants.LD_LV,
8750 constants.DT_FILE: constants.LD_FILE,
8751 constants.DT_SHARED_FILE: constants.LD_FILE,
8752 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8753 constants.DT_RBD: constants.LD_RBD,
8754 constants.DT_EXT: constants.LD_EXT,
8758 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8759 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8760 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8761 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8762 """Generate the entire disk layout for a given template type.
8765 #TODO: compute space requirements
8767 vgname = lu.cfg.GetVGName()
8768 disk_count = len(disk_info)
8771 if template_name == constants.DT_DISKLESS:
8773 elif template_name == constants.DT_DRBD8:
8774 if len(secondary_nodes) != 1:
8775 raise errors.ProgrammerError("Wrong template configuration")
8776 remote_node = secondary_nodes[0]
8777 minors = lu.cfg.AllocateDRBDMinor(
8778 [primary_node, remote_node] * len(disk_info), instance_name)
8780 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8782 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8785 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8786 for i in range(disk_count)]):
8787 names.append(lv_prefix + "_data")
8788 names.append(lv_prefix + "_meta")
8789 for idx, disk in enumerate(disk_info):
8790 disk_index = idx + base_index
8791 data_vg = disk.get(constants.IDISK_VG, vgname)
8792 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8793 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8794 disk[constants.IDISK_SIZE],
8796 names[idx * 2:idx * 2 + 2],
8797 "disk/%d" % disk_index,
8798 minors[idx * 2], minors[idx * 2 + 1])
8799 disk_dev.mode = disk[constants.IDISK_MODE]
8800 disks.append(disk_dev)
8803 raise errors.ProgrammerError("Wrong template configuration")
8805 if template_name == constants.DT_FILE:
8807 elif template_name == constants.DT_SHARED_FILE:
8808 _req_shr_file_storage()
8810 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8811 if name_prefix is None:
8814 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8815 (name_prefix, base_index + i)
8816 for i in range(disk_count)])
8818 if template_name == constants.DT_PLAIN:
8819 def logical_id_fn(idx, _, disk):
8820 vg = disk.get(constants.IDISK_VG, vgname)
8821 return (vg, names[idx])
8822 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8824 lambda _, disk_index, disk: (file_driver,
8825 "%s/disk%d" % (file_storage_dir,
8827 elif template_name == constants.DT_BLOCK:
8829 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8830 disk[constants.IDISK_ADOPT])
8831 elif template_name == constants.DT_RBD:
8832 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8833 elif template_name == constants.DT_EXT:
8834 def logical_id_fn(idx, _, disk):
8835 provider = disk.get(constants.IDISK_PROVIDER, None)
8836 if provider is None:
8837 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
8838 " not found", constants.DT_EXT,
8839 constants.IDISK_PROVIDER)
8840 return (provider, names[idx])
8842 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8844 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8846 for idx, disk in enumerate(disk_info):
8848 # Only for the Ext template add disk_info to params
8849 if template_name == constants.DT_EXT:
8850 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
8852 if key not in constants.IDISK_PARAMS:
8853 params[key] = disk[key]
8854 disk_index = idx + base_index
8855 size = disk[constants.IDISK_SIZE]
8856 feedback_fn("* disk %s, size %s" %
8857 (disk_index, utils.FormatUnit(size, "h")))
8858 disks.append(objects.Disk(dev_type=dev_type, size=size,
8859 logical_id=logical_id_fn(idx, disk_index, disk),
8860 iv_name="disk/%d" % disk_index,
8861 mode=disk[constants.IDISK_MODE],
8867 def _GetInstanceInfoText(instance):
8868 """Compute that text that should be added to the disk's metadata.
8871 return "originstname+%s" % instance.name
8874 def _CalcEta(time_taken, written, total_size):
8875 """Calculates the ETA based on size written and total size.
8877 @param time_taken: The time taken so far
8878 @param written: amount written so far
8879 @param total_size: The total size of data to be written
8880 @return: The remaining time in seconds
8883 avg_time = time_taken / float(written)
8884 return (total_size - written) * avg_time
8887 def _WipeDisks(lu, instance):
8888 """Wipes instance disks.
8890 @type lu: L{LogicalUnit}
8891 @param lu: the logical unit on whose behalf we execute
8892 @type instance: L{objects.Instance}
8893 @param instance: the instance whose disks we should create
8894 @return: the success of the wipe
8897 node = instance.primary_node
8899 for device in instance.disks:
8900 lu.cfg.SetDiskID(device, node)
8902 logging.info("Pause sync of instance %s disks", instance.name)
8903 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8904 (instance.disks, instance),
8906 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8908 for idx, success in enumerate(result.payload):
8910 logging.warn("pause-sync of instance %s for disks %d failed",
8914 for idx, device in enumerate(instance.disks):
8915 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8916 # MAX_WIPE_CHUNK at max
8917 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8918 constants.MIN_WIPE_CHUNK_PERCENT)
8919 # we _must_ make this an int, otherwise rounding errors will
8921 wipe_chunk_size = int(wipe_chunk_size)
8923 lu.LogInfo("* Wiping disk %d", idx)
8924 logging.info("Wiping disk %d for instance %s, node %s using"
8925 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8930 start_time = time.time()
8932 while offset < size:
8933 wipe_size = min(wipe_chunk_size, size - offset)
8934 logging.debug("Wiping disk %d, offset %s, chunk %s",
8935 idx, offset, wipe_size)
8936 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8938 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8939 (idx, offset, wipe_size))
8942 if now - last_output >= 60:
8943 eta = _CalcEta(now - start_time, offset, size)
8944 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8945 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8948 logging.info("Resume sync of instance %s disks", instance.name)
8950 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8951 (instance.disks, instance),
8955 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
8956 " please have a look at the status and troubleshoot"
8957 " the issue: %s", node, result.fail_msg)
8959 for idx, success in enumerate(result.payload):
8961 lu.LogWarning("Resume sync of disk %d failed, please have a"
8962 " look at the status and troubleshoot the issue", idx)
8963 logging.warn("resume-sync of instance %s for disks %d failed",
8967 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8968 """Create all disks for an instance.
8970 This abstracts away some work from AddInstance.
8972 @type lu: L{LogicalUnit}
8973 @param lu: the logical unit on whose behalf we execute
8974 @type instance: L{objects.Instance}
8975 @param instance: the instance whose disks we should create
8977 @param to_skip: list of indices to skip
8978 @type target_node: string
8979 @param target_node: if passed, overrides the target node for creation
8981 @return: the success of the creation
8984 info = _GetInstanceInfoText(instance)
8985 if target_node is None:
8986 pnode = instance.primary_node
8987 all_nodes = instance.all_nodes
8992 if instance.disk_template in constants.DTS_FILEBASED:
8993 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8994 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8996 result.Raise("Failed to create directory '%s' on"
8997 " node %s" % (file_storage_dir, pnode))
8999 # Note: this needs to be kept in sync with adding of disks in
9000 # LUInstanceSetParams
9001 for idx, device in enumerate(instance.disks):
9002 if to_skip and idx in to_skip:
9004 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9006 for node in all_nodes:
9007 f_create = node == pnode
9008 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9011 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9012 """Remove all disks for an instance.
9014 This abstracts away some work from `AddInstance()` and
9015 `RemoveInstance()`. Note that in case some of the devices couldn't
9016 be removed, the removal will continue with the other ones (compare
9017 with `_CreateDisks()`).
9019 @type lu: L{LogicalUnit}
9020 @param lu: the logical unit on whose behalf we execute
9021 @type instance: L{objects.Instance}
9022 @param instance: the instance whose disks we should remove
9023 @type target_node: string
9024 @param target_node: used to override the node on which to remove the disks
9026 @return: the success of the removal
9029 logging.info("Removing block devices for instance %s", instance.name)
9032 ports_to_release = set()
9033 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9034 for (idx, device) in enumerate(anno_disks):
9036 edata = [(target_node, device)]
9038 edata = device.ComputeNodeTree(instance.primary_node)
9039 for node, disk in edata:
9040 lu.cfg.SetDiskID(disk, node)
9041 result = lu.rpc.call_blockdev_remove(node, disk)
9043 lu.LogWarning("Could not remove disk %s on node %s,"
9044 " continuing anyway: %s", idx, node, result.fail_msg)
9045 if not (result.offline and node != instance.primary_node):
9048 # if this is a DRBD disk, return its port to the pool
9049 if device.dev_type in constants.LDS_DRBD:
9050 ports_to_release.add(device.logical_id[2])
9052 if all_result or ignore_failures:
9053 for port in ports_to_release:
9054 lu.cfg.AddTcpUdpPort(port)
9056 if instance.disk_template == constants.DT_FILE:
9057 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9061 tgt = instance.primary_node
9062 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9064 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9065 file_storage_dir, instance.primary_node, result.fail_msg)
9071 def _ComputeDiskSizePerVG(disk_template, disks):
9072 """Compute disk size requirements in the volume group
9075 def _compute(disks, payload):
9076 """Universal algorithm.
9081 vgs[disk[constants.IDISK_VG]] = \
9082 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9086 # Required free disk space as a function of disk and swap space
9088 constants.DT_DISKLESS: {},
9089 constants.DT_PLAIN: _compute(disks, 0),
9090 # 128 MB are added for drbd metadata for each disk
9091 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9092 constants.DT_FILE: {},
9093 constants.DT_SHARED_FILE: {},
9096 if disk_template not in req_size_dict:
9097 raise errors.ProgrammerError("Disk template '%s' size requirement"
9098 " is unknown" % disk_template)
9100 return req_size_dict[disk_template]
9103 def _ComputeDiskSize(disk_template, disks):
9104 """Compute disk size requirements according to disk template
9107 # Required free disk space as a function of disk and swap space
9109 constants.DT_DISKLESS: None,
9110 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9111 # 128 MB are added for drbd metadata for each disk
9113 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9114 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9115 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9116 constants.DT_BLOCK: 0,
9117 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9118 constants.DT_EXT: sum(d[constants.IDISK_SIZE] for d in disks),
9121 if disk_template not in req_size_dict:
9122 raise errors.ProgrammerError("Disk template '%s' size requirement"
9123 " is unknown" % disk_template)
9125 return req_size_dict[disk_template]
9128 def _FilterVmNodes(lu, nodenames):
9129 """Filters out non-vm_capable nodes from a list.
9131 @type lu: L{LogicalUnit}
9132 @param lu: the logical unit for which we check
9133 @type nodenames: list
9134 @param nodenames: the list of nodes on which we should check
9136 @return: the list of vm-capable nodes
9139 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9140 return [name for name in nodenames if name not in vm_nodes]
9143 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9144 """Hypervisor parameter validation.
9146 This function abstract the hypervisor parameter validation to be
9147 used in both instance create and instance modify.
9149 @type lu: L{LogicalUnit}
9150 @param lu: the logical unit for which we check
9151 @type nodenames: list
9152 @param nodenames: the list of nodes on which we should check
9153 @type hvname: string
9154 @param hvname: the name of the hypervisor we should use
9155 @type hvparams: dict
9156 @param hvparams: the parameters which we need to check
9157 @raise errors.OpPrereqError: if the parameters are not valid
9160 nodenames = _FilterVmNodes(lu, nodenames)
9162 cluster = lu.cfg.GetClusterInfo()
9163 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9165 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9166 for node in nodenames:
9170 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9173 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9174 """OS parameters validation.
9176 @type lu: L{LogicalUnit}
9177 @param lu: the logical unit for which we check
9178 @type required: boolean
9179 @param required: whether the validation should fail if the OS is not
9181 @type nodenames: list
9182 @param nodenames: the list of nodes on which we should check
9183 @type osname: string
9184 @param osname: the name of the hypervisor we should use
9185 @type osparams: dict
9186 @param osparams: the parameters which we need to check
9187 @raise errors.OpPrereqError: if the parameters are not valid
9190 nodenames = _FilterVmNodes(lu, nodenames)
9191 result = lu.rpc.call_os_validate(nodenames, required, osname,
9192 [constants.OS_VALIDATE_PARAMETERS],
9194 for node, nres in result.items():
9195 # we don't check for offline cases since this should be run only
9196 # against the master node and/or an instance's nodes
9197 nres.Raise("OS Parameters validation failed on node %s" % node)
9198 if not nres.payload:
9199 lu.LogInfo("OS %s not found on node %s, validation skipped",
9203 class LUInstanceCreate(LogicalUnit):
9204 """Create an instance.
9207 HPATH = "instance-add"
9208 HTYPE = constants.HTYPE_INSTANCE
9211 def CheckArguments(self):
9215 # do not require name_check to ease forward/backward compatibility
9217 if self.op.no_install and self.op.start:
9218 self.LogInfo("No-installation mode selected, disabling startup")
9219 self.op.start = False
9220 # validate/normalize the instance name
9221 self.op.instance_name = \
9222 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9224 if self.op.ip_check and not self.op.name_check:
9225 # TODO: make the ip check more flexible and not depend on the name check
9226 raise errors.OpPrereqError("Cannot do IP address check without a name"
9227 " check", errors.ECODE_INVAL)
9229 # check nics' parameter names
9230 for nic in self.op.nics:
9231 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9233 # check disks. parameter names and consistent adopt/no-adopt strategy
9234 has_adopt = has_no_adopt = False
9235 for disk in self.op.disks:
9236 if self.op.disk_template != constants.DT_EXT:
9237 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9238 if constants.IDISK_ADOPT in disk:
9242 if has_adopt and has_no_adopt:
9243 raise errors.OpPrereqError("Either all disks are adopted or none is",
9246 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9247 raise errors.OpPrereqError("Disk adoption is not supported for the"
9248 " '%s' disk template" %
9249 self.op.disk_template,
9251 if self.op.iallocator is not None:
9252 raise errors.OpPrereqError("Disk adoption not allowed with an"
9253 " iallocator script", errors.ECODE_INVAL)
9254 if self.op.mode == constants.INSTANCE_IMPORT:
9255 raise errors.OpPrereqError("Disk adoption not allowed for"
9256 " instance import", errors.ECODE_INVAL)
9258 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9259 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9260 " but no 'adopt' parameter given" %
9261 self.op.disk_template,
9264 self.adopt_disks = has_adopt
9266 # instance name verification
9267 if self.op.name_check:
9268 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9269 self.op.instance_name = self.hostname1.name
9270 # used in CheckPrereq for ip ping check
9271 self.check_ip = self.hostname1.ip
9273 self.check_ip = None
9275 # file storage checks
9276 if (self.op.file_driver and
9277 not self.op.file_driver in constants.FILE_DRIVER):
9278 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9279 self.op.file_driver, errors.ECODE_INVAL)
9281 if self.op.disk_template == constants.DT_FILE:
9282 opcodes.RequireFileStorage()
9283 elif self.op.disk_template == constants.DT_SHARED_FILE:
9284 opcodes.RequireSharedFileStorage()
9286 ### Node/iallocator related checks
9287 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9289 if self.op.pnode is not None:
9290 if self.op.disk_template in constants.DTS_INT_MIRROR:
9291 if self.op.snode is None:
9292 raise errors.OpPrereqError("The networked disk templates need"
9293 " a mirror node", errors.ECODE_INVAL)
9295 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9297 self.op.snode = None
9299 self._cds = _GetClusterDomainSecret()
9301 if self.op.mode == constants.INSTANCE_IMPORT:
9302 # On import force_variant must be True, because if we forced it at
9303 # initial install, our only chance when importing it back is that it
9305 self.op.force_variant = True
9307 if self.op.no_install:
9308 self.LogInfo("No-installation mode has no effect during import")
9310 elif self.op.mode == constants.INSTANCE_CREATE:
9311 if self.op.os_type is None:
9312 raise errors.OpPrereqError("No guest OS specified",
9314 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9315 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9316 " installation" % self.op.os_type,
9318 if self.op.disk_template is None:
9319 raise errors.OpPrereqError("No disk template specified",
9322 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9323 # Check handshake to ensure both clusters have the same domain secret
9324 src_handshake = self.op.source_handshake
9325 if not src_handshake:
9326 raise errors.OpPrereqError("Missing source handshake",
9329 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9332 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9335 # Load and check source CA
9336 self.source_x509_ca_pem = self.op.source_x509_ca
9337 if not self.source_x509_ca_pem:
9338 raise errors.OpPrereqError("Missing source X509 CA",
9342 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9344 except OpenSSL.crypto.Error, err:
9345 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9346 (err, ), errors.ECODE_INVAL)
9348 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9349 if errcode is not None:
9350 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9353 self.source_x509_ca = cert
9355 src_instance_name = self.op.source_instance_name
9356 if not src_instance_name:
9357 raise errors.OpPrereqError("Missing source instance name",
9360 self.source_instance_name = \
9361 netutils.GetHostname(name=src_instance_name).name
9364 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9365 self.op.mode, errors.ECODE_INVAL)
9367 def ExpandNames(self):
9368 """ExpandNames for CreateInstance.
9370 Figure out the right locks for instance creation.
9373 self.needed_locks = {}
9375 instance_name = self.op.instance_name
9376 # this is just a preventive check, but someone might still add this
9377 # instance in the meantime, and creation will fail at lock-add time
9378 if instance_name in self.cfg.GetInstanceList():
9379 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9380 instance_name, errors.ECODE_EXISTS)
9382 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9384 if self.op.iallocator:
9385 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9386 # specifying a group on instance creation and then selecting nodes from
9388 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9389 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9391 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9392 nodelist = [self.op.pnode]
9393 if self.op.snode is not None:
9394 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9395 nodelist.append(self.op.snode)
9396 self.needed_locks[locking.LEVEL_NODE] = nodelist
9397 # Lock resources of instance's primary and secondary nodes (copy to
9398 # prevent accidential modification)
9399 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9401 # in case of import lock the source node too
9402 if self.op.mode == constants.INSTANCE_IMPORT:
9403 src_node = self.op.src_node
9404 src_path = self.op.src_path
9406 if src_path is None:
9407 self.op.src_path = src_path = self.op.instance_name
9409 if src_node is None:
9410 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9411 self.op.src_node = None
9412 if os.path.isabs(src_path):
9413 raise errors.OpPrereqError("Importing an instance from a path"
9414 " requires a source node option",
9417 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9418 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9419 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9420 if not os.path.isabs(src_path):
9421 self.op.src_path = src_path = \
9422 utils.PathJoin(constants.EXPORT_DIR, src_path)
9424 def _RunAllocator(self):
9425 """Run the allocator based on input opcode.
9428 nics = [n.ToDict() for n in self.nics]
9429 ial = IAllocator(self.cfg, self.rpc,
9430 mode=constants.IALLOCATOR_MODE_ALLOC,
9431 name=self.op.instance_name,
9432 disk_template=self.op.disk_template,
9435 vcpus=self.be_full[constants.BE_VCPUS],
9436 memory=self.be_full[constants.BE_MAXMEM],
9437 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9440 hypervisor=self.op.hypervisor,
9443 ial.Run(self.op.iallocator)
9446 raise errors.OpPrereqError("Can't compute nodes using"
9447 " iallocator '%s': %s" %
9448 (self.op.iallocator, ial.info),
9450 if len(ial.result) != ial.required_nodes:
9451 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9452 " of nodes (%s), required %s" %
9453 (self.op.iallocator, len(ial.result),
9454 ial.required_nodes), errors.ECODE_FAULT)
9455 self.op.pnode = ial.result[0]
9456 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9457 self.op.instance_name, self.op.iallocator,
9458 utils.CommaJoin(ial.result))
9459 if ial.required_nodes == 2:
9460 self.op.snode = ial.result[1]
9462 def BuildHooksEnv(self):
9465 This runs on master, primary and secondary nodes of the instance.
9469 "ADD_MODE": self.op.mode,
9471 if self.op.mode == constants.INSTANCE_IMPORT:
9472 env["SRC_NODE"] = self.op.src_node
9473 env["SRC_PATH"] = self.op.src_path
9474 env["SRC_IMAGES"] = self.src_images
9476 env.update(_BuildInstanceHookEnv(
9477 name=self.op.instance_name,
9478 primary_node=self.op.pnode,
9479 secondary_nodes=self.secondaries,
9480 status=self.op.start,
9481 os_type=self.op.os_type,
9482 minmem=self.be_full[constants.BE_MINMEM],
9483 maxmem=self.be_full[constants.BE_MAXMEM],
9484 vcpus=self.be_full[constants.BE_VCPUS],
9485 nics=_NICListToTuple(self, self.nics),
9486 disk_template=self.op.disk_template,
9487 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9488 for d in self.disks],
9491 hypervisor_name=self.op.hypervisor,
9497 def BuildHooksNodes(self):
9498 """Build hooks nodes.
9501 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9504 def _ReadExportInfo(self):
9505 """Reads the export information from disk.
9507 It will override the opcode source node and path with the actual
9508 information, if these two were not specified before.
9510 @return: the export information
9513 assert self.op.mode == constants.INSTANCE_IMPORT
9515 src_node = self.op.src_node
9516 src_path = self.op.src_path
9518 if src_node is None:
9519 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9520 exp_list = self.rpc.call_export_list(locked_nodes)
9522 for node in exp_list:
9523 if exp_list[node].fail_msg:
9525 if src_path in exp_list[node].payload:
9527 self.op.src_node = src_node = node
9528 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9532 raise errors.OpPrereqError("No export found for relative path %s" %
9533 src_path, errors.ECODE_INVAL)
9535 _CheckNodeOnline(self, src_node)
9536 result = self.rpc.call_export_info(src_node, src_path)
9537 result.Raise("No export or invalid export found in dir %s" % src_path)
9539 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9540 if not export_info.has_section(constants.INISECT_EXP):
9541 raise errors.ProgrammerError("Corrupted export config",
9542 errors.ECODE_ENVIRON)
9544 ei_version = export_info.get(constants.INISECT_EXP, "version")
9545 if (int(ei_version) != constants.EXPORT_VERSION):
9546 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9547 (ei_version, constants.EXPORT_VERSION),
9548 errors.ECODE_ENVIRON)
9551 def _ReadExportParams(self, einfo):
9552 """Use export parameters as defaults.
9554 In case the opcode doesn't specify (as in override) some instance
9555 parameters, then try to use them from the export information, if
9559 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9561 if self.op.disk_template is None:
9562 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9563 self.op.disk_template = einfo.get(constants.INISECT_INS,
9565 if self.op.disk_template not in constants.DISK_TEMPLATES:
9566 raise errors.OpPrereqError("Disk template specified in configuration"
9567 " file is not one of the allowed values:"
9568 " %s" % " ".join(constants.DISK_TEMPLATES))
9570 raise errors.OpPrereqError("No disk template specified and the export"
9571 " is missing the disk_template information",
9574 if not self.op.disks:
9576 # TODO: import the disk iv_name too
9577 for idx in range(constants.MAX_DISKS):
9578 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9579 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9580 disks.append({constants.IDISK_SIZE: disk_sz})
9581 self.op.disks = disks
9582 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9583 raise errors.OpPrereqError("No disk info specified and the export"
9584 " is missing the disk information",
9587 if not self.op.nics:
9589 for idx in range(constants.MAX_NICS):
9590 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9592 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9593 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9600 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9601 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9603 if (self.op.hypervisor is None and
9604 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9605 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9607 if einfo.has_section(constants.INISECT_HYP):
9608 # use the export parameters but do not override the ones
9609 # specified by the user
9610 for name, value in einfo.items(constants.INISECT_HYP):
9611 if name not in self.op.hvparams:
9612 self.op.hvparams[name] = value
9614 if einfo.has_section(constants.INISECT_BEP):
9615 # use the parameters, without overriding
9616 for name, value in einfo.items(constants.INISECT_BEP):
9617 if name not in self.op.beparams:
9618 self.op.beparams[name] = value
9619 # Compatibility for the old "memory" be param
9620 if name == constants.BE_MEMORY:
9621 if constants.BE_MAXMEM not in self.op.beparams:
9622 self.op.beparams[constants.BE_MAXMEM] = value
9623 if constants.BE_MINMEM not in self.op.beparams:
9624 self.op.beparams[constants.BE_MINMEM] = value
9626 # try to read the parameters old style, from the main section
9627 for name in constants.BES_PARAMETERS:
9628 if (name not in self.op.beparams and
9629 einfo.has_option(constants.INISECT_INS, name)):
9630 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9632 if einfo.has_section(constants.INISECT_OSP):
9633 # use the parameters, without overriding
9634 for name, value in einfo.items(constants.INISECT_OSP):
9635 if name not in self.op.osparams:
9636 self.op.osparams[name] = value
9638 def _RevertToDefaults(self, cluster):
9639 """Revert the instance parameters to the default values.
9643 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9644 for name in self.op.hvparams.keys():
9645 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9646 del self.op.hvparams[name]
9648 be_defs = cluster.SimpleFillBE({})
9649 for name in self.op.beparams.keys():
9650 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9651 del self.op.beparams[name]
9653 nic_defs = cluster.SimpleFillNIC({})
9654 for nic in self.op.nics:
9655 for name in constants.NICS_PARAMETERS:
9656 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9659 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9660 for name in self.op.osparams.keys():
9661 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9662 del self.op.osparams[name]
9664 def _CalculateFileStorageDir(self):
9665 """Calculate final instance file storage dir.
9668 # file storage dir calculation/check
9669 self.instance_file_storage_dir = None
9670 if self.op.disk_template in constants.DTS_FILEBASED:
9671 # build the full file storage dir path
9674 if self.op.disk_template == constants.DT_SHARED_FILE:
9675 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9677 get_fsd_fn = self.cfg.GetFileStorageDir
9679 cfg_storagedir = get_fsd_fn()
9680 if not cfg_storagedir:
9681 raise errors.OpPrereqError("Cluster file storage dir not defined")
9682 joinargs.append(cfg_storagedir)
9684 if self.op.file_storage_dir is not None:
9685 joinargs.append(self.op.file_storage_dir)
9687 joinargs.append(self.op.instance_name)
9689 # pylint: disable=W0142
9690 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9692 def CheckPrereq(self): # pylint: disable=R0914
9693 """Check prerequisites.
9696 self._CalculateFileStorageDir()
9698 if self.op.mode == constants.INSTANCE_IMPORT:
9699 export_info = self._ReadExportInfo()
9700 self._ReadExportParams(export_info)
9701 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9703 self._old_instance_name = None
9705 if (not self.cfg.GetVGName() and
9706 self.op.disk_template not in constants.DTS_NOT_LVM):
9707 raise errors.OpPrereqError("Cluster does not support lvm-based"
9708 " instances", errors.ECODE_STATE)
9710 if (self.op.hypervisor is None or
9711 self.op.hypervisor == constants.VALUE_AUTO):
9712 self.op.hypervisor = self.cfg.GetHypervisorType()
9714 cluster = self.cfg.GetClusterInfo()
9715 enabled_hvs = cluster.enabled_hypervisors
9716 if self.op.hypervisor not in enabled_hvs:
9717 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9718 " cluster (%s)" % (self.op.hypervisor,
9719 ",".join(enabled_hvs)),
9722 # Check tag validity
9723 for tag in self.op.tags:
9724 objects.TaggableObject.ValidateTag(tag)
9726 # check hypervisor parameter syntax (locally)
9727 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9728 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9730 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9731 hv_type.CheckParameterSyntax(filled_hvp)
9732 self.hv_full = filled_hvp
9733 # check that we don't specify global parameters on an instance
9734 _CheckGlobalHvParams(self.op.hvparams)
9736 # fill and remember the beparams dict
9737 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9738 for param, value in self.op.beparams.iteritems():
9739 if value == constants.VALUE_AUTO:
9740 self.op.beparams[param] = default_beparams[param]
9741 objects.UpgradeBeParams(self.op.beparams)
9742 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9743 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9745 # build os parameters
9746 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9748 # now that hvp/bep are in final format, let's reset to defaults,
9750 if self.op.identify_defaults:
9751 self._RevertToDefaults(cluster)
9755 for idx, nic in enumerate(self.op.nics):
9756 nic_mode_req = nic.get(constants.INIC_MODE, None)
9757 nic_mode = nic_mode_req
9758 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9759 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9761 # in routed mode, for the first nic, the default ip is 'auto'
9762 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9763 default_ip_mode = constants.VALUE_AUTO
9765 default_ip_mode = constants.VALUE_NONE
9767 # ip validity checks
9768 ip = nic.get(constants.INIC_IP, default_ip_mode)
9769 if ip is None or ip.lower() == constants.VALUE_NONE:
9771 elif ip.lower() == constants.VALUE_AUTO:
9772 if not self.op.name_check:
9773 raise errors.OpPrereqError("IP address set to auto but name checks"
9774 " have been skipped",
9776 nic_ip = self.hostname1.ip
9778 if not netutils.IPAddress.IsValid(ip):
9779 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9783 # TODO: check the ip address for uniqueness
9784 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9785 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9788 # MAC address verification
9789 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9790 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9791 mac = utils.NormalizeAndValidateMac(mac)
9794 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9795 except errors.ReservationError:
9796 raise errors.OpPrereqError("MAC address %s already in use"
9797 " in cluster" % mac,
9798 errors.ECODE_NOTUNIQUE)
9800 # Build nic parameters
9801 link = nic.get(constants.INIC_LINK, None)
9802 if link == constants.VALUE_AUTO:
9803 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9806 nicparams[constants.NIC_MODE] = nic_mode
9808 nicparams[constants.NIC_LINK] = link
9810 check_params = cluster.SimpleFillNIC(nicparams)
9811 objects.NIC.CheckParameterSyntax(check_params)
9812 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9814 # disk checks/pre-build
9815 default_vg = self.cfg.GetVGName()
9817 for disk in self.op.disks:
9818 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9819 if mode not in constants.DISK_ACCESS_SET:
9820 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9821 mode, errors.ECODE_INVAL)
9822 size = disk.get(constants.IDISK_SIZE, None)
9824 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9827 except (TypeError, ValueError):
9828 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9831 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9832 if ext_provider and self.op.disk_template != constants.DT_EXT:
9833 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
9834 " disk template, not %s" %
9835 (constants.IDISK_PROVIDER, constants.DT_EXT,
9836 self.op.disk_template), errors.ECODE_INVAL)
9838 data_vg = disk.get(constants.IDISK_VG, default_vg)
9840 constants.IDISK_SIZE: size,
9841 constants.IDISK_MODE: mode,
9842 constants.IDISK_VG: data_vg,
9845 if constants.IDISK_METAVG in disk:
9846 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9847 if constants.IDISK_ADOPT in disk:
9848 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9850 # For extstorage, demand the `provider' option and add any
9851 # additional parameters (ext-params) to the dict
9852 if self.op.disk_template == constants.DT_EXT:
9854 new_disk[constants.IDISK_PROVIDER] = ext_provider
9856 if key not in constants.IDISK_PARAMS:
9857 new_disk[key] = disk[key]
9859 raise errors.OpPrereqError("Missing provider for template '%s'" %
9860 constants.DT_EXT, errors.ECODE_INVAL)
9862 self.disks.append(new_disk)
9864 if self.op.mode == constants.INSTANCE_IMPORT:
9866 for idx in range(len(self.disks)):
9867 option = "disk%d_dump" % idx
9868 if export_info.has_option(constants.INISECT_INS, option):
9869 # FIXME: are the old os-es, disk sizes, etc. useful?
9870 export_name = export_info.get(constants.INISECT_INS, option)
9871 image = utils.PathJoin(self.op.src_path, export_name)
9872 disk_images.append(image)
9874 disk_images.append(False)
9876 self.src_images = disk_images
9878 if self.op.instance_name == self._old_instance_name:
9879 for idx, nic in enumerate(self.nics):
9880 if nic.mac == constants.VALUE_AUTO:
9881 nic_mac_ini = "nic%d_mac" % idx
9882 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9884 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9886 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9887 if self.op.ip_check:
9888 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9889 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9890 (self.check_ip, self.op.instance_name),
9891 errors.ECODE_NOTUNIQUE)
9893 #### mac address generation
9894 # By generating here the mac address both the allocator and the hooks get
9895 # the real final mac address rather than the 'auto' or 'generate' value.
9896 # There is a race condition between the generation and the instance object
9897 # creation, which means that we know the mac is valid now, but we're not
9898 # sure it will be when we actually add the instance. If things go bad
9899 # adding the instance will abort because of a duplicate mac, and the
9900 # creation job will fail.
9901 for nic in self.nics:
9902 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9903 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9907 if self.op.iallocator is not None:
9908 self._RunAllocator()
9910 # Release all unneeded node locks
9911 _ReleaseLocks(self, locking.LEVEL_NODE,
9912 keep=filter(None, [self.op.pnode, self.op.snode,
9914 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9915 keep=filter(None, [self.op.pnode, self.op.snode,
9918 #### node related checks
9920 # check primary node
9921 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9922 assert self.pnode is not None, \
9923 "Cannot retrieve locked node %s" % self.op.pnode
9925 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9926 pnode.name, errors.ECODE_STATE)
9928 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9929 pnode.name, errors.ECODE_STATE)
9930 if not pnode.vm_capable:
9931 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9932 " '%s'" % pnode.name, errors.ECODE_STATE)
9934 self.secondaries = []
9936 # mirror node verification
9937 if self.op.disk_template in constants.DTS_INT_MIRROR:
9938 if self.op.snode == pnode.name:
9939 raise errors.OpPrereqError("The secondary node cannot be the"
9940 " primary node", errors.ECODE_INVAL)
9941 _CheckNodeOnline(self, self.op.snode)
9942 _CheckNodeNotDrained(self, self.op.snode)
9943 _CheckNodeVmCapable(self, self.op.snode)
9944 self.secondaries.append(self.op.snode)
9946 snode = self.cfg.GetNodeInfo(self.op.snode)
9947 if pnode.group != snode.group:
9948 self.LogWarning("The primary and secondary nodes are in two"
9949 " different node groups; the disk parameters"
9950 " from the first disk's node group will be"
9953 nodenames = [pnode.name] + self.secondaries
9955 # Verify instance specs
9956 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9958 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9959 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9960 constants.ISPEC_DISK_COUNT: len(self.disks),
9961 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9962 constants.ISPEC_NIC_COUNT: len(self.nics),
9963 constants.ISPEC_SPINDLE_USE: spindle_use,
9966 group_info = self.cfg.GetNodeGroup(pnode.group)
9967 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9968 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9969 if not self.op.ignore_ipolicy and res:
9970 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9971 " policy: %s") % (pnode.group,
9972 utils.CommaJoin(res)),
9975 if not self.adopt_disks:
9976 if self.op.disk_template == constants.DT_RBD:
9977 # _CheckRADOSFreeSpace() is just a placeholder.
9978 # Any function that checks prerequisites can be placed here.
9979 # Check if there is enough space on the RADOS cluster.
9980 _CheckRADOSFreeSpace()
9981 elif self.op.disk_template == constants.DT_EXT:
9982 # FIXME: Function that checks prereqs if needed
9985 # Check lv size requirements, if not adopting
9986 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9987 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9989 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9990 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9991 disk[constants.IDISK_ADOPT])
9992 for disk in self.disks])
9993 if len(all_lvs) != len(self.disks):
9994 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9996 for lv_name in all_lvs:
9998 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9999 # to ReserveLV uses the same syntax
10000 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10001 except errors.ReservationError:
10002 raise errors.OpPrereqError("LV named %s used by another instance" %
10003 lv_name, errors.ECODE_NOTUNIQUE)
10005 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10006 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10008 node_lvs = self.rpc.call_lv_list([pnode.name],
10009 vg_names.payload.keys())[pnode.name]
10010 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10011 node_lvs = node_lvs.payload
10013 delta = all_lvs.difference(node_lvs.keys())
10015 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10016 utils.CommaJoin(delta),
10017 errors.ECODE_INVAL)
10018 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10020 raise errors.OpPrereqError("Online logical volumes found, cannot"
10021 " adopt: %s" % utils.CommaJoin(online_lvs),
10022 errors.ECODE_STATE)
10023 # update the size of disk based on what is found
10024 for dsk in self.disks:
10025 dsk[constants.IDISK_SIZE] = \
10026 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10027 dsk[constants.IDISK_ADOPT])][0]))
10029 elif self.op.disk_template == constants.DT_BLOCK:
10030 # Normalize and de-duplicate device paths
10031 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10032 for disk in self.disks])
10033 if len(all_disks) != len(self.disks):
10034 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10035 errors.ECODE_INVAL)
10036 baddisks = [d for d in all_disks
10037 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10039 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10040 " cannot be adopted" %
10041 (", ".join(baddisks),
10042 constants.ADOPTABLE_BLOCKDEV_ROOT),
10043 errors.ECODE_INVAL)
10045 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10046 list(all_disks))[pnode.name]
10047 node_disks.Raise("Cannot get block device information from node %s" %
10049 node_disks = node_disks.payload
10050 delta = all_disks.difference(node_disks.keys())
10052 raise errors.OpPrereqError("Missing block device(s): %s" %
10053 utils.CommaJoin(delta),
10054 errors.ECODE_INVAL)
10055 for dsk in self.disks:
10056 dsk[constants.IDISK_SIZE] = \
10057 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10059 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10061 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10062 # check OS parameters (remotely)
10063 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10065 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10067 #TODO: _CheckExtParams (remotely)
10068 # Check parameters for extstorage
10070 # memory check on primary node
10071 #TODO(dynmem): use MINMEM for checking
10073 _CheckNodeFreeMemory(self, self.pnode.name,
10074 "creating instance %s" % self.op.instance_name,
10075 self.be_full[constants.BE_MAXMEM],
10076 self.op.hypervisor)
10078 self.dry_run_result = list(nodenames)
10080 def Exec(self, feedback_fn):
10081 """Create and add the instance to the cluster.
10084 instance = self.op.instance_name
10085 pnode_name = self.pnode.name
10087 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10088 self.owned_locks(locking.LEVEL_NODE)), \
10089 "Node locks differ from node resource locks"
10091 ht_kind = self.op.hypervisor
10092 if ht_kind in constants.HTS_REQ_PORT:
10093 network_port = self.cfg.AllocatePort()
10095 network_port = None
10097 # This is ugly but we got a chicken-egg problem here
10098 # We can only take the group disk parameters, as the instance
10099 # has no disks yet (we are generating them right here).
10100 node = self.cfg.GetNodeInfo(pnode_name)
10101 nodegroup = self.cfg.GetNodeGroup(node.group)
10102 disks = _GenerateDiskTemplate(self,
10103 self.op.disk_template,
10104 instance, pnode_name,
10107 self.instance_file_storage_dir,
10108 self.op.file_driver,
10111 self.cfg.GetGroupDiskParams(nodegroup))
10113 iobj = objects.Instance(name=instance, os=self.op.os_type,
10114 primary_node=pnode_name,
10115 nics=self.nics, disks=disks,
10116 disk_template=self.op.disk_template,
10117 admin_state=constants.ADMINST_DOWN,
10118 network_port=network_port,
10119 beparams=self.op.beparams,
10120 hvparams=self.op.hvparams,
10121 hypervisor=self.op.hypervisor,
10122 osparams=self.op.osparams,
10126 for tag in self.op.tags:
10129 if self.adopt_disks:
10130 if self.op.disk_template == constants.DT_PLAIN:
10131 # rename LVs to the newly-generated names; we need to construct
10132 # 'fake' LV disks with the old data, plus the new unique_id
10133 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10135 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10136 rename_to.append(t_dsk.logical_id)
10137 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10138 self.cfg.SetDiskID(t_dsk, pnode_name)
10139 result = self.rpc.call_blockdev_rename(pnode_name,
10140 zip(tmp_disks, rename_to))
10141 result.Raise("Failed to rename adoped LVs")
10143 feedback_fn("* creating instance disks...")
10145 _CreateDisks(self, iobj)
10146 except errors.OpExecError:
10147 self.LogWarning("Device creation failed, reverting...")
10149 _RemoveDisks(self, iobj)
10151 self.cfg.ReleaseDRBDMinors(instance)
10154 feedback_fn("adding instance %s to cluster config" % instance)
10156 self.cfg.AddInstance(iobj, self.proc.GetECId())
10158 # Declare that we don't want to remove the instance lock anymore, as we've
10159 # added the instance to the config
10160 del self.remove_locks[locking.LEVEL_INSTANCE]
10162 if self.op.mode == constants.INSTANCE_IMPORT:
10163 # Release unused nodes
10164 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10166 # Release all nodes
10167 _ReleaseLocks(self, locking.LEVEL_NODE)
10170 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10171 feedback_fn("* wiping instance disks...")
10173 _WipeDisks(self, iobj)
10174 except errors.OpExecError, err:
10175 logging.exception("Wiping disks failed")
10176 self.LogWarning("Wiping instance disks failed (%s)", err)
10180 # Something is already wrong with the disks, don't do anything else
10182 elif self.op.wait_for_sync:
10183 disk_abort = not _WaitForSync(self, iobj)
10184 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10185 # make sure the disks are not degraded (still sync-ing is ok)
10186 feedback_fn("* checking mirrors status")
10187 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10192 _RemoveDisks(self, iobj)
10193 self.cfg.RemoveInstance(iobj.name)
10194 # Make sure the instance lock gets removed
10195 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10196 raise errors.OpExecError("There are some degraded disks for"
10199 # Release all node resource locks
10200 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10202 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10203 # we need to set the disks ID to the primary node, since the
10204 # preceding code might or might have not done it, depending on
10205 # disk template and other options
10206 for disk in iobj.disks:
10207 self.cfg.SetDiskID(disk, pnode_name)
10208 if self.op.mode == constants.INSTANCE_CREATE:
10209 if not self.op.no_install:
10210 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10211 not self.op.wait_for_sync)
10213 feedback_fn("* pausing disk sync to install instance OS")
10214 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10217 for idx, success in enumerate(result.payload):
10219 logging.warn("pause-sync of instance %s for disk %d failed",
10222 feedback_fn("* running the instance OS create scripts...")
10223 # FIXME: pass debug option from opcode to backend
10225 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10226 self.op.debug_level)
10228 feedback_fn("* resuming disk sync")
10229 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10232 for idx, success in enumerate(result.payload):
10234 logging.warn("resume-sync of instance %s for disk %d failed",
10237 os_add_result.Raise("Could not add os for instance %s"
10238 " on node %s" % (instance, pnode_name))
10241 if self.op.mode == constants.INSTANCE_IMPORT:
10242 feedback_fn("* running the instance OS import scripts...")
10246 for idx, image in enumerate(self.src_images):
10250 # FIXME: pass debug option from opcode to backend
10251 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10252 constants.IEIO_FILE, (image, ),
10253 constants.IEIO_SCRIPT,
10254 (iobj.disks[idx], idx),
10256 transfers.append(dt)
10259 masterd.instance.TransferInstanceData(self, feedback_fn,
10260 self.op.src_node, pnode_name,
10261 self.pnode.secondary_ip,
10263 if not compat.all(import_result):
10264 self.LogWarning("Some disks for instance %s on node %s were not"
10265 " imported successfully" % (instance, pnode_name))
10267 rename_from = self._old_instance_name
10269 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10270 feedback_fn("* preparing remote import...")
10271 # The source cluster will stop the instance before attempting to make
10272 # a connection. In some cases stopping an instance can take a long
10273 # time, hence the shutdown timeout is added to the connection
10275 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10276 self.op.source_shutdown_timeout)
10277 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10279 assert iobj.primary_node == self.pnode.name
10281 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10282 self.source_x509_ca,
10283 self._cds, timeouts)
10284 if not compat.all(disk_results):
10285 # TODO: Should the instance still be started, even if some disks
10286 # failed to import (valid for local imports, too)?
10287 self.LogWarning("Some disks for instance %s on node %s were not"
10288 " imported successfully" % (instance, pnode_name))
10290 rename_from = self.source_instance_name
10293 # also checked in the prereq part
10294 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10297 # Run rename script on newly imported instance
10298 assert iobj.name == instance
10299 feedback_fn("Running rename script for %s" % instance)
10300 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10302 self.op.debug_level)
10303 if result.fail_msg:
10304 self.LogWarning("Failed to run rename script for %s on node"
10305 " %s: %s" % (instance, pnode_name, result.fail_msg))
10307 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10310 iobj.admin_state = constants.ADMINST_UP
10311 self.cfg.Update(iobj, feedback_fn)
10312 logging.info("Starting instance %s on node %s", instance, pnode_name)
10313 feedback_fn("* starting instance...")
10314 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10316 result.Raise("Could not start instance")
10318 return list(iobj.all_nodes)
10321 def _CheckRADOSFreeSpace():
10322 """Compute disk size requirements inside the RADOS cluster.
10325 # For the RADOS cluster we assume there is always enough space.
10329 class LUInstanceConsole(NoHooksLU):
10330 """Connect to an instance's console.
10332 This is somewhat special in that it returns the command line that
10333 you need to run on the master node in order to connect to the
10339 def ExpandNames(self):
10340 self.share_locks = _ShareAll()
10341 self._ExpandAndLockInstance()
10343 def CheckPrereq(self):
10344 """Check prerequisites.
10346 This checks that the instance is in the cluster.
10349 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10350 assert self.instance is not None, \
10351 "Cannot retrieve locked instance %s" % self.op.instance_name
10352 _CheckNodeOnline(self, self.instance.primary_node)
10354 def Exec(self, feedback_fn):
10355 """Connect to the console of an instance
10358 instance = self.instance
10359 node = instance.primary_node
10361 node_insts = self.rpc.call_instance_list([node],
10362 [instance.hypervisor])[node]
10363 node_insts.Raise("Can't get node information from %s" % node)
10365 if instance.name not in node_insts.payload:
10366 if instance.admin_state == constants.ADMINST_UP:
10367 state = constants.INSTST_ERRORDOWN
10368 elif instance.admin_state == constants.ADMINST_DOWN:
10369 state = constants.INSTST_ADMINDOWN
10371 state = constants.INSTST_ADMINOFFLINE
10372 raise errors.OpExecError("Instance %s is not running (state %s)" %
10373 (instance.name, state))
10375 logging.debug("Connecting to console of %s on %s", instance.name, node)
10377 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10380 def _GetInstanceConsole(cluster, instance):
10381 """Returns console information for an instance.
10383 @type cluster: L{objects.Cluster}
10384 @type instance: L{objects.Instance}
10388 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10389 # beparams and hvparams are passed separately, to avoid editing the
10390 # instance and then saving the defaults in the instance itself.
10391 hvparams = cluster.FillHV(instance)
10392 beparams = cluster.FillBE(instance)
10393 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10395 assert console.instance == instance.name
10396 assert console.Validate()
10398 return console.ToDict()
10401 class LUInstanceReplaceDisks(LogicalUnit):
10402 """Replace the disks of an instance.
10405 HPATH = "mirrors-replace"
10406 HTYPE = constants.HTYPE_INSTANCE
10409 def CheckArguments(self):
10410 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10411 self.op.iallocator)
10413 def ExpandNames(self):
10414 self._ExpandAndLockInstance()
10416 assert locking.LEVEL_NODE not in self.needed_locks
10417 assert locking.LEVEL_NODE_RES not in self.needed_locks
10418 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10420 assert self.op.iallocator is None or self.op.remote_node is None, \
10421 "Conflicting options"
10423 if self.op.remote_node is not None:
10424 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10426 # Warning: do not remove the locking of the new secondary here
10427 # unless DRBD8.AddChildren is changed to work in parallel;
10428 # currently it doesn't since parallel invocations of
10429 # FindUnusedMinor will conflict
10430 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10431 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10433 self.needed_locks[locking.LEVEL_NODE] = []
10434 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10436 if self.op.iallocator is not None:
10437 # iallocator will select a new node in the same group
10438 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10440 self.needed_locks[locking.LEVEL_NODE_RES] = []
10442 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10443 self.op.iallocator, self.op.remote_node,
10444 self.op.disks, False, self.op.early_release,
10445 self.op.ignore_ipolicy)
10447 self.tasklets = [self.replacer]
10449 def DeclareLocks(self, level):
10450 if level == locking.LEVEL_NODEGROUP:
10451 assert self.op.remote_node is None
10452 assert self.op.iallocator is not None
10453 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10455 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10456 # Lock all groups used by instance optimistically; this requires going
10457 # via the node before it's locked, requiring verification later on
10458 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10459 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10461 elif level == locking.LEVEL_NODE:
10462 if self.op.iallocator is not None:
10463 assert self.op.remote_node is None
10464 assert not self.needed_locks[locking.LEVEL_NODE]
10466 # Lock member nodes of all locked groups
10467 self.needed_locks[locking.LEVEL_NODE] = [node_name
10468 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10469 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10471 self._LockInstancesNodes()
10472 elif level == locking.LEVEL_NODE_RES:
10474 self.needed_locks[locking.LEVEL_NODE_RES] = \
10475 self.needed_locks[locking.LEVEL_NODE]
10477 def BuildHooksEnv(self):
10478 """Build hooks env.
10480 This runs on the master, the primary and all the secondaries.
10483 instance = self.replacer.instance
10485 "MODE": self.op.mode,
10486 "NEW_SECONDARY": self.op.remote_node,
10487 "OLD_SECONDARY": instance.secondary_nodes[0],
10489 env.update(_BuildInstanceHookEnvByObject(self, instance))
10492 def BuildHooksNodes(self):
10493 """Build hooks nodes.
10496 instance = self.replacer.instance
10498 self.cfg.GetMasterNode(),
10499 instance.primary_node,
10501 if self.op.remote_node is not None:
10502 nl.append(self.op.remote_node)
10505 def CheckPrereq(self):
10506 """Check prerequisites.
10509 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10510 self.op.iallocator is None)
10512 # Verify if node group locks are still correct
10513 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10515 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10517 return LogicalUnit.CheckPrereq(self)
10520 class TLReplaceDisks(Tasklet):
10521 """Replaces disks for an instance.
10523 Note: Locking is not within the scope of this class.
10526 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10527 disks, delay_iallocator, early_release, ignore_ipolicy):
10528 """Initializes this class.
10531 Tasklet.__init__(self, lu)
10534 self.instance_name = instance_name
10536 self.iallocator_name = iallocator_name
10537 self.remote_node = remote_node
10539 self.delay_iallocator = delay_iallocator
10540 self.early_release = early_release
10541 self.ignore_ipolicy = ignore_ipolicy
10544 self.instance = None
10545 self.new_node = None
10546 self.target_node = None
10547 self.other_node = None
10548 self.remote_node_info = None
10549 self.node_secondary_ip = None
10552 def CheckArguments(mode, remote_node, iallocator):
10553 """Helper function for users of this class.
10556 # check for valid parameter combination
10557 if mode == constants.REPLACE_DISK_CHG:
10558 if remote_node is None and iallocator is None:
10559 raise errors.OpPrereqError("When changing the secondary either an"
10560 " iallocator script must be used or the"
10561 " new node given", errors.ECODE_INVAL)
10563 if remote_node is not None and iallocator is not None:
10564 raise errors.OpPrereqError("Give either the iallocator or the new"
10565 " secondary, not both", errors.ECODE_INVAL)
10567 elif remote_node is not None or iallocator is not None:
10568 # Not replacing the secondary
10569 raise errors.OpPrereqError("The iallocator and new node options can"
10570 " only be used when changing the"
10571 " secondary node", errors.ECODE_INVAL)
10574 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10575 """Compute a new secondary node using an IAllocator.
10578 ial = IAllocator(lu.cfg, lu.rpc,
10579 mode=constants.IALLOCATOR_MODE_RELOC,
10580 name=instance_name,
10581 relocate_from=list(relocate_from))
10583 ial.Run(iallocator_name)
10585 if not ial.success:
10586 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10587 " %s" % (iallocator_name, ial.info),
10588 errors.ECODE_NORES)
10590 if len(ial.result) != ial.required_nodes:
10591 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10592 " of nodes (%s), required %s" %
10594 len(ial.result), ial.required_nodes),
10595 errors.ECODE_FAULT)
10597 remote_node_name = ial.result[0]
10599 lu.LogInfo("Selected new secondary for instance '%s': %s",
10600 instance_name, remote_node_name)
10602 return remote_node_name
10604 def _FindFaultyDisks(self, node_name):
10605 """Wrapper for L{_FindFaultyInstanceDisks}.
10608 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10611 def _CheckDisksActivated(self, instance):
10612 """Checks if the instance disks are activated.
10614 @param instance: The instance to check disks
10615 @return: True if they are activated, False otherwise
10618 nodes = instance.all_nodes
10620 for idx, dev in enumerate(instance.disks):
10622 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10623 self.cfg.SetDiskID(dev, node)
10625 result = _BlockdevFind(self, node, dev, instance)
10629 elif result.fail_msg or not result.payload:
10634 def CheckPrereq(self):
10635 """Check prerequisites.
10637 This checks that the instance is in the cluster.
10640 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10641 assert instance is not None, \
10642 "Cannot retrieve locked instance %s" % self.instance_name
10644 if instance.disk_template != constants.DT_DRBD8:
10645 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10646 " instances", errors.ECODE_INVAL)
10648 if len(instance.secondary_nodes) != 1:
10649 raise errors.OpPrereqError("The instance has a strange layout,"
10650 " expected one secondary but found %d" %
10651 len(instance.secondary_nodes),
10652 errors.ECODE_FAULT)
10654 if not self.delay_iallocator:
10655 self._CheckPrereq2()
10657 def _CheckPrereq2(self):
10658 """Check prerequisites, second part.
10660 This function should always be part of CheckPrereq. It was separated and is
10661 now called from Exec because during node evacuation iallocator was only
10662 called with an unmodified cluster model, not taking planned changes into
10666 instance = self.instance
10667 secondary_node = instance.secondary_nodes[0]
10669 if self.iallocator_name is None:
10670 remote_node = self.remote_node
10672 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10673 instance.name, instance.secondary_nodes)
10675 if remote_node is None:
10676 self.remote_node_info = None
10678 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10679 "Remote node '%s' is not locked" % remote_node
10681 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10682 assert self.remote_node_info is not None, \
10683 "Cannot retrieve locked node %s" % remote_node
10685 if remote_node == self.instance.primary_node:
10686 raise errors.OpPrereqError("The specified node is the primary node of"
10687 " the instance", errors.ECODE_INVAL)
10689 if remote_node == secondary_node:
10690 raise errors.OpPrereqError("The specified node is already the"
10691 " secondary node of the instance",
10692 errors.ECODE_INVAL)
10694 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10695 constants.REPLACE_DISK_CHG):
10696 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10697 errors.ECODE_INVAL)
10699 if self.mode == constants.REPLACE_DISK_AUTO:
10700 if not self._CheckDisksActivated(instance):
10701 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10702 " first" % self.instance_name,
10703 errors.ECODE_STATE)
10704 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10705 faulty_secondary = self._FindFaultyDisks(secondary_node)
10707 if faulty_primary and faulty_secondary:
10708 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10709 " one node and can not be repaired"
10710 " automatically" % self.instance_name,
10711 errors.ECODE_STATE)
10714 self.disks = faulty_primary
10715 self.target_node = instance.primary_node
10716 self.other_node = secondary_node
10717 check_nodes = [self.target_node, self.other_node]
10718 elif faulty_secondary:
10719 self.disks = faulty_secondary
10720 self.target_node = secondary_node
10721 self.other_node = instance.primary_node
10722 check_nodes = [self.target_node, self.other_node]
10728 # Non-automatic modes
10729 if self.mode == constants.REPLACE_DISK_PRI:
10730 self.target_node = instance.primary_node
10731 self.other_node = secondary_node
10732 check_nodes = [self.target_node, self.other_node]
10734 elif self.mode == constants.REPLACE_DISK_SEC:
10735 self.target_node = secondary_node
10736 self.other_node = instance.primary_node
10737 check_nodes = [self.target_node, self.other_node]
10739 elif self.mode == constants.REPLACE_DISK_CHG:
10740 self.new_node = remote_node
10741 self.other_node = instance.primary_node
10742 self.target_node = secondary_node
10743 check_nodes = [self.new_node, self.other_node]
10745 _CheckNodeNotDrained(self.lu, remote_node)
10746 _CheckNodeVmCapable(self.lu, remote_node)
10748 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10749 assert old_node_info is not None
10750 if old_node_info.offline and not self.early_release:
10751 # doesn't make sense to delay the release
10752 self.early_release = True
10753 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10754 " early-release mode", secondary_node)
10757 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10760 # If not specified all disks should be replaced
10762 self.disks = range(len(self.instance.disks))
10764 # TODO: This is ugly, but right now we can't distinguish between internal
10765 # submitted opcode and external one. We should fix that.
10766 if self.remote_node_info:
10767 # We change the node, lets verify it still meets instance policy
10768 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10769 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10771 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10772 ignore=self.ignore_ipolicy)
10774 for node in check_nodes:
10775 _CheckNodeOnline(self.lu, node)
10777 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10780 if node_name is not None)
10782 # Release unneeded node and node resource locks
10783 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10784 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10786 # Release any owned node group
10787 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10788 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10790 # Check whether disks are valid
10791 for disk_idx in self.disks:
10792 instance.FindDisk(disk_idx)
10794 # Get secondary node IP addresses
10795 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10796 in self.cfg.GetMultiNodeInfo(touched_nodes))
10798 def Exec(self, feedback_fn):
10799 """Execute disk replacement.
10801 This dispatches the disk replacement to the appropriate handler.
10804 if self.delay_iallocator:
10805 self._CheckPrereq2()
10808 # Verify owned locks before starting operation
10809 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10810 assert set(owned_nodes) == set(self.node_secondary_ip), \
10811 ("Incorrect node locks, owning %s, expected %s" %
10812 (owned_nodes, self.node_secondary_ip.keys()))
10813 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10814 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10816 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10817 assert list(owned_instances) == [self.instance_name], \
10818 "Instance '%s' not locked" % self.instance_name
10820 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10821 "Should not own any node group lock at this point"
10824 feedback_fn("No disks need replacement")
10827 feedback_fn("Replacing disk(s) %s for %s" %
10828 (utils.CommaJoin(self.disks), self.instance.name))
10830 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10832 # Activate the instance disks if we're replacing them on a down instance
10834 _StartInstanceDisks(self.lu, self.instance, True)
10837 # Should we replace the secondary node?
10838 if self.new_node is not None:
10839 fn = self._ExecDrbd8Secondary
10841 fn = self._ExecDrbd8DiskOnly
10843 result = fn(feedback_fn)
10845 # Deactivate the instance disks if we're replacing them on a
10848 _SafeShutdownInstanceDisks(self.lu, self.instance)
10850 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10853 # Verify owned locks
10854 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10855 nodes = frozenset(self.node_secondary_ip)
10856 assert ((self.early_release and not owned_nodes) or
10857 (not self.early_release and not (set(owned_nodes) - nodes))), \
10858 ("Not owning the correct locks, early_release=%s, owned=%r,"
10859 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10863 def _CheckVolumeGroup(self, nodes):
10864 self.lu.LogInfo("Checking volume groups")
10866 vgname = self.cfg.GetVGName()
10868 # Make sure volume group exists on all involved nodes
10869 results = self.rpc.call_vg_list(nodes)
10871 raise errors.OpExecError("Can't list volume groups on the nodes")
10874 res = results[node]
10875 res.Raise("Error checking node %s" % node)
10876 if vgname not in res.payload:
10877 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10880 def _CheckDisksExistence(self, nodes):
10881 # Check disk existence
10882 for idx, dev in enumerate(self.instance.disks):
10883 if idx not in self.disks:
10887 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10888 self.cfg.SetDiskID(dev, node)
10890 result = _BlockdevFind(self, node, dev, self.instance)
10892 msg = result.fail_msg
10893 if msg or not result.payload:
10895 msg = "disk not found"
10896 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10899 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10900 for idx, dev in enumerate(self.instance.disks):
10901 if idx not in self.disks:
10904 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10907 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10908 on_primary, ldisk=ldisk):
10909 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10910 " replace disks for instance %s" %
10911 (node_name, self.instance.name))
10913 def _CreateNewStorage(self, node_name):
10914 """Create new storage on the primary or secondary node.
10916 This is only used for same-node replaces, not for changing the
10917 secondary node, hence we don't want to modify the existing disk.
10922 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10923 for idx, dev in enumerate(disks):
10924 if idx not in self.disks:
10927 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10929 self.cfg.SetDiskID(dev, node_name)
10931 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10932 names = _GenerateUniqueNames(self.lu, lv_names)
10934 (data_disk, meta_disk) = dev.children
10935 vg_data = data_disk.logical_id[0]
10936 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10937 logical_id=(vg_data, names[0]),
10938 params=data_disk.params)
10939 vg_meta = meta_disk.logical_id[0]
10940 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10941 logical_id=(vg_meta, names[1]),
10942 params=meta_disk.params)
10944 new_lvs = [lv_data, lv_meta]
10945 old_lvs = [child.Copy() for child in dev.children]
10946 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10948 # we pass force_create=True to force the LVM creation
10949 for new_lv in new_lvs:
10950 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10951 _GetInstanceInfoText(self.instance), False)
10955 def _CheckDevices(self, node_name, iv_names):
10956 for name, (dev, _, _) in iv_names.iteritems():
10957 self.cfg.SetDiskID(dev, node_name)
10959 result = _BlockdevFind(self, node_name, dev, self.instance)
10961 msg = result.fail_msg
10962 if msg or not result.payload:
10964 msg = "disk not found"
10965 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10968 if result.payload.is_degraded:
10969 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10971 def _RemoveOldStorage(self, node_name, iv_names):
10972 for name, (_, old_lvs, _) in iv_names.iteritems():
10973 self.lu.LogInfo("Remove logical volumes for %s" % name)
10976 self.cfg.SetDiskID(lv, node_name)
10978 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10980 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10981 hint="remove unused LVs manually")
10983 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10984 """Replace a disk on the primary or secondary for DRBD 8.
10986 The algorithm for replace is quite complicated:
10988 1. for each disk to be replaced:
10990 1. create new LVs on the target node with unique names
10991 1. detach old LVs from the drbd device
10992 1. rename old LVs to name_replaced.<time_t>
10993 1. rename new LVs to old LVs
10994 1. attach the new LVs (with the old names now) to the drbd device
10996 1. wait for sync across all devices
10998 1. for each modified disk:
11000 1. remove old LVs (which have the name name_replaces.<time_t>)
11002 Failures are not very well handled.
11007 # Step: check device activation
11008 self.lu.LogStep(1, steps_total, "Check device existence")
11009 self._CheckDisksExistence([self.other_node, self.target_node])
11010 self._CheckVolumeGroup([self.target_node, self.other_node])
11012 # Step: check other node consistency
11013 self.lu.LogStep(2, steps_total, "Check peer consistency")
11014 self._CheckDisksConsistency(self.other_node,
11015 self.other_node == self.instance.primary_node,
11018 # Step: create new storage
11019 self.lu.LogStep(3, steps_total, "Allocate new storage")
11020 iv_names = self._CreateNewStorage(self.target_node)
11022 # Step: for each lv, detach+rename*2+attach
11023 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11024 for dev, old_lvs, new_lvs in iv_names.itervalues():
11025 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11027 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11029 result.Raise("Can't detach drbd from local storage on node"
11030 " %s for device %s" % (self.target_node, dev.iv_name))
11032 #cfg.Update(instance)
11034 # ok, we created the new LVs, so now we know we have the needed
11035 # storage; as such, we proceed on the target node to rename
11036 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11037 # using the assumption that logical_id == physical_id (which in
11038 # turn is the unique_id on that node)
11040 # FIXME(iustin): use a better name for the replaced LVs
11041 temp_suffix = int(time.time())
11042 ren_fn = lambda d, suff: (d.physical_id[0],
11043 d.physical_id[1] + "_replaced-%s" % suff)
11045 # Build the rename list based on what LVs exist on the node
11046 rename_old_to_new = []
11047 for to_ren in old_lvs:
11048 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11049 if not result.fail_msg and result.payload:
11051 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11053 self.lu.LogInfo("Renaming the old LVs on the target node")
11054 result = self.rpc.call_blockdev_rename(self.target_node,
11056 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11058 # Now we rename the new LVs to the old LVs
11059 self.lu.LogInfo("Renaming the new LVs on the target node")
11060 rename_new_to_old = [(new, old.physical_id)
11061 for old, new in zip(old_lvs, new_lvs)]
11062 result = self.rpc.call_blockdev_rename(self.target_node,
11064 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11066 # Intermediate steps of in memory modifications
11067 for old, new in zip(old_lvs, new_lvs):
11068 new.logical_id = old.logical_id
11069 self.cfg.SetDiskID(new, self.target_node)
11071 # We need to modify old_lvs so that removal later removes the
11072 # right LVs, not the newly added ones; note that old_lvs is a
11074 for disk in old_lvs:
11075 disk.logical_id = ren_fn(disk, temp_suffix)
11076 self.cfg.SetDiskID(disk, self.target_node)
11078 # Now that the new lvs have the old name, we can add them to the device
11079 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11080 result = self.rpc.call_blockdev_addchildren(self.target_node,
11081 (dev, self.instance), new_lvs)
11082 msg = result.fail_msg
11084 for new_lv in new_lvs:
11085 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11088 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11089 hint=("cleanup manually the unused logical"
11091 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11093 cstep = itertools.count(5)
11095 if self.early_release:
11096 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11097 self._RemoveOldStorage(self.target_node, iv_names)
11098 # TODO: Check if releasing locks early still makes sense
11099 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11101 # Release all resource locks except those used by the instance
11102 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11103 keep=self.node_secondary_ip.keys())
11105 # Release all node locks while waiting for sync
11106 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11108 # TODO: Can the instance lock be downgraded here? Take the optional disk
11109 # shutdown in the caller into consideration.
11112 # This can fail as the old devices are degraded and _WaitForSync
11113 # does a combined result over all disks, so we don't check its return value
11114 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11115 _WaitForSync(self.lu, self.instance)
11117 # Check all devices manually
11118 self._CheckDevices(self.instance.primary_node, iv_names)
11120 # Step: remove old storage
11121 if not self.early_release:
11122 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11123 self._RemoveOldStorage(self.target_node, iv_names)
11125 def _ExecDrbd8Secondary(self, feedback_fn):
11126 """Replace the secondary node for DRBD 8.
11128 The algorithm for replace is quite complicated:
11129 - for all disks of the instance:
11130 - create new LVs on the new node with same names
11131 - shutdown the drbd device on the old secondary
11132 - disconnect the drbd network on the primary
11133 - create the drbd device on the new secondary
11134 - network attach the drbd on the primary, using an artifice:
11135 the drbd code for Attach() will connect to the network if it
11136 finds a device which is connected to the good local disks but
11137 not network enabled
11138 - wait for sync across all devices
11139 - remove all disks from the old secondary
11141 Failures are not very well handled.
11146 pnode = self.instance.primary_node
11148 # Step: check device activation
11149 self.lu.LogStep(1, steps_total, "Check device existence")
11150 self._CheckDisksExistence([self.instance.primary_node])
11151 self._CheckVolumeGroup([self.instance.primary_node])
11153 # Step: check other node consistency
11154 self.lu.LogStep(2, steps_total, "Check peer consistency")
11155 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11157 # Step: create new storage
11158 self.lu.LogStep(3, steps_total, "Allocate new storage")
11159 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11160 for idx, dev in enumerate(disks):
11161 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11162 (self.new_node, idx))
11163 # we pass force_create=True to force LVM creation
11164 for new_lv in dev.children:
11165 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11166 True, _GetInstanceInfoText(self.instance), False)
11168 # Step 4: dbrd minors and drbd setups changes
11169 # after this, we must manually remove the drbd minors on both the
11170 # error and the success paths
11171 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11172 minors = self.cfg.AllocateDRBDMinor([self.new_node
11173 for dev in self.instance.disks],
11174 self.instance.name)
11175 logging.debug("Allocated minors %r", minors)
11178 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11179 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11180 (self.new_node, idx))
11181 # create new devices on new_node; note that we create two IDs:
11182 # one without port, so the drbd will be activated without
11183 # networking information on the new node at this stage, and one
11184 # with network, for the latter activation in step 4
11185 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11186 if self.instance.primary_node == o_node1:
11189 assert self.instance.primary_node == o_node2, "Three-node instance?"
11192 new_alone_id = (self.instance.primary_node, self.new_node, None,
11193 p_minor, new_minor, o_secret)
11194 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11195 p_minor, new_minor, o_secret)
11197 iv_names[idx] = (dev, dev.children, new_net_id)
11198 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11200 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11201 logical_id=new_alone_id,
11202 children=dev.children,
11205 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11208 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11210 _GetInstanceInfoText(self.instance), False)
11211 except errors.GenericError:
11212 self.cfg.ReleaseDRBDMinors(self.instance.name)
11215 # We have new devices, shutdown the drbd on the old secondary
11216 for idx, dev in enumerate(self.instance.disks):
11217 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11218 self.cfg.SetDiskID(dev, self.target_node)
11219 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11220 (dev, self.instance)).fail_msg
11222 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11223 "node: %s" % (idx, msg),
11224 hint=("Please cleanup this device manually as"
11225 " soon as possible"))
11227 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11228 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11229 self.instance.disks)[pnode]
11231 msg = result.fail_msg
11233 # detaches didn't succeed (unlikely)
11234 self.cfg.ReleaseDRBDMinors(self.instance.name)
11235 raise errors.OpExecError("Can't detach the disks from the network on"
11236 " old node: %s" % (msg,))
11238 # if we managed to detach at least one, we update all the disks of
11239 # the instance to point to the new secondary
11240 self.lu.LogInfo("Updating instance configuration")
11241 for dev, _, new_logical_id in iv_names.itervalues():
11242 dev.logical_id = new_logical_id
11243 self.cfg.SetDiskID(dev, self.instance.primary_node)
11245 self.cfg.Update(self.instance, feedback_fn)
11247 # Release all node locks (the configuration has been updated)
11248 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11250 # and now perform the drbd attach
11251 self.lu.LogInfo("Attaching primary drbds to new secondary"
11252 " (standalone => connected)")
11253 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11255 self.node_secondary_ip,
11256 (self.instance.disks, self.instance),
11257 self.instance.name,
11259 for to_node, to_result in result.items():
11260 msg = to_result.fail_msg
11262 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11264 hint=("please do a gnt-instance info to see the"
11265 " status of disks"))
11267 cstep = itertools.count(5)
11269 if self.early_release:
11270 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11271 self._RemoveOldStorage(self.target_node, iv_names)
11272 # TODO: Check if releasing locks early still makes sense
11273 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11275 # Release all resource locks except those used by the instance
11276 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11277 keep=self.node_secondary_ip.keys())
11279 # TODO: Can the instance lock be downgraded here? Take the optional disk
11280 # shutdown in the caller into consideration.
11283 # This can fail as the old devices are degraded and _WaitForSync
11284 # does a combined result over all disks, so we don't check its return value
11285 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11286 _WaitForSync(self.lu, self.instance)
11288 # Check all devices manually
11289 self._CheckDevices(self.instance.primary_node, iv_names)
11291 # Step: remove old storage
11292 if not self.early_release:
11293 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11294 self._RemoveOldStorage(self.target_node, iv_names)
11297 class LURepairNodeStorage(NoHooksLU):
11298 """Repairs the volume group on a node.
11303 def CheckArguments(self):
11304 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11306 storage_type = self.op.storage_type
11308 if (constants.SO_FIX_CONSISTENCY not in
11309 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11310 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11311 " repaired" % storage_type,
11312 errors.ECODE_INVAL)
11314 def ExpandNames(self):
11315 self.needed_locks = {
11316 locking.LEVEL_NODE: [self.op.node_name],
11319 def _CheckFaultyDisks(self, instance, node_name):
11320 """Ensure faulty disks abort the opcode or at least warn."""
11322 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11324 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11325 " node '%s'" % (instance.name, node_name),
11326 errors.ECODE_STATE)
11327 except errors.OpPrereqError, err:
11328 if self.op.ignore_consistency:
11329 self.proc.LogWarning(str(err.args[0]))
11333 def CheckPrereq(self):
11334 """Check prerequisites.
11337 # Check whether any instance on this node has faulty disks
11338 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11339 if inst.admin_state != constants.ADMINST_UP:
11341 check_nodes = set(inst.all_nodes)
11342 check_nodes.discard(self.op.node_name)
11343 for inst_node_name in check_nodes:
11344 self._CheckFaultyDisks(inst, inst_node_name)
11346 def Exec(self, feedback_fn):
11347 feedback_fn("Repairing storage unit '%s' on %s ..." %
11348 (self.op.name, self.op.node_name))
11350 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11351 result = self.rpc.call_storage_execute(self.op.node_name,
11352 self.op.storage_type, st_args,
11354 constants.SO_FIX_CONSISTENCY)
11355 result.Raise("Failed to repair storage unit '%s' on %s" %
11356 (self.op.name, self.op.node_name))
11359 class LUNodeEvacuate(NoHooksLU):
11360 """Evacuates instances off a list of nodes.
11365 _MODE2IALLOCATOR = {
11366 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11367 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11368 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11370 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11371 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11372 constants.IALLOCATOR_NEVAC_MODES)
11374 def CheckArguments(self):
11375 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11377 def ExpandNames(self):
11378 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11380 if self.op.remote_node is not None:
11381 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11382 assert self.op.remote_node
11384 if self.op.remote_node == self.op.node_name:
11385 raise errors.OpPrereqError("Can not use evacuated node as a new"
11386 " secondary node", errors.ECODE_INVAL)
11388 if self.op.mode != constants.NODE_EVAC_SEC:
11389 raise errors.OpPrereqError("Without the use of an iallocator only"
11390 " secondary instances can be evacuated",
11391 errors.ECODE_INVAL)
11394 self.share_locks = _ShareAll()
11395 self.needed_locks = {
11396 locking.LEVEL_INSTANCE: [],
11397 locking.LEVEL_NODEGROUP: [],
11398 locking.LEVEL_NODE: [],
11401 # Determine nodes (via group) optimistically, needs verification once locks
11402 # have been acquired
11403 self.lock_nodes = self._DetermineNodes()
11405 def _DetermineNodes(self):
11406 """Gets the list of nodes to operate on.
11409 if self.op.remote_node is None:
11410 # Iallocator will choose any node(s) in the same group
11411 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11413 group_nodes = frozenset([self.op.remote_node])
11415 # Determine nodes to be locked
11416 return set([self.op.node_name]) | group_nodes
11418 def _DetermineInstances(self):
11419 """Builds list of instances to operate on.
11422 assert self.op.mode in constants.NODE_EVAC_MODES
11424 if self.op.mode == constants.NODE_EVAC_PRI:
11425 # Primary instances only
11426 inst_fn = _GetNodePrimaryInstances
11427 assert self.op.remote_node is None, \
11428 "Evacuating primary instances requires iallocator"
11429 elif self.op.mode == constants.NODE_EVAC_SEC:
11430 # Secondary instances only
11431 inst_fn = _GetNodeSecondaryInstances
11434 assert self.op.mode == constants.NODE_EVAC_ALL
11435 inst_fn = _GetNodeInstances
11436 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11438 raise errors.OpPrereqError("Due to an issue with the iallocator"
11439 " interface it is not possible to evacuate"
11440 " all instances at once; specify explicitly"
11441 " whether to evacuate primary or secondary"
11443 errors.ECODE_INVAL)
11445 return inst_fn(self.cfg, self.op.node_name)
11447 def DeclareLocks(self, level):
11448 if level == locking.LEVEL_INSTANCE:
11449 # Lock instances optimistically, needs verification once node and group
11450 # locks have been acquired
11451 self.needed_locks[locking.LEVEL_INSTANCE] = \
11452 set(i.name for i in self._DetermineInstances())
11454 elif level == locking.LEVEL_NODEGROUP:
11455 # Lock node groups for all potential target nodes optimistically, needs
11456 # verification once nodes have been acquired
11457 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11458 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11460 elif level == locking.LEVEL_NODE:
11461 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11463 def CheckPrereq(self):
11465 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11466 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11467 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11469 need_nodes = self._DetermineNodes()
11471 if not owned_nodes.issuperset(need_nodes):
11472 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11473 " locks were acquired, current nodes are"
11474 " are '%s', used to be '%s'; retry the"
11476 (self.op.node_name,
11477 utils.CommaJoin(need_nodes),
11478 utils.CommaJoin(owned_nodes)),
11479 errors.ECODE_STATE)
11481 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11482 if owned_groups != wanted_groups:
11483 raise errors.OpExecError("Node groups changed since locks were acquired,"
11484 " current groups are '%s', used to be '%s';"
11485 " retry the operation" %
11486 (utils.CommaJoin(wanted_groups),
11487 utils.CommaJoin(owned_groups)))
11489 # Determine affected instances
11490 self.instances = self._DetermineInstances()
11491 self.instance_names = [i.name for i in self.instances]
11493 if set(self.instance_names) != owned_instances:
11494 raise errors.OpExecError("Instances on node '%s' changed since locks"
11495 " were acquired, current instances are '%s',"
11496 " used to be '%s'; retry the operation" %
11497 (self.op.node_name,
11498 utils.CommaJoin(self.instance_names),
11499 utils.CommaJoin(owned_instances)))
11501 if self.instance_names:
11502 self.LogInfo("Evacuating instances from node '%s': %s",
11504 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11506 self.LogInfo("No instances to evacuate from node '%s'",
11509 if self.op.remote_node is not None:
11510 for i in self.instances:
11511 if i.primary_node == self.op.remote_node:
11512 raise errors.OpPrereqError("Node %s is the primary node of"
11513 " instance %s, cannot use it as"
11515 (self.op.remote_node, i.name),
11516 errors.ECODE_INVAL)
11518 def Exec(self, feedback_fn):
11519 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11521 if not self.instance_names:
11522 # No instances to evacuate
11525 elif self.op.iallocator is not None:
11526 # TODO: Implement relocation to other group
11527 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11528 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11529 instances=list(self.instance_names))
11531 ial.Run(self.op.iallocator)
11533 if not ial.success:
11534 raise errors.OpPrereqError("Can't compute node evacuation using"
11535 " iallocator '%s': %s" %
11536 (self.op.iallocator, ial.info),
11537 errors.ECODE_NORES)
11539 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11541 elif self.op.remote_node is not None:
11542 assert self.op.mode == constants.NODE_EVAC_SEC
11544 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11545 remote_node=self.op.remote_node,
11547 mode=constants.REPLACE_DISK_CHG,
11548 early_release=self.op.early_release)]
11549 for instance_name in self.instance_names
11553 raise errors.ProgrammerError("No iallocator or remote node")
11555 return ResultWithJobs(jobs)
11558 def _SetOpEarlyRelease(early_release, op):
11559 """Sets C{early_release} flag on opcodes if available.
11563 op.early_release = early_release
11564 except AttributeError:
11565 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11570 def _NodeEvacDest(use_nodes, group, nodes):
11571 """Returns group or nodes depending on caller's choice.
11575 return utils.CommaJoin(nodes)
11580 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11581 """Unpacks the result of change-group and node-evacuate iallocator requests.
11583 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11584 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11586 @type lu: L{LogicalUnit}
11587 @param lu: Logical unit instance
11588 @type alloc_result: tuple/list
11589 @param alloc_result: Result from iallocator
11590 @type early_release: bool
11591 @param early_release: Whether to release locks early if possible
11592 @type use_nodes: bool
11593 @param use_nodes: Whether to display node names instead of groups
11596 (moved, failed, jobs) = alloc_result
11599 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11600 for (name, reason) in failed)
11601 lu.LogWarning("Unable to evacuate instances %s", failreason)
11602 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11605 lu.LogInfo("Instances to be moved: %s",
11606 utils.CommaJoin("%s (to %s)" %
11607 (name, _NodeEvacDest(use_nodes, group, nodes))
11608 for (name, group, nodes) in moved))
11610 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11611 map(opcodes.OpCode.LoadOpCode, ops))
11615 class LUInstanceGrowDisk(LogicalUnit):
11616 """Grow a disk of an instance.
11619 HPATH = "disk-grow"
11620 HTYPE = constants.HTYPE_INSTANCE
11623 def ExpandNames(self):
11624 self._ExpandAndLockInstance()
11625 self.needed_locks[locking.LEVEL_NODE] = []
11626 self.needed_locks[locking.LEVEL_NODE_RES] = []
11627 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11628 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11630 def DeclareLocks(self, level):
11631 if level == locking.LEVEL_NODE:
11632 self._LockInstancesNodes()
11633 elif level == locking.LEVEL_NODE_RES:
11635 self.needed_locks[locking.LEVEL_NODE_RES] = \
11636 self.needed_locks[locking.LEVEL_NODE][:]
11638 def BuildHooksEnv(self):
11639 """Build hooks env.
11641 This runs on the master, the primary and all the secondaries.
11645 "DISK": self.op.disk,
11646 "AMOUNT": self.op.amount,
11647 "ABSOLUTE": self.op.absolute,
11649 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11652 def BuildHooksNodes(self):
11653 """Build hooks nodes.
11656 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11659 def CheckPrereq(self):
11660 """Check prerequisites.
11662 This checks that the instance is in the cluster.
11665 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11666 assert instance is not None, \
11667 "Cannot retrieve locked instance %s" % self.op.instance_name
11668 nodenames = list(instance.all_nodes)
11669 for node in nodenames:
11670 _CheckNodeOnline(self, node)
11672 self.instance = instance
11674 if instance.disk_template not in constants.DTS_GROWABLE:
11675 raise errors.OpPrereqError("Instance's disk layout does not support"
11676 " growing", errors.ECODE_INVAL)
11678 self.disk = instance.FindDisk(self.op.disk)
11680 if self.op.absolute:
11681 self.target = self.op.amount
11682 self.delta = self.target - self.disk.size
11684 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11685 "current disk size (%s)" %
11686 (utils.FormatUnit(self.target, "h"),
11687 utils.FormatUnit(self.disk.size, "h")),
11688 errors.ECODE_STATE)
11690 self.delta = self.op.amount
11691 self.target = self.disk.size + self.delta
11693 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11694 utils.FormatUnit(self.delta, "h"),
11695 errors.ECODE_INVAL)
11697 if instance.disk_template not in (constants.DT_FILE,
11698 constants.DT_SHARED_FILE,
11701 # TODO: check the free disk space for file, when that feature will be
11703 _CheckNodesFreeDiskPerVG(self, nodenames,
11704 self.disk.ComputeGrowth(self.delta))
11706 def Exec(self, feedback_fn):
11707 """Execute disk grow.
11710 instance = self.instance
11713 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11714 assert (self.owned_locks(locking.LEVEL_NODE) ==
11715 self.owned_locks(locking.LEVEL_NODE_RES))
11717 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11719 raise errors.OpExecError("Cannot activate block device to grow")
11721 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11722 (self.op.disk, instance.name,
11723 utils.FormatUnit(self.delta, "h"),
11724 utils.FormatUnit(self.target, "h")))
11726 # First run all grow ops in dry-run mode
11727 for node in instance.all_nodes:
11728 self.cfg.SetDiskID(disk, node)
11729 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11731 result.Raise("Grow request failed to node %s" % node)
11733 # We know that (as far as we can test) operations across different
11734 # nodes will succeed, time to run it for real
11735 for node in instance.all_nodes:
11736 self.cfg.SetDiskID(disk, node)
11737 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11739 result.Raise("Grow request failed to node %s" % node)
11741 # TODO: Rewrite code to work properly
11742 # DRBD goes into sync mode for a short amount of time after executing the
11743 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11744 # calling "resize" in sync mode fails. Sleeping for a short amount of
11745 # time is a work-around.
11748 disk.RecordGrow(self.delta)
11749 self.cfg.Update(instance, feedback_fn)
11751 # Changes have been recorded, release node lock
11752 _ReleaseLocks(self, locking.LEVEL_NODE)
11754 # Downgrade lock while waiting for sync
11755 self.glm.downgrade(locking.LEVEL_INSTANCE)
11757 if self.op.wait_for_sync:
11758 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11760 self.proc.LogWarning("Disk sync-ing has not returned a good"
11761 " status; please check the instance")
11762 if instance.admin_state != constants.ADMINST_UP:
11763 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11764 elif instance.admin_state != constants.ADMINST_UP:
11765 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11766 " not supposed to be running because no wait for"
11767 " sync mode was requested")
11769 assert self.owned_locks(locking.LEVEL_NODE_RES)
11770 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11773 class LUInstanceQueryData(NoHooksLU):
11774 """Query runtime instance data.
11779 def ExpandNames(self):
11780 self.needed_locks = {}
11782 # Use locking if requested or when non-static information is wanted
11783 if not (self.op.static or self.op.use_locking):
11784 self.LogWarning("Non-static data requested, locks need to be acquired")
11785 self.op.use_locking = True
11787 if self.op.instances or not self.op.use_locking:
11788 # Expand instance names right here
11789 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11791 # Will use acquired locks
11792 self.wanted_names = None
11794 if self.op.use_locking:
11795 self.share_locks = _ShareAll()
11797 if self.wanted_names is None:
11798 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11800 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11802 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11803 self.needed_locks[locking.LEVEL_NODE] = []
11804 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11806 def DeclareLocks(self, level):
11807 if self.op.use_locking:
11808 if level == locking.LEVEL_NODEGROUP:
11809 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11811 # Lock all groups used by instances optimistically; this requires going
11812 # via the node before it's locked, requiring verification later on
11813 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11814 frozenset(group_uuid
11815 for instance_name in owned_instances
11817 self.cfg.GetInstanceNodeGroups(instance_name))
11819 elif level == locking.LEVEL_NODE:
11820 self._LockInstancesNodes()
11822 def CheckPrereq(self):
11823 """Check prerequisites.
11825 This only checks the optional instance list against the existing names.
11828 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11829 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11830 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11832 if self.wanted_names is None:
11833 assert self.op.use_locking, "Locking was not used"
11834 self.wanted_names = owned_instances
11836 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11838 if self.op.use_locking:
11839 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11842 assert not (owned_instances or owned_groups or owned_nodes)
11844 self.wanted_instances = instances.values()
11846 def _ComputeBlockdevStatus(self, node, instance, dev):
11847 """Returns the status of a block device
11850 if self.op.static or not node:
11853 self.cfg.SetDiskID(dev, node)
11855 result = self.rpc.call_blockdev_find(node, dev)
11859 result.Raise("Can't compute disk status for %s" % instance.name)
11861 status = result.payload
11865 return (status.dev_path, status.major, status.minor,
11866 status.sync_percent, status.estimated_time,
11867 status.is_degraded, status.ldisk_status)
11869 def _ComputeDiskStatus(self, instance, snode, dev):
11870 """Compute block device status.
11873 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11875 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11877 def _ComputeDiskStatusInner(self, instance, snode, dev):
11878 """Compute block device status.
11880 @attention: The device has to be annotated already.
11883 if dev.dev_type in constants.LDS_DRBD:
11884 # we change the snode then (otherwise we use the one passed in)
11885 if dev.logical_id[0] == instance.primary_node:
11886 snode = dev.logical_id[1]
11888 snode = dev.logical_id[0]
11890 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11892 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11895 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11902 "iv_name": dev.iv_name,
11903 "dev_type": dev.dev_type,
11904 "logical_id": dev.logical_id,
11905 "physical_id": dev.physical_id,
11906 "pstatus": dev_pstatus,
11907 "sstatus": dev_sstatus,
11908 "children": dev_children,
11913 def Exec(self, feedback_fn):
11914 """Gather and return data"""
11917 cluster = self.cfg.GetClusterInfo()
11919 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11920 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11922 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11923 for node in nodes.values()))
11925 group2name_fn = lambda uuid: groups[uuid].name
11927 for instance in self.wanted_instances:
11928 pnode = nodes[instance.primary_node]
11930 if self.op.static or pnode.offline:
11931 remote_state = None
11933 self.LogWarning("Primary node %s is marked offline, returning static"
11934 " information only for instance %s" %
11935 (pnode.name, instance.name))
11937 remote_info = self.rpc.call_instance_info(instance.primary_node,
11939 instance.hypervisor)
11940 remote_info.Raise("Error checking node %s" % instance.primary_node)
11941 remote_info = remote_info.payload
11942 if remote_info and "state" in remote_info:
11943 remote_state = "up"
11945 if instance.admin_state == constants.ADMINST_UP:
11946 remote_state = "down"
11948 remote_state = instance.admin_state
11950 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11953 snodes_group_uuids = [nodes[snode_name].group
11954 for snode_name in instance.secondary_nodes]
11956 result[instance.name] = {
11957 "name": instance.name,
11958 "config_state": instance.admin_state,
11959 "run_state": remote_state,
11960 "pnode": instance.primary_node,
11961 "pnode_group_uuid": pnode.group,
11962 "pnode_group_name": group2name_fn(pnode.group),
11963 "snodes": instance.secondary_nodes,
11964 "snodes_group_uuids": snodes_group_uuids,
11965 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11967 # this happens to be the same format used for hooks
11968 "nics": _NICListToTuple(self, instance.nics),
11969 "disk_template": instance.disk_template,
11971 "hypervisor": instance.hypervisor,
11972 "network_port": instance.network_port,
11973 "hv_instance": instance.hvparams,
11974 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11975 "be_instance": instance.beparams,
11976 "be_actual": cluster.FillBE(instance),
11977 "os_instance": instance.osparams,
11978 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11979 "serial_no": instance.serial_no,
11980 "mtime": instance.mtime,
11981 "ctime": instance.ctime,
11982 "uuid": instance.uuid,
11988 def PrepareContainerMods(mods, private_fn):
11989 """Prepares a list of container modifications by adding a private data field.
11991 @type mods: list of tuples; (operation, index, parameters)
11992 @param mods: List of modifications
11993 @type private_fn: callable or None
11994 @param private_fn: Callable for constructing a private data field for a
11999 if private_fn is None:
12004 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12007 #: Type description for changes as returned by L{ApplyContainerMods}'s
12009 _TApplyContModsCbChanges = \
12010 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12011 ht.TNonEmptyString,
12016 def ApplyContainerMods(kind, container, chgdesc, mods,
12017 create_fn, modify_fn, remove_fn):
12018 """Applies descriptions in C{mods} to C{container}.
12021 @param kind: One-word item description
12022 @type container: list
12023 @param container: Container to modify
12024 @type chgdesc: None or list
12025 @param chgdesc: List of applied changes
12027 @param mods: Modifications as returned by L{PrepareContainerMods}
12028 @type create_fn: callable
12029 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12030 receives absolute item index, parameters and private data object as added
12031 by L{PrepareContainerMods}, returns tuple containing new item and changes
12033 @type modify_fn: callable
12034 @param modify_fn: Callback for modifying an existing item
12035 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12036 and private data object as added by L{PrepareContainerMods}, returns
12038 @type remove_fn: callable
12039 @param remove_fn: Callback on removing item; receives absolute item index,
12040 item and private data object as added by L{PrepareContainerMods}
12043 for (op, idx, params, private) in mods:
12046 absidx = len(container) - 1
12048 raise IndexError("Not accepting negative indices other than -1")
12049 elif idx > len(container):
12050 raise IndexError("Got %s index %s, but there are only %s" %
12051 (kind, idx, len(container)))
12057 if op == constants.DDM_ADD:
12058 # Calculate where item will be added
12060 addidx = len(container)
12064 if create_fn is None:
12067 (item, changes) = create_fn(addidx, params, private)
12070 container.append(item)
12073 assert idx <= len(container)
12074 # list.insert does so before the specified index
12075 container.insert(idx, item)
12077 # Retrieve existing item
12079 item = container[absidx]
12081 raise IndexError("Invalid %s index %s" % (kind, idx))
12083 if op == constants.DDM_REMOVE:
12086 if remove_fn is not None:
12087 remove_fn(absidx, item, private)
12089 changes = [("%s/%s" % (kind, absidx), "remove")]
12091 assert container[absidx] == item
12092 del container[absidx]
12093 elif op == constants.DDM_MODIFY:
12094 if modify_fn is not None:
12095 changes = modify_fn(absidx, item, params, private)
12097 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12099 assert _TApplyContModsCbChanges(changes)
12101 if not (chgdesc is None or changes is None):
12102 chgdesc.extend(changes)
12105 def _UpdateIvNames(base_index, disks):
12106 """Updates the C{iv_name} attribute of disks.
12108 @type disks: list of L{objects.Disk}
12111 for (idx, disk) in enumerate(disks):
12112 disk.iv_name = "disk/%s" % (base_index + idx, )
12115 class _InstNicModPrivate:
12116 """Data structure for network interface modifications.
12118 Used by L{LUInstanceSetParams}.
12121 def __init__(self):
12126 class LUInstanceSetParams(LogicalUnit):
12127 """Modifies an instances's parameters.
12130 HPATH = "instance-modify"
12131 HTYPE = constants.HTYPE_INSTANCE
12135 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12136 assert ht.TList(mods)
12137 assert not mods or len(mods[0]) in (2, 3)
12139 if mods and len(mods[0]) == 2:
12143 for op, params in mods:
12144 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12145 result.append((op, -1, params))
12149 raise errors.OpPrereqError("Only one %s add or remove operation is"
12150 " supported at a time" % kind,
12151 errors.ECODE_INVAL)
12153 result.append((constants.DDM_MODIFY, op, params))
12155 assert verify_fn(result)
12162 def _CheckMods(kind, mods, key_types, item_fn):
12163 """Ensures requested disk/NIC modifications are valid.
12166 for (op, _, params) in mods:
12167 assert ht.TDict(params)
12169 # If key_types is an empty dict, we assume we have an 'ext' template
12170 # and thus do not ForceDictType
12172 utils.ForceDictType(params, key_types)
12174 if op == constants.DDM_REMOVE:
12176 raise errors.OpPrereqError("No settings should be passed when"
12177 " removing a %s" % kind,
12178 errors.ECODE_INVAL)
12179 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12180 item_fn(op, params)
12182 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12185 def _VerifyDiskModification(op, params):
12186 """Verifies a disk modification.
12189 if op == constants.DDM_ADD:
12190 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12191 if mode not in constants.DISK_ACCESS_SET:
12192 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12193 errors.ECODE_INVAL)
12195 size = params.get(constants.IDISK_SIZE, None)
12197 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12198 constants.IDISK_SIZE, errors.ECODE_INVAL)
12202 except (TypeError, ValueError), err:
12203 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12204 errors.ECODE_INVAL)
12206 params[constants.IDISK_SIZE] = size
12208 elif op == constants.DDM_MODIFY:
12209 if constants.IDISK_SIZE in params:
12210 raise errors.OpPrereqError("Disk size change not possible, use"
12211 " grow-disk", errors.ECODE_INVAL)
12212 if constants.IDISK_MODE not in params:
12213 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
12214 " modification supported, but missing",
12215 errors.ECODE_NOENT)
12216 if len(params) > 1:
12217 raise errors.OpPrereqError("Disk modification doesn't support"
12218 " additional arbitrary parameters",
12219 errors.ECODE_INVAL)
12222 def _VerifyNicModification(op, params):
12223 """Verifies a network interface modification.
12226 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12227 ip = params.get(constants.INIC_IP, None)
12230 elif ip.lower() == constants.VALUE_NONE:
12231 params[constants.INIC_IP] = None
12232 elif not netutils.IPAddress.IsValid(ip):
12233 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12234 errors.ECODE_INVAL)
12236 bridge = params.get("bridge", None)
12237 link = params.get(constants.INIC_LINK, None)
12238 if bridge and link:
12239 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12240 " at the same time", errors.ECODE_INVAL)
12241 elif bridge and bridge.lower() == constants.VALUE_NONE:
12242 params["bridge"] = None
12243 elif link and link.lower() == constants.VALUE_NONE:
12244 params[constants.INIC_LINK] = None
12246 if op == constants.DDM_ADD:
12247 macaddr = params.get(constants.INIC_MAC, None)
12248 if macaddr is None:
12249 params[constants.INIC_MAC] = constants.VALUE_AUTO
12251 if constants.INIC_MAC in params:
12252 macaddr = params[constants.INIC_MAC]
12253 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12254 macaddr = utils.NormalizeAndValidateMac(macaddr)
12256 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12257 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12258 " modifying an existing NIC",
12259 errors.ECODE_INVAL)
12261 def CheckArguments(self):
12262 if not (self.op.nics or self.op.disks or self.op.disk_template or
12263 self.op.hvparams or self.op.beparams or self.op.os_name or
12264 self.op.offline is not None or self.op.runtime_mem):
12265 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12267 if self.op.hvparams:
12268 _CheckGlobalHvParams(self.op.hvparams)
12270 if self.op.allow_arbit_params:
12272 self._UpgradeDiskNicMods("disk", self.op.disks,
12273 opcodes.OpInstanceSetParams.TestExtDiskModifications)
12276 self._UpgradeDiskNicMods("disk", self.op.disks,
12277 opcodes.OpInstanceSetParams.TestDiskModifications)
12280 self._UpgradeDiskNicMods("NIC", self.op.nics,
12281 opcodes.OpInstanceSetParams.TestNicModifications)
12283 # Check disk modifications
12284 if self.op.allow_arbit_params:
12285 self._CheckMods("disk", self.op.disks, {},
12286 self._VerifyDiskModification)
12288 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12289 self._VerifyDiskModification)
12291 if self.op.disks and self.op.disk_template is not None:
12292 raise errors.OpPrereqError("Disk template conversion and other disk"
12293 " changes not supported at the same time",
12294 errors.ECODE_INVAL)
12296 if (self.op.disk_template and
12297 self.op.disk_template in constants.DTS_INT_MIRROR and
12298 self.op.remote_node is None):
12299 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12300 " one requires specifying a secondary node",
12301 errors.ECODE_INVAL)
12303 # Check NIC modifications
12304 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12305 self._VerifyNicModification)
12307 def ExpandNames(self):
12308 self._ExpandAndLockInstance()
12309 # Can't even acquire node locks in shared mode as upcoming changes in
12310 # Ganeti 2.6 will start to modify the node object on disk conversion
12311 self.needed_locks[locking.LEVEL_NODE] = []
12312 self.needed_locks[locking.LEVEL_NODE_RES] = []
12313 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12315 def DeclareLocks(self, level):
12316 # TODO: Acquire group lock in shared mode (disk parameters)
12317 if level == locking.LEVEL_NODE:
12318 self._LockInstancesNodes()
12319 if self.op.disk_template and self.op.remote_node:
12320 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12321 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12322 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12324 self.needed_locks[locking.LEVEL_NODE_RES] = \
12325 self.needed_locks[locking.LEVEL_NODE][:]
12327 def BuildHooksEnv(self):
12328 """Build hooks env.
12330 This runs on the master, primary and secondaries.
12334 if constants.BE_MINMEM in self.be_new:
12335 args["minmem"] = self.be_new[constants.BE_MINMEM]
12336 if constants.BE_MAXMEM in self.be_new:
12337 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12338 if constants.BE_VCPUS in self.be_new:
12339 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12340 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12341 # information at all.
12343 if self._new_nics is not None:
12346 for nic in self._new_nics:
12347 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12348 mode = nicparams[constants.NIC_MODE]
12349 link = nicparams[constants.NIC_LINK]
12350 nics.append((nic.ip, nic.mac, mode, link))
12352 args["nics"] = nics
12354 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12355 if self.op.disk_template:
12356 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12357 if self.op.runtime_mem:
12358 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12362 def BuildHooksNodes(self):
12363 """Build hooks nodes.
12366 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12369 def _PrepareNicModification(self, params, private, old_ip, old_params,
12371 update_params_dict = dict([(key, params[key])
12372 for key in constants.NICS_PARAMETERS
12375 if "bridge" in params:
12376 update_params_dict[constants.NIC_LINK] = params["bridge"]
12378 new_params = _GetUpdatedParams(old_params, update_params_dict)
12379 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12381 new_filled_params = cluster.SimpleFillNIC(new_params)
12382 objects.NIC.CheckParameterSyntax(new_filled_params)
12384 new_mode = new_filled_params[constants.NIC_MODE]
12385 if new_mode == constants.NIC_MODE_BRIDGED:
12386 bridge = new_filled_params[constants.NIC_LINK]
12387 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12389 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12391 self.warn.append(msg)
12393 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12395 elif new_mode == constants.NIC_MODE_ROUTED:
12396 ip = params.get(constants.INIC_IP, old_ip)
12398 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12399 " on a routed NIC", errors.ECODE_INVAL)
12401 if constants.INIC_MAC in params:
12402 mac = params[constants.INIC_MAC]
12404 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12405 errors.ECODE_INVAL)
12406 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12407 # otherwise generate the MAC address
12408 params[constants.INIC_MAC] = \
12409 self.cfg.GenerateMAC(self.proc.GetECId())
12411 # or validate/reserve the current one
12413 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12414 except errors.ReservationError:
12415 raise errors.OpPrereqError("MAC address '%s' already in use"
12416 " in cluster" % mac,
12417 errors.ECODE_NOTUNIQUE)
12419 private.params = new_params
12420 private.filled = new_filled_params
12422 def CheckPrereq(self):
12423 """Check prerequisites.
12425 This only checks the instance list against the existing names.
12428 # checking the new params on the primary/secondary nodes
12430 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12431 cluster = self.cluster = self.cfg.GetClusterInfo()
12432 assert self.instance is not None, \
12433 "Cannot retrieve locked instance %s" % self.op.instance_name
12434 pnode = instance.primary_node
12435 nodelist = list(instance.all_nodes)
12436 pnode_info = self.cfg.GetNodeInfo(pnode)
12437 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12439 # Prepare disk/NIC modifications
12440 self.diskmod = PrepareContainerMods(self.op.disks, None)
12441 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12443 # Check the validity of the `provider' parameter
12444 if instance.disk_template in constants.DT_EXT:
12445 for mod in self.diskmod:
12446 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
12447 if mod[0] == constants.DDM_ADD:
12448 if ext_provider is None:
12449 raise errors.OpPrereqError("Instance template is '%s' and parameter"
12450 " '%s' missing, during disk add" %
12452 constants.IDISK_PROVIDER),
12453 errors.ECODE_NOENT)
12454 elif mod[0] == constants.DDM_MODIFY:
12456 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
12457 " modification" % constants.IDISK_PROVIDER,
12458 errors.ECODE_INVAL)
12460 for mod in self.diskmod:
12461 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
12462 if ext_provider is not None:
12463 raise errors.OpPrereqError("Parameter '%s' is only valid for instances"
12464 " of type '%s'" % (constants.IDISK_PROVIDER,
12465 constants.DT_EXT), errors.ECODE_INVAL)
12468 if self.op.os_name and not self.op.force:
12469 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12470 self.op.force_variant)
12471 instance_os = self.op.os_name
12473 instance_os = instance.os
12475 assert not (self.op.disk_template and self.op.disks), \
12476 "Can't modify disk template and apply disk changes at the same time"
12478 if self.op.disk_template:
12479 if instance.disk_template == self.op.disk_template:
12480 raise errors.OpPrereqError("Instance already has disk template %s" %
12481 instance.disk_template, errors.ECODE_INVAL)
12483 if (instance.disk_template,
12484 self.op.disk_template) not in self._DISK_CONVERSIONS:
12485 raise errors.OpPrereqError("Unsupported disk template conversion from"
12486 " %s to %s" % (instance.disk_template,
12487 self.op.disk_template),
12488 errors.ECODE_INVAL)
12489 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12490 msg="cannot change disk template")
12491 if self.op.disk_template in constants.DTS_INT_MIRROR:
12492 if self.op.remote_node == pnode:
12493 raise errors.OpPrereqError("Given new secondary node %s is the same"
12494 " as the primary node of the instance" %
12495 self.op.remote_node, errors.ECODE_STATE)
12496 _CheckNodeOnline(self, self.op.remote_node)
12497 _CheckNodeNotDrained(self, self.op.remote_node)
12498 # FIXME: here we assume that the old instance type is DT_PLAIN
12499 assert instance.disk_template == constants.DT_PLAIN
12500 disks = [{constants.IDISK_SIZE: d.size,
12501 constants.IDISK_VG: d.logical_id[0]}
12502 for d in instance.disks]
12503 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12504 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12506 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12507 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12508 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12509 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12510 ignore=self.op.ignore_ipolicy)
12511 if pnode_info.group != snode_info.group:
12512 self.LogWarning("The primary and secondary nodes are in two"
12513 " different node groups; the disk parameters"
12514 " from the first disk's node group will be"
12517 # hvparams processing
12518 if self.op.hvparams:
12519 hv_type = instance.hypervisor
12520 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12521 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12522 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12525 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12526 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12527 self.hv_proposed = self.hv_new = hv_new # the new actual values
12528 self.hv_inst = i_hvdict # the new dict (without defaults)
12530 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12532 self.hv_new = self.hv_inst = {}
12534 # beparams processing
12535 if self.op.beparams:
12536 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12538 objects.UpgradeBeParams(i_bedict)
12539 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12540 be_new = cluster.SimpleFillBE(i_bedict)
12541 self.be_proposed = self.be_new = be_new # the new actual values
12542 self.be_inst = i_bedict # the new dict (without defaults)
12544 self.be_new = self.be_inst = {}
12545 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12546 be_old = cluster.FillBE(instance)
12548 # CPU param validation -- checking every time a parameter is
12549 # changed to cover all cases where either CPU mask or vcpus have
12551 if (constants.BE_VCPUS in self.be_proposed and
12552 constants.HV_CPU_MASK in self.hv_proposed):
12554 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12555 # Verify mask is consistent with number of vCPUs. Can skip this
12556 # test if only 1 entry in the CPU mask, which means same mask
12557 # is applied to all vCPUs.
12558 if (len(cpu_list) > 1 and
12559 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12560 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12562 (self.be_proposed[constants.BE_VCPUS],
12563 self.hv_proposed[constants.HV_CPU_MASK]),
12564 errors.ECODE_INVAL)
12566 # Only perform this test if a new CPU mask is given
12567 if constants.HV_CPU_MASK in self.hv_new:
12568 # Calculate the largest CPU number requested
12569 max_requested_cpu = max(map(max, cpu_list))
12570 # Check that all of the instance's nodes have enough physical CPUs to
12571 # satisfy the requested CPU mask
12572 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12573 max_requested_cpu + 1, instance.hypervisor)
12575 # osparams processing
12576 if self.op.osparams:
12577 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12578 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12579 self.os_inst = i_osdict # the new dict (without defaults)
12585 #TODO(dynmem): do the appropriate check involving MINMEM
12586 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12587 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12588 mem_check_list = [pnode]
12589 if be_new[constants.BE_AUTO_BALANCE]:
12590 # either we changed auto_balance to yes or it was from before
12591 mem_check_list.extend(instance.secondary_nodes)
12592 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12593 instance.hypervisor)
12594 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12595 [instance.hypervisor])
12596 pninfo = nodeinfo[pnode]
12597 msg = pninfo.fail_msg
12599 # Assume the primary node is unreachable and go ahead
12600 self.warn.append("Can't get info from primary node %s: %s" %
12603 (_, _, (pnhvinfo, )) = pninfo.payload
12604 if not isinstance(pnhvinfo.get("memory_free", None), int):
12605 self.warn.append("Node data from primary node %s doesn't contain"
12606 " free memory information" % pnode)
12607 elif instance_info.fail_msg:
12608 self.warn.append("Can't get instance runtime information: %s" %
12609 instance_info.fail_msg)
12611 if instance_info.payload:
12612 current_mem = int(instance_info.payload["memory"])
12614 # Assume instance not running
12615 # (there is a slight race condition here, but it's not very
12616 # probable, and we have no other way to check)
12617 # TODO: Describe race condition
12619 #TODO(dynmem): do the appropriate check involving MINMEM
12620 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12621 pnhvinfo["memory_free"])
12623 raise errors.OpPrereqError("This change will prevent the instance"
12624 " from starting, due to %d MB of memory"
12625 " missing on its primary node" %
12627 errors.ECODE_NORES)
12629 if be_new[constants.BE_AUTO_BALANCE]:
12630 for node, nres in nodeinfo.items():
12631 if node not in instance.secondary_nodes:
12633 nres.Raise("Can't get info from secondary node %s" % node,
12634 prereq=True, ecode=errors.ECODE_STATE)
12635 (_, _, (nhvinfo, )) = nres.payload
12636 if not isinstance(nhvinfo.get("memory_free", None), int):
12637 raise errors.OpPrereqError("Secondary node %s didn't return free"
12638 " memory information" % node,
12639 errors.ECODE_STATE)
12640 #TODO(dynmem): do the appropriate check involving MINMEM
12641 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12642 raise errors.OpPrereqError("This change will prevent the instance"
12643 " from failover to its secondary node"
12644 " %s, due to not enough memory" % node,
12645 errors.ECODE_STATE)
12647 if self.op.runtime_mem:
12648 remote_info = self.rpc.call_instance_info(instance.primary_node,
12650 instance.hypervisor)
12651 remote_info.Raise("Error checking node %s" % instance.primary_node)
12652 if not remote_info.payload: # not running already
12653 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12654 errors.ECODE_STATE)
12656 current_memory = remote_info.payload["memory"]
12657 if (not self.op.force and
12658 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12659 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12660 raise errors.OpPrereqError("Instance %s must have memory between %d"
12661 " and %d MB of memory unless --force is"
12662 " given" % (instance.name,
12663 self.be_proposed[constants.BE_MINMEM],
12664 self.be_proposed[constants.BE_MAXMEM]),
12665 errors.ECODE_INVAL)
12667 if self.op.runtime_mem > current_memory:
12668 _CheckNodeFreeMemory(self, instance.primary_node,
12669 "ballooning memory for instance %s" %
12671 self.op.memory - current_memory,
12672 instance.hypervisor)
12674 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12675 raise errors.OpPrereqError("Disk operations not supported for"
12676 " diskless instances",
12677 errors.ECODE_INVAL)
12679 def _PrepareNicCreate(_, params, private):
12680 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12681 return (None, None)
12683 def _PrepareNicMod(_, nic, params, private):
12684 self._PrepareNicModification(params, private, nic.ip,
12685 nic.nicparams, cluster, pnode)
12688 # Verify NIC changes (operating on copy)
12689 nics = instance.nics[:]
12690 ApplyContainerMods("NIC", nics, None, self.nicmod,
12691 _PrepareNicCreate, _PrepareNicMod, None)
12692 if len(nics) > constants.MAX_NICS:
12693 raise errors.OpPrereqError("Instance has too many network interfaces"
12694 " (%d), cannot add more" % constants.MAX_NICS,
12695 errors.ECODE_STATE)
12697 # Verify disk changes (operating on a copy)
12698 disks = instance.disks[:]
12699 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12700 if len(disks) > constants.MAX_DISKS:
12701 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12702 " more" % constants.MAX_DISKS,
12703 errors.ECODE_STATE)
12705 if self.op.offline is not None:
12706 if self.op.offline:
12707 msg = "can't change to offline"
12709 msg = "can't change to online"
12710 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12712 # Pre-compute NIC changes (necessary to use result in hooks)
12713 self._nic_chgdesc = []
12715 # Operate on copies as this is still in prereq
12716 nics = [nic.Copy() for nic in instance.nics]
12717 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12718 self._CreateNewNic, self._ApplyNicMods, None)
12719 self._new_nics = nics
12721 self._new_nics = None
12723 def _ConvertPlainToDrbd(self, feedback_fn):
12724 """Converts an instance from plain to drbd.
12727 feedback_fn("Converting template to drbd")
12728 instance = self.instance
12729 pnode = instance.primary_node
12730 snode = self.op.remote_node
12732 assert instance.disk_template == constants.DT_PLAIN
12734 # create a fake disk info for _GenerateDiskTemplate
12735 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12736 constants.IDISK_VG: d.logical_id[0]}
12737 for d in instance.disks]
12738 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12739 instance.name, pnode, [snode],
12740 disk_info, None, None, 0, feedback_fn,
12742 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12744 info = _GetInstanceInfoText(instance)
12745 feedback_fn("Creating additional volumes...")
12746 # first, create the missing data and meta devices
12747 for disk in anno_disks:
12748 # unfortunately this is... not too nice
12749 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12751 for child in disk.children:
12752 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12753 # at this stage, all new LVs have been created, we can rename the
12755 feedback_fn("Renaming original volumes...")
12756 rename_list = [(o, n.children[0].logical_id)
12757 for (o, n) in zip(instance.disks, new_disks)]
12758 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12759 result.Raise("Failed to rename original LVs")
12761 feedback_fn("Initializing DRBD devices...")
12762 # all child devices are in place, we can now create the DRBD devices
12763 for disk in anno_disks:
12764 for node in [pnode, snode]:
12765 f_create = node == pnode
12766 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12768 # at this point, the instance has been modified
12769 instance.disk_template = constants.DT_DRBD8
12770 instance.disks = new_disks
12771 self.cfg.Update(instance, feedback_fn)
12773 # Release node locks while waiting for sync
12774 _ReleaseLocks(self, locking.LEVEL_NODE)
12776 # disks are created, waiting for sync
12777 disk_abort = not _WaitForSync(self, instance,
12778 oneshot=not self.op.wait_for_sync)
12780 raise errors.OpExecError("There are some degraded disks for"
12781 " this instance, please cleanup manually")
12783 # Node resource locks will be released by caller
12785 def _ConvertDrbdToPlain(self, feedback_fn):
12786 """Converts an instance from drbd to plain.
12789 instance = self.instance
12791 assert len(instance.secondary_nodes) == 1
12792 assert instance.disk_template == constants.DT_DRBD8
12794 pnode = instance.primary_node
12795 snode = instance.secondary_nodes[0]
12796 feedback_fn("Converting template to plain")
12798 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12799 new_disks = [d.children[0] for d in instance.disks]
12801 # copy over size and mode
12802 for parent, child in zip(old_disks, new_disks):
12803 child.size = parent.size
12804 child.mode = parent.mode
12806 # this is a DRBD disk, return its port to the pool
12807 # NOTE: this must be done right before the call to cfg.Update!
12808 for disk in old_disks:
12809 tcp_port = disk.logical_id[2]
12810 self.cfg.AddTcpUdpPort(tcp_port)
12812 # update instance structure
12813 instance.disks = new_disks
12814 instance.disk_template = constants.DT_PLAIN
12815 self.cfg.Update(instance, feedback_fn)
12817 # Release locks in case removing disks takes a while
12818 _ReleaseLocks(self, locking.LEVEL_NODE)
12820 feedback_fn("Removing volumes on the secondary node...")
12821 for disk in old_disks:
12822 self.cfg.SetDiskID(disk, snode)
12823 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12825 self.LogWarning("Could not remove block device %s on node %s,"
12826 " continuing anyway: %s", disk.iv_name, snode, msg)
12828 feedback_fn("Removing unneeded volumes on the primary node...")
12829 for idx, disk in enumerate(old_disks):
12830 meta = disk.children[1]
12831 self.cfg.SetDiskID(meta, pnode)
12832 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12834 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12835 " continuing anyway: %s", idx, pnode, msg)
12837 def _CreateNewDisk(self, idx, params, _):
12838 """Creates a new disk.
12841 instance = self.instance
12844 if instance.disk_template in constants.DTS_FILEBASED:
12845 (file_driver, file_path) = instance.disks[0].logical_id
12846 file_path = os.path.dirname(file_path)
12848 file_driver = file_path = None
12851 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12852 instance.primary_node, instance.secondary_nodes,
12853 [params], file_path, file_driver, idx,
12854 self.Log, self.diskparams)[0]
12856 info = _GetInstanceInfoText(instance)
12858 logging.info("Creating volume %s for instance %s",
12859 disk.iv_name, instance.name)
12860 # Note: this needs to be kept in sync with _CreateDisks
12862 for node in instance.all_nodes:
12863 f_create = (node == instance.primary_node)
12865 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12866 except errors.OpExecError, err:
12867 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12868 disk.iv_name, disk, node, err)
12871 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12875 def _ModifyDisk(idx, disk, params, _):
12876 """Modifies a disk.
12879 disk.mode = params[constants.IDISK_MODE]
12882 ("disk.mode/%d" % idx, disk.mode),
12885 def _RemoveDisk(self, idx, root, _):
12889 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12890 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12891 self.cfg.SetDiskID(disk, node)
12892 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12894 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12895 " continuing anyway", idx, node, msg)
12897 # if this is a DRBD disk, return its port to the pool
12898 if root.dev_type in constants.LDS_DRBD:
12899 self.cfg.AddTcpUdpPort(root.logical_id[2])
12902 def _CreateNewNic(idx, params, private):
12903 """Creates data structure for a new network interface.
12906 mac = params[constants.INIC_MAC]
12907 ip = params.get(constants.INIC_IP, None)
12908 nicparams = private.params
12910 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12912 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12913 (mac, ip, private.filled[constants.NIC_MODE],
12914 private.filled[constants.NIC_LINK])),
12918 def _ApplyNicMods(idx, nic, params, private):
12919 """Modifies a network interface.
12924 for key in [constants.INIC_MAC, constants.INIC_IP]:
12926 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12927 setattr(nic, key, params[key])
12930 nic.nicparams = private.params
12932 for (key, val) in params.items():
12933 changes.append(("nic.%s/%d" % (key, idx), val))
12937 def Exec(self, feedback_fn):
12938 """Modifies an instance.
12940 All parameters take effect only at the next restart of the instance.
12943 # Process here the warnings from CheckPrereq, as we don't have a
12944 # feedback_fn there.
12945 # TODO: Replace with self.LogWarning
12946 for warn in self.warn:
12947 feedback_fn("WARNING: %s" % warn)
12949 assert ((self.op.disk_template is None) ^
12950 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12951 "Not owning any node resource locks"
12954 instance = self.instance
12957 if self.op.runtime_mem:
12958 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12960 self.op.runtime_mem)
12961 rpcres.Raise("Cannot modify instance runtime memory")
12962 result.append(("runtime_memory", self.op.runtime_mem))
12964 # Apply disk changes
12965 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12966 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12967 _UpdateIvNames(0, instance.disks)
12969 if self.op.disk_template:
12971 check_nodes = set(instance.all_nodes)
12972 if self.op.remote_node:
12973 check_nodes.add(self.op.remote_node)
12974 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12975 owned = self.owned_locks(level)
12976 assert not (check_nodes - owned), \
12977 ("Not owning the correct locks, owning %r, expected at least %r" %
12978 (owned, check_nodes))
12980 r_shut = _ShutdownInstanceDisks(self, instance)
12982 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12983 " proceed with disk template conversion")
12984 mode = (instance.disk_template, self.op.disk_template)
12986 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12988 self.cfg.ReleaseDRBDMinors(instance.name)
12990 result.append(("disk_template", self.op.disk_template))
12992 assert instance.disk_template == self.op.disk_template, \
12993 ("Expected disk template '%s', found '%s'" %
12994 (self.op.disk_template, instance.disk_template))
12996 # Release node and resource locks if there are any (they might already have
12997 # been released during disk conversion)
12998 _ReleaseLocks(self, locking.LEVEL_NODE)
12999 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13001 # Apply NIC changes
13002 if self._new_nics is not None:
13003 instance.nics = self._new_nics
13004 result.extend(self._nic_chgdesc)
13007 if self.op.hvparams:
13008 instance.hvparams = self.hv_inst
13009 for key, val in self.op.hvparams.iteritems():
13010 result.append(("hv/%s" % key, val))
13013 if self.op.beparams:
13014 instance.beparams = self.be_inst
13015 for key, val in self.op.beparams.iteritems():
13016 result.append(("be/%s" % key, val))
13019 if self.op.os_name:
13020 instance.os = self.op.os_name
13023 if self.op.osparams:
13024 instance.osparams = self.os_inst
13025 for key, val in self.op.osparams.iteritems():
13026 result.append(("os/%s" % key, val))
13028 if self.op.offline is None:
13031 elif self.op.offline:
13032 # Mark instance as offline
13033 self.cfg.MarkInstanceOffline(instance.name)
13034 result.append(("admin_state", constants.ADMINST_OFFLINE))
13036 # Mark instance as online, but stopped
13037 self.cfg.MarkInstanceDown(instance.name)
13038 result.append(("admin_state", constants.ADMINST_DOWN))
13040 self.cfg.Update(instance, feedback_fn)
13042 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13043 self.owned_locks(locking.LEVEL_NODE)), \
13044 "All node locks should have been released by now"
13048 _DISK_CONVERSIONS = {
13049 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13050 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13054 class LUInstanceChangeGroup(LogicalUnit):
13055 HPATH = "instance-change-group"
13056 HTYPE = constants.HTYPE_INSTANCE
13059 def ExpandNames(self):
13060 self.share_locks = _ShareAll()
13061 self.needed_locks = {
13062 locking.LEVEL_NODEGROUP: [],
13063 locking.LEVEL_NODE: [],
13066 self._ExpandAndLockInstance()
13068 if self.op.target_groups:
13069 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13070 self.op.target_groups)
13072 self.req_target_uuids = None
13074 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13076 def DeclareLocks(self, level):
13077 if level == locking.LEVEL_NODEGROUP:
13078 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13080 if self.req_target_uuids:
13081 lock_groups = set(self.req_target_uuids)
13083 # Lock all groups used by instance optimistically; this requires going
13084 # via the node before it's locked, requiring verification later on
13085 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13086 lock_groups.update(instance_groups)
13088 # No target groups, need to lock all of them
13089 lock_groups = locking.ALL_SET
13091 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13093 elif level == locking.LEVEL_NODE:
13094 if self.req_target_uuids:
13095 # Lock all nodes used by instances
13096 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13097 self._LockInstancesNodes()
13099 # Lock all nodes in all potential target groups
13100 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13101 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13102 member_nodes = [node_name
13103 for group in lock_groups
13104 for node_name in self.cfg.GetNodeGroup(group).members]
13105 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13107 # Lock all nodes as all groups are potential targets
13108 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13110 def CheckPrereq(self):
13111 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13112 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13113 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13115 assert (self.req_target_uuids is None or
13116 owned_groups.issuperset(self.req_target_uuids))
13117 assert owned_instances == set([self.op.instance_name])
13119 # Get instance information
13120 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13122 # Check if node groups for locked instance are still correct
13123 assert owned_nodes.issuperset(self.instance.all_nodes), \
13124 ("Instance %s's nodes changed while we kept the lock" %
13125 self.op.instance_name)
13127 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13130 if self.req_target_uuids:
13131 # User requested specific target groups
13132 self.target_uuids = frozenset(self.req_target_uuids)
13134 # All groups except those used by the instance are potential targets
13135 self.target_uuids = owned_groups - inst_groups
13137 conflicting_groups = self.target_uuids & inst_groups
13138 if conflicting_groups:
13139 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13140 " used by the instance '%s'" %
13141 (utils.CommaJoin(conflicting_groups),
13142 self.op.instance_name),
13143 errors.ECODE_INVAL)
13145 if not self.target_uuids:
13146 raise errors.OpPrereqError("There are no possible target groups",
13147 errors.ECODE_INVAL)
13149 def BuildHooksEnv(self):
13150 """Build hooks env.
13153 assert self.target_uuids
13156 "TARGET_GROUPS": " ".join(self.target_uuids),
13159 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13163 def BuildHooksNodes(self):
13164 """Build hooks nodes.
13167 mn = self.cfg.GetMasterNode()
13168 return ([mn], [mn])
13170 def Exec(self, feedback_fn):
13171 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13173 assert instances == [self.op.instance_name], "Instance not locked"
13175 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13176 instances=instances, target_groups=list(self.target_uuids))
13178 ial.Run(self.op.iallocator)
13180 if not ial.success:
13181 raise errors.OpPrereqError("Can't compute solution for changing group of"
13182 " instance '%s' using iallocator '%s': %s" %
13183 (self.op.instance_name, self.op.iallocator,
13185 errors.ECODE_NORES)
13187 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13189 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13190 " instance '%s'", len(jobs), self.op.instance_name)
13192 return ResultWithJobs(jobs)
13195 class LUBackupQuery(NoHooksLU):
13196 """Query the exports list
13201 def CheckArguments(self):
13202 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13203 ["node", "export"], self.op.use_locking)
13205 def ExpandNames(self):
13206 self.expq.ExpandNames(self)
13208 def DeclareLocks(self, level):
13209 self.expq.DeclareLocks(self, level)
13211 def Exec(self, feedback_fn):
13214 for (node, expname) in self.expq.OldStyleQuery(self):
13215 if expname is None:
13216 result[node] = False
13218 result.setdefault(node, []).append(expname)
13223 class _ExportQuery(_QueryBase):
13224 FIELDS = query.EXPORT_FIELDS
13226 #: The node name is not a unique key for this query
13227 SORT_FIELD = "node"
13229 def ExpandNames(self, lu):
13230 lu.needed_locks = {}
13232 # The following variables interact with _QueryBase._GetNames
13234 self.wanted = _GetWantedNodes(lu, self.names)
13236 self.wanted = locking.ALL_SET
13238 self.do_locking = self.use_locking
13240 if self.do_locking:
13241 lu.share_locks = _ShareAll()
13242 lu.needed_locks = {
13243 locking.LEVEL_NODE: self.wanted,
13246 def DeclareLocks(self, lu, level):
13249 def _GetQueryData(self, lu):
13250 """Computes the list of nodes and their attributes.
13253 # Locking is not used
13255 assert not (compat.any(lu.glm.is_owned(level)
13256 for level in locking.LEVELS
13257 if level != locking.LEVEL_CLUSTER) or
13258 self.do_locking or self.use_locking)
13260 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13264 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13266 result.append((node, None))
13268 result.extend((node, expname) for expname in nres.payload)
13273 class LUBackupPrepare(NoHooksLU):
13274 """Prepares an instance for an export and returns useful information.
13279 def ExpandNames(self):
13280 self._ExpandAndLockInstance()
13282 def CheckPrereq(self):
13283 """Check prerequisites.
13286 instance_name = self.op.instance_name
13288 self.instance = self.cfg.GetInstanceInfo(instance_name)
13289 assert self.instance is not None, \
13290 "Cannot retrieve locked instance %s" % self.op.instance_name
13291 _CheckNodeOnline(self, self.instance.primary_node)
13293 self._cds = _GetClusterDomainSecret()
13295 def Exec(self, feedback_fn):
13296 """Prepares an instance for an export.
13299 instance = self.instance
13301 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13302 salt = utils.GenerateSecret(8)
13304 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13305 result = self.rpc.call_x509_cert_create(instance.primary_node,
13306 constants.RIE_CERT_VALIDITY)
13307 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13309 (name, cert_pem) = result.payload
13311 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13315 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13316 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13318 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13324 class LUBackupExport(LogicalUnit):
13325 """Export an instance to an image in the cluster.
13328 HPATH = "instance-export"
13329 HTYPE = constants.HTYPE_INSTANCE
13332 def CheckArguments(self):
13333 """Check the arguments.
13336 self.x509_key_name = self.op.x509_key_name
13337 self.dest_x509_ca_pem = self.op.destination_x509_ca
13339 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13340 if not self.x509_key_name:
13341 raise errors.OpPrereqError("Missing X509 key name for encryption",
13342 errors.ECODE_INVAL)
13344 if not self.dest_x509_ca_pem:
13345 raise errors.OpPrereqError("Missing destination X509 CA",
13346 errors.ECODE_INVAL)
13348 def ExpandNames(self):
13349 self._ExpandAndLockInstance()
13351 # Lock all nodes for local exports
13352 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13353 # FIXME: lock only instance primary and destination node
13355 # Sad but true, for now we have do lock all nodes, as we don't know where
13356 # the previous export might be, and in this LU we search for it and
13357 # remove it from its current node. In the future we could fix this by:
13358 # - making a tasklet to search (share-lock all), then create the
13359 # new one, then one to remove, after
13360 # - removing the removal operation altogether
13361 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13363 def DeclareLocks(self, level):
13364 """Last minute lock declaration."""
13365 # All nodes are locked anyway, so nothing to do here.
13367 def BuildHooksEnv(self):
13368 """Build hooks env.
13370 This will run on the master, primary node and target node.
13374 "EXPORT_MODE": self.op.mode,
13375 "EXPORT_NODE": self.op.target_node,
13376 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13377 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13378 # TODO: Generic function for boolean env variables
13379 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13382 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13386 def BuildHooksNodes(self):
13387 """Build hooks nodes.
13390 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13392 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13393 nl.append(self.op.target_node)
13397 def CheckPrereq(self):
13398 """Check prerequisites.
13400 This checks that the instance and node names are valid.
13403 instance_name = self.op.instance_name
13405 self.instance = self.cfg.GetInstanceInfo(instance_name)
13406 assert self.instance is not None, \
13407 "Cannot retrieve locked instance %s" % self.op.instance_name
13408 _CheckNodeOnline(self, self.instance.primary_node)
13410 if (self.op.remove_instance and
13411 self.instance.admin_state == constants.ADMINST_UP and
13412 not self.op.shutdown):
13413 raise errors.OpPrereqError("Can not remove instance without shutting it"
13416 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13417 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13418 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13419 assert self.dst_node is not None
13421 _CheckNodeOnline(self, self.dst_node.name)
13422 _CheckNodeNotDrained(self, self.dst_node.name)
13425 self.dest_disk_info = None
13426 self.dest_x509_ca = None
13428 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13429 self.dst_node = None
13431 if len(self.op.target_node) != len(self.instance.disks):
13432 raise errors.OpPrereqError(("Received destination information for %s"
13433 " disks, but instance %s has %s disks") %
13434 (len(self.op.target_node), instance_name,
13435 len(self.instance.disks)),
13436 errors.ECODE_INVAL)
13438 cds = _GetClusterDomainSecret()
13440 # Check X509 key name
13442 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13443 except (TypeError, ValueError), err:
13444 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13446 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13447 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13448 errors.ECODE_INVAL)
13450 # Load and verify CA
13452 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13453 except OpenSSL.crypto.Error, err:
13454 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13455 (err, ), errors.ECODE_INVAL)
13457 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13458 if errcode is not None:
13459 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13460 (msg, ), errors.ECODE_INVAL)
13462 self.dest_x509_ca = cert
13464 # Verify target information
13466 for idx, disk_data in enumerate(self.op.target_node):
13468 (host, port, magic) = \
13469 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13470 except errors.GenericError, err:
13471 raise errors.OpPrereqError("Target info for disk %s: %s" %
13472 (idx, err), errors.ECODE_INVAL)
13474 disk_info.append((host, port, magic))
13476 assert len(disk_info) == len(self.op.target_node)
13477 self.dest_disk_info = disk_info
13480 raise errors.ProgrammerError("Unhandled export mode %r" %
13483 # instance disk type verification
13484 # TODO: Implement export support for file-based disks
13485 for disk in self.instance.disks:
13486 if disk.dev_type == constants.LD_FILE:
13487 raise errors.OpPrereqError("Export not supported for instances with"
13488 " file-based disks", errors.ECODE_INVAL)
13490 def _CleanupExports(self, feedback_fn):
13491 """Removes exports of current instance from all other nodes.
13493 If an instance in a cluster with nodes A..D was exported to node C, its
13494 exports will be removed from the nodes A, B and D.
13497 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13499 nodelist = self.cfg.GetNodeList()
13500 nodelist.remove(self.dst_node.name)
13502 # on one-node clusters nodelist will be empty after the removal
13503 # if we proceed the backup would be removed because OpBackupQuery
13504 # substitutes an empty list with the full cluster node list.
13505 iname = self.instance.name
13507 feedback_fn("Removing old exports for instance %s" % iname)
13508 exportlist = self.rpc.call_export_list(nodelist)
13509 for node in exportlist:
13510 if exportlist[node].fail_msg:
13512 if iname in exportlist[node].payload:
13513 msg = self.rpc.call_export_remove(node, iname).fail_msg
13515 self.LogWarning("Could not remove older export for instance %s"
13516 " on node %s: %s", iname, node, msg)
13518 def Exec(self, feedback_fn):
13519 """Export an instance to an image in the cluster.
13522 assert self.op.mode in constants.EXPORT_MODES
13524 instance = self.instance
13525 src_node = instance.primary_node
13527 if self.op.shutdown:
13528 # shutdown the instance, but not the disks
13529 feedback_fn("Shutting down instance %s" % instance.name)
13530 result = self.rpc.call_instance_shutdown(src_node, instance,
13531 self.op.shutdown_timeout)
13532 # TODO: Maybe ignore failures if ignore_remove_failures is set
13533 result.Raise("Could not shutdown instance %s on"
13534 " node %s" % (instance.name, src_node))
13536 # set the disks ID correctly since call_instance_start needs the
13537 # correct drbd minor to create the symlinks
13538 for disk in instance.disks:
13539 self.cfg.SetDiskID(disk, src_node)
13541 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13544 # Activate the instance disks if we'exporting a stopped instance
13545 feedback_fn("Activating disks for %s" % instance.name)
13546 _StartInstanceDisks(self, instance, None)
13549 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13552 helper.CreateSnapshots()
13554 if (self.op.shutdown and
13555 instance.admin_state == constants.ADMINST_UP and
13556 not self.op.remove_instance):
13557 assert not activate_disks
13558 feedback_fn("Starting instance %s" % instance.name)
13559 result = self.rpc.call_instance_start(src_node,
13560 (instance, None, None), False)
13561 msg = result.fail_msg
13563 feedback_fn("Failed to start instance: %s" % msg)
13564 _ShutdownInstanceDisks(self, instance)
13565 raise errors.OpExecError("Could not start instance: %s" % msg)
13567 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13568 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13569 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13570 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13571 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13573 (key_name, _, _) = self.x509_key_name
13576 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13579 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13580 key_name, dest_ca_pem,
13585 # Check for backwards compatibility
13586 assert len(dresults) == len(instance.disks)
13587 assert compat.all(isinstance(i, bool) for i in dresults), \
13588 "Not all results are boolean: %r" % dresults
13592 feedback_fn("Deactivating disks for %s" % instance.name)
13593 _ShutdownInstanceDisks(self, instance)
13595 if not (compat.all(dresults) and fin_resu):
13598 failures.append("export finalization")
13599 if not compat.all(dresults):
13600 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13602 failures.append("disk export: disk(s) %s" % fdsk)
13604 raise errors.OpExecError("Export failed, errors in %s" %
13605 utils.CommaJoin(failures))
13607 # At this point, the export was successful, we can cleanup/finish
13609 # Remove instance if requested
13610 if self.op.remove_instance:
13611 feedback_fn("Removing instance %s" % instance.name)
13612 _RemoveInstance(self, feedback_fn, instance,
13613 self.op.ignore_remove_failures)
13615 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13616 self._CleanupExports(feedback_fn)
13618 return fin_resu, dresults
13621 class LUBackupRemove(NoHooksLU):
13622 """Remove exports related to the named instance.
13627 def ExpandNames(self):
13628 self.needed_locks = {}
13629 # We need all nodes to be locked in order for RemoveExport to work, but we
13630 # don't need to lock the instance itself, as nothing will happen to it (and
13631 # we can remove exports also for a removed instance)
13632 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13634 def Exec(self, feedback_fn):
13635 """Remove any export.
13638 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13639 # If the instance was not found we'll try with the name that was passed in.
13640 # This will only work if it was an FQDN, though.
13642 if not instance_name:
13644 instance_name = self.op.instance_name
13646 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13647 exportlist = self.rpc.call_export_list(locked_nodes)
13649 for node in exportlist:
13650 msg = exportlist[node].fail_msg
13652 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13654 if instance_name in exportlist[node].payload:
13656 result = self.rpc.call_export_remove(node, instance_name)
13657 msg = result.fail_msg
13659 logging.error("Could not remove export for instance %s"
13660 " on node %s: %s", instance_name, node, msg)
13662 if fqdn_warn and not found:
13663 feedback_fn("Export not found. If trying to remove an export belonging"
13664 " to a deleted instance please use its Fully Qualified"
13668 class LUGroupAdd(LogicalUnit):
13669 """Logical unit for creating node groups.
13672 HPATH = "group-add"
13673 HTYPE = constants.HTYPE_GROUP
13676 def ExpandNames(self):
13677 # We need the new group's UUID here so that we can create and acquire the
13678 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13679 # that it should not check whether the UUID exists in the configuration.
13680 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13681 self.needed_locks = {}
13682 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13684 def CheckPrereq(self):
13685 """Check prerequisites.
13687 This checks that the given group name is not an existing node group
13692 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13693 except errors.OpPrereqError:
13696 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13697 " node group (UUID: %s)" %
13698 (self.op.group_name, existing_uuid),
13699 errors.ECODE_EXISTS)
13701 if self.op.ndparams:
13702 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13704 if self.op.hv_state:
13705 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13707 self.new_hv_state = None
13709 if self.op.disk_state:
13710 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13712 self.new_disk_state = None
13714 if self.op.diskparams:
13715 for templ in constants.DISK_TEMPLATES:
13716 if templ in self.op.diskparams:
13717 utils.ForceDictType(self.op.diskparams[templ],
13718 constants.DISK_DT_TYPES)
13719 self.new_diskparams = self.op.diskparams
13721 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13722 except errors.OpPrereqError, err:
13723 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13724 errors.ECODE_INVAL)
13726 self.new_diskparams = {}
13728 if self.op.ipolicy:
13729 cluster = self.cfg.GetClusterInfo()
13730 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13732 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13733 except errors.ConfigurationError, err:
13734 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13735 errors.ECODE_INVAL)
13737 def BuildHooksEnv(self):
13738 """Build hooks env.
13742 "GROUP_NAME": self.op.group_name,
13745 def BuildHooksNodes(self):
13746 """Build hooks nodes.
13749 mn = self.cfg.GetMasterNode()
13750 return ([mn], [mn])
13752 def Exec(self, feedback_fn):
13753 """Add the node group to the cluster.
13756 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13757 uuid=self.group_uuid,
13758 alloc_policy=self.op.alloc_policy,
13759 ndparams=self.op.ndparams,
13760 diskparams=self.new_diskparams,
13761 ipolicy=self.op.ipolicy,
13762 hv_state_static=self.new_hv_state,
13763 disk_state_static=self.new_disk_state)
13765 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13766 del self.remove_locks[locking.LEVEL_NODEGROUP]
13769 class LUGroupAssignNodes(NoHooksLU):
13770 """Logical unit for assigning nodes to groups.
13775 def ExpandNames(self):
13776 # These raise errors.OpPrereqError on their own:
13777 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13778 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13780 # We want to lock all the affected nodes and groups. We have readily
13781 # available the list of nodes, and the *destination* group. To gather the
13782 # list of "source" groups, we need to fetch node information later on.
13783 self.needed_locks = {
13784 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13785 locking.LEVEL_NODE: self.op.nodes,
13788 def DeclareLocks(self, level):
13789 if level == locking.LEVEL_NODEGROUP:
13790 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13792 # Try to get all affected nodes' groups without having the group or node
13793 # lock yet. Needs verification later in the code flow.
13794 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13796 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13798 def CheckPrereq(self):
13799 """Check prerequisites.
13802 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13803 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13804 frozenset(self.op.nodes))
13806 expected_locks = (set([self.group_uuid]) |
13807 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13808 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13809 if actual_locks != expected_locks:
13810 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13811 " current groups are '%s', used to be '%s'" %
13812 (utils.CommaJoin(expected_locks),
13813 utils.CommaJoin(actual_locks)))
13815 self.node_data = self.cfg.GetAllNodesInfo()
13816 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13817 instance_data = self.cfg.GetAllInstancesInfo()
13819 if self.group is None:
13820 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13821 (self.op.group_name, self.group_uuid))
13823 (new_splits, previous_splits) = \
13824 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13825 for node in self.op.nodes],
13826 self.node_data, instance_data)
13829 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13831 if not self.op.force:
13832 raise errors.OpExecError("The following instances get split by this"
13833 " change and --force was not given: %s" %
13836 self.LogWarning("This operation will split the following instances: %s",
13839 if previous_splits:
13840 self.LogWarning("In addition, these already-split instances continue"
13841 " to be split across groups: %s",
13842 utils.CommaJoin(utils.NiceSort(previous_splits)))
13844 def Exec(self, feedback_fn):
13845 """Assign nodes to a new group.
13848 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13850 self.cfg.AssignGroupNodes(mods)
13853 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13854 """Check for split instances after a node assignment.
13856 This method considers a series of node assignments as an atomic operation,
13857 and returns information about split instances after applying the set of
13860 In particular, it returns information about newly split instances, and
13861 instances that were already split, and remain so after the change.
13863 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13866 @type changes: list of (node_name, new_group_uuid) pairs.
13867 @param changes: list of node assignments to consider.
13868 @param node_data: a dict with data for all nodes
13869 @param instance_data: a dict with all instances to consider
13870 @rtype: a two-tuple
13871 @return: a list of instances that were previously okay and result split as a
13872 consequence of this change, and a list of instances that were previously
13873 split and this change does not fix.
13876 changed_nodes = dict((node, group) for node, group in changes
13877 if node_data[node].group != group)
13879 all_split_instances = set()
13880 previously_split_instances = set()
13882 def InstanceNodes(instance):
13883 return [instance.primary_node] + list(instance.secondary_nodes)
13885 for inst in instance_data.values():
13886 if inst.disk_template not in constants.DTS_INT_MIRROR:
13889 instance_nodes = InstanceNodes(inst)
13891 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13892 previously_split_instances.add(inst.name)
13894 if len(set(changed_nodes.get(node, node_data[node].group)
13895 for node in instance_nodes)) > 1:
13896 all_split_instances.add(inst.name)
13898 return (list(all_split_instances - previously_split_instances),
13899 list(previously_split_instances & all_split_instances))
13902 class _GroupQuery(_QueryBase):
13903 FIELDS = query.GROUP_FIELDS
13905 def ExpandNames(self, lu):
13906 lu.needed_locks = {}
13908 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13909 self._cluster = lu.cfg.GetClusterInfo()
13910 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13913 self.wanted = [name_to_uuid[name]
13914 for name in utils.NiceSort(name_to_uuid.keys())]
13916 # Accept names to be either names or UUIDs.
13919 all_uuid = frozenset(self._all_groups.keys())
13921 for name in self.names:
13922 if name in all_uuid:
13923 self.wanted.append(name)
13924 elif name in name_to_uuid:
13925 self.wanted.append(name_to_uuid[name])
13927 missing.append(name)
13930 raise errors.OpPrereqError("Some groups do not exist: %s" %
13931 utils.CommaJoin(missing),
13932 errors.ECODE_NOENT)
13934 def DeclareLocks(self, lu, level):
13937 def _GetQueryData(self, lu):
13938 """Computes the list of node groups and their attributes.
13941 do_nodes = query.GQ_NODE in self.requested_data
13942 do_instances = query.GQ_INST in self.requested_data
13944 group_to_nodes = None
13945 group_to_instances = None
13947 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13948 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13949 # latter GetAllInstancesInfo() is not enough, for we have to go through
13950 # instance->node. Hence, we will need to process nodes even if we only need
13951 # instance information.
13952 if do_nodes or do_instances:
13953 all_nodes = lu.cfg.GetAllNodesInfo()
13954 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13957 for node in all_nodes.values():
13958 if node.group in group_to_nodes:
13959 group_to_nodes[node.group].append(node.name)
13960 node_to_group[node.name] = node.group
13963 all_instances = lu.cfg.GetAllInstancesInfo()
13964 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13966 for instance in all_instances.values():
13967 node = instance.primary_node
13968 if node in node_to_group:
13969 group_to_instances[node_to_group[node]].append(instance.name)
13972 # Do not pass on node information if it was not requested.
13973 group_to_nodes = None
13975 return query.GroupQueryData(self._cluster,
13976 [self._all_groups[uuid]
13977 for uuid in self.wanted],
13978 group_to_nodes, group_to_instances,
13979 query.GQ_DISKPARAMS in self.requested_data)
13982 class LUGroupQuery(NoHooksLU):
13983 """Logical unit for querying node groups.
13988 def CheckArguments(self):
13989 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13990 self.op.output_fields, False)
13992 def ExpandNames(self):
13993 self.gq.ExpandNames(self)
13995 def DeclareLocks(self, level):
13996 self.gq.DeclareLocks(self, level)
13998 def Exec(self, feedback_fn):
13999 return self.gq.OldStyleQuery(self)
14002 class LUGroupSetParams(LogicalUnit):
14003 """Modifies the parameters of a node group.
14006 HPATH = "group-modify"
14007 HTYPE = constants.HTYPE_GROUP
14010 def CheckArguments(self):
14013 self.op.diskparams,
14014 self.op.alloc_policy,
14016 self.op.disk_state,
14020 if all_changes.count(None) == len(all_changes):
14021 raise errors.OpPrereqError("Please pass at least one modification",
14022 errors.ECODE_INVAL)
14024 def ExpandNames(self):
14025 # This raises errors.OpPrereqError on its own:
14026 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14028 self.needed_locks = {
14029 locking.LEVEL_INSTANCE: [],
14030 locking.LEVEL_NODEGROUP: [self.group_uuid],
14033 self.share_locks[locking.LEVEL_INSTANCE] = 1
14035 def DeclareLocks(self, level):
14036 if level == locking.LEVEL_INSTANCE:
14037 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14039 # Lock instances optimistically, needs verification once group lock has
14041 self.needed_locks[locking.LEVEL_INSTANCE] = \
14042 self.cfg.GetNodeGroupInstances(self.group_uuid)
14045 def _UpdateAndVerifyDiskParams(old, new):
14046 """Updates and verifies disk parameters.
14049 new_params = _GetUpdatedParams(old, new)
14050 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14053 def CheckPrereq(self):
14054 """Check prerequisites.
14057 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14059 # Check if locked instances are still correct
14060 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14062 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14063 cluster = self.cfg.GetClusterInfo()
14065 if self.group is None:
14066 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14067 (self.op.group_name, self.group_uuid))
14069 if self.op.ndparams:
14070 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14071 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14072 self.new_ndparams = new_ndparams
14074 if self.op.diskparams:
14075 diskparams = self.group.diskparams
14076 uavdp = self._UpdateAndVerifyDiskParams
14077 # For each disktemplate subdict update and verify the values
14078 new_diskparams = dict((dt,
14079 uavdp(diskparams.get(dt, {}),
14080 self.op.diskparams[dt]))
14081 for dt in constants.DISK_TEMPLATES
14082 if dt in self.op.diskparams)
14083 # As we've all subdicts of diskparams ready, lets merge the actual
14084 # dict with all updated subdicts
14085 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14087 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14088 except errors.OpPrereqError, err:
14089 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14090 errors.ECODE_INVAL)
14092 if self.op.hv_state:
14093 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14094 self.group.hv_state_static)
14096 if self.op.disk_state:
14097 self.new_disk_state = \
14098 _MergeAndVerifyDiskState(self.op.disk_state,
14099 self.group.disk_state_static)
14101 if self.op.ipolicy:
14102 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14106 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14107 inst_filter = lambda inst: inst.name in owned_instances
14108 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14110 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14112 new_ipolicy, instances)
14115 self.LogWarning("After the ipolicy change the following instances"
14116 " violate them: %s",
14117 utils.CommaJoin(violations))
14119 def BuildHooksEnv(self):
14120 """Build hooks env.
14124 "GROUP_NAME": self.op.group_name,
14125 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14128 def BuildHooksNodes(self):
14129 """Build hooks nodes.
14132 mn = self.cfg.GetMasterNode()
14133 return ([mn], [mn])
14135 def Exec(self, feedback_fn):
14136 """Modifies the node group.
14141 if self.op.ndparams:
14142 self.group.ndparams = self.new_ndparams
14143 result.append(("ndparams", str(self.group.ndparams)))
14145 if self.op.diskparams:
14146 self.group.diskparams = self.new_diskparams
14147 result.append(("diskparams", str(self.group.diskparams)))
14149 if self.op.alloc_policy:
14150 self.group.alloc_policy = self.op.alloc_policy
14152 if self.op.hv_state:
14153 self.group.hv_state_static = self.new_hv_state
14155 if self.op.disk_state:
14156 self.group.disk_state_static = self.new_disk_state
14158 if self.op.ipolicy:
14159 self.group.ipolicy = self.new_ipolicy
14161 self.cfg.Update(self.group, feedback_fn)
14165 class LUGroupRemove(LogicalUnit):
14166 HPATH = "group-remove"
14167 HTYPE = constants.HTYPE_GROUP
14170 def ExpandNames(self):
14171 # This will raises errors.OpPrereqError on its own:
14172 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14173 self.needed_locks = {
14174 locking.LEVEL_NODEGROUP: [self.group_uuid],
14177 def CheckPrereq(self):
14178 """Check prerequisites.
14180 This checks that the given group name exists as a node group, that is
14181 empty (i.e., contains no nodes), and that is not the last group of the
14185 # Verify that the group is empty.
14186 group_nodes = [node.name
14187 for node in self.cfg.GetAllNodesInfo().values()
14188 if node.group == self.group_uuid]
14191 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14193 (self.op.group_name,
14194 utils.CommaJoin(utils.NiceSort(group_nodes))),
14195 errors.ECODE_STATE)
14197 # Verify the cluster would not be left group-less.
14198 if len(self.cfg.GetNodeGroupList()) == 1:
14199 raise errors.OpPrereqError("Group '%s' is the only group,"
14200 " cannot be removed" %
14201 self.op.group_name,
14202 errors.ECODE_STATE)
14204 def BuildHooksEnv(self):
14205 """Build hooks env.
14209 "GROUP_NAME": self.op.group_name,
14212 def BuildHooksNodes(self):
14213 """Build hooks nodes.
14216 mn = self.cfg.GetMasterNode()
14217 return ([mn], [mn])
14219 def Exec(self, feedback_fn):
14220 """Remove the node group.
14224 self.cfg.RemoveNodeGroup(self.group_uuid)
14225 except errors.ConfigurationError:
14226 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14227 (self.op.group_name, self.group_uuid))
14229 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14232 class LUGroupRename(LogicalUnit):
14233 HPATH = "group-rename"
14234 HTYPE = constants.HTYPE_GROUP
14237 def ExpandNames(self):
14238 # This raises errors.OpPrereqError on its own:
14239 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14241 self.needed_locks = {
14242 locking.LEVEL_NODEGROUP: [self.group_uuid],
14245 def CheckPrereq(self):
14246 """Check prerequisites.
14248 Ensures requested new name is not yet used.
14252 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14253 except errors.OpPrereqError:
14256 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14257 " node group (UUID: %s)" %
14258 (self.op.new_name, new_name_uuid),
14259 errors.ECODE_EXISTS)
14261 def BuildHooksEnv(self):
14262 """Build hooks env.
14266 "OLD_NAME": self.op.group_name,
14267 "NEW_NAME": self.op.new_name,
14270 def BuildHooksNodes(self):
14271 """Build hooks nodes.
14274 mn = self.cfg.GetMasterNode()
14276 all_nodes = self.cfg.GetAllNodesInfo()
14277 all_nodes.pop(mn, None)
14280 run_nodes.extend(node.name for node in all_nodes.values()
14281 if node.group == self.group_uuid)
14283 return (run_nodes, run_nodes)
14285 def Exec(self, feedback_fn):
14286 """Rename the node group.
14289 group = self.cfg.GetNodeGroup(self.group_uuid)
14292 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14293 (self.op.group_name, self.group_uuid))
14295 group.name = self.op.new_name
14296 self.cfg.Update(group, feedback_fn)
14298 return self.op.new_name
14301 class LUGroupEvacuate(LogicalUnit):
14302 HPATH = "group-evacuate"
14303 HTYPE = constants.HTYPE_GROUP
14306 def ExpandNames(self):
14307 # This raises errors.OpPrereqError on its own:
14308 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14310 if self.op.target_groups:
14311 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14312 self.op.target_groups)
14314 self.req_target_uuids = []
14316 if self.group_uuid in self.req_target_uuids:
14317 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14318 " as a target group (targets are %s)" %
14320 utils.CommaJoin(self.req_target_uuids)),
14321 errors.ECODE_INVAL)
14323 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14325 self.share_locks = _ShareAll()
14326 self.needed_locks = {
14327 locking.LEVEL_INSTANCE: [],
14328 locking.LEVEL_NODEGROUP: [],
14329 locking.LEVEL_NODE: [],
14332 def DeclareLocks(self, level):
14333 if level == locking.LEVEL_INSTANCE:
14334 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14336 # Lock instances optimistically, needs verification once node and group
14337 # locks have been acquired
14338 self.needed_locks[locking.LEVEL_INSTANCE] = \
14339 self.cfg.GetNodeGroupInstances(self.group_uuid)
14341 elif level == locking.LEVEL_NODEGROUP:
14342 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14344 if self.req_target_uuids:
14345 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14347 # Lock all groups used by instances optimistically; this requires going
14348 # via the node before it's locked, requiring verification later on
14349 lock_groups.update(group_uuid
14350 for instance_name in
14351 self.owned_locks(locking.LEVEL_INSTANCE)
14353 self.cfg.GetInstanceNodeGroups(instance_name))
14355 # No target groups, need to lock all of them
14356 lock_groups = locking.ALL_SET
14358 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14360 elif level == locking.LEVEL_NODE:
14361 # This will only lock the nodes in the group to be evacuated which
14362 # contain actual instances
14363 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14364 self._LockInstancesNodes()
14366 # Lock all nodes in group to be evacuated and target groups
14367 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14368 assert self.group_uuid in owned_groups
14369 member_nodes = [node_name
14370 for group in owned_groups
14371 for node_name in self.cfg.GetNodeGroup(group).members]
14372 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14374 def CheckPrereq(self):
14375 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14376 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14377 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14379 assert owned_groups.issuperset(self.req_target_uuids)
14380 assert self.group_uuid in owned_groups
14382 # Check if locked instances are still correct
14383 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14385 # Get instance information
14386 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14388 # Check if node groups for locked instances are still correct
14389 _CheckInstancesNodeGroups(self.cfg, self.instances,
14390 owned_groups, owned_nodes, self.group_uuid)
14392 if self.req_target_uuids:
14393 # User requested specific target groups
14394 self.target_uuids = self.req_target_uuids
14396 # All groups except the one to be evacuated are potential targets
14397 self.target_uuids = [group_uuid for group_uuid in owned_groups
14398 if group_uuid != self.group_uuid]
14400 if not self.target_uuids:
14401 raise errors.OpPrereqError("There are no possible target groups",
14402 errors.ECODE_INVAL)
14404 def BuildHooksEnv(self):
14405 """Build hooks env.
14409 "GROUP_NAME": self.op.group_name,
14410 "TARGET_GROUPS": " ".join(self.target_uuids),
14413 def BuildHooksNodes(self):
14414 """Build hooks nodes.
14417 mn = self.cfg.GetMasterNode()
14419 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14421 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14423 return (run_nodes, run_nodes)
14425 def Exec(self, feedback_fn):
14426 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14428 assert self.group_uuid not in self.target_uuids
14430 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14431 instances=instances, target_groups=self.target_uuids)
14433 ial.Run(self.op.iallocator)
14435 if not ial.success:
14436 raise errors.OpPrereqError("Can't compute group evacuation using"
14437 " iallocator '%s': %s" %
14438 (self.op.iallocator, ial.info),
14439 errors.ECODE_NORES)
14441 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14443 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14444 len(jobs), self.op.group_name)
14446 return ResultWithJobs(jobs)
14449 class TagsLU(NoHooksLU): # pylint: disable=W0223
14450 """Generic tags LU.
14452 This is an abstract class which is the parent of all the other tags LUs.
14455 def ExpandNames(self):
14456 self.group_uuid = None
14457 self.needed_locks = {}
14459 if self.op.kind == constants.TAG_NODE:
14460 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14461 lock_level = locking.LEVEL_NODE
14462 lock_name = self.op.name
14463 elif self.op.kind == constants.TAG_INSTANCE:
14464 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14465 lock_level = locking.LEVEL_INSTANCE
14466 lock_name = self.op.name
14467 elif self.op.kind == constants.TAG_NODEGROUP:
14468 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14469 lock_level = locking.LEVEL_NODEGROUP
14470 lock_name = self.group_uuid
14475 if lock_level and getattr(self.op, "use_locking", True):
14476 self.needed_locks[lock_level] = lock_name
14478 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14479 # not possible to acquire the BGL based on opcode parameters)
14481 def CheckPrereq(self):
14482 """Check prerequisites.
14485 if self.op.kind == constants.TAG_CLUSTER:
14486 self.target = self.cfg.GetClusterInfo()
14487 elif self.op.kind == constants.TAG_NODE:
14488 self.target = self.cfg.GetNodeInfo(self.op.name)
14489 elif self.op.kind == constants.TAG_INSTANCE:
14490 self.target = self.cfg.GetInstanceInfo(self.op.name)
14491 elif self.op.kind == constants.TAG_NODEGROUP:
14492 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14494 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14495 str(self.op.kind), errors.ECODE_INVAL)
14498 class LUTagsGet(TagsLU):
14499 """Returns the tags of a given object.
14504 def ExpandNames(self):
14505 TagsLU.ExpandNames(self)
14507 # Share locks as this is only a read operation
14508 self.share_locks = _ShareAll()
14510 def Exec(self, feedback_fn):
14511 """Returns the tag list.
14514 return list(self.target.GetTags())
14517 class LUTagsSearch(NoHooksLU):
14518 """Searches the tags for a given pattern.
14523 def ExpandNames(self):
14524 self.needed_locks = {}
14526 def CheckPrereq(self):
14527 """Check prerequisites.
14529 This checks the pattern passed for validity by compiling it.
14533 self.re = re.compile(self.op.pattern)
14534 except re.error, err:
14535 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14536 (self.op.pattern, err), errors.ECODE_INVAL)
14538 def Exec(self, feedback_fn):
14539 """Returns the tag list.
14543 tgts = [("/cluster", cfg.GetClusterInfo())]
14544 ilist = cfg.GetAllInstancesInfo().values()
14545 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14546 nlist = cfg.GetAllNodesInfo().values()
14547 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14548 tgts.extend(("/nodegroup/%s" % n.name, n)
14549 for n in cfg.GetAllNodeGroupsInfo().values())
14551 for path, target in tgts:
14552 for tag in target.GetTags():
14553 if self.re.search(tag):
14554 results.append((path, tag))
14558 class LUTagsSet(TagsLU):
14559 """Sets a tag on a given object.
14564 def CheckPrereq(self):
14565 """Check prerequisites.
14567 This checks the type and length of the tag name and value.
14570 TagsLU.CheckPrereq(self)
14571 for tag in self.op.tags:
14572 objects.TaggableObject.ValidateTag(tag)
14574 def Exec(self, feedback_fn):
14579 for tag in self.op.tags:
14580 self.target.AddTag(tag)
14581 except errors.TagError, err:
14582 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14583 self.cfg.Update(self.target, feedback_fn)
14586 class LUTagsDel(TagsLU):
14587 """Delete a list of tags from a given object.
14592 def CheckPrereq(self):
14593 """Check prerequisites.
14595 This checks that we have the given tag.
14598 TagsLU.CheckPrereq(self)
14599 for tag in self.op.tags:
14600 objects.TaggableObject.ValidateTag(tag)
14601 del_tags = frozenset(self.op.tags)
14602 cur_tags = self.target.GetTags()
14604 diff_tags = del_tags - cur_tags
14606 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14607 raise errors.OpPrereqError("Tag(s) %s not found" %
14608 (utils.CommaJoin(diff_names), ),
14609 errors.ECODE_NOENT)
14611 def Exec(self, feedback_fn):
14612 """Remove the tag from the object.
14615 for tag in self.op.tags:
14616 self.target.RemoveTag(tag)
14617 self.cfg.Update(self.target, feedback_fn)
14620 class LUTestDelay(NoHooksLU):
14621 """Sleep for a specified amount of time.
14623 This LU sleeps on the master and/or nodes for a specified amount of
14629 def ExpandNames(self):
14630 """Expand names and set required locks.
14632 This expands the node list, if any.
14635 self.needed_locks = {}
14636 if self.op.on_nodes:
14637 # _GetWantedNodes can be used here, but is not always appropriate to use
14638 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14639 # more information.
14640 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14641 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14643 def _TestDelay(self):
14644 """Do the actual sleep.
14647 if self.op.on_master:
14648 if not utils.TestDelay(self.op.duration):
14649 raise errors.OpExecError("Error during master delay test")
14650 if self.op.on_nodes:
14651 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14652 for node, node_result in result.items():
14653 node_result.Raise("Failure during rpc call to node %s" % node)
14655 def Exec(self, feedback_fn):
14656 """Execute the test delay opcode, with the wanted repetitions.
14659 if self.op.repeat == 0:
14662 top_value = self.op.repeat - 1
14663 for i in range(self.op.repeat):
14664 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14668 class LUTestJqueue(NoHooksLU):
14669 """Utility LU to test some aspects of the job queue.
14674 # Must be lower than default timeout for WaitForJobChange to see whether it
14675 # notices changed jobs
14676 _CLIENT_CONNECT_TIMEOUT = 20.0
14677 _CLIENT_CONFIRM_TIMEOUT = 60.0
14680 def _NotifyUsingSocket(cls, cb, errcls):
14681 """Opens a Unix socket and waits for another program to connect.
14684 @param cb: Callback to send socket name to client
14685 @type errcls: class
14686 @param errcls: Exception class to use for errors
14689 # Using a temporary directory as there's no easy way to create temporary
14690 # sockets without writing a custom loop around tempfile.mktemp and
14692 tmpdir = tempfile.mkdtemp()
14694 tmpsock = utils.PathJoin(tmpdir, "sock")
14696 logging.debug("Creating temporary socket at %s", tmpsock)
14697 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14702 # Send details to client
14705 # Wait for client to connect before continuing
14706 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14708 (conn, _) = sock.accept()
14709 except socket.error, err:
14710 raise errcls("Client didn't connect in time (%s)" % err)
14714 # Remove as soon as client is connected
14715 shutil.rmtree(tmpdir)
14717 # Wait for client to close
14720 # pylint: disable=E1101
14721 # Instance of '_socketobject' has no ... member
14722 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14724 except socket.error, err:
14725 raise errcls("Client failed to confirm notification (%s)" % err)
14729 def _SendNotification(self, test, arg, sockname):
14730 """Sends a notification to the client.
14733 @param test: Test name
14734 @param arg: Test argument (depends on test)
14735 @type sockname: string
14736 @param sockname: Socket path
14739 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14741 def _Notify(self, prereq, test, arg):
14742 """Notifies the client of a test.
14745 @param prereq: Whether this is a prereq-phase test
14747 @param test: Test name
14748 @param arg: Test argument (depends on test)
14752 errcls = errors.OpPrereqError
14754 errcls = errors.OpExecError
14756 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14760 def CheckArguments(self):
14761 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14762 self.expandnames_calls = 0
14764 def ExpandNames(self):
14765 checkargs_calls = getattr(self, "checkargs_calls", 0)
14766 if checkargs_calls < 1:
14767 raise errors.ProgrammerError("CheckArguments was not called")
14769 self.expandnames_calls += 1
14771 if self.op.notify_waitlock:
14772 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14774 self.LogInfo("Expanding names")
14776 # Get lock on master node (just to get a lock, not for a particular reason)
14777 self.needed_locks = {
14778 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14781 def Exec(self, feedback_fn):
14782 if self.expandnames_calls < 1:
14783 raise errors.ProgrammerError("ExpandNames was not called")
14785 if self.op.notify_exec:
14786 self._Notify(False, constants.JQT_EXEC, None)
14788 self.LogInfo("Executing")
14790 if self.op.log_messages:
14791 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14792 for idx, msg in enumerate(self.op.log_messages):
14793 self.LogInfo("Sending log message %s", idx + 1)
14794 feedback_fn(constants.JQT_MSGPREFIX + msg)
14795 # Report how many test messages have been sent
14796 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14799 raise errors.OpExecError("Opcode failure was requested")
14804 class IAllocator(object):
14805 """IAllocator framework.
14807 An IAllocator instance has three sets of attributes:
14808 - cfg that is needed to query the cluster
14809 - input data (all members of the _KEYS class attribute are required)
14810 - four buffer attributes (in|out_data|text), that represent the
14811 input (to the external script) in text and data structure format,
14812 and the output from it, again in two formats
14813 - the result variables from the script (success, info, nodes) for
14817 # pylint: disable=R0902
14818 # lots of instance attributes
14820 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14822 self.rpc = rpc_runner
14823 # init buffer variables
14824 self.in_text = self.out_text = self.in_data = self.out_data = None
14825 # init all input fields so that pylint is happy
14827 self.memory = self.disks = self.disk_template = self.spindle_use = None
14828 self.os = self.tags = self.nics = self.vcpus = None
14829 self.hypervisor = None
14830 self.relocate_from = None
14832 self.instances = None
14833 self.evac_mode = None
14834 self.target_groups = []
14836 self.required_nodes = None
14837 # init result fields
14838 self.success = self.info = self.result = None
14841 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14843 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14844 " IAllocator" % self.mode)
14846 keyset = [n for (n, _) in keydata]
14849 if key not in keyset:
14850 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14851 " IAllocator" % key)
14852 setattr(self, key, kwargs[key])
14855 if key not in kwargs:
14856 raise errors.ProgrammerError("Missing input parameter '%s' to"
14857 " IAllocator" % key)
14858 self._BuildInputData(compat.partial(fn, self), keydata)
14860 def _ComputeClusterData(self):
14861 """Compute the generic allocator input data.
14863 This is the data that is independent of the actual operation.
14867 cluster_info = cfg.GetClusterInfo()
14870 "version": constants.IALLOCATOR_VERSION,
14871 "cluster_name": cfg.GetClusterName(),
14872 "cluster_tags": list(cluster_info.GetTags()),
14873 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14874 "ipolicy": cluster_info.ipolicy,
14876 ninfo = cfg.GetAllNodesInfo()
14877 iinfo = cfg.GetAllInstancesInfo().values()
14878 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14881 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14883 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14884 hypervisor_name = self.hypervisor
14885 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14886 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14888 hypervisor_name = cluster_info.primary_hypervisor
14890 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14893 self.rpc.call_all_instances_info(node_list,
14894 cluster_info.enabled_hypervisors)
14896 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14898 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14899 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14900 i_list, config_ndata)
14901 assert len(data["nodes"]) == len(ninfo), \
14902 "Incomplete node data computed"
14904 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14906 self.in_data = data
14909 def _ComputeNodeGroupData(cfg):
14910 """Compute node groups data.
14913 cluster = cfg.GetClusterInfo()
14914 ng = dict((guuid, {
14915 "name": gdata.name,
14916 "alloc_policy": gdata.alloc_policy,
14917 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14919 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14924 def _ComputeBasicNodeData(cfg, node_cfg):
14925 """Compute global node data.
14928 @returns: a dict of name: (node dict, node config)
14931 # fill in static (config-based) values
14932 node_results = dict((ninfo.name, {
14933 "tags": list(ninfo.GetTags()),
14934 "primary_ip": ninfo.primary_ip,
14935 "secondary_ip": ninfo.secondary_ip,
14936 "offline": ninfo.offline,
14937 "drained": ninfo.drained,
14938 "master_candidate": ninfo.master_candidate,
14939 "group": ninfo.group,
14940 "master_capable": ninfo.master_capable,
14941 "vm_capable": ninfo.vm_capable,
14942 "ndparams": cfg.GetNdParams(ninfo),
14944 for ninfo in node_cfg.values())
14946 return node_results
14949 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14951 """Compute global node data.
14953 @param node_results: the basic node structures as filled from the config
14956 #TODO(dynmem): compute the right data on MAX and MIN memory
14957 # make a copy of the current dict
14958 node_results = dict(node_results)
14959 for nname, nresult in node_data.items():
14960 assert nname in node_results, "Missing basic data for node %s" % nname
14961 ninfo = node_cfg[nname]
14963 if not (ninfo.offline or ninfo.drained):
14964 nresult.Raise("Can't get data for node %s" % nname)
14965 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14967 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14969 for attr in ["memory_total", "memory_free", "memory_dom0",
14970 "vg_size", "vg_free", "cpu_total"]:
14971 if attr not in remote_info:
14972 raise errors.OpExecError("Node '%s' didn't return attribute"
14973 " '%s'" % (nname, attr))
14974 if not isinstance(remote_info[attr], int):
14975 raise errors.OpExecError("Node '%s' returned invalid value"
14977 (nname, attr, remote_info[attr]))
14978 # compute memory used by primary instances
14979 i_p_mem = i_p_up_mem = 0
14980 for iinfo, beinfo in i_list:
14981 if iinfo.primary_node == nname:
14982 i_p_mem += beinfo[constants.BE_MAXMEM]
14983 if iinfo.name not in node_iinfo[nname].payload:
14986 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14987 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14988 remote_info["memory_free"] -= max(0, i_mem_diff)
14990 if iinfo.admin_state == constants.ADMINST_UP:
14991 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14993 # compute memory used by instances
14995 "total_memory": remote_info["memory_total"],
14996 "reserved_memory": remote_info["memory_dom0"],
14997 "free_memory": remote_info["memory_free"],
14998 "total_disk": remote_info["vg_size"],
14999 "free_disk": remote_info["vg_free"],
15000 "total_cpus": remote_info["cpu_total"],
15001 "i_pri_memory": i_p_mem,
15002 "i_pri_up_memory": i_p_up_mem,
15004 pnr_dyn.update(node_results[nname])
15005 node_results[nname] = pnr_dyn
15007 return node_results
15010 def _ComputeInstanceData(cluster_info, i_list):
15011 """Compute global instance data.
15015 for iinfo, beinfo in i_list:
15017 for nic in iinfo.nics:
15018 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
15022 "mode": filled_params[constants.NIC_MODE],
15023 "link": filled_params[constants.NIC_LINK],
15025 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
15026 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
15027 nic_data.append(nic_dict)
15029 "tags": list(iinfo.GetTags()),
15030 "admin_state": iinfo.admin_state,
15031 "vcpus": beinfo[constants.BE_VCPUS],
15032 "memory": beinfo[constants.BE_MAXMEM],
15033 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
15035 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
15037 "disks": [{constants.IDISK_SIZE: dsk.size,
15038 constants.IDISK_MODE: dsk.mode}
15039 for dsk in iinfo.disks],
15040 "disk_template": iinfo.disk_template,
15041 "hypervisor": iinfo.hypervisor,
15043 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
15045 instance_data[iinfo.name] = pir
15047 return instance_data
15049 def _AddNewInstance(self):
15050 """Add new instance data to allocator structure.
15052 This in combination with _AllocatorGetClusterData will create the
15053 correct structure needed as input for the allocator.
15055 The checks for the completeness of the opcode must have already been
15059 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
15061 if self.disk_template in constants.DTS_INT_MIRROR:
15062 self.required_nodes = 2
15064 self.required_nodes = 1
15068 "disk_template": self.disk_template,
15071 "vcpus": self.vcpus,
15072 "memory": self.memory,
15073 "spindle_use": self.spindle_use,
15074 "disks": self.disks,
15075 "disk_space_total": disk_space,
15077 "required_nodes": self.required_nodes,
15078 "hypervisor": self.hypervisor,
15083 def _AddRelocateInstance(self):
15084 """Add relocate instance data to allocator structure.
15086 This in combination with _IAllocatorGetClusterData will create the
15087 correct structure needed as input for the allocator.
15089 The checks for the completeness of the opcode must have already been
15093 instance = self.cfg.GetInstanceInfo(self.name)
15094 if instance is None:
15095 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15096 " IAllocator" % self.name)
15098 if instance.disk_template not in constants.DTS_MIRRORED:
15099 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15100 errors.ECODE_INVAL)
15102 if instance.disk_template in constants.DTS_INT_MIRROR and \
15103 len(instance.secondary_nodes) != 1:
15104 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15105 errors.ECODE_STATE)
15107 self.required_nodes = 1
15108 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15109 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15113 "disk_space_total": disk_space,
15114 "required_nodes": self.required_nodes,
15115 "relocate_from": self.relocate_from,
15119 def _AddNodeEvacuate(self):
15120 """Get data for node-evacuate requests.
15124 "instances": self.instances,
15125 "evac_mode": self.evac_mode,
15128 def _AddChangeGroup(self):
15129 """Get data for node-evacuate requests.
15133 "instances": self.instances,
15134 "target_groups": self.target_groups,
15137 def _BuildInputData(self, fn, keydata):
15138 """Build input data structures.
15141 self._ComputeClusterData()
15144 request["type"] = self.mode
15145 for keyname, keytype in keydata:
15146 if keyname not in request:
15147 raise errors.ProgrammerError("Request parameter %s is missing" %
15149 val = request[keyname]
15150 if not keytype(val):
15151 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15152 " validation, value %s, expected"
15153 " type %s" % (keyname, val, keytype))
15154 self.in_data["request"] = request
15156 self.in_text = serializer.Dump(self.in_data)
15158 _STRING_LIST = ht.TListOf(ht.TString)
15159 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15160 # pylint: disable=E1101
15161 # Class '...' has no 'OP_ID' member
15162 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15163 opcodes.OpInstanceMigrate.OP_ID,
15164 opcodes.OpInstanceReplaceDisks.OP_ID])
15168 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15169 ht.TItems([ht.TNonEmptyString,
15170 ht.TNonEmptyString,
15171 ht.TListOf(ht.TNonEmptyString),
15174 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15175 ht.TItems([ht.TNonEmptyString,
15178 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15179 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15182 constants.IALLOCATOR_MODE_ALLOC:
15185 ("name", ht.TString),
15186 ("memory", ht.TInt),
15187 ("spindle_use", ht.TInt),
15188 ("disks", ht.TListOf(ht.TDict)),
15189 ("disk_template", ht.TString),
15190 ("os", ht.TString),
15191 ("tags", _STRING_LIST),
15192 ("nics", ht.TListOf(ht.TDict)),
15193 ("vcpus", ht.TInt),
15194 ("hypervisor", ht.TString),
15196 constants.IALLOCATOR_MODE_RELOC:
15197 (_AddRelocateInstance,
15198 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15200 constants.IALLOCATOR_MODE_NODE_EVAC:
15201 (_AddNodeEvacuate, [
15202 ("instances", _STRING_LIST),
15203 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15205 constants.IALLOCATOR_MODE_CHG_GROUP:
15206 (_AddChangeGroup, [
15207 ("instances", _STRING_LIST),
15208 ("target_groups", _STRING_LIST),
15212 def Run(self, name, validate=True, call_fn=None):
15213 """Run an instance allocator and return the results.
15216 if call_fn is None:
15217 call_fn = self.rpc.call_iallocator_runner
15219 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15220 result.Raise("Failure while running the iallocator script")
15222 self.out_text = result.payload
15224 self._ValidateResult()
15226 def _ValidateResult(self):
15227 """Process the allocator results.
15229 This will process and if successful save the result in
15230 self.out_data and the other parameters.
15234 rdict = serializer.Load(self.out_text)
15235 except Exception, err:
15236 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15238 if not isinstance(rdict, dict):
15239 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15241 # TODO: remove backwards compatiblity in later versions
15242 if "nodes" in rdict and "result" not in rdict:
15243 rdict["result"] = rdict["nodes"]
15246 for key in "success", "info", "result":
15247 if key not in rdict:
15248 raise errors.OpExecError("Can't parse iallocator results:"
15249 " missing key '%s'" % key)
15250 setattr(self, key, rdict[key])
15252 if not self._result_check(self.result):
15253 raise errors.OpExecError("Iallocator returned invalid result,"
15254 " expected %s, got %s" %
15255 (self._result_check, self.result),
15256 errors.ECODE_INVAL)
15258 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15259 assert self.relocate_from is not None
15260 assert self.required_nodes == 1
15262 node2group = dict((name, ndata["group"])
15263 for (name, ndata) in self.in_data["nodes"].items())
15265 fn = compat.partial(self._NodesToGroups, node2group,
15266 self.in_data["nodegroups"])
15268 instance = self.cfg.GetInstanceInfo(self.name)
15269 request_groups = fn(self.relocate_from + [instance.primary_node])
15270 result_groups = fn(rdict["result"] + [instance.primary_node])
15272 if self.success and not set(result_groups).issubset(request_groups):
15273 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15274 " differ from original groups (%s)" %
15275 (utils.CommaJoin(result_groups),
15276 utils.CommaJoin(request_groups)))
15278 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15279 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15281 self.out_data = rdict
15284 def _NodesToGroups(node2group, groups, nodes):
15285 """Returns a list of unique group names for a list of nodes.
15287 @type node2group: dict
15288 @param node2group: Map from node name to group UUID
15290 @param groups: Group information
15292 @param nodes: Node names
15299 group_uuid = node2group[node]
15301 # Ignore unknown node
15305 group = groups[group_uuid]
15307 # Can't find group, let's use UUID
15308 group_name = group_uuid
15310 group_name = group["name"]
15312 result.add(group_name)
15314 return sorted(result)
15317 class LUTestAllocator(NoHooksLU):
15318 """Run allocator tests.
15320 This LU runs the allocator tests
15323 def CheckPrereq(self):
15324 """Check prerequisites.
15326 This checks the opcode parameters depending on the director and mode test.
15329 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15330 for attr in ["memory", "disks", "disk_template",
15331 "os", "tags", "nics", "vcpus"]:
15332 if not hasattr(self.op, attr):
15333 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15334 attr, errors.ECODE_INVAL)
15335 iname = self.cfg.ExpandInstanceName(self.op.name)
15336 if iname is not None:
15337 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15338 iname, errors.ECODE_EXISTS)
15339 if not isinstance(self.op.nics, list):
15340 raise errors.OpPrereqError("Invalid parameter 'nics'",
15341 errors.ECODE_INVAL)
15342 if not isinstance(self.op.disks, list):
15343 raise errors.OpPrereqError("Invalid parameter 'disks'",
15344 errors.ECODE_INVAL)
15345 for row in self.op.disks:
15346 if (not isinstance(row, dict) or
15347 constants.IDISK_SIZE not in row or
15348 not isinstance(row[constants.IDISK_SIZE], int) or
15349 constants.IDISK_MODE not in row or
15350 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15351 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15352 " parameter", errors.ECODE_INVAL)
15353 if self.op.hypervisor is None:
15354 self.op.hypervisor = self.cfg.GetHypervisorType()
15355 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15356 fname = _ExpandInstanceName(self.cfg, self.op.name)
15357 self.op.name = fname
15358 self.relocate_from = \
15359 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15360 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15361 constants.IALLOCATOR_MODE_NODE_EVAC):
15362 if not self.op.instances:
15363 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15364 self.op.instances = _GetWantedInstances(self, self.op.instances)
15366 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15367 self.op.mode, errors.ECODE_INVAL)
15369 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15370 if self.op.allocator is None:
15371 raise errors.OpPrereqError("Missing allocator name",
15372 errors.ECODE_INVAL)
15373 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15374 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15375 self.op.direction, errors.ECODE_INVAL)
15377 def Exec(self, feedback_fn):
15378 """Run the allocator test.
15381 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15382 ial = IAllocator(self.cfg, self.rpc,
15385 memory=self.op.memory,
15386 disks=self.op.disks,
15387 disk_template=self.op.disk_template,
15391 vcpus=self.op.vcpus,
15392 hypervisor=self.op.hypervisor,
15394 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15395 ial = IAllocator(self.cfg, self.rpc,
15398 relocate_from=list(self.relocate_from),
15400 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15401 ial = IAllocator(self.cfg, self.rpc,
15403 instances=self.op.instances,
15404 target_groups=self.op.target_groups)
15405 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15406 ial = IAllocator(self.cfg, self.rpc,
15408 instances=self.op.instances,
15409 evac_mode=self.op.evac_mode)
15411 raise errors.ProgrammerError("Uncatched mode %s in"
15412 " LUTestAllocator.Exec", self.op.mode)
15414 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15415 result = ial.in_text
15417 ial.Run(self.op.allocator, validate=False)
15418 result = ial.out_text
15422 #: Query type implementations
15424 constants.QR_CLUSTER: _ClusterQuery,
15425 constants.QR_INSTANCE: _InstanceQuery,
15426 constants.QR_NODE: _NodeQuery,
15427 constants.QR_GROUP: _GroupQuery,
15428 constants.QR_OS: _OsQuery,
15429 constants.QR_EXPORT: _ExportQuery,
15432 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15435 def _GetQueryImplementation(name):
15436 """Returns the implemtnation for a query type.
15438 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15442 return _QUERY_IMPL[name]
15444 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15445 errors.ECODE_INVAL)