4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _GetWantedNodes(lu, nodes):
707 """Returns list of checked and expanded node names.
709 @type lu: L{LogicalUnit}
710 @param lu: the logical unit on whose behalf we execute
712 @param nodes: list of node names or None for all nodes
714 @return: the list of nodes, sorted
715 @raise errors.ProgrammerError: if the nodes parameter is wrong type
719 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
721 return utils.NiceSort(lu.cfg.GetNodeList())
724 def _GetWantedInstances(lu, instances):
725 """Returns list of checked and expanded instance names.
727 @type lu: L{LogicalUnit}
728 @param lu: the logical unit on whose behalf we execute
729 @type instances: list
730 @param instances: list of instance names or None for all instances
732 @return: the list of instances, sorted
733 @raise errors.OpPrereqError: if the instances parameter is wrong type
734 @raise errors.OpPrereqError: if any of the passed instances is not found
738 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
740 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
744 def _GetUpdatedParams(old_params, update_dict,
745 use_default=True, use_none=False):
746 """Return the new version of a parameter dictionary.
748 @type old_params: dict
749 @param old_params: old parameters
750 @type update_dict: dict
751 @param update_dict: dict containing new parameter values, or
752 constants.VALUE_DEFAULT to reset the parameter to its default
754 @param use_default: boolean
755 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
756 values as 'to be deleted' values
757 @param use_none: boolean
758 @type use_none: whether to recognise C{None} values as 'to be
761 @return: the new parameter dictionary
764 params_copy = copy.deepcopy(old_params)
765 for key, val in update_dict.iteritems():
766 if ((use_default and val == constants.VALUE_DEFAULT) or
767 (use_none and val is None)):
773 params_copy[key] = val
777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
778 """Return the new version of a instance policy.
780 @param group_policy: whether this policy applies to a group and thus
781 we should support removal of policy entries
784 use_none = use_default = group_policy
785 ipolicy = copy.deepcopy(old_ipolicy)
786 for key, value in new_ipolicy.items():
787 if key not in constants.IPOLICY_ALL_KEYS:
788 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
790 if key in constants.IPOLICY_ISPECS:
791 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
792 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
794 use_default=use_default)
796 if not value or value == [constants.VALUE_DEFAULT]:
800 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
801 " on the cluster'" % key,
804 if key in constants.IPOLICY_PARAMETERS:
805 # FIXME: we assume all such values are float
807 ipolicy[key] = float(value)
808 except (TypeError, ValueError), err:
809 raise errors.OpPrereqError("Invalid value for attribute"
810 " '%s': '%s', error: %s" %
811 (key, value, err), errors.ECODE_INVAL)
813 # FIXME: we assume all others are lists; this should be redone
815 ipolicy[key] = list(value)
817 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
818 except errors.ConfigurationError, err:
819 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
824 def _UpdateAndVerifySubDict(base, updates, type_check):
825 """Updates and verifies a dict with sub dicts of the same type.
827 @param base: The dict with the old data
828 @param updates: The dict with the new data
829 @param type_check: Dict suitable to ForceDictType to verify correct types
830 @returns: A new dict with updated and verified values
834 new = _GetUpdatedParams(old, value)
835 utils.ForceDictType(new, type_check)
838 ret = copy.deepcopy(base)
839 ret.update(dict((key, fn(base.get(key, {}), value))
840 for key, value in updates.items()))
844 def _MergeAndVerifyHvState(op_input, obj_input):
845 """Combines the hv state from an opcode with the one of the object
847 @param op_input: The input dict from the opcode
848 @param obj_input: The input dict from the objects
849 @return: The verified and updated dict
853 invalid_hvs = set(op_input) - constants.HYPER_TYPES
855 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
856 " %s" % utils.CommaJoin(invalid_hvs),
858 if obj_input is None:
860 type_check = constants.HVSTS_PARAMETER_TYPES
861 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
866 def _MergeAndVerifyDiskState(op_input, obj_input):
867 """Combines the disk state from an opcode with the one of the object
869 @param op_input: The input dict from the opcode
870 @param obj_input: The input dict from the objects
871 @return: The verified and updated dict
874 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
876 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
877 utils.CommaJoin(invalid_dst),
879 type_check = constants.DSS_PARAMETER_TYPES
880 if obj_input is None:
882 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
884 for key, value in op_input.items())
889 def _ReleaseLocks(lu, level, names=None, keep=None):
890 """Releases locks owned by an LU.
892 @type lu: L{LogicalUnit}
893 @param level: Lock level
894 @type names: list or None
895 @param names: Names of locks to release
896 @type keep: list or None
897 @param keep: Names of locks to retain
900 assert not (keep is not None and names is not None), \
901 "Only one of the 'names' and the 'keep' parameters can be given"
903 if names is not None:
904 should_release = names.__contains__
906 should_release = lambda name: name not in keep
908 should_release = None
910 owned = lu.owned_locks(level)
912 # Not owning any lock at this level, do nothing
919 # Determine which locks to release
921 if should_release(name):
926 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
928 # Release just some locks
929 lu.glm.release(level, names=release)
931 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
934 lu.glm.release(level)
936 assert not lu.glm.is_owned(level), "No locks should be owned"
939 def _MapInstanceDisksToNodes(instances):
940 """Creates a map from (node, volume) to instance name.
942 @type instances: list of L{objects.Instance}
943 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
946 return dict(((node, vol), inst.name)
947 for inst in instances
948 for (node, vols) in inst.MapLVsByNode().items()
952 def _RunPostHook(lu, node_name):
953 """Runs the post-hook for an opcode on a single node.
956 hm = lu.proc.BuildHooksManager(lu)
958 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
960 # pylint: disable=W0702
961 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
964 def _CheckOutputFields(static, dynamic, selected):
965 """Checks whether all selected fields are valid.
967 @type static: L{utils.FieldSet}
968 @param static: static fields set
969 @type dynamic: L{utils.FieldSet}
970 @param dynamic: dynamic fields set
977 delta = f.NonMatching(selected)
979 raise errors.OpPrereqError("Unknown output fields selected: %s"
980 % ",".join(delta), errors.ECODE_INVAL)
983 def _CheckGlobalHvParams(params):
984 """Validates that given hypervisor params are not global ones.
986 This will ensure that instances don't get customised versions of
990 used_globals = constants.HVC_GLOBALS.intersection(params)
992 msg = ("The following hypervisor parameters are global and cannot"
993 " be customized at instance level, please modify them at"
994 " cluster level: %s" % utils.CommaJoin(used_globals))
995 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
998 def _CheckNodeOnline(lu, node, msg=None):
999 """Ensure that a given node is online.
1001 @param lu: the LU on behalf of which we make the check
1002 @param node: the node to check
1003 @param msg: if passed, should be a message to replace the default one
1004 @raise errors.OpPrereqError: if the node is offline
1008 msg = "Can't use offline node"
1009 if lu.cfg.GetNodeInfo(node).offline:
1010 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1013 def _CheckNodeNotDrained(lu, node):
1014 """Ensure that a given node is not drained.
1016 @param lu: the LU on behalf of which we make the check
1017 @param node: the node to check
1018 @raise errors.OpPrereqError: if the node is drained
1021 if lu.cfg.GetNodeInfo(node).drained:
1022 raise errors.OpPrereqError("Can't use drained node %s" % node,
1026 def _CheckNodeVmCapable(lu, node):
1027 """Ensure that a given node is vm capable.
1029 @param lu: the LU on behalf of which we make the check
1030 @param node: the node to check
1031 @raise errors.OpPrereqError: if the node is not vm capable
1034 if not lu.cfg.GetNodeInfo(node).vm_capable:
1035 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1039 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1040 """Ensure that a node supports a given OS.
1042 @param lu: the LU on behalf of which we make the check
1043 @param node: the node to check
1044 @param os_name: the OS to query about
1045 @param force_variant: whether to ignore variant errors
1046 @raise errors.OpPrereqError: if the node is not supporting the OS
1049 result = lu.rpc.call_os_get(node, os_name)
1050 result.Raise("OS '%s' not in supported OS list for node %s" %
1052 prereq=True, ecode=errors.ECODE_INVAL)
1053 if not force_variant:
1054 _CheckOSVariant(result.payload, os_name)
1057 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1058 """Ensure that a node has the given secondary ip.
1060 @type lu: L{LogicalUnit}
1061 @param lu: the LU on behalf of which we make the check
1063 @param node: the node to check
1064 @type secondary_ip: string
1065 @param secondary_ip: the ip to check
1066 @type prereq: boolean
1067 @param prereq: whether to throw a prerequisite or an execute error
1068 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1069 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1072 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1073 result.Raise("Failure checking secondary ip on node %s" % node,
1074 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1075 if not result.payload:
1076 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1077 " please fix and re-run this command" % secondary_ip)
1079 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1081 raise errors.OpExecError(msg)
1084 def _GetClusterDomainSecret():
1085 """Reads the cluster domain secret.
1088 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1092 def _CheckInstanceState(lu, instance, req_states, msg=None):
1093 """Ensure that an instance is in one of the required states.
1095 @param lu: the LU on behalf of which we make the check
1096 @param instance: the instance to check
1097 @param msg: if passed, should be a message to replace the default one
1098 @raise errors.OpPrereqError: if the instance is not in the required state
1102 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1103 if instance.admin_state not in req_states:
1104 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1105 (instance.name, instance.admin_state, msg),
1108 if constants.ADMINST_UP not in req_states:
1109 pnode = instance.primary_node
1110 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1111 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1112 prereq=True, ecode=errors.ECODE_ENVIRON)
1114 if instance.name in ins_l.payload:
1115 raise errors.OpPrereqError("Instance %s is running, %s" %
1116 (instance.name, msg), errors.ECODE_STATE)
1119 def _ComputeMinMaxSpec(name, ipolicy, value):
1120 """Computes if value is in the desired range.
1122 @param name: name of the parameter for which we perform the check
1123 @param ipolicy: dictionary containing min, max and std values
1124 @param value: actual value that we want to use
1125 @return: None or element not meeting the criteria
1129 if value in [None, constants.VALUE_AUTO]:
1131 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1132 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1133 if value > max_v or min_v > value:
1134 return ("%s value %s is not in range [%s, %s]" %
1135 (name, value, min_v, max_v))
1139 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1140 nic_count, disk_sizes, spindle_use,
1141 _compute_fn=_ComputeMinMaxSpec):
1142 """Verifies ipolicy against provided specs.
1145 @param ipolicy: The ipolicy
1147 @param mem_size: The memory size
1148 @type cpu_count: int
1149 @param cpu_count: Used cpu cores
1150 @type disk_count: int
1151 @param disk_count: Number of disks used
1152 @type nic_count: int
1153 @param nic_count: Number of nics used
1154 @type disk_sizes: list of ints
1155 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1156 @type spindle_use: int
1157 @param spindle_use: The number of spindles this instance uses
1158 @param _compute_fn: The compute function (unittest only)
1159 @return: A list of violations, or an empty list of no violations are found
1162 assert disk_count == len(disk_sizes)
1165 (constants.ISPEC_MEM_SIZE, mem_size),
1166 (constants.ISPEC_CPU_COUNT, cpu_count),
1167 (constants.ISPEC_DISK_COUNT, disk_count),
1168 (constants.ISPEC_NIC_COUNT, nic_count),
1169 (constants.ISPEC_SPINDLE_USE, spindle_use),
1170 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1173 (_compute_fn(name, ipolicy, value)
1174 for (name, value) in test_settings))
1177 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1178 _compute_fn=_ComputeIPolicySpecViolation):
1179 """Compute if instance meets the specs of ipolicy.
1182 @param ipolicy: The ipolicy to verify against
1183 @type instance: L{objects.Instance}
1184 @param instance: The instance to verify
1185 @param _compute_fn: The function to verify ipolicy (unittest only)
1186 @see: L{_ComputeIPolicySpecViolation}
1189 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1190 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1191 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1192 disk_count = len(instance.disks)
1193 disk_sizes = [disk.size for disk in instance.disks]
1194 nic_count = len(instance.nics)
1196 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1197 disk_sizes, spindle_use)
1200 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1201 _compute_fn=_ComputeIPolicySpecViolation):
1202 """Compute if instance specs meets the specs of ipolicy.
1205 @param ipolicy: The ipolicy to verify against
1206 @param instance_spec: dict
1207 @param instance_spec: The instance spec to verify
1208 @param _compute_fn: The function to verify ipolicy (unittest only)
1209 @see: L{_ComputeIPolicySpecViolation}
1212 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1213 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1214 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1215 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1216 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1217 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1219 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1220 disk_sizes, spindle_use)
1223 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1225 _compute_fn=_ComputeIPolicyInstanceViolation):
1226 """Compute if instance meets the specs of the new target group.
1228 @param ipolicy: The ipolicy to verify
1229 @param instance: The instance object to verify
1230 @param current_group: The current group of the instance
1231 @param target_group: The new group of the instance
1232 @param _compute_fn: The function to verify ipolicy (unittest only)
1233 @see: L{_ComputeIPolicySpecViolation}
1236 if current_group == target_group:
1239 return _compute_fn(ipolicy, instance)
1242 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1243 _compute_fn=_ComputeIPolicyNodeViolation):
1244 """Checks that the target node is correct in terms of instance policy.
1246 @param ipolicy: The ipolicy to verify
1247 @param instance: The instance object to verify
1248 @param node: The new node to relocate
1249 @param ignore: Ignore violations of the ipolicy
1250 @param _compute_fn: The function to verify ipolicy (unittest only)
1251 @see: L{_ComputeIPolicySpecViolation}
1254 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1255 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1258 msg = ("Instance does not meet target node group's (%s) instance"
1259 " policy: %s") % (node.group, utils.CommaJoin(res))
1263 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1266 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1267 """Computes a set of any instances that would violate the new ipolicy.
1269 @param old_ipolicy: The current (still in-place) ipolicy
1270 @param new_ipolicy: The new (to become) ipolicy
1271 @param instances: List of instances to verify
1272 @return: A list of instances which violates the new ipolicy but did not before
1275 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1276 _ComputeViolatingInstances(new_ipolicy, instances))
1279 def _ExpandItemName(fn, name, kind):
1280 """Expand an item name.
1282 @param fn: the function to use for expansion
1283 @param name: requested item name
1284 @param kind: text description ('Node' or 'Instance')
1285 @return: the resolved (full) name
1286 @raise errors.OpPrereqError: if the item is not found
1289 full_name = fn(name)
1290 if full_name is None:
1291 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1296 def _ExpandNodeName(cfg, name):
1297 """Wrapper over L{_ExpandItemName} for nodes."""
1298 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1301 def _ExpandInstanceName(cfg, name):
1302 """Wrapper over L{_ExpandItemName} for instance."""
1303 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1306 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1307 minmem, maxmem, vcpus, nics, disk_template, disks,
1308 bep, hvp, hypervisor_name, tags):
1309 """Builds instance related env variables for hooks
1311 This builds the hook environment from individual variables.
1314 @param name: the name of the instance
1315 @type primary_node: string
1316 @param primary_node: the name of the instance's primary node
1317 @type secondary_nodes: list
1318 @param secondary_nodes: list of secondary nodes as strings
1319 @type os_type: string
1320 @param os_type: the name of the instance's OS
1321 @type status: string
1322 @param status: the desired status of the instance
1323 @type minmem: string
1324 @param minmem: the minimum memory size of the instance
1325 @type maxmem: string
1326 @param maxmem: the maximum memory size of the instance
1328 @param vcpus: the count of VCPUs the instance has
1330 @param nics: list of tuples (ip, mac, mode, link) representing
1331 the NICs the instance has
1332 @type disk_template: string
1333 @param disk_template: the disk template of the instance
1335 @param disks: the list of (size, mode) pairs
1337 @param bep: the backend parameters for the instance
1339 @param hvp: the hypervisor parameters for the instance
1340 @type hypervisor_name: string
1341 @param hypervisor_name: the hypervisor for the instance
1343 @param tags: list of instance tags as strings
1345 @return: the hook environment for this instance
1350 "INSTANCE_NAME": name,
1351 "INSTANCE_PRIMARY": primary_node,
1352 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1353 "INSTANCE_OS_TYPE": os_type,
1354 "INSTANCE_STATUS": status,
1355 "INSTANCE_MINMEM": minmem,
1356 "INSTANCE_MAXMEM": maxmem,
1357 # TODO(2.7) remove deprecated "memory" value
1358 "INSTANCE_MEMORY": maxmem,
1359 "INSTANCE_VCPUS": vcpus,
1360 "INSTANCE_DISK_TEMPLATE": disk_template,
1361 "INSTANCE_HYPERVISOR": hypervisor_name,
1364 nic_count = len(nics)
1365 for idx, (ip, mac, mode, link) in enumerate(nics):
1368 env["INSTANCE_NIC%d_IP" % idx] = ip
1369 env["INSTANCE_NIC%d_MAC" % idx] = mac
1370 env["INSTANCE_NIC%d_MODE" % idx] = mode
1371 env["INSTANCE_NIC%d_LINK" % idx] = link
1372 if mode == constants.NIC_MODE_BRIDGED:
1373 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1377 env["INSTANCE_NIC_COUNT"] = nic_count
1380 disk_count = len(disks)
1381 for idx, (size, mode) in enumerate(disks):
1382 env["INSTANCE_DISK%d_SIZE" % idx] = size
1383 env["INSTANCE_DISK%d_MODE" % idx] = mode
1387 env["INSTANCE_DISK_COUNT"] = disk_count
1392 env["INSTANCE_TAGS"] = " ".join(tags)
1394 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1395 for key, value in source.items():
1396 env["INSTANCE_%s_%s" % (kind, key)] = value
1401 def _NICListToTuple(lu, nics):
1402 """Build a list of nic information tuples.
1404 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1405 value in LUInstanceQueryData.
1407 @type lu: L{LogicalUnit}
1408 @param lu: the logical unit on whose behalf we execute
1409 @type nics: list of L{objects.NIC}
1410 @param nics: list of nics to convert to hooks tuples
1414 cluster = lu.cfg.GetClusterInfo()
1418 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1419 mode = filled_params[constants.NIC_MODE]
1420 link = filled_params[constants.NIC_LINK]
1421 hooks_nics.append((ip, mac, mode, link))
1425 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1426 """Builds instance related env variables for hooks from an object.
1428 @type lu: L{LogicalUnit}
1429 @param lu: the logical unit on whose behalf we execute
1430 @type instance: L{objects.Instance}
1431 @param instance: the instance for which we should build the
1433 @type override: dict
1434 @param override: dictionary with key/values that will override
1437 @return: the hook environment dictionary
1440 cluster = lu.cfg.GetClusterInfo()
1441 bep = cluster.FillBE(instance)
1442 hvp = cluster.FillHV(instance)
1444 "name": instance.name,
1445 "primary_node": instance.primary_node,
1446 "secondary_nodes": instance.secondary_nodes,
1447 "os_type": instance.os,
1448 "status": instance.admin_state,
1449 "maxmem": bep[constants.BE_MAXMEM],
1450 "minmem": bep[constants.BE_MINMEM],
1451 "vcpus": bep[constants.BE_VCPUS],
1452 "nics": _NICListToTuple(lu, instance.nics),
1453 "disk_template": instance.disk_template,
1454 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1457 "hypervisor_name": instance.hypervisor,
1458 "tags": instance.tags,
1461 args.update(override)
1462 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1465 def _AdjustCandidatePool(lu, exceptions):
1466 """Adjust the candidate pool after node operations.
1469 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1471 lu.LogInfo("Promoted nodes to master candidate role: %s",
1472 utils.CommaJoin(node.name for node in mod_list))
1473 for name in mod_list:
1474 lu.context.ReaddNode(name)
1475 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1477 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1481 def _DecideSelfPromotion(lu, exceptions=None):
1482 """Decide whether I should promote myself as a master candidate.
1485 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1486 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1487 # the new node will increase mc_max with one, so:
1488 mc_should = min(mc_should + 1, cp_size)
1489 return mc_now < mc_should
1492 def _CalculateGroupIPolicy(cluster, group):
1493 """Calculate instance policy for group.
1496 return cluster.SimpleFillIPolicy(group.ipolicy)
1499 def _ComputeViolatingInstances(ipolicy, instances):
1500 """Computes a set of instances who violates given ipolicy.
1502 @param ipolicy: The ipolicy to verify
1503 @type instances: object.Instance
1504 @param instances: List of instances to verify
1505 @return: A frozenset of instance names violating the ipolicy
1508 return frozenset([inst.name for inst in instances
1509 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1512 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1513 """Check that the brigdes needed by a list of nics exist.
1516 cluster = lu.cfg.GetClusterInfo()
1517 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1518 brlist = [params[constants.NIC_LINK] for params in paramslist
1519 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1521 result = lu.rpc.call_bridges_exist(target_node, brlist)
1522 result.Raise("Error checking bridges on destination node '%s'" %
1523 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1526 def _CheckInstanceBridgesExist(lu, instance, node=None):
1527 """Check that the brigdes needed by an instance exist.
1531 node = instance.primary_node
1532 _CheckNicsBridgesExist(lu, instance.nics, node)
1535 def _CheckOSVariant(os_obj, name):
1536 """Check whether an OS name conforms to the os variants specification.
1538 @type os_obj: L{objects.OS}
1539 @param os_obj: OS object to check
1541 @param name: OS name passed by the user, to check for validity
1544 variant = objects.OS.GetVariant(name)
1545 if not os_obj.supported_variants:
1547 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1548 " passed)" % (os_obj.name, variant),
1552 raise errors.OpPrereqError("OS name must include a variant",
1555 if variant not in os_obj.supported_variants:
1556 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1559 def _GetNodeInstancesInner(cfg, fn):
1560 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1563 def _GetNodeInstances(cfg, node_name):
1564 """Returns a list of all primary and secondary instances on a node.
1568 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1571 def _GetNodePrimaryInstances(cfg, node_name):
1572 """Returns primary instances on a node.
1575 return _GetNodeInstancesInner(cfg,
1576 lambda inst: node_name == inst.primary_node)
1579 def _GetNodeSecondaryInstances(cfg, node_name):
1580 """Returns secondary instances on a node.
1583 return _GetNodeInstancesInner(cfg,
1584 lambda inst: node_name in inst.secondary_nodes)
1587 def _GetStorageTypeArgs(cfg, storage_type):
1588 """Returns the arguments for a storage type.
1591 # Special case for file storage
1592 if storage_type == constants.ST_FILE:
1593 # storage.FileStorage wants a list of storage directories
1594 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1599 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1602 for dev in instance.disks:
1603 cfg.SetDiskID(dev, node_name)
1605 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1606 result.Raise("Failed to get disk status from node %s" % node_name,
1607 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1609 for idx, bdev_status in enumerate(result.payload):
1610 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1616 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1617 """Check the sanity of iallocator and node arguments and use the
1618 cluster-wide iallocator if appropriate.
1620 Check that at most one of (iallocator, node) is specified. If none is
1621 specified, then the LU's opcode's iallocator slot is filled with the
1622 cluster-wide default iallocator.
1624 @type iallocator_slot: string
1625 @param iallocator_slot: the name of the opcode iallocator slot
1626 @type node_slot: string
1627 @param node_slot: the name of the opcode target node slot
1630 node = getattr(lu.op, node_slot, None)
1631 iallocator = getattr(lu.op, iallocator_slot, None)
1633 if node is not None and iallocator is not None:
1634 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1636 elif node is None and iallocator is None:
1637 default_iallocator = lu.cfg.GetDefaultIAllocator()
1638 if default_iallocator:
1639 setattr(lu.op, iallocator_slot, default_iallocator)
1641 raise errors.OpPrereqError("No iallocator or node given and no"
1642 " cluster-wide default iallocator found;"
1643 " please specify either an iallocator or a"
1644 " node, or set a cluster-wide default"
1648 def _GetDefaultIAllocator(cfg, iallocator):
1649 """Decides on which iallocator to use.
1651 @type cfg: L{config.ConfigWriter}
1652 @param cfg: Cluster configuration object
1653 @type iallocator: string or None
1654 @param iallocator: Iallocator specified in opcode
1656 @return: Iallocator name
1660 # Use default iallocator
1661 iallocator = cfg.GetDefaultIAllocator()
1664 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1665 " opcode nor as a cluster-wide default",
1671 class LUClusterPostInit(LogicalUnit):
1672 """Logical unit for running hooks after cluster initialization.
1675 HPATH = "cluster-init"
1676 HTYPE = constants.HTYPE_CLUSTER
1678 def BuildHooksEnv(self):
1683 "OP_TARGET": self.cfg.GetClusterName(),
1686 def BuildHooksNodes(self):
1687 """Build hooks nodes.
1690 return ([], [self.cfg.GetMasterNode()])
1692 def Exec(self, feedback_fn):
1699 class LUClusterDestroy(LogicalUnit):
1700 """Logical unit for destroying the cluster.
1703 HPATH = "cluster-destroy"
1704 HTYPE = constants.HTYPE_CLUSTER
1706 def BuildHooksEnv(self):
1711 "OP_TARGET": self.cfg.GetClusterName(),
1714 def BuildHooksNodes(self):
1715 """Build hooks nodes.
1720 def CheckPrereq(self):
1721 """Check prerequisites.
1723 This checks whether the cluster is empty.
1725 Any errors are signaled by raising errors.OpPrereqError.
1728 master = self.cfg.GetMasterNode()
1730 nodelist = self.cfg.GetNodeList()
1731 if len(nodelist) != 1 or nodelist[0] != master:
1732 raise errors.OpPrereqError("There are still %d node(s) in"
1733 " this cluster." % (len(nodelist) - 1),
1735 instancelist = self.cfg.GetInstanceList()
1737 raise errors.OpPrereqError("There are still %d instance(s) in"
1738 " this cluster." % len(instancelist),
1741 def Exec(self, feedback_fn):
1742 """Destroys the cluster.
1745 master_params = self.cfg.GetMasterNetworkParameters()
1747 # Run post hooks on master node before it's removed
1748 _RunPostHook(self, master_params.name)
1750 ems = self.cfg.GetUseExternalMipScript()
1751 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1754 self.LogWarning("Error disabling the master IP address: %s",
1757 return master_params.name
1760 def _VerifyCertificate(filename):
1761 """Verifies a certificate for L{LUClusterVerifyConfig}.
1763 @type filename: string
1764 @param filename: Path to PEM file
1768 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1769 utils.ReadFile(filename))
1770 except Exception, err: # pylint: disable=W0703
1771 return (LUClusterVerifyConfig.ETYPE_ERROR,
1772 "Failed to load X509 certificate %s: %s" % (filename, err))
1775 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1776 constants.SSL_CERT_EXPIRATION_ERROR)
1779 fnamemsg = "While verifying %s: %s" % (filename, msg)
1784 return (None, fnamemsg)
1785 elif errcode == utils.CERT_WARNING:
1786 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1787 elif errcode == utils.CERT_ERROR:
1788 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1790 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1793 def _GetAllHypervisorParameters(cluster, instances):
1794 """Compute the set of all hypervisor parameters.
1796 @type cluster: L{objects.Cluster}
1797 @param cluster: the cluster object
1798 @param instances: list of L{objects.Instance}
1799 @param instances: additional instances from which to obtain parameters
1800 @rtype: list of (origin, hypervisor, parameters)
1801 @return: a list with all parameters found, indicating the hypervisor they
1802 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1807 for hv_name in cluster.enabled_hypervisors:
1808 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1810 for os_name, os_hvp in cluster.os_hvp.items():
1811 for hv_name, hv_params in os_hvp.items():
1813 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1814 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1816 # TODO: collapse identical parameter values in a single one
1817 for instance in instances:
1818 if instance.hvparams:
1819 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1820 cluster.FillHV(instance)))
1825 class _VerifyErrors(object):
1826 """Mix-in for cluster/group verify LUs.
1828 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1829 self.op and self._feedback_fn to be available.)
1833 ETYPE_FIELD = "code"
1834 ETYPE_ERROR = "ERROR"
1835 ETYPE_WARNING = "WARNING"
1837 def _Error(self, ecode, item, msg, *args, **kwargs):
1838 """Format an error message.
1840 Based on the opcode's error_codes parameter, either format a
1841 parseable error code, or a simpler error string.
1843 This must be called only from Exec and functions called from Exec.
1846 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1847 itype, etxt, _ = ecode
1848 # first complete the msg
1851 # then format the whole message
1852 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1853 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1859 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1860 # and finally report it via the feedback_fn
1861 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1863 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1864 """Log an error message if the passed condition is True.
1868 or self.op.debug_simulate_errors) # pylint: disable=E1101
1870 # If the error code is in the list of ignored errors, demote the error to a
1872 (_, etxt, _) = ecode
1873 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1874 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1877 self._Error(ecode, *args, **kwargs)
1879 # do not mark the operation as failed for WARN cases only
1880 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1881 self.bad = self.bad or cond
1884 class LUClusterVerify(NoHooksLU):
1885 """Submits all jobs necessary to verify the cluster.
1890 def ExpandNames(self):
1891 self.needed_locks = {}
1893 def Exec(self, feedback_fn):
1896 if self.op.group_name:
1897 groups = [self.op.group_name]
1898 depends_fn = lambda: None
1900 groups = self.cfg.GetNodeGroupList()
1902 # Verify global configuration
1904 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1907 # Always depend on global verification
1908 depends_fn = lambda: [(-len(jobs), [])]
1910 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1911 ignore_errors=self.op.ignore_errors,
1912 depends=depends_fn())]
1913 for group in groups)
1915 # Fix up all parameters
1916 for op in itertools.chain(*jobs): # pylint: disable=W0142
1917 op.debug_simulate_errors = self.op.debug_simulate_errors
1918 op.verbose = self.op.verbose
1919 op.error_codes = self.op.error_codes
1921 op.skip_checks = self.op.skip_checks
1922 except AttributeError:
1923 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1925 return ResultWithJobs(jobs)
1928 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1929 """Verifies the cluster config.
1934 def _VerifyHVP(self, hvp_data):
1935 """Verifies locally the syntax of the hypervisor parameters.
1938 for item, hv_name, hv_params in hvp_data:
1939 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1942 hv_class = hypervisor.GetHypervisor(hv_name)
1943 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1944 hv_class.CheckParameterSyntax(hv_params)
1945 except errors.GenericError, err:
1946 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1948 def ExpandNames(self):
1949 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1950 self.share_locks = _ShareAll()
1952 def CheckPrereq(self):
1953 """Check prerequisites.
1956 # Retrieve all information
1957 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1958 self.all_node_info = self.cfg.GetAllNodesInfo()
1959 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1961 def Exec(self, feedback_fn):
1962 """Verify integrity of cluster, performing various test on nodes.
1966 self._feedback_fn = feedback_fn
1968 feedback_fn("* Verifying cluster config")
1970 for msg in self.cfg.VerifyConfig():
1971 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1973 feedback_fn("* Verifying cluster certificate files")
1975 for cert_filename in constants.ALL_CERT_FILES:
1976 (errcode, msg) = _VerifyCertificate(cert_filename)
1977 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1979 feedback_fn("* Verifying hypervisor parameters")
1981 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1982 self.all_inst_info.values()))
1984 feedback_fn("* Verifying all nodes belong to an existing group")
1986 # We do this verification here because, should this bogus circumstance
1987 # occur, it would never be caught by VerifyGroup, which only acts on
1988 # nodes/instances reachable from existing node groups.
1990 dangling_nodes = set(node.name for node in self.all_node_info.values()
1991 if node.group not in self.all_group_info)
1993 dangling_instances = {}
1994 no_node_instances = []
1996 for inst in self.all_inst_info.values():
1997 if inst.primary_node in dangling_nodes:
1998 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1999 elif inst.primary_node not in self.all_node_info:
2000 no_node_instances.append(inst.name)
2005 utils.CommaJoin(dangling_instances.get(node.name,
2007 for node in dangling_nodes]
2009 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2011 "the following nodes (and their instances) belong to a non"
2012 " existing group: %s", utils.CommaJoin(pretty_dangling))
2014 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2016 "the following instances have a non-existing primary-node:"
2017 " %s", utils.CommaJoin(no_node_instances))
2022 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2023 """Verifies the status of a node group.
2026 HPATH = "cluster-verify"
2027 HTYPE = constants.HTYPE_CLUSTER
2030 _HOOKS_INDENT_RE = re.compile("^", re.M)
2032 class NodeImage(object):
2033 """A class representing the logical and physical status of a node.
2036 @ivar name: the node name to which this object refers
2037 @ivar volumes: a structure as returned from
2038 L{ganeti.backend.GetVolumeList} (runtime)
2039 @ivar instances: a list of running instances (runtime)
2040 @ivar pinst: list of configured primary instances (config)
2041 @ivar sinst: list of configured secondary instances (config)
2042 @ivar sbp: dictionary of {primary-node: list of instances} for all
2043 instances for which this node is secondary (config)
2044 @ivar mfree: free memory, as reported by hypervisor (runtime)
2045 @ivar dfree: free disk, as reported by the node (runtime)
2046 @ivar offline: the offline status (config)
2047 @type rpc_fail: boolean
2048 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2049 not whether the individual keys were correct) (runtime)
2050 @type lvm_fail: boolean
2051 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2052 @type hyp_fail: boolean
2053 @ivar hyp_fail: whether the RPC call didn't return the instance list
2054 @type ghost: boolean
2055 @ivar ghost: whether this is a known node or not (config)
2056 @type os_fail: boolean
2057 @ivar os_fail: whether the RPC call didn't return valid OS data
2059 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2060 @type vm_capable: boolean
2061 @ivar vm_capable: whether the node can host instances
2064 def __init__(self, offline=False, name=None, vm_capable=True):
2073 self.offline = offline
2074 self.vm_capable = vm_capable
2075 self.rpc_fail = False
2076 self.lvm_fail = False
2077 self.hyp_fail = False
2079 self.os_fail = False
2082 def ExpandNames(self):
2083 # This raises errors.OpPrereqError on its own:
2084 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2086 # Get instances in node group; this is unsafe and needs verification later
2088 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2090 self.needed_locks = {
2091 locking.LEVEL_INSTANCE: inst_names,
2092 locking.LEVEL_NODEGROUP: [self.group_uuid],
2093 locking.LEVEL_NODE: [],
2096 self.share_locks = _ShareAll()
2098 def DeclareLocks(self, level):
2099 if level == locking.LEVEL_NODE:
2100 # Get members of node group; this is unsafe and needs verification later
2101 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2103 all_inst_info = self.cfg.GetAllInstancesInfo()
2105 # In Exec(), we warn about mirrored instances that have primary and
2106 # secondary living in separate node groups. To fully verify that
2107 # volumes for these instances are healthy, we will need to do an
2108 # extra call to their secondaries. We ensure here those nodes will
2110 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2111 # Important: access only the instances whose lock is owned
2112 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2113 nodes.update(all_inst_info[inst].secondary_nodes)
2115 self.needed_locks[locking.LEVEL_NODE] = nodes
2117 def CheckPrereq(self):
2118 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2119 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2121 group_nodes = set(self.group_info.members)
2123 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2126 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2128 unlocked_instances = \
2129 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2132 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2133 utils.CommaJoin(unlocked_nodes),
2136 if unlocked_instances:
2137 raise errors.OpPrereqError("Missing lock for instances: %s" %
2138 utils.CommaJoin(unlocked_instances),
2141 self.all_node_info = self.cfg.GetAllNodesInfo()
2142 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2144 self.my_node_names = utils.NiceSort(group_nodes)
2145 self.my_inst_names = utils.NiceSort(group_instances)
2147 self.my_node_info = dict((name, self.all_node_info[name])
2148 for name in self.my_node_names)
2150 self.my_inst_info = dict((name, self.all_inst_info[name])
2151 for name in self.my_inst_names)
2153 # We detect here the nodes that will need the extra RPC calls for verifying
2154 # split LV volumes; they should be locked.
2155 extra_lv_nodes = set()
2157 for inst in self.my_inst_info.values():
2158 if inst.disk_template in constants.DTS_INT_MIRROR:
2159 for nname in inst.all_nodes:
2160 if self.all_node_info[nname].group != self.group_uuid:
2161 extra_lv_nodes.add(nname)
2163 unlocked_lv_nodes = \
2164 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2166 if unlocked_lv_nodes:
2167 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2168 utils.CommaJoin(unlocked_lv_nodes),
2170 self.extra_lv_nodes = list(extra_lv_nodes)
2172 def _VerifyNode(self, ninfo, nresult):
2173 """Perform some basic validation on data returned from a node.
2175 - check the result data structure is well formed and has all the
2177 - check ganeti version
2179 @type ninfo: L{objects.Node}
2180 @param ninfo: the node to check
2181 @param nresult: the results from the node
2183 @return: whether overall this call was successful (and we can expect
2184 reasonable values in the respose)
2188 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2190 # main result, nresult should be a non-empty dict
2191 test = not nresult or not isinstance(nresult, dict)
2192 _ErrorIf(test, constants.CV_ENODERPC, node,
2193 "unable to verify node: no data returned")
2197 # compares ganeti version
2198 local_version = constants.PROTOCOL_VERSION
2199 remote_version = nresult.get("version", None)
2200 test = not (remote_version and
2201 isinstance(remote_version, (list, tuple)) and
2202 len(remote_version) == 2)
2203 _ErrorIf(test, constants.CV_ENODERPC, node,
2204 "connection to node returned invalid data")
2208 test = local_version != remote_version[0]
2209 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2210 "incompatible protocol versions: master %s,"
2211 " node %s", local_version, remote_version[0])
2215 # node seems compatible, we can actually try to look into its results
2217 # full package version
2218 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2219 constants.CV_ENODEVERSION, node,
2220 "software version mismatch: master %s, node %s",
2221 constants.RELEASE_VERSION, remote_version[1],
2222 code=self.ETYPE_WARNING)
2224 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2225 if ninfo.vm_capable and isinstance(hyp_result, dict):
2226 for hv_name, hv_result in hyp_result.iteritems():
2227 test = hv_result is not None
2228 _ErrorIf(test, constants.CV_ENODEHV, node,
2229 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2231 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2232 if ninfo.vm_capable and isinstance(hvp_result, list):
2233 for item, hv_name, hv_result in hvp_result:
2234 _ErrorIf(True, constants.CV_ENODEHV, node,
2235 "hypervisor %s parameter verify failure (source %s): %s",
2236 hv_name, item, hv_result)
2238 test = nresult.get(constants.NV_NODESETUP,
2239 ["Missing NODESETUP results"])
2240 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2245 def _VerifyNodeTime(self, ninfo, nresult,
2246 nvinfo_starttime, nvinfo_endtime):
2247 """Check the node time.
2249 @type ninfo: L{objects.Node}
2250 @param ninfo: the node to check
2251 @param nresult: the remote results for the node
2252 @param nvinfo_starttime: the start time of the RPC call
2253 @param nvinfo_endtime: the end time of the RPC call
2257 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2259 ntime = nresult.get(constants.NV_TIME, None)
2261 ntime_merged = utils.MergeTime(ntime)
2262 except (ValueError, TypeError):
2263 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2266 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2267 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2268 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2269 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2273 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2274 "Node time diverges by at least %s from master node time",
2277 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2278 """Check the node LVM results.
2280 @type ninfo: L{objects.Node}
2281 @param ninfo: the node to check
2282 @param nresult: the remote results for the node
2283 @param vg_name: the configured VG name
2290 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2292 # checks vg existence and size > 20G
2293 vglist = nresult.get(constants.NV_VGLIST, None)
2295 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2297 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2298 constants.MIN_VG_SIZE)
2299 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2302 pvlist = nresult.get(constants.NV_PVLIST, None)
2303 test = pvlist is None
2304 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2306 # check that ':' is not present in PV names, since it's a
2307 # special character for lvcreate (denotes the range of PEs to
2309 for _, pvname, owner_vg in pvlist:
2310 test = ":" in pvname
2311 _ErrorIf(test, constants.CV_ENODELVM, node,
2312 "Invalid character ':' in PV '%s' of VG '%s'",
2315 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2316 """Check the node bridges.
2318 @type ninfo: L{objects.Node}
2319 @param ninfo: the node to check
2320 @param nresult: the remote results for the node
2321 @param bridges: the expected list of bridges
2328 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2330 missing = nresult.get(constants.NV_BRIDGES, None)
2331 test = not isinstance(missing, list)
2332 _ErrorIf(test, constants.CV_ENODENET, node,
2333 "did not return valid bridge information")
2335 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2336 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2338 def _VerifyNodeUserScripts(self, ninfo, nresult):
2339 """Check the results of user scripts presence and executability on the node
2341 @type ninfo: L{objects.Node}
2342 @param ninfo: the node to check
2343 @param nresult: the remote results for the node
2348 test = not constants.NV_USERSCRIPTS in nresult
2349 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2350 "did not return user scripts information")
2352 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2354 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2355 "user scripts not present or not executable: %s" %
2356 utils.CommaJoin(sorted(broken_scripts)))
2358 def _VerifyNodeNetwork(self, ninfo, nresult):
2359 """Check the node network connectivity results.
2361 @type ninfo: L{objects.Node}
2362 @param ninfo: the node to check
2363 @param nresult: the remote results for the node
2367 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2369 test = constants.NV_NODELIST not in nresult
2370 _ErrorIf(test, constants.CV_ENODESSH, node,
2371 "node hasn't returned node ssh connectivity data")
2373 if nresult[constants.NV_NODELIST]:
2374 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2375 _ErrorIf(True, constants.CV_ENODESSH, node,
2376 "ssh communication with node '%s': %s", a_node, a_msg)
2378 test = constants.NV_NODENETTEST not in nresult
2379 _ErrorIf(test, constants.CV_ENODENET, node,
2380 "node hasn't returned node tcp connectivity data")
2382 if nresult[constants.NV_NODENETTEST]:
2383 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2385 _ErrorIf(True, constants.CV_ENODENET, node,
2386 "tcp communication with node '%s': %s",
2387 anode, nresult[constants.NV_NODENETTEST][anode])
2389 test = constants.NV_MASTERIP not in nresult
2390 _ErrorIf(test, constants.CV_ENODENET, node,
2391 "node hasn't returned node master IP reachability data")
2393 if not nresult[constants.NV_MASTERIP]:
2394 if node == self.master_node:
2395 msg = "the master node cannot reach the master IP (not configured?)"
2397 msg = "cannot reach the master IP"
2398 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2400 def _VerifyInstance(self, instance, instanceconfig, node_image,
2402 """Verify an instance.
2404 This function checks to see if the required block devices are
2405 available on the instance's node.
2408 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2409 node_current = instanceconfig.primary_node
2411 node_vol_should = {}
2412 instanceconfig.MapLVsByNode(node_vol_should)
2414 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2415 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2416 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2418 for node in node_vol_should:
2419 n_img = node_image[node]
2420 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2421 # ignore missing volumes on offline or broken nodes
2423 for volume in node_vol_should[node]:
2424 test = volume not in n_img.volumes
2425 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2426 "volume %s missing on node %s", volume, node)
2428 if instanceconfig.admin_state == constants.ADMINST_UP:
2429 pri_img = node_image[node_current]
2430 test = instance not in pri_img.instances and not pri_img.offline
2431 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2432 "instance not running on its primary node %s",
2435 diskdata = [(nname, success, status, idx)
2436 for (nname, disks) in diskstatus.items()
2437 for idx, (success, status) in enumerate(disks)]
2439 for nname, success, bdev_status, idx in diskdata:
2440 # the 'ghost node' construction in Exec() ensures that we have a
2442 snode = node_image[nname]
2443 bad_snode = snode.ghost or snode.offline
2444 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2445 not success and not bad_snode,
2446 constants.CV_EINSTANCEFAULTYDISK, instance,
2447 "couldn't retrieve status for disk/%s on %s: %s",
2448 idx, nname, bdev_status)
2449 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2450 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2451 constants.CV_EINSTANCEFAULTYDISK, instance,
2452 "disk/%s on %s is faulty", idx, nname)
2454 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2455 """Verify if there are any unknown volumes in the cluster.
2457 The .os, .swap and backup volumes are ignored. All other volumes are
2458 reported as unknown.
2460 @type reserved: L{ganeti.utils.FieldSet}
2461 @param reserved: a FieldSet of reserved volume names
2464 for node, n_img in node_image.items():
2465 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2466 self.all_node_info[node].group != self.group_uuid):
2467 # skip non-healthy nodes
2469 for volume in n_img.volumes:
2470 test = ((node not in node_vol_should or
2471 volume not in node_vol_should[node]) and
2472 not reserved.Matches(volume))
2473 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2474 "volume %s is unknown", volume)
2476 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2477 """Verify N+1 Memory Resilience.
2479 Check that if one single node dies we can still start all the
2480 instances it was primary for.
2483 cluster_info = self.cfg.GetClusterInfo()
2484 for node, n_img in node_image.items():
2485 # This code checks that every node which is now listed as
2486 # secondary has enough memory to host all instances it is
2487 # supposed to should a single other node in the cluster fail.
2488 # FIXME: not ready for failover to an arbitrary node
2489 # FIXME: does not support file-backed instances
2490 # WARNING: we currently take into account down instances as well
2491 # as up ones, considering that even if they're down someone
2492 # might want to start them even in the event of a node failure.
2493 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2494 # we're skipping nodes marked offline and nodes in other groups from
2495 # the N+1 warning, since most likely we don't have good memory
2496 # infromation from them; we already list instances living on such
2497 # nodes, and that's enough warning
2499 #TODO(dynmem): also consider ballooning out other instances
2500 for prinode, instances in n_img.sbp.items():
2502 for instance in instances:
2503 bep = cluster_info.FillBE(instance_cfg[instance])
2504 if bep[constants.BE_AUTO_BALANCE]:
2505 needed_mem += bep[constants.BE_MINMEM]
2506 test = n_img.mfree < needed_mem
2507 self._ErrorIf(test, constants.CV_ENODEN1, node,
2508 "not enough memory to accomodate instance failovers"
2509 " should node %s fail (%dMiB needed, %dMiB available)",
2510 prinode, needed_mem, n_img.mfree)
2513 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2514 (files_all, files_opt, files_mc, files_vm)):
2515 """Verifies file checksums collected from all nodes.
2517 @param errorif: Callback for reporting errors
2518 @param nodeinfo: List of L{objects.Node} objects
2519 @param master_node: Name of master node
2520 @param all_nvinfo: RPC results
2523 # Define functions determining which nodes to consider for a file
2526 (files_mc, lambda node: (node.master_candidate or
2527 node.name == master_node)),
2528 (files_vm, lambda node: node.vm_capable),
2531 # Build mapping from filename to list of nodes which should have the file
2533 for (files, fn) in files2nodefn:
2535 filenodes = nodeinfo
2537 filenodes = filter(fn, nodeinfo)
2538 nodefiles.update((filename,
2539 frozenset(map(operator.attrgetter("name"), filenodes)))
2540 for filename in files)
2542 assert set(nodefiles) == (files_all | files_mc | files_vm)
2544 fileinfo = dict((filename, {}) for filename in nodefiles)
2545 ignore_nodes = set()
2547 for node in nodeinfo:
2549 ignore_nodes.add(node.name)
2552 nresult = all_nvinfo[node.name]
2554 if nresult.fail_msg or not nresult.payload:
2557 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2559 test = not (node_files and isinstance(node_files, dict))
2560 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2561 "Node did not return file checksum data")
2563 ignore_nodes.add(node.name)
2566 # Build per-checksum mapping from filename to nodes having it
2567 for (filename, checksum) in node_files.items():
2568 assert filename in nodefiles
2569 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2571 for (filename, checksums) in fileinfo.items():
2572 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2574 # Nodes having the file
2575 with_file = frozenset(node_name
2576 for nodes in fileinfo[filename].values()
2577 for node_name in nodes) - ignore_nodes
2579 expected_nodes = nodefiles[filename] - ignore_nodes
2581 # Nodes missing file
2582 missing_file = expected_nodes - with_file
2584 if filename in files_opt:
2586 errorif(missing_file and missing_file != expected_nodes,
2587 constants.CV_ECLUSTERFILECHECK, None,
2588 "File %s is optional, but it must exist on all or no"
2589 " nodes (not found on %s)",
2590 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2592 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2593 "File %s is missing from node(s) %s", filename,
2594 utils.CommaJoin(utils.NiceSort(missing_file)))
2596 # Warn if a node has a file it shouldn't
2597 unexpected = with_file - expected_nodes
2599 constants.CV_ECLUSTERFILECHECK, None,
2600 "File %s should not exist on node(s) %s",
2601 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2603 # See if there are multiple versions of the file
2604 test = len(checksums) > 1
2606 variants = ["variant %s on %s" %
2607 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2608 for (idx, (checksum, nodes)) in
2609 enumerate(sorted(checksums.items()))]
2613 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2614 "File %s found with %s different checksums (%s)",
2615 filename, len(checksums), "; ".join(variants))
2617 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2619 """Verifies and the node DRBD status.
2621 @type ninfo: L{objects.Node}
2622 @param ninfo: the node to check
2623 @param nresult: the remote results for the node
2624 @param instanceinfo: the dict of instances
2625 @param drbd_helper: the configured DRBD usermode helper
2626 @param drbd_map: the DRBD map as returned by
2627 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2631 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2634 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2635 test = (helper_result == None)
2636 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2637 "no drbd usermode helper returned")
2639 status, payload = helper_result
2641 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2642 "drbd usermode helper check unsuccessful: %s", payload)
2643 test = status and (payload != drbd_helper)
2644 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2645 "wrong drbd usermode helper: %s", payload)
2647 # compute the DRBD minors
2649 for minor, instance in drbd_map[node].items():
2650 test = instance not in instanceinfo
2651 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2652 "ghost instance '%s' in temporary DRBD map", instance)
2653 # ghost instance should not be running, but otherwise we
2654 # don't give double warnings (both ghost instance and
2655 # unallocated minor in use)
2657 node_drbd[minor] = (instance, False)
2659 instance = instanceinfo[instance]
2660 node_drbd[minor] = (instance.name,
2661 instance.admin_state == constants.ADMINST_UP)
2663 # and now check them
2664 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2665 test = not isinstance(used_minors, (tuple, list))
2666 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2667 "cannot parse drbd status file: %s", str(used_minors))
2669 # we cannot check drbd status
2672 for minor, (iname, must_exist) in node_drbd.items():
2673 test = minor not in used_minors and must_exist
2674 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2675 "drbd minor %d of instance %s is not active", minor, iname)
2676 for minor in used_minors:
2677 test = minor not in node_drbd
2678 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2679 "unallocated drbd minor %d is in use", minor)
2681 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2682 """Builds the node OS structures.
2684 @type ninfo: L{objects.Node}
2685 @param ninfo: the node to check
2686 @param nresult: the remote results for the node
2687 @param nimg: the node image object
2691 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2693 remote_os = nresult.get(constants.NV_OSLIST, None)
2694 test = (not isinstance(remote_os, list) or
2695 not compat.all(isinstance(v, list) and len(v) == 7
2696 for v in remote_os))
2698 _ErrorIf(test, constants.CV_ENODEOS, node,
2699 "node hasn't returned valid OS data")
2708 for (name, os_path, status, diagnose,
2709 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2711 if name not in os_dict:
2714 # parameters is a list of lists instead of list of tuples due to
2715 # JSON lacking a real tuple type, fix it:
2716 parameters = [tuple(v) for v in parameters]
2717 os_dict[name].append((os_path, status, diagnose,
2718 set(variants), set(parameters), set(api_ver)))
2720 nimg.oslist = os_dict
2722 def _VerifyNodeOS(self, ninfo, nimg, base):
2723 """Verifies the node OS list.
2725 @type ninfo: L{objects.Node}
2726 @param ninfo: the node to check
2727 @param nimg: the node image object
2728 @param base: the 'template' node we match against (e.g. from the master)
2732 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2734 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2736 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2737 for os_name, os_data in nimg.oslist.items():
2738 assert os_data, "Empty OS status for OS %s?!" % os_name
2739 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2740 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2741 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2742 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2743 "OS '%s' has multiple entries (first one shadows the rest): %s",
2744 os_name, utils.CommaJoin([v[0] for v in os_data]))
2745 # comparisons with the 'base' image
2746 test = os_name not in base.oslist
2747 _ErrorIf(test, constants.CV_ENODEOS, node,
2748 "Extra OS %s not present on reference node (%s)",
2752 assert base.oslist[os_name], "Base node has empty OS status?"
2753 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2755 # base OS is invalid, skipping
2757 for kind, a, b in [("API version", f_api, b_api),
2758 ("variants list", f_var, b_var),
2759 ("parameters", beautify_params(f_param),
2760 beautify_params(b_param))]:
2761 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2762 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2763 kind, os_name, base.name,
2764 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2766 # check any missing OSes
2767 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2768 _ErrorIf(missing, constants.CV_ENODEOS, node,
2769 "OSes present on reference node %s but missing on this node: %s",
2770 base.name, utils.CommaJoin(missing))
2772 def _VerifyOob(self, ninfo, nresult):
2773 """Verifies out of band functionality of a node.
2775 @type ninfo: L{objects.Node}
2776 @param ninfo: the node to check
2777 @param nresult: the remote results for the node
2781 # We just have to verify the paths on master and/or master candidates
2782 # as the oob helper is invoked on the master
2783 if ((ninfo.master_candidate or ninfo.master_capable) and
2784 constants.NV_OOB_PATHS in nresult):
2785 for path_result in nresult[constants.NV_OOB_PATHS]:
2786 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2788 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2789 """Verifies and updates the node volume data.
2791 This function will update a L{NodeImage}'s internal structures
2792 with data from the remote call.
2794 @type ninfo: L{objects.Node}
2795 @param ninfo: the node to check
2796 @param nresult: the remote results for the node
2797 @param nimg: the node image object
2798 @param vg_name: the configured VG name
2802 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2804 nimg.lvm_fail = True
2805 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2808 elif isinstance(lvdata, basestring):
2809 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2810 utils.SafeEncode(lvdata))
2811 elif not isinstance(lvdata, dict):
2812 _ErrorIf(True, constants.CV_ENODELVM, node,
2813 "rpc call to node failed (lvlist)")
2815 nimg.volumes = lvdata
2816 nimg.lvm_fail = False
2818 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2819 """Verifies and updates the node instance list.
2821 If the listing was successful, then updates this node's instance
2822 list. Otherwise, it marks the RPC call as failed for the instance
2825 @type ninfo: L{objects.Node}
2826 @param ninfo: the node to check
2827 @param nresult: the remote results for the node
2828 @param nimg: the node image object
2831 idata = nresult.get(constants.NV_INSTANCELIST, None)
2832 test = not isinstance(idata, list)
2833 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2834 "rpc call to node failed (instancelist): %s",
2835 utils.SafeEncode(str(idata)))
2837 nimg.hyp_fail = True
2839 nimg.instances = idata
2841 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2842 """Verifies and computes a node information map
2844 @type ninfo: L{objects.Node}
2845 @param ninfo: the node to check
2846 @param nresult: the remote results for the node
2847 @param nimg: the node image object
2848 @param vg_name: the configured VG name
2852 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2854 # try to read free memory (from the hypervisor)
2855 hv_info = nresult.get(constants.NV_HVINFO, None)
2856 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2857 _ErrorIf(test, constants.CV_ENODEHV, node,
2858 "rpc call to node failed (hvinfo)")
2861 nimg.mfree = int(hv_info["memory_free"])
2862 except (ValueError, TypeError):
2863 _ErrorIf(True, constants.CV_ENODERPC, node,
2864 "node returned invalid nodeinfo, check hypervisor")
2866 # FIXME: devise a free space model for file based instances as well
2867 if vg_name is not None:
2868 test = (constants.NV_VGLIST not in nresult or
2869 vg_name not in nresult[constants.NV_VGLIST])
2870 _ErrorIf(test, constants.CV_ENODELVM, node,
2871 "node didn't return data for the volume group '%s'"
2872 " - it is either missing or broken", vg_name)
2875 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2876 except (ValueError, TypeError):
2877 _ErrorIf(True, constants.CV_ENODERPC, node,
2878 "node returned invalid LVM info, check LVM status")
2880 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2881 """Gets per-disk status information for all instances.
2883 @type nodelist: list of strings
2884 @param nodelist: Node names
2885 @type node_image: dict of (name, L{objects.Node})
2886 @param node_image: Node objects
2887 @type instanceinfo: dict of (name, L{objects.Instance})
2888 @param instanceinfo: Instance objects
2889 @rtype: {instance: {node: [(succes, payload)]}}
2890 @return: a dictionary of per-instance dictionaries with nodes as
2891 keys and disk information as values; the disk information is a
2892 list of tuples (success, payload)
2895 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2898 node_disks_devonly = {}
2899 diskless_instances = set()
2900 diskless = constants.DT_DISKLESS
2902 for nname in nodelist:
2903 node_instances = list(itertools.chain(node_image[nname].pinst,
2904 node_image[nname].sinst))
2905 diskless_instances.update(inst for inst in node_instances
2906 if instanceinfo[inst].disk_template == diskless)
2907 disks = [(inst, disk)
2908 for inst in node_instances
2909 for disk in instanceinfo[inst].disks]
2912 # No need to collect data
2915 node_disks[nname] = disks
2917 # Creating copies as SetDiskID below will modify the objects and that can
2918 # lead to incorrect data returned from nodes
2919 devonly = [dev.Copy() for (_, dev) in disks]
2922 self.cfg.SetDiskID(dev, nname)
2924 node_disks_devonly[nname] = devonly
2926 assert len(node_disks) == len(node_disks_devonly)
2928 # Collect data from all nodes with disks
2929 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2932 assert len(result) == len(node_disks)
2936 for (nname, nres) in result.items():
2937 disks = node_disks[nname]
2940 # No data from this node
2941 data = len(disks) * [(False, "node offline")]
2944 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2945 "while getting disk information: %s", msg)
2947 # No data from this node
2948 data = len(disks) * [(False, msg)]
2951 for idx, i in enumerate(nres.payload):
2952 if isinstance(i, (tuple, list)) and len(i) == 2:
2955 logging.warning("Invalid result from node %s, entry %d: %s",
2957 data.append((False, "Invalid result from the remote node"))
2959 for ((inst, _), status) in zip(disks, data):
2960 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2962 # Add empty entries for diskless instances.
2963 for inst in diskless_instances:
2964 assert inst not in instdisk
2967 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2968 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2969 compat.all(isinstance(s, (tuple, list)) and
2970 len(s) == 2 for s in statuses)
2971 for inst, nnames in instdisk.items()
2972 for nname, statuses in nnames.items())
2973 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2978 def _SshNodeSelector(group_uuid, all_nodes):
2979 """Create endless iterators for all potential SSH check hosts.
2982 nodes = [node for node in all_nodes
2983 if (node.group != group_uuid and
2985 keyfunc = operator.attrgetter("group")
2987 return map(itertools.cycle,
2988 [sorted(map(operator.attrgetter("name"), names))
2989 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2993 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2994 """Choose which nodes should talk to which other nodes.
2996 We will make nodes contact all nodes in their group, and one node from
2999 @warning: This algorithm has a known issue if one node group is much
3000 smaller than others (e.g. just one node). In such a case all other
3001 nodes will talk to the single node.
3004 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3005 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3007 return (online_nodes,
3008 dict((name, sorted([i.next() for i in sel]))
3009 for name in online_nodes))
3011 def BuildHooksEnv(self):
3014 Cluster-Verify hooks just ran in the post phase and their failure makes
3015 the output be logged in the verify output and the verification to fail.
3019 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3022 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3023 for node in self.my_node_info.values())
3027 def BuildHooksNodes(self):
3028 """Build hooks nodes.
3031 return ([], self.my_node_names)
3033 def Exec(self, feedback_fn):
3034 """Verify integrity of the node group, performing various test on nodes.
3037 # This method has too many local variables. pylint: disable=R0914
3038 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3040 if not self.my_node_names:
3042 feedback_fn("* Empty node group, skipping verification")
3046 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3047 verbose = self.op.verbose
3048 self._feedback_fn = feedback_fn
3050 vg_name = self.cfg.GetVGName()
3051 drbd_helper = self.cfg.GetDRBDHelper()
3052 cluster = self.cfg.GetClusterInfo()
3053 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3054 hypervisors = cluster.enabled_hypervisors
3055 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3057 i_non_redundant = [] # Non redundant instances
3058 i_non_a_balanced = [] # Non auto-balanced instances
3059 i_offline = 0 # Count of offline instances
3060 n_offline = 0 # Count of offline nodes
3061 n_drained = 0 # Count of nodes being drained
3062 node_vol_should = {}
3064 # FIXME: verify OS list
3067 filemap = _ComputeAncillaryFiles(cluster, False)
3069 # do local checksums
3070 master_node = self.master_node = self.cfg.GetMasterNode()
3071 master_ip = self.cfg.GetMasterIP()
3073 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3076 if self.cfg.GetUseExternalMipScript():
3077 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3079 node_verify_param = {
3080 constants.NV_FILELIST:
3081 utils.UniqueSequence(filename
3082 for files in filemap
3083 for filename in files),
3084 constants.NV_NODELIST:
3085 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3086 self.all_node_info.values()),
3087 constants.NV_HYPERVISOR: hypervisors,
3088 constants.NV_HVPARAMS:
3089 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3090 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3091 for node in node_data_list
3092 if not node.offline],
3093 constants.NV_INSTANCELIST: hypervisors,
3094 constants.NV_VERSION: None,
3095 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3096 constants.NV_NODESETUP: None,
3097 constants.NV_TIME: None,
3098 constants.NV_MASTERIP: (master_node, master_ip),
3099 constants.NV_OSLIST: None,
3100 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3101 constants.NV_USERSCRIPTS: user_scripts,
3104 if vg_name is not None:
3105 node_verify_param[constants.NV_VGLIST] = None
3106 node_verify_param[constants.NV_LVLIST] = vg_name
3107 node_verify_param[constants.NV_PVLIST] = [vg_name]
3108 node_verify_param[constants.NV_DRBDLIST] = None
3111 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3114 # FIXME: this needs to be changed per node-group, not cluster-wide
3116 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3117 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3118 bridges.add(default_nicpp[constants.NIC_LINK])
3119 for instance in self.my_inst_info.values():
3120 for nic in instance.nics:
3121 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3122 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3123 bridges.add(full_nic[constants.NIC_LINK])
3126 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3128 # Build our expected cluster state
3129 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3131 vm_capable=node.vm_capable))
3132 for node in node_data_list)
3136 for node in self.all_node_info.values():
3137 path = _SupportsOob(self.cfg, node)
3138 if path and path not in oob_paths:
3139 oob_paths.append(path)
3142 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3144 for instance in self.my_inst_names:
3145 inst_config = self.my_inst_info[instance]
3147 for nname in inst_config.all_nodes:
3148 if nname not in node_image:
3149 gnode = self.NodeImage(name=nname)
3150 gnode.ghost = (nname not in self.all_node_info)
3151 node_image[nname] = gnode
3153 inst_config.MapLVsByNode(node_vol_should)
3155 pnode = inst_config.primary_node
3156 node_image[pnode].pinst.append(instance)
3158 for snode in inst_config.secondary_nodes:
3159 nimg = node_image[snode]
3160 nimg.sinst.append(instance)
3161 if pnode not in nimg.sbp:
3162 nimg.sbp[pnode] = []
3163 nimg.sbp[pnode].append(instance)
3165 # At this point, we have the in-memory data structures complete,
3166 # except for the runtime information, which we'll gather next
3168 # Due to the way our RPC system works, exact response times cannot be
3169 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3170 # time before and after executing the request, we can at least have a time
3172 nvinfo_starttime = time.time()
3173 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3175 self.cfg.GetClusterName())
3176 nvinfo_endtime = time.time()
3178 if self.extra_lv_nodes and vg_name is not None:
3180 self.rpc.call_node_verify(self.extra_lv_nodes,
3181 {constants.NV_LVLIST: vg_name},
3182 self.cfg.GetClusterName())
3184 extra_lv_nvinfo = {}
3186 all_drbd_map = self.cfg.ComputeDRBDMap()
3188 feedback_fn("* Gathering disk information (%s nodes)" %
3189 len(self.my_node_names))
3190 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3193 feedback_fn("* Verifying configuration file consistency")
3195 # If not all nodes are being checked, we need to make sure the master node
3196 # and a non-checked vm_capable node are in the list.
3197 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3199 vf_nvinfo = all_nvinfo.copy()
3200 vf_node_info = list(self.my_node_info.values())
3201 additional_nodes = []
3202 if master_node not in self.my_node_info:
3203 additional_nodes.append(master_node)
3204 vf_node_info.append(self.all_node_info[master_node])
3205 # Add the first vm_capable node we find which is not included
3206 for node in absent_nodes:
3207 nodeinfo = self.all_node_info[node]
3208 if nodeinfo.vm_capable and not nodeinfo.offline:
3209 additional_nodes.append(node)
3210 vf_node_info.append(self.all_node_info[node])
3212 key = constants.NV_FILELIST
3213 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3214 {key: node_verify_param[key]},
3215 self.cfg.GetClusterName()))
3217 vf_nvinfo = all_nvinfo
3218 vf_node_info = self.my_node_info.values()
3220 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3222 feedback_fn("* Verifying node status")
3226 for node_i in node_data_list:
3228 nimg = node_image[node]
3232 feedback_fn("* Skipping offline node %s" % (node,))
3236 if node == master_node:
3238 elif node_i.master_candidate:
3239 ntype = "master candidate"
3240 elif node_i.drained:
3246 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3248 msg = all_nvinfo[node].fail_msg
3249 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3252 nimg.rpc_fail = True
3255 nresult = all_nvinfo[node].payload
3257 nimg.call_ok = self._VerifyNode(node_i, nresult)
3258 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3259 self._VerifyNodeNetwork(node_i, nresult)
3260 self._VerifyNodeUserScripts(node_i, nresult)
3261 self._VerifyOob(node_i, nresult)
3264 self._VerifyNodeLVM(node_i, nresult, vg_name)
3265 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3268 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3269 self._UpdateNodeInstances(node_i, nresult, nimg)
3270 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3271 self._UpdateNodeOS(node_i, nresult, nimg)
3273 if not nimg.os_fail:
3274 if refos_img is None:
3276 self._VerifyNodeOS(node_i, nimg, refos_img)
3277 self._VerifyNodeBridges(node_i, nresult, bridges)
3279 # Check whether all running instancies are primary for the node. (This
3280 # can no longer be done from _VerifyInstance below, since some of the
3281 # wrong instances could be from other node groups.)
3282 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3284 for inst in non_primary_inst:
3285 # FIXME: investigate best way to handle offline insts
3286 if inst.admin_state == constants.ADMINST_OFFLINE:
3288 feedback_fn("* Skipping offline instance %s" % inst.name)
3291 test = inst in self.all_inst_info
3292 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3293 "instance should not run on node %s", node_i.name)
3294 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3295 "node is running unknown instance %s", inst)
3297 for node, result in extra_lv_nvinfo.items():
3298 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3299 node_image[node], vg_name)
3301 feedback_fn("* Verifying instance status")
3302 for instance in self.my_inst_names:
3304 feedback_fn("* Verifying instance %s" % instance)
3305 inst_config = self.my_inst_info[instance]
3306 self._VerifyInstance(instance, inst_config, node_image,
3308 inst_nodes_offline = []
3310 pnode = inst_config.primary_node
3311 pnode_img = node_image[pnode]
3312 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3313 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3314 " primary node failed", instance)
3316 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3318 constants.CV_EINSTANCEBADNODE, instance,
3319 "instance is marked as running and lives on offline node %s",
3320 inst_config.primary_node)
3322 # If the instance is non-redundant we cannot survive losing its primary
3323 # node, so we are not N+1 compliant. On the other hand we have no disk
3324 # templates with more than one secondary so that situation is not well
3326 # FIXME: does not support file-backed instances
3327 if not inst_config.secondary_nodes:
3328 i_non_redundant.append(instance)
3330 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3331 constants.CV_EINSTANCELAYOUT,
3332 instance, "instance has multiple secondary nodes: %s",
3333 utils.CommaJoin(inst_config.secondary_nodes),
3334 code=self.ETYPE_WARNING)
3336 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3337 pnode = inst_config.primary_node
3338 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3339 instance_groups = {}
3341 for node in instance_nodes:
3342 instance_groups.setdefault(self.all_node_info[node].group,
3346 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3347 # Sort so that we always list the primary node first.
3348 for group, nodes in sorted(instance_groups.items(),
3349 key=lambda (_, nodes): pnode in nodes,
3352 self._ErrorIf(len(instance_groups) > 1,
3353 constants.CV_EINSTANCESPLITGROUPS,
3354 instance, "instance has primary and secondary nodes in"
3355 " different groups: %s", utils.CommaJoin(pretty_list),
3356 code=self.ETYPE_WARNING)
3358 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3359 i_non_a_balanced.append(instance)
3361 for snode in inst_config.secondary_nodes:
3362 s_img = node_image[snode]
3363 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3364 snode, "instance %s, connection to secondary node failed",
3368 inst_nodes_offline.append(snode)
3370 # warn that the instance lives on offline nodes
3371 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3372 "instance has offline secondary node(s) %s",
3373 utils.CommaJoin(inst_nodes_offline))
3374 # ... or ghost/non-vm_capable nodes
3375 for node in inst_config.all_nodes:
3376 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3377 instance, "instance lives on ghost node %s", node)
3378 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3379 instance, "instance lives on non-vm_capable node %s", node)
3381 feedback_fn("* Verifying orphan volumes")
3382 reserved = utils.FieldSet(*cluster.reserved_lvs)
3384 # We will get spurious "unknown volume" warnings if any node of this group
3385 # is secondary for an instance whose primary is in another group. To avoid
3386 # them, we find these instances and add their volumes to node_vol_should.
3387 for inst in self.all_inst_info.values():
3388 for secondary in inst.secondary_nodes:
3389 if (secondary in self.my_node_info
3390 and inst.name not in self.my_inst_info):
3391 inst.MapLVsByNode(node_vol_should)
3394 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3396 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3397 feedback_fn("* Verifying N+1 Memory redundancy")
3398 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3400 feedback_fn("* Other Notes")
3402 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3403 % len(i_non_redundant))
3405 if i_non_a_balanced:
3406 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3407 % len(i_non_a_balanced))
3410 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3413 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3416 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3420 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3421 """Analyze the post-hooks' result
3423 This method analyses the hook result, handles it, and sends some
3424 nicely-formatted feedback back to the user.
3426 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3427 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3428 @param hooks_results: the results of the multi-node hooks rpc call
3429 @param feedback_fn: function used send feedback back to the caller
3430 @param lu_result: previous Exec result
3431 @return: the new Exec result, based on the previous result
3435 # We only really run POST phase hooks, only for non-empty groups,
3436 # and are only interested in their results
3437 if not self.my_node_names:
3440 elif phase == constants.HOOKS_PHASE_POST:
3441 # Used to change hooks' output to proper indentation
3442 feedback_fn("* Hooks Results")
3443 assert hooks_results, "invalid result from hooks"
3445 for node_name in hooks_results:
3446 res = hooks_results[node_name]
3448 test = msg and not res.offline
3449 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3450 "Communication failure in hooks execution: %s", msg)
3451 if res.offline or msg:
3452 # No need to investigate payload if node is offline or gave
3455 for script, hkr, output in res.payload:
3456 test = hkr == constants.HKR_FAIL
3457 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3458 "Script %s failed, output:", script)
3460 output = self._HOOKS_INDENT_RE.sub(" ", output)
3461 feedback_fn("%s" % output)
3467 class LUClusterVerifyDisks(NoHooksLU):
3468 """Verifies the cluster disks status.
3473 def ExpandNames(self):
3474 self.share_locks = _ShareAll()
3475 self.needed_locks = {
3476 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3479 def Exec(self, feedback_fn):
3480 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3482 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3483 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3484 for group in group_names])
3487 class LUGroupVerifyDisks(NoHooksLU):
3488 """Verifies the status of all disks in a node group.
3493 def ExpandNames(self):
3494 # Raises errors.OpPrereqError on its own if group can't be found
3495 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3497 self.share_locks = _ShareAll()
3498 self.needed_locks = {
3499 locking.LEVEL_INSTANCE: [],
3500 locking.LEVEL_NODEGROUP: [],
3501 locking.LEVEL_NODE: [],
3504 def DeclareLocks(self, level):
3505 if level == locking.LEVEL_INSTANCE:
3506 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3508 # Lock instances optimistically, needs verification once node and group
3509 # locks have been acquired
3510 self.needed_locks[locking.LEVEL_INSTANCE] = \
3511 self.cfg.GetNodeGroupInstances(self.group_uuid)
3513 elif level == locking.LEVEL_NODEGROUP:
3514 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3516 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3517 set([self.group_uuid] +
3518 # Lock all groups used by instances optimistically; this requires
3519 # going via the node before it's locked, requiring verification
3522 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3523 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3525 elif level == locking.LEVEL_NODE:
3526 # This will only lock the nodes in the group to be verified which contain
3528 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3529 self._LockInstancesNodes()
3531 # Lock all nodes in group to be verified
3532 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3533 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3534 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3536 def CheckPrereq(self):
3537 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3538 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3539 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3541 assert self.group_uuid in owned_groups
3543 # Check if locked instances are still correct
3544 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3546 # Get instance information
3547 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3549 # Check if node groups for locked instances are still correct
3550 _CheckInstancesNodeGroups(self.cfg, self.instances,
3551 owned_groups, owned_nodes, self.group_uuid)
3553 def Exec(self, feedback_fn):
3554 """Verify integrity of cluster disks.
3556 @rtype: tuple of three items
3557 @return: a tuple of (dict of node-to-node_error, list of instances
3558 which need activate-disks, dict of instance: (node, volume) for
3563 res_instances = set()
3566 nv_dict = _MapInstanceDisksToNodes([inst
3567 for inst in self.instances.values()
3568 if inst.admin_state == constants.ADMINST_UP])
3571 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3572 set(self.cfg.GetVmCapableNodeList()))
3574 node_lvs = self.rpc.call_lv_list(nodes, [])
3576 for (node, node_res) in node_lvs.items():
3577 if node_res.offline:
3580 msg = node_res.fail_msg
3582 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3583 res_nodes[node] = msg
3586 for lv_name, (_, _, lv_online) in node_res.payload.items():
3587 inst = nv_dict.pop((node, lv_name), None)
3588 if not (lv_online or inst is None):
3589 res_instances.add(inst)
3591 # any leftover items in nv_dict are missing LVs, let's arrange the data
3593 for key, inst in nv_dict.iteritems():
3594 res_missing.setdefault(inst, []).append(list(key))
3596 return (res_nodes, list(res_instances), res_missing)
3599 class LUClusterRepairDiskSizes(NoHooksLU):
3600 """Verifies the cluster disks sizes.
3605 def ExpandNames(self):
3606 if self.op.instances:
3607 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3608 self.needed_locks = {
3609 locking.LEVEL_NODE_RES: [],
3610 locking.LEVEL_INSTANCE: self.wanted_names,
3612 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3614 self.wanted_names = None
3615 self.needed_locks = {
3616 locking.LEVEL_NODE_RES: locking.ALL_SET,
3617 locking.LEVEL_INSTANCE: locking.ALL_SET,
3619 self.share_locks = {
3620 locking.LEVEL_NODE_RES: 1,
3621 locking.LEVEL_INSTANCE: 0,
3624 def DeclareLocks(self, level):
3625 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3626 self._LockInstancesNodes(primary_only=True, level=level)
3628 def CheckPrereq(self):
3629 """Check prerequisites.
3631 This only checks the optional instance list against the existing names.
3634 if self.wanted_names is None:
3635 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3637 self.wanted_instances = \
3638 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3640 def _EnsureChildSizes(self, disk):
3641 """Ensure children of the disk have the needed disk size.
3643 This is valid mainly for DRBD8 and fixes an issue where the
3644 children have smaller disk size.
3646 @param disk: an L{ganeti.objects.Disk} object
3649 if disk.dev_type == constants.LD_DRBD8:
3650 assert disk.children, "Empty children for DRBD8?"
3651 fchild = disk.children[0]
3652 mismatch = fchild.size < disk.size
3654 self.LogInfo("Child disk has size %d, parent %d, fixing",
3655 fchild.size, disk.size)
3656 fchild.size = disk.size
3658 # and we recurse on this child only, not on the metadev
3659 return self._EnsureChildSizes(fchild) or mismatch
3663 def Exec(self, feedback_fn):
3664 """Verify the size of cluster disks.
3667 # TODO: check child disks too
3668 # TODO: check differences in size between primary/secondary nodes
3670 for instance in self.wanted_instances:
3671 pnode = instance.primary_node
3672 if pnode not in per_node_disks:
3673 per_node_disks[pnode] = []
3674 for idx, disk in enumerate(instance.disks):
3675 per_node_disks[pnode].append((instance, idx, disk))
3677 assert not (frozenset(per_node_disks.keys()) -
3678 self.owned_locks(locking.LEVEL_NODE_RES)), \
3679 "Not owning correct locks"
3680 assert not self.owned_locks(locking.LEVEL_NODE)
3683 for node, dskl in per_node_disks.items():
3684 newl = [v[2].Copy() for v in dskl]
3686 self.cfg.SetDiskID(dsk, node)
3687 result = self.rpc.call_blockdev_getsize(node, newl)
3689 self.LogWarning("Failure in blockdev_getsize call to node"
3690 " %s, ignoring", node)
3692 if len(result.payload) != len(dskl):
3693 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3694 " result.payload=%s", node, len(dskl), result.payload)
3695 self.LogWarning("Invalid result from node %s, ignoring node results",
3698 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3700 self.LogWarning("Disk %d of instance %s did not return size"
3701 " information, ignoring", idx, instance.name)
3703 if not isinstance(size, (int, long)):
3704 self.LogWarning("Disk %d of instance %s did not return valid"
3705 " size information, ignoring", idx, instance.name)
3708 if size != disk.size:
3709 self.LogInfo("Disk %d of instance %s has mismatched size,"
3710 " correcting: recorded %d, actual %d", idx,
3711 instance.name, disk.size, size)
3713 self.cfg.Update(instance, feedback_fn)
3714 changed.append((instance.name, idx, size))
3715 if self._EnsureChildSizes(disk):
3716 self.cfg.Update(instance, feedback_fn)
3717 changed.append((instance.name, idx, disk.size))
3721 class LUClusterRename(LogicalUnit):
3722 """Rename the cluster.
3725 HPATH = "cluster-rename"
3726 HTYPE = constants.HTYPE_CLUSTER
3728 def BuildHooksEnv(self):
3733 "OP_TARGET": self.cfg.GetClusterName(),
3734 "NEW_NAME": self.op.name,
3737 def BuildHooksNodes(self):
3738 """Build hooks nodes.
3741 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3743 def CheckPrereq(self):
3744 """Verify that the passed name is a valid one.
3747 hostname = netutils.GetHostname(name=self.op.name,
3748 family=self.cfg.GetPrimaryIPFamily())
3750 new_name = hostname.name
3751 self.ip = new_ip = hostname.ip
3752 old_name = self.cfg.GetClusterName()
3753 old_ip = self.cfg.GetMasterIP()
3754 if new_name == old_name and new_ip == old_ip:
3755 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3756 " cluster has changed",
3758 if new_ip != old_ip:
3759 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3760 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3761 " reachable on the network" %
3762 new_ip, errors.ECODE_NOTUNIQUE)
3764 self.op.name = new_name
3766 def Exec(self, feedback_fn):
3767 """Rename the cluster.
3770 clustername = self.op.name
3773 # shutdown the master IP
3774 master_params = self.cfg.GetMasterNetworkParameters()
3775 ems = self.cfg.GetUseExternalMipScript()
3776 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3778 result.Raise("Could not disable the master role")
3781 cluster = self.cfg.GetClusterInfo()
3782 cluster.cluster_name = clustername
3783 cluster.master_ip = new_ip
3784 self.cfg.Update(cluster, feedback_fn)
3786 # update the known hosts file
3787 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3788 node_list = self.cfg.GetOnlineNodeList()
3790 node_list.remove(master_params.name)
3793 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3795 master_params.ip = new_ip
3796 result = self.rpc.call_node_activate_master_ip(master_params.name,
3798 msg = result.fail_msg
3800 self.LogWarning("Could not re-enable the master role on"
3801 " the master, please restart manually: %s", msg)
3806 def _ValidateNetmask(cfg, netmask):
3807 """Checks if a netmask is valid.
3809 @type cfg: L{config.ConfigWriter}
3810 @param cfg: The cluster configuration
3812 @param netmask: the netmask to be verified
3813 @raise errors.OpPrereqError: if the validation fails
3816 ip_family = cfg.GetPrimaryIPFamily()
3818 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3819 except errors.ProgrammerError:
3820 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3822 if not ipcls.ValidateNetmask(netmask):
3823 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3827 class LUClusterSetParams(LogicalUnit):
3828 """Change the parameters of the cluster.
3831 HPATH = "cluster-modify"
3832 HTYPE = constants.HTYPE_CLUSTER
3835 def CheckArguments(self):
3839 if self.op.uid_pool:
3840 uidpool.CheckUidPool(self.op.uid_pool)
3842 if self.op.add_uids:
3843 uidpool.CheckUidPool(self.op.add_uids)
3845 if self.op.remove_uids:
3846 uidpool.CheckUidPool(self.op.remove_uids)
3848 if self.op.master_netmask is not None:
3849 _ValidateNetmask(self.cfg, self.op.master_netmask)
3851 if self.op.diskparams:
3852 for dt_params in self.op.diskparams.values():
3853 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3855 def ExpandNames(self):
3856 # FIXME: in the future maybe other cluster params won't require checking on
3857 # all nodes to be modified.
3858 self.needed_locks = {
3859 locking.LEVEL_NODE: locking.ALL_SET,
3860 locking.LEVEL_INSTANCE: locking.ALL_SET,
3861 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3863 self.share_locks = {
3864 locking.LEVEL_NODE: 1,
3865 locking.LEVEL_INSTANCE: 1,
3866 locking.LEVEL_NODEGROUP: 1,
3869 def BuildHooksEnv(self):
3874 "OP_TARGET": self.cfg.GetClusterName(),
3875 "NEW_VG_NAME": self.op.vg_name,
3878 def BuildHooksNodes(self):
3879 """Build hooks nodes.
3882 mn = self.cfg.GetMasterNode()
3885 def CheckPrereq(self):
3886 """Check prerequisites.
3888 This checks whether the given params don't conflict and
3889 if the given volume group is valid.
3892 if self.op.vg_name is not None and not self.op.vg_name:
3893 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3894 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3895 " instances exist", errors.ECODE_INVAL)
3897 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3898 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3899 raise errors.OpPrereqError("Cannot disable drbd helper while"
3900 " drbd-based instances exist",
3903 node_list = self.owned_locks(locking.LEVEL_NODE)
3905 # if vg_name not None, checks given volume group on all nodes
3907 vglist = self.rpc.call_vg_list(node_list)
3908 for node in node_list:
3909 msg = vglist[node].fail_msg
3911 # ignoring down node
3912 self.LogWarning("Error while gathering data on node %s"
3913 " (ignoring node): %s", node, msg)
3915 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3917 constants.MIN_VG_SIZE)
3919 raise errors.OpPrereqError("Error on node '%s': %s" %
3920 (node, vgstatus), errors.ECODE_ENVIRON)
3922 if self.op.drbd_helper:
3923 # checks given drbd helper on all nodes
3924 helpers = self.rpc.call_drbd_helper(node_list)
3925 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3927 self.LogInfo("Not checking drbd helper on offline node %s", node)
3929 msg = helpers[node].fail_msg
3931 raise errors.OpPrereqError("Error checking drbd helper on node"
3932 " '%s': %s" % (node, msg),
3933 errors.ECODE_ENVIRON)
3934 node_helper = helpers[node].payload
3935 if node_helper != self.op.drbd_helper:
3936 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3937 (node, node_helper), errors.ECODE_ENVIRON)
3939 self.cluster = cluster = self.cfg.GetClusterInfo()
3940 # validate params changes
3941 if self.op.beparams:
3942 objects.UpgradeBeParams(self.op.beparams)
3943 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3944 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3946 if self.op.ndparams:
3947 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3948 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3950 # TODO: we need a more general way to handle resetting
3951 # cluster-level parameters to default values
3952 if self.new_ndparams["oob_program"] == "":
3953 self.new_ndparams["oob_program"] = \
3954 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3956 if self.op.hv_state:
3957 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3958 self.cluster.hv_state_static)
3959 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3960 for hv, values in new_hv_state.items())
3962 if self.op.disk_state:
3963 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3964 self.cluster.disk_state_static)
3965 self.new_disk_state = \
3966 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3967 for name, values in svalues.items()))
3968 for storage, svalues in new_disk_state.items())
3971 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3974 all_instances = self.cfg.GetAllInstancesInfo().values()
3976 for group in self.cfg.GetAllNodeGroupsInfo().values():
3977 instances = frozenset([inst for inst in all_instances
3978 if compat.any(node in group.members
3979 for node in inst.all_nodes)])
3980 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3981 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3983 new_ipolicy, instances)
3985 violations.update(new)
3988 self.LogWarning("After the ipolicy change the following instances"
3989 " violate them: %s",
3990 utils.CommaJoin(violations))
3992 if self.op.nicparams:
3993 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3994 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3995 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3998 # check all instances for consistency
3999 for instance in self.cfg.GetAllInstancesInfo().values():
4000 for nic_idx, nic in enumerate(instance.nics):
4001 params_copy = copy.deepcopy(nic.nicparams)
4002 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4004 # check parameter syntax
4006 objects.NIC.CheckParameterSyntax(params_filled)
4007 except errors.ConfigurationError, err:
4008 nic_errors.append("Instance %s, nic/%d: %s" %
4009 (instance.name, nic_idx, err))
4011 # if we're moving instances to routed, check that they have an ip
4012 target_mode = params_filled[constants.NIC_MODE]
4013 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4014 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4015 " address" % (instance.name, nic_idx))
4017 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4018 "\n".join(nic_errors))
4020 # hypervisor list/parameters
4021 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4022 if self.op.hvparams:
4023 for hv_name, hv_dict in self.op.hvparams.items():
4024 if hv_name not in self.new_hvparams:
4025 self.new_hvparams[hv_name] = hv_dict
4027 self.new_hvparams[hv_name].update(hv_dict)
4029 # disk template parameters
4030 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4031 if self.op.diskparams:
4032 for dt_name, dt_params in self.op.diskparams.items():
4033 if dt_name not in self.op.diskparams:
4034 self.new_diskparams[dt_name] = dt_params
4036 self.new_diskparams[dt_name].update(dt_params)
4038 # os hypervisor parameters
4039 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4041 for os_name, hvs in self.op.os_hvp.items():
4042 if os_name not in self.new_os_hvp:
4043 self.new_os_hvp[os_name] = hvs
4045 for hv_name, hv_dict in hvs.items():
4046 if hv_name not in self.new_os_hvp[os_name]:
4047 self.new_os_hvp[os_name][hv_name] = hv_dict
4049 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4052 self.new_osp = objects.FillDict(cluster.osparams, {})
4053 if self.op.osparams:
4054 for os_name, osp in self.op.osparams.items():
4055 if os_name not in self.new_osp:
4056 self.new_osp[os_name] = {}
4058 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4061 if not self.new_osp[os_name]:
4062 # we removed all parameters
4063 del self.new_osp[os_name]
4065 # check the parameter validity (remote check)
4066 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4067 os_name, self.new_osp[os_name])
4069 # changes to the hypervisor list
4070 if self.op.enabled_hypervisors is not None:
4071 self.hv_list = self.op.enabled_hypervisors
4072 for hv in self.hv_list:
4073 # if the hypervisor doesn't already exist in the cluster
4074 # hvparams, we initialize it to empty, and then (in both
4075 # cases) we make sure to fill the defaults, as we might not
4076 # have a complete defaults list if the hypervisor wasn't
4078 if hv not in new_hvp:
4080 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4081 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4083 self.hv_list = cluster.enabled_hypervisors
4085 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4086 # either the enabled list has changed, or the parameters have, validate
4087 for hv_name, hv_params in self.new_hvparams.items():
4088 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4089 (self.op.enabled_hypervisors and
4090 hv_name in self.op.enabled_hypervisors)):
4091 # either this is a new hypervisor, or its parameters have changed
4092 hv_class = hypervisor.GetHypervisor(hv_name)
4093 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4094 hv_class.CheckParameterSyntax(hv_params)
4095 _CheckHVParams(self, node_list, hv_name, hv_params)
4098 # no need to check any newly-enabled hypervisors, since the
4099 # defaults have already been checked in the above code-block
4100 for os_name, os_hvp in self.new_os_hvp.items():
4101 for hv_name, hv_params in os_hvp.items():
4102 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4103 # we need to fill in the new os_hvp on top of the actual hv_p
4104 cluster_defaults = self.new_hvparams.get(hv_name, {})
4105 new_osp = objects.FillDict(cluster_defaults, hv_params)
4106 hv_class = hypervisor.GetHypervisor(hv_name)
4107 hv_class.CheckParameterSyntax(new_osp)
4108 _CheckHVParams(self, node_list, hv_name, new_osp)
4110 if self.op.default_iallocator:
4111 alloc_script = utils.FindFile(self.op.default_iallocator,
4112 constants.IALLOCATOR_SEARCH_PATH,
4114 if alloc_script is None:
4115 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4116 " specified" % self.op.default_iallocator,
4119 def Exec(self, feedback_fn):
4120 """Change the parameters of the cluster.
4123 if self.op.vg_name is not None:
4124 new_volume = self.op.vg_name
4127 if new_volume != self.cfg.GetVGName():
4128 self.cfg.SetVGName(new_volume)
4130 feedback_fn("Cluster LVM configuration already in desired"
4131 " state, not changing")
4132 if self.op.drbd_helper is not None:
4133 new_helper = self.op.drbd_helper
4136 if new_helper != self.cfg.GetDRBDHelper():
4137 self.cfg.SetDRBDHelper(new_helper)
4139 feedback_fn("Cluster DRBD helper already in desired state,"
4141 if self.op.hvparams:
4142 self.cluster.hvparams = self.new_hvparams
4144 self.cluster.os_hvp = self.new_os_hvp
4145 if self.op.enabled_hypervisors is not None:
4146 self.cluster.hvparams = self.new_hvparams
4147 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4148 if self.op.beparams:
4149 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4150 if self.op.nicparams:
4151 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4153 self.cluster.ipolicy = self.new_ipolicy
4154 if self.op.osparams:
4155 self.cluster.osparams = self.new_osp
4156 if self.op.ndparams:
4157 self.cluster.ndparams = self.new_ndparams
4158 if self.op.diskparams:
4159 self.cluster.diskparams = self.new_diskparams
4160 if self.op.hv_state:
4161 self.cluster.hv_state_static = self.new_hv_state
4162 if self.op.disk_state:
4163 self.cluster.disk_state_static = self.new_disk_state
4165 if self.op.candidate_pool_size is not None:
4166 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4167 # we need to update the pool size here, otherwise the save will fail
4168 _AdjustCandidatePool(self, [])
4170 if self.op.maintain_node_health is not None:
4171 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4172 feedback_fn("Note: CONFD was disabled at build time, node health"
4173 " maintenance is not useful (still enabling it)")
4174 self.cluster.maintain_node_health = self.op.maintain_node_health
4176 if self.op.prealloc_wipe_disks is not None:
4177 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4179 if self.op.add_uids is not None:
4180 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4182 if self.op.remove_uids is not None:
4183 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4185 if self.op.uid_pool is not None:
4186 self.cluster.uid_pool = self.op.uid_pool
4188 if self.op.default_iallocator is not None:
4189 self.cluster.default_iallocator = self.op.default_iallocator
4191 if self.op.reserved_lvs is not None:
4192 self.cluster.reserved_lvs = self.op.reserved_lvs
4194 if self.op.use_external_mip_script is not None:
4195 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4197 def helper_os(aname, mods, desc):
4199 lst = getattr(self.cluster, aname)
4200 for key, val in mods:
4201 if key == constants.DDM_ADD:
4203 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4206 elif key == constants.DDM_REMOVE:
4210 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4212 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4214 if self.op.hidden_os:
4215 helper_os("hidden_os", self.op.hidden_os, "hidden")
4217 if self.op.blacklisted_os:
4218 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4220 if self.op.master_netdev:
4221 master_params = self.cfg.GetMasterNetworkParameters()
4222 ems = self.cfg.GetUseExternalMipScript()
4223 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4224 self.cluster.master_netdev)
4225 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4227 result.Raise("Could not disable the master ip")
4228 feedback_fn("Changing master_netdev from %s to %s" %
4229 (master_params.netdev, self.op.master_netdev))
4230 self.cluster.master_netdev = self.op.master_netdev
4232 if self.op.master_netmask:
4233 master_params = self.cfg.GetMasterNetworkParameters()
4234 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4235 result = self.rpc.call_node_change_master_netmask(master_params.name,
4236 master_params.netmask,
4237 self.op.master_netmask,
4239 master_params.netdev)
4241 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4244 self.cluster.master_netmask = self.op.master_netmask
4246 self.cfg.Update(self.cluster, feedback_fn)
4248 if self.op.master_netdev:
4249 master_params = self.cfg.GetMasterNetworkParameters()
4250 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4251 self.op.master_netdev)
4252 ems = self.cfg.GetUseExternalMipScript()
4253 result = self.rpc.call_node_activate_master_ip(master_params.name,
4256 self.LogWarning("Could not re-enable the master ip on"
4257 " the master, please restart manually: %s",
4261 def _UploadHelper(lu, nodes, fname):
4262 """Helper for uploading a file and showing warnings.
4265 if os.path.exists(fname):
4266 result = lu.rpc.call_upload_file(nodes, fname)
4267 for to_node, to_result in result.items():
4268 msg = to_result.fail_msg
4270 msg = ("Copy of file %s to node %s failed: %s" %
4271 (fname, to_node, msg))
4272 lu.proc.LogWarning(msg)
4275 def _ComputeAncillaryFiles(cluster, redist):
4276 """Compute files external to Ganeti which need to be consistent.
4278 @type redist: boolean
4279 @param redist: Whether to include files which need to be redistributed
4282 # Compute files for all nodes
4284 constants.SSH_KNOWN_HOSTS_FILE,
4285 constants.CONFD_HMAC_KEY,
4286 constants.CLUSTER_DOMAIN_SECRET_FILE,
4287 constants.SPICE_CERT_FILE,
4288 constants.SPICE_CACERT_FILE,
4289 constants.RAPI_USERS_FILE,
4293 files_all.update(constants.ALL_CERT_FILES)
4294 files_all.update(ssconf.SimpleStore().GetFileList())
4296 # we need to ship at least the RAPI certificate
4297 files_all.add(constants.RAPI_CERT_FILE)
4299 if cluster.modify_etc_hosts:
4300 files_all.add(constants.ETC_HOSTS)
4302 # Files which are optional, these must:
4303 # - be present in one other category as well
4304 # - either exist or not exist on all nodes of that category (mc, vm all)
4306 constants.RAPI_USERS_FILE,
4309 # Files which should only be on master candidates
4313 files_mc.add(constants.CLUSTER_CONF_FILE)
4315 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4317 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4319 # Files which should only be on VM-capable nodes
4320 files_vm = set(filename
4321 for hv_name in cluster.enabled_hypervisors
4322 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4324 files_opt |= set(filename
4325 for hv_name in cluster.enabled_hypervisors
4326 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4328 # Filenames in each category must be unique
4329 all_files_set = files_all | files_mc | files_vm
4330 assert (len(all_files_set) ==
4331 sum(map(len, [files_all, files_mc, files_vm]))), \
4332 "Found file listed in more than one file list"
4334 # Optional files must be present in one other category
4335 assert all_files_set.issuperset(files_opt), \
4336 "Optional file not in a different required list"
4338 return (files_all, files_opt, files_mc, files_vm)
4341 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4342 """Distribute additional files which are part of the cluster configuration.
4344 ConfigWriter takes care of distributing the config and ssconf files, but
4345 there are more files which should be distributed to all nodes. This function
4346 makes sure those are copied.
4348 @param lu: calling logical unit
4349 @param additional_nodes: list of nodes not in the config to distribute to
4350 @type additional_vm: boolean
4351 @param additional_vm: whether the additional nodes are vm-capable or not
4354 # Gather target nodes
4355 cluster = lu.cfg.GetClusterInfo()
4356 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4358 online_nodes = lu.cfg.GetOnlineNodeList()
4359 vm_nodes = lu.cfg.GetVmCapableNodeList()
4361 if additional_nodes is not None:
4362 online_nodes.extend(additional_nodes)
4364 vm_nodes.extend(additional_nodes)
4366 # Never distribute to master node
4367 for nodelist in [online_nodes, vm_nodes]:
4368 if master_info.name in nodelist:
4369 nodelist.remove(master_info.name)
4372 (files_all, _, files_mc, files_vm) = \
4373 _ComputeAncillaryFiles(cluster, True)
4375 # Never re-distribute configuration file from here
4376 assert not (constants.CLUSTER_CONF_FILE in files_all or
4377 constants.CLUSTER_CONF_FILE in files_vm)
4378 assert not files_mc, "Master candidates not handled in this function"
4381 (online_nodes, files_all),
4382 (vm_nodes, files_vm),
4386 for (node_list, files) in filemap:
4388 _UploadHelper(lu, node_list, fname)
4391 class LUClusterRedistConf(NoHooksLU):
4392 """Force the redistribution of cluster configuration.
4394 This is a very simple LU.
4399 def ExpandNames(self):
4400 self.needed_locks = {
4401 locking.LEVEL_NODE: locking.ALL_SET,
4403 self.share_locks[locking.LEVEL_NODE] = 1
4405 def Exec(self, feedback_fn):
4406 """Redistribute the configuration.
4409 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4410 _RedistributeAncillaryFiles(self)
4413 class LUClusterActivateMasterIp(NoHooksLU):
4414 """Activate the master IP on the master node.
4417 def Exec(self, feedback_fn):
4418 """Activate the master IP.
4421 master_params = self.cfg.GetMasterNetworkParameters()
4422 ems = self.cfg.GetUseExternalMipScript()
4423 result = self.rpc.call_node_activate_master_ip(master_params.name,
4425 result.Raise("Could not activate the master IP")
4428 class LUClusterDeactivateMasterIp(NoHooksLU):
4429 """Deactivate the master IP on the master node.
4432 def Exec(self, feedback_fn):
4433 """Deactivate the master IP.
4436 master_params = self.cfg.GetMasterNetworkParameters()
4437 ems = self.cfg.GetUseExternalMipScript()
4438 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4440 result.Raise("Could not deactivate the master IP")
4443 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4444 """Sleep and poll for an instance's disk to sync.
4447 if not instance.disks or disks is not None and not disks:
4450 disks = _ExpandCheckDisks(instance, disks)
4453 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4455 node = instance.primary_node
4458 lu.cfg.SetDiskID(dev, node)
4460 # TODO: Convert to utils.Retry
4463 degr_retries = 10 # in seconds, as we sleep 1 second each time
4467 cumul_degraded = False
4468 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4469 msg = rstats.fail_msg
4471 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4474 raise errors.RemoteError("Can't contact node %s for mirror data,"
4475 " aborting." % node)
4478 rstats = rstats.payload
4480 for i, mstat in enumerate(rstats):
4482 lu.LogWarning("Can't compute data for node %s/%s",
4483 node, disks[i].iv_name)
4486 cumul_degraded = (cumul_degraded or
4487 (mstat.is_degraded and mstat.sync_percent is None))
4488 if mstat.sync_percent is not None:
4490 if mstat.estimated_time is not None:
4491 rem_time = ("%s remaining (estimated)" %
4492 utils.FormatSeconds(mstat.estimated_time))
4493 max_time = mstat.estimated_time
4495 rem_time = "no time estimate"
4496 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4497 (disks[i].iv_name, mstat.sync_percent, rem_time))
4499 # if we're done but degraded, let's do a few small retries, to
4500 # make sure we see a stable and not transient situation; therefore
4501 # we force restart of the loop
4502 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4503 logging.info("Degraded disks found, %d retries left", degr_retries)
4511 time.sleep(min(60, max_time))
4514 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4515 return not cumul_degraded
4518 def _BlockdevFind(lu, node, dev, instance):
4519 """Wrapper around call_blockdev_find to annotate diskparams.
4521 @param lu: A reference to the lu object
4522 @param node: The node to call out
4523 @param dev: The device to find
4524 @param instance: The instance object the device belongs to
4525 @returns The result of the rpc call
4528 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4529 return lu.rpc.call_blockdev_find(node, disk)
4532 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4533 """Wrapper around L{_CheckDiskConsistencyInner}.
4536 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4537 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4541 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4543 """Check that mirrors are not degraded.
4545 @attention: The device has to be annotated already.
4547 The ldisk parameter, if True, will change the test from the
4548 is_degraded attribute (which represents overall non-ok status for
4549 the device(s)) to the ldisk (representing the local storage status).
4552 lu.cfg.SetDiskID(dev, node)
4556 if on_primary or dev.AssembleOnSecondary():
4557 rstats = lu.rpc.call_blockdev_find(node, dev)
4558 msg = rstats.fail_msg
4560 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4562 elif not rstats.payload:
4563 lu.LogWarning("Can't find disk on node %s", node)
4567 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4569 result = result and not rstats.payload.is_degraded
4572 for child in dev.children:
4573 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4579 class LUOobCommand(NoHooksLU):
4580 """Logical unit for OOB handling.
4584 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4586 def ExpandNames(self):
4587 """Gather locks we need.
4590 if self.op.node_names:
4591 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4592 lock_names = self.op.node_names
4594 lock_names = locking.ALL_SET
4596 self.needed_locks = {
4597 locking.LEVEL_NODE: lock_names,
4600 def CheckPrereq(self):
4601 """Check prerequisites.
4604 - the node exists in the configuration
4607 Any errors are signaled by raising errors.OpPrereqError.
4611 self.master_node = self.cfg.GetMasterNode()
4613 assert self.op.power_delay >= 0.0
4615 if self.op.node_names:
4616 if (self.op.command in self._SKIP_MASTER and
4617 self.master_node in self.op.node_names):
4618 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4619 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4621 if master_oob_handler:
4622 additional_text = ("run '%s %s %s' if you want to operate on the"
4623 " master regardless") % (master_oob_handler,
4627 additional_text = "it does not support out-of-band operations"
4629 raise errors.OpPrereqError(("Operating on the master node %s is not"
4630 " allowed for %s; %s") %
4631 (self.master_node, self.op.command,
4632 additional_text), errors.ECODE_INVAL)
4634 self.op.node_names = self.cfg.GetNodeList()
4635 if self.op.command in self._SKIP_MASTER:
4636 self.op.node_names.remove(self.master_node)
4638 if self.op.command in self._SKIP_MASTER:
4639 assert self.master_node not in self.op.node_names
4641 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4643 raise errors.OpPrereqError("Node %s not found" % node_name,
4646 self.nodes.append(node)
4648 if (not self.op.ignore_status and
4649 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4650 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4651 " not marked offline") % node_name,
4654 def Exec(self, feedback_fn):
4655 """Execute OOB and return result if we expect any.
4658 master_node = self.master_node
4661 for idx, node in enumerate(utils.NiceSort(self.nodes,
4662 key=lambda node: node.name)):
4663 node_entry = [(constants.RS_NORMAL, node.name)]
4664 ret.append(node_entry)
4666 oob_program = _SupportsOob(self.cfg, node)
4669 node_entry.append((constants.RS_UNAVAIL, None))
4672 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4673 self.op.command, oob_program, node.name)
4674 result = self.rpc.call_run_oob(master_node, oob_program,
4675 self.op.command, node.name,
4679 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4680 node.name, result.fail_msg)
4681 node_entry.append((constants.RS_NODATA, None))
4684 self._CheckPayload(result)
4685 except errors.OpExecError, err:
4686 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4688 node_entry.append((constants.RS_NODATA, None))
4690 if self.op.command == constants.OOB_HEALTH:
4691 # For health we should log important events
4692 for item, status in result.payload:
4693 if status in [constants.OOB_STATUS_WARNING,
4694 constants.OOB_STATUS_CRITICAL]:
4695 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4696 item, node.name, status)
4698 if self.op.command == constants.OOB_POWER_ON:
4700 elif self.op.command == constants.OOB_POWER_OFF:
4701 node.powered = False
4702 elif self.op.command == constants.OOB_POWER_STATUS:
4703 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4704 if powered != node.powered:
4705 logging.warning(("Recorded power state (%s) of node '%s' does not"
4706 " match actual power state (%s)"), node.powered,
4709 # For configuration changing commands we should update the node
4710 if self.op.command in (constants.OOB_POWER_ON,
4711 constants.OOB_POWER_OFF):
4712 self.cfg.Update(node, feedback_fn)
4714 node_entry.append((constants.RS_NORMAL, result.payload))
4716 if (self.op.command == constants.OOB_POWER_ON and
4717 idx < len(self.nodes) - 1):
4718 time.sleep(self.op.power_delay)
4722 def _CheckPayload(self, result):
4723 """Checks if the payload is valid.
4725 @param result: RPC result
4726 @raises errors.OpExecError: If payload is not valid
4730 if self.op.command == constants.OOB_HEALTH:
4731 if not isinstance(result.payload, list):
4732 errs.append("command 'health' is expected to return a list but got %s" %
4733 type(result.payload))
4735 for item, status in result.payload:
4736 if status not in constants.OOB_STATUSES:
4737 errs.append("health item '%s' has invalid status '%s'" %
4740 if self.op.command == constants.OOB_POWER_STATUS:
4741 if not isinstance(result.payload, dict):
4742 errs.append("power-status is expected to return a dict but got %s" %
4743 type(result.payload))
4745 if self.op.command in [
4746 constants.OOB_POWER_ON,
4747 constants.OOB_POWER_OFF,
4748 constants.OOB_POWER_CYCLE,
4750 if result.payload is not None:
4751 errs.append("%s is expected to not return payload but got '%s'" %
4752 (self.op.command, result.payload))
4755 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4756 utils.CommaJoin(errs))
4759 class _OsQuery(_QueryBase):
4760 FIELDS = query.OS_FIELDS
4762 def ExpandNames(self, lu):
4763 # Lock all nodes in shared mode
4764 # Temporary removal of locks, should be reverted later
4765 # TODO: reintroduce locks when they are lighter-weight
4766 lu.needed_locks = {}
4767 #self.share_locks[locking.LEVEL_NODE] = 1
4768 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4770 # The following variables interact with _QueryBase._GetNames
4772 self.wanted = self.names
4774 self.wanted = locking.ALL_SET
4776 self.do_locking = self.use_locking
4778 def DeclareLocks(self, lu, level):
4782 def _DiagnoseByOS(rlist):
4783 """Remaps a per-node return list into an a per-os per-node dictionary
4785 @param rlist: a map with node names as keys and OS objects as values
4788 @return: a dictionary with osnames as keys and as value another
4789 map, with nodes as keys and tuples of (path, status, diagnose,
4790 variants, parameters, api_versions) as values, eg::
4792 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4793 (/srv/..., False, "invalid api")],
4794 "node2": [(/srv/..., True, "", [], [])]}
4799 # we build here the list of nodes that didn't fail the RPC (at RPC
4800 # level), so that nodes with a non-responding node daemon don't
4801 # make all OSes invalid
4802 good_nodes = [node_name for node_name in rlist
4803 if not rlist[node_name].fail_msg]
4804 for node_name, nr in rlist.items():
4805 if nr.fail_msg or not nr.payload:
4807 for (name, path, status, diagnose, variants,
4808 params, api_versions) in nr.payload:
4809 if name not in all_os:
4810 # build a list of nodes for this os containing empty lists
4811 # for each node in node_list
4813 for nname in good_nodes:
4814 all_os[name][nname] = []
4815 # convert params from [name, help] to (name, help)
4816 params = [tuple(v) for v in params]
4817 all_os[name][node_name].append((path, status, diagnose,
4818 variants, params, api_versions))
4821 def _GetQueryData(self, lu):
4822 """Computes the list of nodes and their attributes.
4825 # Locking is not used
4826 assert not (compat.any(lu.glm.is_owned(level)
4827 for level in locking.LEVELS
4828 if level != locking.LEVEL_CLUSTER) or
4829 self.do_locking or self.use_locking)
4831 valid_nodes = [node.name
4832 for node in lu.cfg.GetAllNodesInfo().values()
4833 if not node.offline and node.vm_capable]
4834 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4835 cluster = lu.cfg.GetClusterInfo()
4839 for (os_name, os_data) in pol.items():
4840 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4841 hidden=(os_name in cluster.hidden_os),
4842 blacklisted=(os_name in cluster.blacklisted_os))
4846 api_versions = set()
4848 for idx, osl in enumerate(os_data.values()):
4849 info.valid = bool(info.valid and osl and osl[0][1])
4853 (node_variants, node_params, node_api) = osl[0][3:6]
4856 variants.update(node_variants)
4857 parameters.update(node_params)
4858 api_versions.update(node_api)
4860 # Filter out inconsistent values
4861 variants.intersection_update(node_variants)
4862 parameters.intersection_update(node_params)
4863 api_versions.intersection_update(node_api)
4865 info.variants = list(variants)
4866 info.parameters = list(parameters)
4867 info.api_versions = list(api_versions)
4869 data[os_name] = info
4871 # Prepare data in requested order
4872 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4876 class LUOsDiagnose(NoHooksLU):
4877 """Logical unit for OS diagnose/query.
4883 def _BuildFilter(fields, names):
4884 """Builds a filter for querying OSes.
4887 name_filter = qlang.MakeSimpleFilter("name", names)
4889 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4890 # respective field is not requested
4891 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4892 for fname in ["hidden", "blacklisted"]
4893 if fname not in fields]
4894 if "valid" not in fields:
4895 status_filter.append([qlang.OP_TRUE, "valid"])
4898 status_filter.insert(0, qlang.OP_AND)
4900 status_filter = None
4902 if name_filter and status_filter:
4903 return [qlang.OP_AND, name_filter, status_filter]
4907 return status_filter
4909 def CheckArguments(self):
4910 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4911 self.op.output_fields, False)
4913 def ExpandNames(self):
4914 self.oq.ExpandNames(self)
4916 def Exec(self, feedback_fn):
4917 return self.oq.OldStyleQuery(self)
4920 class LUNodeRemove(LogicalUnit):
4921 """Logical unit for removing a node.
4924 HPATH = "node-remove"
4925 HTYPE = constants.HTYPE_NODE
4927 def BuildHooksEnv(self):
4932 "OP_TARGET": self.op.node_name,
4933 "NODE_NAME": self.op.node_name,
4936 def BuildHooksNodes(self):
4937 """Build hooks nodes.
4939 This doesn't run on the target node in the pre phase as a failed
4940 node would then be impossible to remove.
4943 all_nodes = self.cfg.GetNodeList()
4945 all_nodes.remove(self.op.node_name)
4948 return (all_nodes, all_nodes)
4950 def CheckPrereq(self):
4951 """Check prerequisites.
4954 - the node exists in the configuration
4955 - it does not have primary or secondary instances
4956 - it's not the master
4958 Any errors are signaled by raising errors.OpPrereqError.
4961 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4962 node = self.cfg.GetNodeInfo(self.op.node_name)
4963 assert node is not None
4965 masternode = self.cfg.GetMasterNode()
4966 if node.name == masternode:
4967 raise errors.OpPrereqError("Node is the master node, failover to another"
4968 " node is required", errors.ECODE_INVAL)
4970 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4971 if node.name in instance.all_nodes:
4972 raise errors.OpPrereqError("Instance %s is still running on the node,"
4973 " please remove first" % instance_name,
4975 self.op.node_name = node.name
4978 def Exec(self, feedback_fn):
4979 """Removes the node from the cluster.
4983 logging.info("Stopping the node daemon and removing configs from node %s",
4986 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4988 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4991 # Promote nodes to master candidate as needed
4992 _AdjustCandidatePool(self, exceptions=[node.name])
4993 self.context.RemoveNode(node.name)
4995 # Run post hooks on the node before it's removed
4996 _RunPostHook(self, node.name)
4998 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4999 msg = result.fail_msg
5001 self.LogWarning("Errors encountered on the remote node while leaving"
5002 " the cluster: %s", msg)
5004 # Remove node from our /etc/hosts
5005 if self.cfg.GetClusterInfo().modify_etc_hosts:
5006 master_node = self.cfg.GetMasterNode()
5007 result = self.rpc.call_etc_hosts_modify(master_node,
5008 constants.ETC_HOSTS_REMOVE,
5010 result.Raise("Can't update hosts file with new host data")
5011 _RedistributeAncillaryFiles(self)
5014 class _NodeQuery(_QueryBase):
5015 FIELDS = query.NODE_FIELDS
5017 def ExpandNames(self, lu):
5018 lu.needed_locks = {}
5019 lu.share_locks = _ShareAll()
5022 self.wanted = _GetWantedNodes(lu, self.names)
5024 self.wanted = locking.ALL_SET
5026 self.do_locking = (self.use_locking and
5027 query.NQ_LIVE in self.requested_data)
5030 # If any non-static field is requested we need to lock the nodes
5031 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5033 def DeclareLocks(self, lu, level):
5036 def _GetQueryData(self, lu):
5037 """Computes the list of nodes and their attributes.
5040 all_info = lu.cfg.GetAllNodesInfo()
5042 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5044 # Gather data as requested
5045 if query.NQ_LIVE in self.requested_data:
5046 # filter out non-vm_capable nodes
5047 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5049 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5050 [lu.cfg.GetHypervisorType()])
5051 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5052 for (name, nresult) in node_data.items()
5053 if not nresult.fail_msg and nresult.payload)
5057 if query.NQ_INST in self.requested_data:
5058 node_to_primary = dict([(name, set()) for name in nodenames])
5059 node_to_secondary = dict([(name, set()) for name in nodenames])
5061 inst_data = lu.cfg.GetAllInstancesInfo()
5063 for inst in inst_data.values():
5064 if inst.primary_node in node_to_primary:
5065 node_to_primary[inst.primary_node].add(inst.name)
5066 for secnode in inst.secondary_nodes:
5067 if secnode in node_to_secondary:
5068 node_to_secondary[secnode].add(inst.name)
5070 node_to_primary = None
5071 node_to_secondary = None
5073 if query.NQ_OOB in self.requested_data:
5074 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5075 for name, node in all_info.iteritems())
5079 if query.NQ_GROUP in self.requested_data:
5080 groups = lu.cfg.GetAllNodeGroupsInfo()
5084 return query.NodeQueryData([all_info[name] for name in nodenames],
5085 live_data, lu.cfg.GetMasterNode(),
5086 node_to_primary, node_to_secondary, groups,
5087 oob_support, lu.cfg.GetClusterInfo())
5090 class LUNodeQuery(NoHooksLU):
5091 """Logical unit for querying nodes.
5094 # pylint: disable=W0142
5097 def CheckArguments(self):
5098 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5099 self.op.output_fields, self.op.use_locking)
5101 def ExpandNames(self):
5102 self.nq.ExpandNames(self)
5104 def DeclareLocks(self, level):
5105 self.nq.DeclareLocks(self, level)
5107 def Exec(self, feedback_fn):
5108 return self.nq.OldStyleQuery(self)
5111 class LUNodeQueryvols(NoHooksLU):
5112 """Logical unit for getting volumes on node(s).
5116 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5117 _FIELDS_STATIC = utils.FieldSet("node")
5119 def CheckArguments(self):
5120 _CheckOutputFields(static=self._FIELDS_STATIC,
5121 dynamic=self._FIELDS_DYNAMIC,
5122 selected=self.op.output_fields)
5124 def ExpandNames(self):
5125 self.share_locks = _ShareAll()
5126 self.needed_locks = {}
5128 if not self.op.nodes:
5129 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5131 self.needed_locks[locking.LEVEL_NODE] = \
5132 _GetWantedNodes(self, self.op.nodes)
5134 def Exec(self, feedback_fn):
5135 """Computes the list of nodes and their attributes.
5138 nodenames = self.owned_locks(locking.LEVEL_NODE)
5139 volumes = self.rpc.call_node_volumes(nodenames)
5141 ilist = self.cfg.GetAllInstancesInfo()
5142 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5145 for node in nodenames:
5146 nresult = volumes[node]
5149 msg = nresult.fail_msg
5151 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5154 node_vols = sorted(nresult.payload,
5155 key=operator.itemgetter("dev"))
5157 for vol in node_vols:
5159 for field in self.op.output_fields:
5162 elif field == "phys":
5166 elif field == "name":
5168 elif field == "size":
5169 val = int(float(vol["size"]))
5170 elif field == "instance":
5171 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5173 raise errors.ParameterError(field)
5174 node_output.append(str(val))
5176 output.append(node_output)
5181 class LUNodeQueryStorage(NoHooksLU):
5182 """Logical unit for getting information on storage units on node(s).
5185 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5188 def CheckArguments(self):
5189 _CheckOutputFields(static=self._FIELDS_STATIC,
5190 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5191 selected=self.op.output_fields)
5193 def ExpandNames(self):
5194 self.share_locks = _ShareAll()
5195 self.needed_locks = {}
5198 self.needed_locks[locking.LEVEL_NODE] = \
5199 _GetWantedNodes(self, self.op.nodes)
5201 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5203 def Exec(self, feedback_fn):
5204 """Computes the list of nodes and their attributes.
5207 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5209 # Always get name to sort by
5210 if constants.SF_NAME in self.op.output_fields:
5211 fields = self.op.output_fields[:]
5213 fields = [constants.SF_NAME] + self.op.output_fields
5215 # Never ask for node or type as it's only known to the LU
5216 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5217 while extra in fields:
5218 fields.remove(extra)
5220 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5221 name_idx = field_idx[constants.SF_NAME]
5223 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5224 data = self.rpc.call_storage_list(self.nodes,
5225 self.op.storage_type, st_args,
5226 self.op.name, fields)
5230 for node in utils.NiceSort(self.nodes):
5231 nresult = data[node]
5235 msg = nresult.fail_msg
5237 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5240 rows = dict([(row[name_idx], row) for row in nresult.payload])
5242 for name in utils.NiceSort(rows.keys()):
5247 for field in self.op.output_fields:
5248 if field == constants.SF_NODE:
5250 elif field == constants.SF_TYPE:
5251 val = self.op.storage_type
5252 elif field in field_idx:
5253 val = row[field_idx[field]]
5255 raise errors.ParameterError(field)
5264 class _InstanceQuery(_QueryBase):
5265 FIELDS = query.INSTANCE_FIELDS
5267 def ExpandNames(self, lu):
5268 lu.needed_locks = {}
5269 lu.share_locks = _ShareAll()
5272 self.wanted = _GetWantedInstances(lu, self.names)
5274 self.wanted = locking.ALL_SET
5276 self.do_locking = (self.use_locking and
5277 query.IQ_LIVE in self.requested_data)
5279 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5280 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5281 lu.needed_locks[locking.LEVEL_NODE] = []
5282 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5284 self.do_grouplocks = (self.do_locking and
5285 query.IQ_NODES in self.requested_data)
5287 def DeclareLocks(self, lu, level):
5289 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5290 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5292 # Lock all groups used by instances optimistically; this requires going
5293 # via the node before it's locked, requiring verification later on
5294 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5296 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5297 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5298 elif level == locking.LEVEL_NODE:
5299 lu._LockInstancesNodes() # pylint: disable=W0212
5302 def _CheckGroupLocks(lu):
5303 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5304 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5306 # Check if node groups for locked instances are still correct
5307 for instance_name in owned_instances:
5308 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5310 def _GetQueryData(self, lu):
5311 """Computes the list of instances and their attributes.
5314 if self.do_grouplocks:
5315 self._CheckGroupLocks(lu)
5317 cluster = lu.cfg.GetClusterInfo()
5318 all_info = lu.cfg.GetAllInstancesInfo()
5320 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5322 instance_list = [all_info[name] for name in instance_names]
5323 nodes = frozenset(itertools.chain(*(inst.all_nodes
5324 for inst in instance_list)))
5325 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5328 wrongnode_inst = set()
5330 # Gather data as requested
5331 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5333 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5335 result = node_data[name]
5337 # offline nodes will be in both lists
5338 assert result.fail_msg
5339 offline_nodes.append(name)
5341 bad_nodes.append(name)
5342 elif result.payload:
5343 for inst in result.payload:
5344 if inst in all_info:
5345 if all_info[inst].primary_node == name:
5346 live_data.update(result.payload)
5348 wrongnode_inst.add(inst)
5350 # orphan instance; we don't list it here as we don't
5351 # handle this case yet in the output of instance listing
5352 logging.warning("Orphan instance '%s' found on node %s",
5354 # else no instance is alive
5358 if query.IQ_DISKUSAGE in self.requested_data:
5359 disk_usage = dict((inst.name,
5360 _ComputeDiskSize(inst.disk_template,
5361 [{constants.IDISK_SIZE: disk.size}
5362 for disk in inst.disks]))
5363 for inst in instance_list)
5367 if query.IQ_CONSOLE in self.requested_data:
5369 for inst in instance_list:
5370 if inst.name in live_data:
5371 # Instance is running
5372 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5374 consinfo[inst.name] = None
5375 assert set(consinfo.keys()) == set(instance_names)
5379 if query.IQ_NODES in self.requested_data:
5380 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5382 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5383 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5384 for uuid in set(map(operator.attrgetter("group"),
5390 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5391 disk_usage, offline_nodes, bad_nodes,
5392 live_data, wrongnode_inst, consinfo,
5396 class LUQuery(NoHooksLU):
5397 """Query for resources/items of a certain kind.
5400 # pylint: disable=W0142
5403 def CheckArguments(self):
5404 qcls = _GetQueryImplementation(self.op.what)
5406 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5408 def ExpandNames(self):
5409 self.impl.ExpandNames(self)
5411 def DeclareLocks(self, level):
5412 self.impl.DeclareLocks(self, level)
5414 def Exec(self, feedback_fn):
5415 return self.impl.NewStyleQuery(self)
5418 class LUQueryFields(NoHooksLU):
5419 """Query for resources/items of a certain kind.
5422 # pylint: disable=W0142
5425 def CheckArguments(self):
5426 self.qcls = _GetQueryImplementation(self.op.what)
5428 def ExpandNames(self):
5429 self.needed_locks = {}
5431 def Exec(self, feedback_fn):
5432 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5435 class LUNodeModifyStorage(NoHooksLU):
5436 """Logical unit for modifying a storage volume on a node.
5441 def CheckArguments(self):
5442 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5444 storage_type = self.op.storage_type
5447 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5449 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5450 " modified" % storage_type,
5453 diff = set(self.op.changes.keys()) - modifiable
5455 raise errors.OpPrereqError("The following fields can not be modified for"
5456 " storage units of type '%s': %r" %
5457 (storage_type, list(diff)),
5460 def ExpandNames(self):
5461 self.needed_locks = {
5462 locking.LEVEL_NODE: self.op.node_name,
5465 def Exec(self, feedback_fn):
5466 """Computes the list of nodes and their attributes.
5469 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5470 result = self.rpc.call_storage_modify(self.op.node_name,
5471 self.op.storage_type, st_args,
5472 self.op.name, self.op.changes)
5473 result.Raise("Failed to modify storage unit '%s' on %s" %
5474 (self.op.name, self.op.node_name))
5477 class LUNodeAdd(LogicalUnit):
5478 """Logical unit for adding node to the cluster.
5482 HTYPE = constants.HTYPE_NODE
5483 _NFLAGS = ["master_capable", "vm_capable"]
5485 def CheckArguments(self):
5486 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5487 # validate/normalize the node name
5488 self.hostname = netutils.GetHostname(name=self.op.node_name,
5489 family=self.primary_ip_family)
5490 self.op.node_name = self.hostname.name
5492 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5493 raise errors.OpPrereqError("Cannot readd the master node",
5496 if self.op.readd and self.op.group:
5497 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5498 " being readded", errors.ECODE_INVAL)
5500 def BuildHooksEnv(self):
5503 This will run on all nodes before, and on all nodes + the new node after.
5507 "OP_TARGET": self.op.node_name,
5508 "NODE_NAME": self.op.node_name,
5509 "NODE_PIP": self.op.primary_ip,
5510 "NODE_SIP": self.op.secondary_ip,
5511 "MASTER_CAPABLE": str(self.op.master_capable),
5512 "VM_CAPABLE": str(self.op.vm_capable),
5515 def BuildHooksNodes(self):
5516 """Build hooks nodes.
5519 # Exclude added node
5520 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5521 post_nodes = pre_nodes + [self.op.node_name, ]
5523 return (pre_nodes, post_nodes)
5525 def CheckPrereq(self):
5526 """Check prerequisites.
5529 - the new node is not already in the config
5531 - its parameters (single/dual homed) matches the cluster
5533 Any errors are signaled by raising errors.OpPrereqError.
5537 hostname = self.hostname
5538 node = hostname.name
5539 primary_ip = self.op.primary_ip = hostname.ip
5540 if self.op.secondary_ip is None:
5541 if self.primary_ip_family == netutils.IP6Address.family:
5542 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5543 " IPv4 address must be given as secondary",
5545 self.op.secondary_ip = primary_ip
5547 secondary_ip = self.op.secondary_ip
5548 if not netutils.IP4Address.IsValid(secondary_ip):
5549 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5550 " address" % secondary_ip, errors.ECODE_INVAL)
5552 node_list = cfg.GetNodeList()
5553 if not self.op.readd and node in node_list:
5554 raise errors.OpPrereqError("Node %s is already in the configuration" %
5555 node, errors.ECODE_EXISTS)
5556 elif self.op.readd and node not in node_list:
5557 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5560 self.changed_primary_ip = False
5562 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5563 if self.op.readd and node == existing_node_name:
5564 if existing_node.secondary_ip != secondary_ip:
5565 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5566 " address configuration as before",
5568 if existing_node.primary_ip != primary_ip:
5569 self.changed_primary_ip = True
5573 if (existing_node.primary_ip == primary_ip or
5574 existing_node.secondary_ip == primary_ip or
5575 existing_node.primary_ip == secondary_ip or
5576 existing_node.secondary_ip == secondary_ip):
5577 raise errors.OpPrereqError("New node ip address(es) conflict with"
5578 " existing node %s" % existing_node.name,
5579 errors.ECODE_NOTUNIQUE)
5581 # After this 'if' block, None is no longer a valid value for the
5582 # _capable op attributes
5584 old_node = self.cfg.GetNodeInfo(node)
5585 assert old_node is not None, "Can't retrieve locked node %s" % node
5586 for attr in self._NFLAGS:
5587 if getattr(self.op, attr) is None:
5588 setattr(self.op, attr, getattr(old_node, attr))
5590 for attr in self._NFLAGS:
5591 if getattr(self.op, attr) is None:
5592 setattr(self.op, attr, True)
5594 if self.op.readd and not self.op.vm_capable:
5595 pri, sec = cfg.GetNodeInstances(node)
5597 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5598 " flag set to false, but it already holds"
5599 " instances" % node,
5602 # check that the type of the node (single versus dual homed) is the
5603 # same as for the master
5604 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5605 master_singlehomed = myself.secondary_ip == myself.primary_ip
5606 newbie_singlehomed = secondary_ip == primary_ip
5607 if master_singlehomed != newbie_singlehomed:
5608 if master_singlehomed:
5609 raise errors.OpPrereqError("The master has no secondary ip but the"
5610 " new node has one",
5613 raise errors.OpPrereqError("The master has a secondary ip but the"
5614 " new node doesn't have one",
5617 # checks reachability
5618 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5619 raise errors.OpPrereqError("Node not reachable by ping",
5620 errors.ECODE_ENVIRON)
5622 if not newbie_singlehomed:
5623 # check reachability from my secondary ip to newbie's secondary ip
5624 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5625 source=myself.secondary_ip):
5626 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5627 " based ping to node daemon port",
5628 errors.ECODE_ENVIRON)
5635 if self.op.master_capable:
5636 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5638 self.master_candidate = False
5641 self.new_node = old_node
5643 node_group = cfg.LookupNodeGroup(self.op.group)
5644 self.new_node = objects.Node(name=node,
5645 primary_ip=primary_ip,
5646 secondary_ip=secondary_ip,
5647 master_candidate=self.master_candidate,
5648 offline=False, drained=False,
5651 if self.op.ndparams:
5652 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5654 if self.op.hv_state:
5655 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5657 if self.op.disk_state:
5658 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5660 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5661 # it a property on the base class.
5662 result = rpc.DnsOnlyRunner().call_version([node])[node]
5663 result.Raise("Can't get version information from node %s" % node)
5664 if constants.PROTOCOL_VERSION == result.payload:
5665 logging.info("Communication to node %s fine, sw version %s match",
5666 node, result.payload)
5668 raise errors.OpPrereqError("Version mismatch master version %s,"
5669 " node version %s" %
5670 (constants.PROTOCOL_VERSION, result.payload),
5671 errors.ECODE_ENVIRON)
5673 def Exec(self, feedback_fn):
5674 """Adds the new node to the cluster.
5677 new_node = self.new_node
5678 node = new_node.name
5680 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5683 # We adding a new node so we assume it's powered
5684 new_node.powered = True
5686 # for re-adds, reset the offline/drained/master-candidate flags;
5687 # we need to reset here, otherwise offline would prevent RPC calls
5688 # later in the procedure; this also means that if the re-add
5689 # fails, we are left with a non-offlined, broken node
5691 new_node.drained = new_node.offline = False # pylint: disable=W0201
5692 self.LogInfo("Readding a node, the offline/drained flags were reset")
5693 # if we demote the node, we do cleanup later in the procedure
5694 new_node.master_candidate = self.master_candidate
5695 if self.changed_primary_ip:
5696 new_node.primary_ip = self.op.primary_ip
5698 # copy the master/vm_capable flags
5699 for attr in self._NFLAGS:
5700 setattr(new_node, attr, getattr(self.op, attr))
5702 # notify the user about any possible mc promotion
5703 if new_node.master_candidate:
5704 self.LogInfo("Node will be a master candidate")
5706 if self.op.ndparams:
5707 new_node.ndparams = self.op.ndparams
5709 new_node.ndparams = {}
5711 if self.op.hv_state:
5712 new_node.hv_state_static = self.new_hv_state
5714 if self.op.disk_state:
5715 new_node.disk_state_static = self.new_disk_state
5717 # Add node to our /etc/hosts, and add key to known_hosts
5718 if self.cfg.GetClusterInfo().modify_etc_hosts:
5719 master_node = self.cfg.GetMasterNode()
5720 result = self.rpc.call_etc_hosts_modify(master_node,
5721 constants.ETC_HOSTS_ADD,
5724 result.Raise("Can't update hosts file with new host data")
5726 if new_node.secondary_ip != new_node.primary_ip:
5727 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5730 node_verify_list = [self.cfg.GetMasterNode()]
5731 node_verify_param = {
5732 constants.NV_NODELIST: ([node], {}),
5733 # TODO: do a node-net-test as well?
5736 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5737 self.cfg.GetClusterName())
5738 for verifier in node_verify_list:
5739 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5740 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5742 for failed in nl_payload:
5743 feedback_fn("ssh/hostname verification failed"
5744 " (checking from %s): %s" %
5745 (verifier, nl_payload[failed]))
5746 raise errors.OpExecError("ssh/hostname verification failed")
5749 _RedistributeAncillaryFiles(self)
5750 self.context.ReaddNode(new_node)
5751 # make sure we redistribute the config
5752 self.cfg.Update(new_node, feedback_fn)
5753 # and make sure the new node will not have old files around
5754 if not new_node.master_candidate:
5755 result = self.rpc.call_node_demote_from_mc(new_node.name)
5756 msg = result.fail_msg
5758 self.LogWarning("Node failed to demote itself from master"
5759 " candidate status: %s" % msg)
5761 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5762 additional_vm=self.op.vm_capable)
5763 self.context.AddNode(new_node, self.proc.GetECId())
5766 class LUNodeSetParams(LogicalUnit):
5767 """Modifies the parameters of a node.
5769 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5770 to the node role (as _ROLE_*)
5771 @cvar _R2F: a dictionary from node role to tuples of flags
5772 @cvar _FLAGS: a list of attribute names corresponding to the flags
5775 HPATH = "node-modify"
5776 HTYPE = constants.HTYPE_NODE
5778 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5780 (True, False, False): _ROLE_CANDIDATE,
5781 (False, True, False): _ROLE_DRAINED,
5782 (False, False, True): _ROLE_OFFLINE,
5783 (False, False, False): _ROLE_REGULAR,
5785 _R2F = dict((v, k) for k, v in _F2R.items())
5786 _FLAGS = ["master_candidate", "drained", "offline"]
5788 def CheckArguments(self):
5789 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5790 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5791 self.op.master_capable, self.op.vm_capable,
5792 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5794 if all_mods.count(None) == len(all_mods):
5795 raise errors.OpPrereqError("Please pass at least one modification",
5797 if all_mods.count(True) > 1:
5798 raise errors.OpPrereqError("Can't set the node into more than one"
5799 " state at the same time",
5802 # Boolean value that tells us whether we might be demoting from MC
5803 self.might_demote = (self.op.master_candidate == False or
5804 self.op.offline == True or
5805 self.op.drained == True or
5806 self.op.master_capable == False)
5808 if self.op.secondary_ip:
5809 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5810 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5811 " address" % self.op.secondary_ip,
5814 self.lock_all = self.op.auto_promote and self.might_demote
5815 self.lock_instances = self.op.secondary_ip is not None
5817 def _InstanceFilter(self, instance):
5818 """Filter for getting affected instances.
5821 return (instance.disk_template in constants.DTS_INT_MIRROR and
5822 self.op.node_name in instance.all_nodes)
5824 def ExpandNames(self):
5826 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5828 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5830 # Since modifying a node can have severe effects on currently running
5831 # operations the resource lock is at least acquired in shared mode
5832 self.needed_locks[locking.LEVEL_NODE_RES] = \
5833 self.needed_locks[locking.LEVEL_NODE]
5835 # Get node resource and instance locks in shared mode; they are not used
5836 # for anything but read-only access
5837 self.share_locks[locking.LEVEL_NODE_RES] = 1
5838 self.share_locks[locking.LEVEL_INSTANCE] = 1
5840 if self.lock_instances:
5841 self.needed_locks[locking.LEVEL_INSTANCE] = \
5842 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5844 def BuildHooksEnv(self):
5847 This runs on the master node.
5851 "OP_TARGET": self.op.node_name,
5852 "MASTER_CANDIDATE": str(self.op.master_candidate),
5853 "OFFLINE": str(self.op.offline),
5854 "DRAINED": str(self.op.drained),
5855 "MASTER_CAPABLE": str(self.op.master_capable),
5856 "VM_CAPABLE": str(self.op.vm_capable),
5859 def BuildHooksNodes(self):
5860 """Build hooks nodes.
5863 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5866 def CheckPrereq(self):
5867 """Check prerequisites.
5869 This only checks the instance list against the existing names.
5872 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5874 if self.lock_instances:
5875 affected_instances = \
5876 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5878 # Verify instance locks
5879 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5880 wanted_instances = frozenset(affected_instances.keys())
5881 if wanted_instances - owned_instances:
5882 raise errors.OpPrereqError("Instances affected by changing node %s's"
5883 " secondary IP address have changed since"
5884 " locks were acquired, wanted '%s', have"
5885 " '%s'; retry the operation" %
5887 utils.CommaJoin(wanted_instances),
5888 utils.CommaJoin(owned_instances)),
5891 affected_instances = None
5893 if (self.op.master_candidate is not None or
5894 self.op.drained is not None or
5895 self.op.offline is not None):
5896 # we can't change the master's node flags
5897 if self.op.node_name == self.cfg.GetMasterNode():
5898 raise errors.OpPrereqError("The master role can be changed"
5899 " only via master-failover",
5902 if self.op.master_candidate and not node.master_capable:
5903 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5904 " it a master candidate" % node.name,
5907 if self.op.vm_capable == False:
5908 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5910 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5911 " the vm_capable flag" % node.name,
5914 if node.master_candidate and self.might_demote and not self.lock_all:
5915 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5916 # check if after removing the current node, we're missing master
5918 (mc_remaining, mc_should, _) = \
5919 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5920 if mc_remaining < mc_should:
5921 raise errors.OpPrereqError("Not enough master candidates, please"
5922 " pass auto promote option to allow"
5923 " promotion", errors.ECODE_STATE)
5925 self.old_flags = old_flags = (node.master_candidate,
5926 node.drained, node.offline)
5927 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5928 self.old_role = old_role = self._F2R[old_flags]
5930 # Check for ineffective changes
5931 for attr in self._FLAGS:
5932 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5933 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5934 setattr(self.op, attr, None)
5936 # Past this point, any flag change to False means a transition
5937 # away from the respective state, as only real changes are kept
5939 # TODO: We might query the real power state if it supports OOB
5940 if _SupportsOob(self.cfg, node):
5941 if self.op.offline is False and not (node.powered or
5942 self.op.powered == True):
5943 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5944 " offline status can be reset") %
5946 elif self.op.powered is not None:
5947 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5948 " as it does not support out-of-band"
5949 " handling") % self.op.node_name)
5951 # If we're being deofflined/drained, we'll MC ourself if needed
5952 if (self.op.drained == False or self.op.offline == False or
5953 (self.op.master_capable and not node.master_capable)):
5954 if _DecideSelfPromotion(self):
5955 self.op.master_candidate = True
5956 self.LogInfo("Auto-promoting node to master candidate")
5958 # If we're no longer master capable, we'll demote ourselves from MC
5959 if self.op.master_capable == False and node.master_candidate:
5960 self.LogInfo("Demoting from master candidate")
5961 self.op.master_candidate = False
5964 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5965 if self.op.master_candidate:
5966 new_role = self._ROLE_CANDIDATE
5967 elif self.op.drained:
5968 new_role = self._ROLE_DRAINED
5969 elif self.op.offline:
5970 new_role = self._ROLE_OFFLINE
5971 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5972 # False is still in new flags, which means we're un-setting (the
5974 new_role = self._ROLE_REGULAR
5975 else: # no new flags, nothing, keep old role
5978 self.new_role = new_role
5980 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5981 # Trying to transition out of offline status
5982 result = self.rpc.call_version([node.name])[node.name]
5984 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5985 " to report its version: %s" %
5986 (node.name, result.fail_msg),
5989 self.LogWarning("Transitioning node from offline to online state"
5990 " without using re-add. Please make sure the node"
5993 if self.op.secondary_ip:
5994 # Ok even without locking, because this can't be changed by any LU
5995 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5996 master_singlehomed = master.secondary_ip == master.primary_ip
5997 if master_singlehomed and self.op.secondary_ip:
5998 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5999 " homed cluster", errors.ECODE_INVAL)
6001 assert not (frozenset(affected_instances) -
6002 self.owned_locks(locking.LEVEL_INSTANCE))
6005 if affected_instances:
6006 raise errors.OpPrereqError("Cannot change secondary IP address:"
6007 " offline node has instances (%s)"
6008 " configured to use it" %
6009 utils.CommaJoin(affected_instances.keys()))
6011 # On online nodes, check that no instances are running, and that
6012 # the node has the new ip and we can reach it.
6013 for instance in affected_instances.values():
6014 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6015 msg="cannot change secondary ip")
6017 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6018 if master.name != node.name:
6019 # check reachability from master secondary ip to new secondary ip
6020 if not netutils.TcpPing(self.op.secondary_ip,
6021 constants.DEFAULT_NODED_PORT,
6022 source=master.secondary_ip):
6023 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6024 " based ping to node daemon port",
6025 errors.ECODE_ENVIRON)
6027 if self.op.ndparams:
6028 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6029 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6030 self.new_ndparams = new_ndparams
6032 if self.op.hv_state:
6033 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6034 self.node.hv_state_static)
6036 if self.op.disk_state:
6037 self.new_disk_state = \
6038 _MergeAndVerifyDiskState(self.op.disk_state,
6039 self.node.disk_state_static)
6041 def Exec(self, feedback_fn):
6046 old_role = self.old_role
6047 new_role = self.new_role
6051 if self.op.ndparams:
6052 node.ndparams = self.new_ndparams
6054 if self.op.powered is not None:
6055 node.powered = self.op.powered
6057 if self.op.hv_state:
6058 node.hv_state_static = self.new_hv_state
6060 if self.op.disk_state:
6061 node.disk_state_static = self.new_disk_state
6063 for attr in ["master_capable", "vm_capable"]:
6064 val = getattr(self.op, attr)
6066 setattr(node, attr, val)
6067 result.append((attr, str(val)))
6069 if new_role != old_role:
6070 # Tell the node to demote itself, if no longer MC and not offline
6071 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6072 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6074 self.LogWarning("Node failed to demote itself: %s", msg)
6076 new_flags = self._R2F[new_role]
6077 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6079 result.append((desc, str(nf)))
6080 (node.master_candidate, node.drained, node.offline) = new_flags
6082 # we locked all nodes, we adjust the CP before updating this node
6084 _AdjustCandidatePool(self, [node.name])
6086 if self.op.secondary_ip:
6087 node.secondary_ip = self.op.secondary_ip
6088 result.append(("secondary_ip", self.op.secondary_ip))
6090 # this will trigger configuration file update, if needed
6091 self.cfg.Update(node, feedback_fn)
6093 # this will trigger job queue propagation or cleanup if the mc
6095 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6096 self.context.ReaddNode(node)
6101 class LUNodePowercycle(NoHooksLU):
6102 """Powercycles a node.
6107 def CheckArguments(self):
6108 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6109 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6110 raise errors.OpPrereqError("The node is the master and the force"
6111 " parameter was not set",
6114 def ExpandNames(self):
6115 """Locking for PowercycleNode.
6117 This is a last-resort option and shouldn't block on other
6118 jobs. Therefore, we grab no locks.
6121 self.needed_locks = {}
6123 def Exec(self, feedback_fn):
6127 result = self.rpc.call_node_powercycle(self.op.node_name,
6128 self.cfg.GetHypervisorType())
6129 result.Raise("Failed to schedule the reboot")
6130 return result.payload
6133 class LUClusterQuery(NoHooksLU):
6134 """Query cluster configuration.
6139 def ExpandNames(self):
6140 self.needed_locks = {}
6142 def Exec(self, feedback_fn):
6143 """Return cluster config.
6146 cluster = self.cfg.GetClusterInfo()
6149 # Filter just for enabled hypervisors
6150 for os_name, hv_dict in cluster.os_hvp.items():
6151 os_hvp[os_name] = {}
6152 for hv_name, hv_params in hv_dict.items():
6153 if hv_name in cluster.enabled_hypervisors:
6154 os_hvp[os_name][hv_name] = hv_params
6156 # Convert ip_family to ip_version
6157 primary_ip_version = constants.IP4_VERSION
6158 if cluster.primary_ip_family == netutils.IP6Address.family:
6159 primary_ip_version = constants.IP6_VERSION
6162 "software_version": constants.RELEASE_VERSION,
6163 "protocol_version": constants.PROTOCOL_VERSION,
6164 "config_version": constants.CONFIG_VERSION,
6165 "os_api_version": max(constants.OS_API_VERSIONS),
6166 "export_version": constants.EXPORT_VERSION,
6167 "architecture": runtime.GetArchInfo(),
6168 "name": cluster.cluster_name,
6169 "master": cluster.master_node,
6170 "default_hypervisor": cluster.primary_hypervisor,
6171 "enabled_hypervisors": cluster.enabled_hypervisors,
6172 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6173 for hypervisor_name in cluster.enabled_hypervisors]),
6175 "beparams": cluster.beparams,
6176 "osparams": cluster.osparams,
6177 "ipolicy": cluster.ipolicy,
6178 "nicparams": cluster.nicparams,
6179 "ndparams": cluster.ndparams,
6180 "candidate_pool_size": cluster.candidate_pool_size,
6181 "master_netdev": cluster.master_netdev,
6182 "master_netmask": cluster.master_netmask,
6183 "use_external_mip_script": cluster.use_external_mip_script,
6184 "volume_group_name": cluster.volume_group_name,
6185 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6186 "file_storage_dir": cluster.file_storage_dir,
6187 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6188 "maintain_node_health": cluster.maintain_node_health,
6189 "ctime": cluster.ctime,
6190 "mtime": cluster.mtime,
6191 "uuid": cluster.uuid,
6192 "tags": list(cluster.GetTags()),
6193 "uid_pool": cluster.uid_pool,
6194 "default_iallocator": cluster.default_iallocator,
6195 "reserved_lvs": cluster.reserved_lvs,
6196 "primary_ip_version": primary_ip_version,
6197 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6198 "hidden_os": cluster.hidden_os,
6199 "blacklisted_os": cluster.blacklisted_os,
6205 class LUClusterConfigQuery(NoHooksLU):
6206 """Return configuration values.
6211 def CheckArguments(self):
6212 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6214 def ExpandNames(self):
6215 self.cq.ExpandNames(self)
6217 def DeclareLocks(self, level):
6218 self.cq.DeclareLocks(self, level)
6220 def Exec(self, feedback_fn):
6221 result = self.cq.OldStyleQuery(self)
6223 assert len(result) == 1
6228 class _ClusterQuery(_QueryBase):
6229 FIELDS = query.CLUSTER_FIELDS
6231 #: Do not sort (there is only one item)
6234 def ExpandNames(self, lu):
6235 lu.needed_locks = {}
6237 # The following variables interact with _QueryBase._GetNames
6238 self.wanted = locking.ALL_SET
6239 self.do_locking = self.use_locking
6242 raise errors.OpPrereqError("Can not use locking for cluster queries",
6245 def DeclareLocks(self, lu, level):
6248 def _GetQueryData(self, lu):
6249 """Computes the list of nodes and their attributes.
6252 # Locking is not used
6253 assert not (compat.any(lu.glm.is_owned(level)
6254 for level in locking.LEVELS
6255 if level != locking.LEVEL_CLUSTER) or
6256 self.do_locking or self.use_locking)
6258 if query.CQ_CONFIG in self.requested_data:
6259 cluster = lu.cfg.GetClusterInfo()
6261 cluster = NotImplemented
6263 if query.CQ_QUEUE_DRAINED in self.requested_data:
6264 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6266 drain_flag = NotImplemented
6268 if query.CQ_WATCHER_PAUSE in self.requested_data:
6269 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6271 watcher_pause = NotImplemented
6273 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6276 class LUInstanceActivateDisks(NoHooksLU):
6277 """Bring up an instance's disks.
6282 def ExpandNames(self):
6283 self._ExpandAndLockInstance()
6284 self.needed_locks[locking.LEVEL_NODE] = []
6285 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6287 def DeclareLocks(self, level):
6288 if level == locking.LEVEL_NODE:
6289 self._LockInstancesNodes()
6291 def CheckPrereq(self):
6292 """Check prerequisites.
6294 This checks that the instance is in the cluster.
6297 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6298 assert self.instance is not None, \
6299 "Cannot retrieve locked instance %s" % self.op.instance_name
6300 _CheckNodeOnline(self, self.instance.primary_node)
6302 def Exec(self, feedback_fn):
6303 """Activate the disks.
6306 disks_ok, disks_info = \
6307 _AssembleInstanceDisks(self, self.instance,
6308 ignore_size=self.op.ignore_size)
6310 raise errors.OpExecError("Cannot activate block devices")
6315 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6317 """Prepare the block devices for an instance.
6319 This sets up the block devices on all nodes.
6321 @type lu: L{LogicalUnit}
6322 @param lu: the logical unit on whose behalf we execute
6323 @type instance: L{objects.Instance}
6324 @param instance: the instance for whose disks we assemble
6325 @type disks: list of L{objects.Disk} or None
6326 @param disks: which disks to assemble (or all, if None)
6327 @type ignore_secondaries: boolean
6328 @param ignore_secondaries: if true, errors on secondary nodes
6329 won't result in an error return from the function
6330 @type ignore_size: boolean
6331 @param ignore_size: if true, the current known size of the disk
6332 will not be used during the disk activation, useful for cases
6333 when the size is wrong
6334 @return: False if the operation failed, otherwise a list of
6335 (host, instance_visible_name, node_visible_name)
6336 with the mapping from node devices to instance devices
6341 iname = instance.name
6342 disks = _ExpandCheckDisks(instance, disks)
6344 # With the two passes mechanism we try to reduce the window of
6345 # opportunity for the race condition of switching DRBD to primary
6346 # before handshaking occured, but we do not eliminate it
6348 # The proper fix would be to wait (with some limits) until the
6349 # connection has been made and drbd transitions from WFConnection
6350 # into any other network-connected state (Connected, SyncTarget,
6353 # 1st pass, assemble on all nodes in secondary mode
6354 for idx, inst_disk in enumerate(disks):
6355 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6357 node_disk = node_disk.Copy()
6358 node_disk.UnsetSize()
6359 lu.cfg.SetDiskID(node_disk, node)
6360 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6362 msg = result.fail_msg
6364 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6365 " (is_primary=False, pass=1): %s",
6366 inst_disk.iv_name, node, msg)
6367 if not ignore_secondaries:
6370 # FIXME: race condition on drbd migration to primary
6372 # 2nd pass, do only the primary node
6373 for idx, inst_disk in enumerate(disks):
6376 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6377 if node != instance.primary_node:
6380 node_disk = node_disk.Copy()
6381 node_disk.UnsetSize()
6382 lu.cfg.SetDiskID(node_disk, node)
6383 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6385 msg = result.fail_msg
6387 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6388 " (is_primary=True, pass=2): %s",
6389 inst_disk.iv_name, node, msg)
6392 dev_path = result.payload
6394 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6396 # leave the disks configured for the primary node
6397 # this is a workaround that would be fixed better by
6398 # improving the logical/physical id handling
6400 lu.cfg.SetDiskID(disk, instance.primary_node)
6402 return disks_ok, device_info
6405 def _StartInstanceDisks(lu, instance, force):
6406 """Start the disks of an instance.
6409 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6410 ignore_secondaries=force)
6412 _ShutdownInstanceDisks(lu, instance)
6413 if force is not None and not force:
6414 lu.proc.LogWarning("", hint="If the message above refers to a"
6416 " you can retry the operation using '--force'.")
6417 raise errors.OpExecError("Disk consistency error")
6420 class LUInstanceDeactivateDisks(NoHooksLU):
6421 """Shutdown an instance's disks.
6426 def ExpandNames(self):
6427 self._ExpandAndLockInstance()
6428 self.needed_locks[locking.LEVEL_NODE] = []
6429 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6431 def DeclareLocks(self, level):
6432 if level == locking.LEVEL_NODE:
6433 self._LockInstancesNodes()
6435 def CheckPrereq(self):
6436 """Check prerequisites.
6438 This checks that the instance is in the cluster.
6441 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6442 assert self.instance is not None, \
6443 "Cannot retrieve locked instance %s" % self.op.instance_name
6445 def Exec(self, feedback_fn):
6446 """Deactivate the disks
6449 instance = self.instance
6451 _ShutdownInstanceDisks(self, instance)
6453 _SafeShutdownInstanceDisks(self, instance)
6456 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6457 """Shutdown block devices of an instance.
6459 This function checks if an instance is running, before calling
6460 _ShutdownInstanceDisks.
6463 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6464 _ShutdownInstanceDisks(lu, instance, disks=disks)
6467 def _ExpandCheckDisks(instance, disks):
6468 """Return the instance disks selected by the disks list
6470 @type disks: list of L{objects.Disk} or None
6471 @param disks: selected disks
6472 @rtype: list of L{objects.Disk}
6473 @return: selected instance disks to act on
6477 return instance.disks
6479 if not set(disks).issubset(instance.disks):
6480 raise errors.ProgrammerError("Can only act on disks belonging to the"
6485 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6486 """Shutdown block devices of an instance.
6488 This does the shutdown on all nodes of the instance.
6490 If the ignore_primary is false, errors on the primary node are
6495 disks = _ExpandCheckDisks(instance, disks)
6498 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6499 lu.cfg.SetDiskID(top_disk, node)
6500 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6501 msg = result.fail_msg
6503 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6504 disk.iv_name, node, msg)
6505 if ((node == instance.primary_node and not ignore_primary) or
6506 (node != instance.primary_node and not result.offline)):
6511 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6512 """Checks if a node has enough free memory.
6514 This function check if a given node has the needed amount of free
6515 memory. In case the node has less memory or we cannot get the
6516 information from the node, this function raise an OpPrereqError
6519 @type lu: C{LogicalUnit}
6520 @param lu: a logical unit from which we get configuration data
6522 @param node: the node to check
6523 @type reason: C{str}
6524 @param reason: string to use in the error message
6525 @type requested: C{int}
6526 @param requested: the amount of memory in MiB to check for
6527 @type hypervisor_name: C{str}
6528 @param hypervisor_name: the hypervisor to ask for memory stats
6530 @return: node current free memory
6531 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6532 we cannot check the node
6535 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6536 nodeinfo[node].Raise("Can't get data from node %s" % node,
6537 prereq=True, ecode=errors.ECODE_ENVIRON)
6538 (_, _, (hv_info, )) = nodeinfo[node].payload
6540 free_mem = hv_info.get("memory_free", None)
6541 if not isinstance(free_mem, int):
6542 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6543 " was '%s'" % (node, free_mem),
6544 errors.ECODE_ENVIRON)
6545 if requested > free_mem:
6546 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6547 " needed %s MiB, available %s MiB" %
6548 (node, reason, requested, free_mem),
6553 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6554 """Checks if nodes have enough free disk space in the all VGs.
6556 This function check if all given nodes have the needed amount of
6557 free disk. In case any node has less disk or we cannot get the
6558 information from the node, this function raise an OpPrereqError
6561 @type lu: C{LogicalUnit}
6562 @param lu: a logical unit from which we get configuration data
6563 @type nodenames: C{list}
6564 @param nodenames: the list of node names to check
6565 @type req_sizes: C{dict}
6566 @param req_sizes: the hash of vg and corresponding amount of disk in
6568 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6569 or we cannot check the node
6572 for vg, req_size in req_sizes.items():
6573 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6576 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6577 """Checks if nodes have enough free disk space in the specified VG.
6579 This function check if all given nodes have the needed amount of
6580 free disk. In case any node has less disk or we cannot get the
6581 information from the node, this function raise an OpPrereqError
6584 @type lu: C{LogicalUnit}
6585 @param lu: a logical unit from which we get configuration data
6586 @type nodenames: C{list}
6587 @param nodenames: the list of node names to check
6589 @param vg: the volume group to check
6590 @type requested: C{int}
6591 @param requested: the amount of disk in MiB to check for
6592 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6593 or we cannot check the node
6596 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6597 for node in nodenames:
6598 info = nodeinfo[node]
6599 info.Raise("Cannot get current information from node %s" % node,
6600 prereq=True, ecode=errors.ECODE_ENVIRON)
6601 (_, (vg_info, ), _) = info.payload
6602 vg_free = vg_info.get("vg_free", None)
6603 if not isinstance(vg_free, int):
6604 raise errors.OpPrereqError("Can't compute free disk space on node"
6605 " %s for vg %s, result was '%s'" %
6606 (node, vg, vg_free), errors.ECODE_ENVIRON)
6607 if requested > vg_free:
6608 raise errors.OpPrereqError("Not enough disk space on target node %s"
6609 " vg %s: required %d MiB, available %d MiB" %
6610 (node, vg, requested, vg_free),
6614 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6615 """Checks if nodes have enough physical CPUs
6617 This function checks if all given nodes have the needed number of
6618 physical CPUs. In case any node has less CPUs or we cannot get the
6619 information from the node, this function raises an OpPrereqError
6622 @type lu: C{LogicalUnit}
6623 @param lu: a logical unit from which we get configuration data
6624 @type nodenames: C{list}
6625 @param nodenames: the list of node names to check
6626 @type requested: C{int}
6627 @param requested: the minimum acceptable number of physical CPUs
6628 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6629 or we cannot check the node
6632 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6633 for node in nodenames:
6634 info = nodeinfo[node]
6635 info.Raise("Cannot get current information from node %s" % node,
6636 prereq=True, ecode=errors.ECODE_ENVIRON)
6637 (_, _, (hv_info, )) = info.payload
6638 num_cpus = hv_info.get("cpu_total", None)
6639 if not isinstance(num_cpus, int):
6640 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6641 " on node %s, result was '%s'" %
6642 (node, num_cpus), errors.ECODE_ENVIRON)
6643 if requested > num_cpus:
6644 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6645 "required" % (node, num_cpus, requested),
6649 class LUInstanceStartup(LogicalUnit):
6650 """Starts an instance.
6653 HPATH = "instance-start"
6654 HTYPE = constants.HTYPE_INSTANCE
6657 def CheckArguments(self):
6659 if self.op.beparams:
6660 # fill the beparams dict
6661 objects.UpgradeBeParams(self.op.beparams)
6662 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6664 def ExpandNames(self):
6665 self._ExpandAndLockInstance()
6666 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6668 def DeclareLocks(self, level):
6669 if level == locking.LEVEL_NODE_RES:
6670 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6672 def BuildHooksEnv(self):
6675 This runs on master, primary and secondary nodes of the instance.
6679 "FORCE": self.op.force,
6682 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6686 def BuildHooksNodes(self):
6687 """Build hooks nodes.
6690 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6693 def CheckPrereq(self):
6694 """Check prerequisites.
6696 This checks that the instance is in the cluster.
6699 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6700 assert self.instance is not None, \
6701 "Cannot retrieve locked instance %s" % self.op.instance_name
6704 if self.op.hvparams:
6705 # check hypervisor parameter syntax (locally)
6706 cluster = self.cfg.GetClusterInfo()
6707 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6708 filled_hvp = cluster.FillHV(instance)
6709 filled_hvp.update(self.op.hvparams)
6710 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6711 hv_type.CheckParameterSyntax(filled_hvp)
6712 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6714 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6716 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6718 if self.primary_offline and self.op.ignore_offline_nodes:
6719 self.proc.LogWarning("Ignoring offline primary node")
6721 if self.op.hvparams or self.op.beparams:
6722 self.proc.LogWarning("Overridden parameters are ignored")
6724 _CheckNodeOnline(self, instance.primary_node)
6726 bep = self.cfg.GetClusterInfo().FillBE(instance)
6727 bep.update(self.op.beparams)
6729 # check bridges existence
6730 _CheckInstanceBridgesExist(self, instance)
6732 remote_info = self.rpc.call_instance_info(instance.primary_node,
6734 instance.hypervisor)
6735 remote_info.Raise("Error checking node %s" % instance.primary_node,
6736 prereq=True, ecode=errors.ECODE_ENVIRON)
6737 if not remote_info.payload: # not running already
6738 _CheckNodeFreeMemory(self, instance.primary_node,
6739 "starting instance %s" % instance.name,
6740 bep[constants.BE_MINMEM], instance.hypervisor)
6742 def Exec(self, feedback_fn):
6743 """Start the instance.
6746 instance = self.instance
6747 force = self.op.force
6749 if not self.op.no_remember:
6750 self.cfg.MarkInstanceUp(instance.name)
6752 if self.primary_offline:
6753 assert self.op.ignore_offline_nodes
6754 self.proc.LogInfo("Primary node offline, marked instance as started")
6756 node_current = instance.primary_node
6758 _StartInstanceDisks(self, instance, force)
6761 self.rpc.call_instance_start(node_current,
6762 (instance, self.op.hvparams,
6764 self.op.startup_paused)
6765 msg = result.fail_msg
6767 _ShutdownInstanceDisks(self, instance)
6768 raise errors.OpExecError("Could not start instance: %s" % msg)
6771 class LUInstanceReboot(LogicalUnit):
6772 """Reboot an instance.
6775 HPATH = "instance-reboot"
6776 HTYPE = constants.HTYPE_INSTANCE
6779 def ExpandNames(self):
6780 self._ExpandAndLockInstance()
6782 def BuildHooksEnv(self):
6785 This runs on master, primary and secondary nodes of the instance.
6789 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6790 "REBOOT_TYPE": self.op.reboot_type,
6791 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6794 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6798 def BuildHooksNodes(self):
6799 """Build hooks nodes.
6802 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6805 def CheckPrereq(self):
6806 """Check prerequisites.
6808 This checks that the instance is in the cluster.
6811 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6812 assert self.instance is not None, \
6813 "Cannot retrieve locked instance %s" % self.op.instance_name
6814 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6815 _CheckNodeOnline(self, instance.primary_node)
6817 # check bridges existence
6818 _CheckInstanceBridgesExist(self, instance)
6820 def Exec(self, feedback_fn):
6821 """Reboot the instance.
6824 instance = self.instance
6825 ignore_secondaries = self.op.ignore_secondaries
6826 reboot_type = self.op.reboot_type
6828 remote_info = self.rpc.call_instance_info(instance.primary_node,
6830 instance.hypervisor)
6831 remote_info.Raise("Error checking node %s" % instance.primary_node)
6832 instance_running = bool(remote_info.payload)
6834 node_current = instance.primary_node
6836 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6837 constants.INSTANCE_REBOOT_HARD]:
6838 for disk in instance.disks:
6839 self.cfg.SetDiskID(disk, node_current)
6840 result = self.rpc.call_instance_reboot(node_current, instance,
6842 self.op.shutdown_timeout)
6843 result.Raise("Could not reboot instance")
6845 if instance_running:
6846 result = self.rpc.call_instance_shutdown(node_current, instance,
6847 self.op.shutdown_timeout)
6848 result.Raise("Could not shutdown instance for full reboot")
6849 _ShutdownInstanceDisks(self, instance)
6851 self.LogInfo("Instance %s was already stopped, starting now",
6853 _StartInstanceDisks(self, instance, ignore_secondaries)
6854 result = self.rpc.call_instance_start(node_current,
6855 (instance, None, None), False)
6856 msg = result.fail_msg
6858 _ShutdownInstanceDisks(self, instance)
6859 raise errors.OpExecError("Could not start instance for"
6860 " full reboot: %s" % msg)
6862 self.cfg.MarkInstanceUp(instance.name)
6865 class LUInstanceShutdown(LogicalUnit):
6866 """Shutdown an instance.
6869 HPATH = "instance-stop"
6870 HTYPE = constants.HTYPE_INSTANCE
6873 def ExpandNames(self):
6874 self._ExpandAndLockInstance()
6876 def BuildHooksEnv(self):
6879 This runs on master, primary and secondary nodes of the instance.
6882 env = _BuildInstanceHookEnvByObject(self, self.instance)
6883 env["TIMEOUT"] = self.op.timeout
6886 def BuildHooksNodes(self):
6887 """Build hooks nodes.
6890 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6893 def CheckPrereq(self):
6894 """Check prerequisites.
6896 This checks that the instance is in the cluster.
6899 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6900 assert self.instance is not None, \
6901 "Cannot retrieve locked instance %s" % self.op.instance_name
6903 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6905 self.primary_offline = \
6906 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6908 if self.primary_offline and self.op.ignore_offline_nodes:
6909 self.proc.LogWarning("Ignoring offline primary node")
6911 _CheckNodeOnline(self, self.instance.primary_node)
6913 def Exec(self, feedback_fn):
6914 """Shutdown the instance.
6917 instance = self.instance
6918 node_current = instance.primary_node
6919 timeout = self.op.timeout
6921 if not self.op.no_remember:
6922 self.cfg.MarkInstanceDown(instance.name)
6924 if self.primary_offline:
6925 assert self.op.ignore_offline_nodes
6926 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6928 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6929 msg = result.fail_msg
6931 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6933 _ShutdownInstanceDisks(self, instance)
6936 class LUInstanceReinstall(LogicalUnit):
6937 """Reinstall an instance.
6940 HPATH = "instance-reinstall"
6941 HTYPE = constants.HTYPE_INSTANCE
6944 def ExpandNames(self):
6945 self._ExpandAndLockInstance()
6947 def BuildHooksEnv(self):
6950 This runs on master, primary and secondary nodes of the instance.
6953 return _BuildInstanceHookEnvByObject(self, self.instance)
6955 def BuildHooksNodes(self):
6956 """Build hooks nodes.
6959 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6962 def CheckPrereq(self):
6963 """Check prerequisites.
6965 This checks that the instance is in the cluster and is not running.
6968 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6969 assert instance is not None, \
6970 "Cannot retrieve locked instance %s" % self.op.instance_name
6971 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6972 " offline, cannot reinstall")
6973 for node in instance.secondary_nodes:
6974 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6975 " cannot reinstall")
6977 if instance.disk_template == constants.DT_DISKLESS:
6978 raise errors.OpPrereqError("Instance '%s' has no disks" %
6979 self.op.instance_name,
6981 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6983 if self.op.os_type is not None:
6985 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6986 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6987 instance_os = self.op.os_type
6989 instance_os = instance.os
6991 nodelist = list(instance.all_nodes)
6993 if self.op.osparams:
6994 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6995 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6996 self.os_inst = i_osdict # the new dict (without defaults)
7000 self.instance = instance
7002 def Exec(self, feedback_fn):
7003 """Reinstall the instance.
7006 inst = self.instance
7008 if self.op.os_type is not None:
7009 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7010 inst.os = self.op.os_type
7011 # Write to configuration
7012 self.cfg.Update(inst, feedback_fn)
7014 _StartInstanceDisks(self, inst, None)
7016 feedback_fn("Running the instance OS create scripts...")
7017 # FIXME: pass debug option from opcode to backend
7018 result = self.rpc.call_instance_os_add(inst.primary_node,
7019 (inst, self.os_inst), True,
7020 self.op.debug_level)
7021 result.Raise("Could not install OS for instance %s on node %s" %
7022 (inst.name, inst.primary_node))
7024 _ShutdownInstanceDisks(self, inst)
7027 class LUInstanceRecreateDisks(LogicalUnit):
7028 """Recreate an instance's missing disks.
7031 HPATH = "instance-recreate-disks"
7032 HTYPE = constants.HTYPE_INSTANCE
7035 _MODIFYABLE = frozenset([
7036 constants.IDISK_SIZE,
7037 constants.IDISK_MODE,
7040 # New or changed disk parameters may have different semantics
7041 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7042 constants.IDISK_ADOPT,
7044 # TODO: Implement support changing VG while recreating
7046 constants.IDISK_METAVG,
7049 def CheckArguments(self):
7050 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7051 # Normalize and convert deprecated list of disk indices
7052 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7054 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7056 raise errors.OpPrereqError("Some disks have been specified more than"
7057 " once: %s" % utils.CommaJoin(duplicates),
7060 for (idx, params) in self.op.disks:
7061 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7062 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7064 raise errors.OpPrereqError("Parameters for disk %s try to change"
7065 " unmodifyable parameter(s): %s" %
7066 (idx, utils.CommaJoin(unsupported)),
7069 def ExpandNames(self):
7070 self._ExpandAndLockInstance()
7071 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7073 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7074 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7076 self.needed_locks[locking.LEVEL_NODE] = []
7077 self.needed_locks[locking.LEVEL_NODE_RES] = []
7079 def DeclareLocks(self, level):
7080 if level == locking.LEVEL_NODE:
7081 # if we replace the nodes, we only need to lock the old primary,
7082 # otherwise we need to lock all nodes for disk re-creation
7083 primary_only = bool(self.op.nodes)
7084 self._LockInstancesNodes(primary_only=primary_only)
7085 elif level == locking.LEVEL_NODE_RES:
7087 self.needed_locks[locking.LEVEL_NODE_RES] = \
7088 self.needed_locks[locking.LEVEL_NODE][:]
7090 def BuildHooksEnv(self):
7093 This runs on master, primary and secondary nodes of the instance.
7096 return _BuildInstanceHookEnvByObject(self, self.instance)
7098 def BuildHooksNodes(self):
7099 """Build hooks nodes.
7102 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7105 def CheckPrereq(self):
7106 """Check prerequisites.
7108 This checks that the instance is in the cluster and is not running.
7111 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7112 assert instance is not None, \
7113 "Cannot retrieve locked instance %s" % self.op.instance_name
7115 if len(self.op.nodes) != len(instance.all_nodes):
7116 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7117 " %d replacement nodes were specified" %
7118 (instance.name, len(instance.all_nodes),
7119 len(self.op.nodes)),
7121 assert instance.disk_template != constants.DT_DRBD8 or \
7122 len(self.op.nodes) == 2
7123 assert instance.disk_template != constants.DT_PLAIN or \
7124 len(self.op.nodes) == 1
7125 primary_node = self.op.nodes[0]
7127 primary_node = instance.primary_node
7128 _CheckNodeOnline(self, primary_node)
7130 if instance.disk_template == constants.DT_DISKLESS:
7131 raise errors.OpPrereqError("Instance '%s' has no disks" %
7132 self.op.instance_name, errors.ECODE_INVAL)
7134 # if we replace nodes *and* the old primary is offline, we don't
7136 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7137 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7138 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7139 if not (self.op.nodes and old_pnode.offline):
7140 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7141 msg="cannot recreate disks")
7144 self.disks = dict(self.op.disks)
7146 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7148 maxidx = max(self.disks.keys())
7149 if maxidx >= len(instance.disks):
7150 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7153 if (self.op.nodes and
7154 sorted(self.disks.keys()) != range(len(instance.disks))):
7155 raise errors.OpPrereqError("Can't recreate disks partially and"
7156 " change the nodes at the same time",
7159 self.instance = instance
7161 def Exec(self, feedback_fn):
7162 """Recreate the disks.
7165 instance = self.instance
7167 assert (self.owned_locks(locking.LEVEL_NODE) ==
7168 self.owned_locks(locking.LEVEL_NODE_RES))
7171 mods = [] # keeps track of needed changes
7173 for idx, disk in enumerate(instance.disks):
7175 changes = self.disks[idx]
7177 # Disk should not be recreated
7181 # update secondaries for disks, if needed
7182 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7183 # need to update the nodes and minors
7184 assert len(self.op.nodes) == 2
7185 assert len(disk.logical_id) == 6 # otherwise disk internals
7187 (_, _, old_port, _, _, old_secret) = disk.logical_id
7188 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7189 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7190 new_minors[0], new_minors[1], old_secret)
7191 assert len(disk.logical_id) == len(new_id)
7195 mods.append((idx, new_id, changes))
7197 # now that we have passed all asserts above, we can apply the mods
7198 # in a single run (to avoid partial changes)
7199 for idx, new_id, changes in mods:
7200 disk = instance.disks[idx]
7201 if new_id is not None:
7202 assert disk.dev_type == constants.LD_DRBD8
7203 disk.logical_id = new_id
7205 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7206 mode=changes.get(constants.IDISK_MODE, None))
7208 # change primary node, if needed
7210 instance.primary_node = self.op.nodes[0]
7211 self.LogWarning("Changing the instance's nodes, you will have to"
7212 " remove any disks left on the older nodes manually")
7215 self.cfg.Update(instance, feedback_fn)
7217 _CreateDisks(self, instance, to_skip=to_skip)
7220 class LUInstanceRename(LogicalUnit):
7221 """Rename an instance.
7224 HPATH = "instance-rename"
7225 HTYPE = constants.HTYPE_INSTANCE
7227 def CheckArguments(self):
7231 if self.op.ip_check and not self.op.name_check:
7232 # TODO: make the ip check more flexible and not depend on the name check
7233 raise errors.OpPrereqError("IP address check requires a name check",
7236 def BuildHooksEnv(self):
7239 This runs on master, primary and secondary nodes of the instance.
7242 env = _BuildInstanceHookEnvByObject(self, self.instance)
7243 env["INSTANCE_NEW_NAME"] = self.op.new_name
7246 def BuildHooksNodes(self):
7247 """Build hooks nodes.
7250 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7253 def CheckPrereq(self):
7254 """Check prerequisites.
7256 This checks that the instance is in the cluster and is not running.
7259 self.op.instance_name = _ExpandInstanceName(self.cfg,
7260 self.op.instance_name)
7261 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7262 assert instance is not None
7263 _CheckNodeOnline(self, instance.primary_node)
7264 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7265 msg="cannot rename")
7266 self.instance = instance
7268 new_name = self.op.new_name
7269 if self.op.name_check:
7270 hostname = netutils.GetHostname(name=new_name)
7271 if hostname.name != new_name:
7272 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7274 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7275 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7276 " same as given hostname '%s'") %
7277 (hostname.name, self.op.new_name),
7279 new_name = self.op.new_name = hostname.name
7280 if (self.op.ip_check and
7281 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7282 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7283 (hostname.ip, new_name),
7284 errors.ECODE_NOTUNIQUE)
7286 instance_list = self.cfg.GetInstanceList()
7287 if new_name in instance_list and new_name != instance.name:
7288 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7289 new_name, errors.ECODE_EXISTS)
7291 def Exec(self, feedback_fn):
7292 """Rename the instance.
7295 inst = self.instance
7296 old_name = inst.name
7298 rename_file_storage = False
7299 if (inst.disk_template in constants.DTS_FILEBASED and
7300 self.op.new_name != inst.name):
7301 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7302 rename_file_storage = True
7304 self.cfg.RenameInstance(inst.name, self.op.new_name)
7305 # Change the instance lock. This is definitely safe while we hold the BGL.
7306 # Otherwise the new lock would have to be added in acquired mode.
7308 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7309 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7311 # re-read the instance from the configuration after rename
7312 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7314 if rename_file_storage:
7315 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7316 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7317 old_file_storage_dir,
7318 new_file_storage_dir)
7319 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7320 " (but the instance has been renamed in Ganeti)" %
7321 (inst.primary_node, old_file_storage_dir,
7322 new_file_storage_dir))
7324 _StartInstanceDisks(self, inst, None)
7326 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7327 old_name, self.op.debug_level)
7328 msg = result.fail_msg
7330 msg = ("Could not run OS rename script for instance %s on node %s"
7331 " (but the instance has been renamed in Ganeti): %s" %
7332 (inst.name, inst.primary_node, msg))
7333 self.proc.LogWarning(msg)
7335 _ShutdownInstanceDisks(self, inst)
7340 class LUInstanceRemove(LogicalUnit):
7341 """Remove an instance.
7344 HPATH = "instance-remove"
7345 HTYPE = constants.HTYPE_INSTANCE
7348 def ExpandNames(self):
7349 self._ExpandAndLockInstance()
7350 self.needed_locks[locking.LEVEL_NODE] = []
7351 self.needed_locks[locking.LEVEL_NODE_RES] = []
7352 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7354 def DeclareLocks(self, level):
7355 if level == locking.LEVEL_NODE:
7356 self._LockInstancesNodes()
7357 elif level == locking.LEVEL_NODE_RES:
7359 self.needed_locks[locking.LEVEL_NODE_RES] = \
7360 self.needed_locks[locking.LEVEL_NODE][:]
7362 def BuildHooksEnv(self):
7365 This runs on master, primary and secondary nodes of the instance.
7368 env = _BuildInstanceHookEnvByObject(self, self.instance)
7369 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7372 def BuildHooksNodes(self):
7373 """Build hooks nodes.
7376 nl = [self.cfg.GetMasterNode()]
7377 nl_post = list(self.instance.all_nodes) + nl
7378 return (nl, nl_post)
7380 def CheckPrereq(self):
7381 """Check prerequisites.
7383 This checks that the instance is in the cluster.
7386 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7387 assert self.instance is not None, \
7388 "Cannot retrieve locked instance %s" % self.op.instance_name
7390 def Exec(self, feedback_fn):
7391 """Remove the instance.
7394 instance = self.instance
7395 logging.info("Shutting down instance %s on node %s",
7396 instance.name, instance.primary_node)
7398 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7399 self.op.shutdown_timeout)
7400 msg = result.fail_msg
7402 if self.op.ignore_failures:
7403 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7405 raise errors.OpExecError("Could not shutdown instance %s on"
7407 (instance.name, instance.primary_node, msg))
7409 assert (self.owned_locks(locking.LEVEL_NODE) ==
7410 self.owned_locks(locking.LEVEL_NODE_RES))
7411 assert not (set(instance.all_nodes) -
7412 self.owned_locks(locking.LEVEL_NODE)), \
7413 "Not owning correct locks"
7415 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7418 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7419 """Utility function to remove an instance.
7422 logging.info("Removing block devices for instance %s", instance.name)
7424 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7425 if not ignore_failures:
7426 raise errors.OpExecError("Can't remove instance's disks")
7427 feedback_fn("Warning: can't remove instance's disks")
7429 logging.info("Removing instance %s out of cluster config", instance.name)
7431 lu.cfg.RemoveInstance(instance.name)
7433 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7434 "Instance lock removal conflict"
7436 # Remove lock for the instance
7437 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7440 class LUInstanceQuery(NoHooksLU):
7441 """Logical unit for querying instances.
7444 # pylint: disable=W0142
7447 def CheckArguments(self):
7448 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7449 self.op.output_fields, self.op.use_locking)
7451 def ExpandNames(self):
7452 self.iq.ExpandNames(self)
7454 def DeclareLocks(self, level):
7455 self.iq.DeclareLocks(self, level)
7457 def Exec(self, feedback_fn):
7458 return self.iq.OldStyleQuery(self)
7461 class LUInstanceFailover(LogicalUnit):
7462 """Failover an instance.
7465 HPATH = "instance-failover"
7466 HTYPE = constants.HTYPE_INSTANCE
7469 def CheckArguments(self):
7470 """Check the arguments.
7473 self.iallocator = getattr(self.op, "iallocator", None)
7474 self.target_node = getattr(self.op, "target_node", None)
7476 def ExpandNames(self):
7477 self._ExpandAndLockInstance()
7479 if self.op.target_node is not None:
7480 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7482 self.needed_locks[locking.LEVEL_NODE] = []
7483 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7485 self.needed_locks[locking.LEVEL_NODE_RES] = []
7486 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7488 ignore_consistency = self.op.ignore_consistency
7489 shutdown_timeout = self.op.shutdown_timeout
7490 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7493 ignore_consistency=ignore_consistency,
7494 shutdown_timeout=shutdown_timeout,
7495 ignore_ipolicy=self.op.ignore_ipolicy)
7496 self.tasklets = [self._migrater]
7498 def DeclareLocks(self, level):
7499 if level == locking.LEVEL_NODE:
7500 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7501 if instance.disk_template in constants.DTS_EXT_MIRROR:
7502 if self.op.target_node is None:
7503 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7505 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7506 self.op.target_node]
7507 del self.recalculate_locks[locking.LEVEL_NODE]
7509 self._LockInstancesNodes()
7510 elif level == locking.LEVEL_NODE_RES:
7512 self.needed_locks[locking.LEVEL_NODE_RES] = \
7513 self.needed_locks[locking.LEVEL_NODE][:]
7515 def BuildHooksEnv(self):
7518 This runs on master, primary and secondary nodes of the instance.
7521 instance = self._migrater.instance
7522 source_node = instance.primary_node
7523 target_node = self.op.target_node
7525 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7526 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7527 "OLD_PRIMARY": source_node,
7528 "NEW_PRIMARY": target_node,
7531 if instance.disk_template in constants.DTS_INT_MIRROR:
7532 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7533 env["NEW_SECONDARY"] = source_node
7535 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7537 env.update(_BuildInstanceHookEnvByObject(self, instance))
7541 def BuildHooksNodes(self):
7542 """Build hooks nodes.
7545 instance = self._migrater.instance
7546 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7547 return (nl, nl + [instance.primary_node])
7550 class LUInstanceMigrate(LogicalUnit):
7551 """Migrate an instance.
7553 This is migration without shutting down, compared to the failover,
7554 which is done with shutdown.
7557 HPATH = "instance-migrate"
7558 HTYPE = constants.HTYPE_INSTANCE
7561 def ExpandNames(self):
7562 self._ExpandAndLockInstance()
7564 if self.op.target_node is not None:
7565 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7567 self.needed_locks[locking.LEVEL_NODE] = []
7568 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7570 self.needed_locks[locking.LEVEL_NODE] = []
7571 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7574 TLMigrateInstance(self, self.op.instance_name,
7575 cleanup=self.op.cleanup,
7577 fallback=self.op.allow_failover,
7578 allow_runtime_changes=self.op.allow_runtime_changes,
7579 ignore_ipolicy=self.op.ignore_ipolicy)
7580 self.tasklets = [self._migrater]
7582 def DeclareLocks(self, level):
7583 if level == locking.LEVEL_NODE:
7584 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7585 if instance.disk_template in constants.DTS_EXT_MIRROR:
7586 if self.op.target_node is None:
7587 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7589 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7590 self.op.target_node]
7591 del self.recalculate_locks[locking.LEVEL_NODE]
7593 self._LockInstancesNodes()
7594 elif level == locking.LEVEL_NODE_RES:
7596 self.needed_locks[locking.LEVEL_NODE_RES] = \
7597 self.needed_locks[locking.LEVEL_NODE][:]
7599 def BuildHooksEnv(self):
7602 This runs on master, primary and secondary nodes of the instance.
7605 instance = self._migrater.instance
7606 source_node = instance.primary_node
7607 target_node = self.op.target_node
7608 env = _BuildInstanceHookEnvByObject(self, instance)
7610 "MIGRATE_LIVE": self._migrater.live,
7611 "MIGRATE_CLEANUP": self.op.cleanup,
7612 "OLD_PRIMARY": source_node,
7613 "NEW_PRIMARY": target_node,
7614 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7617 if instance.disk_template in constants.DTS_INT_MIRROR:
7618 env["OLD_SECONDARY"] = target_node
7619 env["NEW_SECONDARY"] = source_node
7621 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7625 def BuildHooksNodes(self):
7626 """Build hooks nodes.
7629 instance = self._migrater.instance
7630 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7631 return (nl, nl + [instance.primary_node])
7634 class LUInstanceMove(LogicalUnit):
7635 """Move an instance by data-copying.
7638 HPATH = "instance-move"
7639 HTYPE = constants.HTYPE_INSTANCE
7642 def ExpandNames(self):
7643 self._ExpandAndLockInstance()
7644 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7645 self.op.target_node = target_node
7646 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7647 self.needed_locks[locking.LEVEL_NODE_RES] = []
7648 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7650 def DeclareLocks(self, level):
7651 if level == locking.LEVEL_NODE:
7652 self._LockInstancesNodes(primary_only=True)
7653 elif level == locking.LEVEL_NODE_RES:
7655 self.needed_locks[locking.LEVEL_NODE_RES] = \
7656 self.needed_locks[locking.LEVEL_NODE][:]
7658 def BuildHooksEnv(self):
7661 This runs on master, primary and secondary nodes of the instance.
7665 "TARGET_NODE": self.op.target_node,
7666 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7668 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7671 def BuildHooksNodes(self):
7672 """Build hooks nodes.
7676 self.cfg.GetMasterNode(),
7677 self.instance.primary_node,
7678 self.op.target_node,
7682 def CheckPrereq(self):
7683 """Check prerequisites.
7685 This checks that the instance is in the cluster.
7688 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7689 assert self.instance is not None, \
7690 "Cannot retrieve locked instance %s" % self.op.instance_name
7692 node = self.cfg.GetNodeInfo(self.op.target_node)
7693 assert node is not None, \
7694 "Cannot retrieve locked node %s" % self.op.target_node
7696 self.target_node = target_node = node.name
7698 if target_node == instance.primary_node:
7699 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7700 (instance.name, target_node),
7703 bep = self.cfg.GetClusterInfo().FillBE(instance)
7705 for idx, dsk in enumerate(instance.disks):
7706 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7707 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7708 " cannot copy" % idx, errors.ECODE_STATE)
7710 _CheckNodeOnline(self, target_node)
7711 _CheckNodeNotDrained(self, target_node)
7712 _CheckNodeVmCapable(self, target_node)
7713 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7714 self.cfg.GetNodeGroup(node.group))
7715 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7716 ignore=self.op.ignore_ipolicy)
7718 if instance.admin_state == constants.ADMINST_UP:
7719 # check memory requirements on the secondary node
7720 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7721 instance.name, bep[constants.BE_MAXMEM],
7722 instance.hypervisor)
7724 self.LogInfo("Not checking memory on the secondary node as"
7725 " instance will not be started")
7727 # check bridge existance
7728 _CheckInstanceBridgesExist(self, instance, node=target_node)
7730 def Exec(self, feedback_fn):
7731 """Move an instance.
7733 The move is done by shutting it down on its present node, copying
7734 the data over (slow) and starting it on the new node.
7737 instance = self.instance
7739 source_node = instance.primary_node
7740 target_node = self.target_node
7742 self.LogInfo("Shutting down instance %s on source node %s",
7743 instance.name, source_node)
7745 assert (self.owned_locks(locking.LEVEL_NODE) ==
7746 self.owned_locks(locking.LEVEL_NODE_RES))
7748 result = self.rpc.call_instance_shutdown(source_node, instance,
7749 self.op.shutdown_timeout)
7750 msg = result.fail_msg
7752 if self.op.ignore_consistency:
7753 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7754 " Proceeding anyway. Please make sure node"
7755 " %s is down. Error details: %s",
7756 instance.name, source_node, source_node, msg)
7758 raise errors.OpExecError("Could not shutdown instance %s on"
7760 (instance.name, source_node, msg))
7762 # create the target disks
7764 _CreateDisks(self, instance, target_node=target_node)
7765 except errors.OpExecError:
7766 self.LogWarning("Device creation failed, reverting...")
7768 _RemoveDisks(self, instance, target_node=target_node)
7770 self.cfg.ReleaseDRBDMinors(instance.name)
7773 cluster_name = self.cfg.GetClusterInfo().cluster_name
7776 # activate, get path, copy the data over
7777 for idx, disk in enumerate(instance.disks):
7778 self.LogInfo("Copying data for disk %d", idx)
7779 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7780 instance.name, True, idx)
7782 self.LogWarning("Can't assemble newly created disk %d: %s",
7783 idx, result.fail_msg)
7784 errs.append(result.fail_msg)
7786 dev_path = result.payload
7787 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7788 target_node, dev_path,
7791 self.LogWarning("Can't copy data over for disk %d: %s",
7792 idx, result.fail_msg)
7793 errs.append(result.fail_msg)
7797 self.LogWarning("Some disks failed to copy, aborting")
7799 _RemoveDisks(self, instance, target_node=target_node)
7801 self.cfg.ReleaseDRBDMinors(instance.name)
7802 raise errors.OpExecError("Errors during disk copy: %s" %
7805 instance.primary_node = target_node
7806 self.cfg.Update(instance, feedback_fn)
7808 self.LogInfo("Removing the disks on the original node")
7809 _RemoveDisks(self, instance, target_node=source_node)
7811 # Only start the instance if it's marked as up
7812 if instance.admin_state == constants.ADMINST_UP:
7813 self.LogInfo("Starting instance %s on node %s",
7814 instance.name, target_node)
7816 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7817 ignore_secondaries=True)
7819 _ShutdownInstanceDisks(self, instance)
7820 raise errors.OpExecError("Can't activate the instance's disks")
7822 result = self.rpc.call_instance_start(target_node,
7823 (instance, None, None), False)
7824 msg = result.fail_msg
7826 _ShutdownInstanceDisks(self, instance)
7827 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7828 (instance.name, target_node, msg))
7831 class LUNodeMigrate(LogicalUnit):
7832 """Migrate all instances from a node.
7835 HPATH = "node-migrate"
7836 HTYPE = constants.HTYPE_NODE
7839 def CheckArguments(self):
7842 def ExpandNames(self):
7843 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7845 self.share_locks = _ShareAll()
7846 self.needed_locks = {
7847 locking.LEVEL_NODE: [self.op.node_name],
7850 def BuildHooksEnv(self):
7853 This runs on the master, the primary and all the secondaries.
7857 "NODE_NAME": self.op.node_name,
7858 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7861 def BuildHooksNodes(self):
7862 """Build hooks nodes.
7865 nl = [self.cfg.GetMasterNode()]
7868 def CheckPrereq(self):
7871 def Exec(self, feedback_fn):
7872 # Prepare jobs for migration instances
7873 allow_runtime_changes = self.op.allow_runtime_changes
7875 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7878 iallocator=self.op.iallocator,
7879 target_node=self.op.target_node,
7880 allow_runtime_changes=allow_runtime_changes,
7881 ignore_ipolicy=self.op.ignore_ipolicy)]
7882 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7885 # TODO: Run iallocator in this opcode and pass correct placement options to
7886 # OpInstanceMigrate. Since other jobs can modify the cluster between
7887 # running the iallocator and the actual migration, a good consistency model
7888 # will have to be found.
7890 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7891 frozenset([self.op.node_name]))
7893 return ResultWithJobs(jobs)
7896 class TLMigrateInstance(Tasklet):
7897 """Tasklet class for instance migration.
7900 @ivar live: whether the migration will be done live or non-live;
7901 this variable is initalized only after CheckPrereq has run
7902 @type cleanup: boolean
7903 @ivar cleanup: Wheater we cleanup from a failed migration
7904 @type iallocator: string
7905 @ivar iallocator: The iallocator used to determine target_node
7906 @type target_node: string
7907 @ivar target_node: If given, the target_node to reallocate the instance to
7908 @type failover: boolean
7909 @ivar failover: Whether operation results in failover or migration
7910 @type fallback: boolean
7911 @ivar fallback: Whether fallback to failover is allowed if migration not
7913 @type ignore_consistency: boolean
7914 @ivar ignore_consistency: Wheter we should ignore consistency between source
7916 @type shutdown_timeout: int
7917 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7918 @type ignore_ipolicy: bool
7919 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7924 _MIGRATION_POLL_INTERVAL = 1 # seconds
7925 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7927 def __init__(self, lu, instance_name, cleanup=False,
7928 failover=False, fallback=False,
7929 ignore_consistency=False,
7930 allow_runtime_changes=True,
7931 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7932 ignore_ipolicy=False):
7933 """Initializes this class.
7936 Tasklet.__init__(self, lu)
7939 self.instance_name = instance_name
7940 self.cleanup = cleanup
7941 self.live = False # will be overridden later
7942 self.failover = failover
7943 self.fallback = fallback
7944 self.ignore_consistency = ignore_consistency
7945 self.shutdown_timeout = shutdown_timeout
7946 self.ignore_ipolicy = ignore_ipolicy
7947 self.allow_runtime_changes = allow_runtime_changes
7949 def CheckPrereq(self):
7950 """Check prerequisites.
7952 This checks that the instance is in the cluster.
7955 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7956 instance = self.cfg.GetInstanceInfo(instance_name)
7957 assert instance is not None
7958 self.instance = instance
7959 cluster = self.cfg.GetClusterInfo()
7961 if (not self.cleanup and
7962 not instance.admin_state == constants.ADMINST_UP and
7963 not self.failover and self.fallback):
7964 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7965 " switching to failover")
7966 self.failover = True
7968 if instance.disk_template not in constants.DTS_MIRRORED:
7973 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7974 " %s" % (instance.disk_template, text),
7977 if instance.disk_template in constants.DTS_EXT_MIRROR:
7978 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7980 if self.lu.op.iallocator:
7981 self._RunAllocator()
7983 # We set set self.target_node as it is required by
7985 self.target_node = self.lu.op.target_node
7987 # Check that the target node is correct in terms of instance policy
7988 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7989 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7990 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7991 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7992 ignore=self.ignore_ipolicy)
7994 # self.target_node is already populated, either directly or by the
7996 target_node = self.target_node
7997 if self.target_node == instance.primary_node:
7998 raise errors.OpPrereqError("Cannot migrate instance %s"
7999 " to its primary (%s)" %
8000 (instance.name, instance.primary_node))
8002 if len(self.lu.tasklets) == 1:
8003 # It is safe to release locks only when we're the only tasklet
8005 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8006 keep=[instance.primary_node, self.target_node])
8009 secondary_nodes = instance.secondary_nodes
8010 if not secondary_nodes:
8011 raise errors.ConfigurationError("No secondary node but using"
8012 " %s disk template" %
8013 instance.disk_template)
8014 target_node = secondary_nodes[0]
8015 if self.lu.op.iallocator or (self.lu.op.target_node and
8016 self.lu.op.target_node != target_node):
8018 text = "failed over"
8021 raise errors.OpPrereqError("Instances with disk template %s cannot"
8022 " be %s to arbitrary nodes"
8023 " (neither an iallocator nor a target"
8024 " node can be passed)" %
8025 (instance.disk_template, text),
8027 nodeinfo = self.cfg.GetNodeInfo(target_node)
8028 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8029 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8030 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8031 ignore=self.ignore_ipolicy)
8033 i_be = cluster.FillBE(instance)
8035 # check memory requirements on the secondary node
8036 if (not self.cleanup and
8037 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8038 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8039 "migrating instance %s" %
8041 i_be[constants.BE_MINMEM],
8042 instance.hypervisor)
8044 self.lu.LogInfo("Not checking memory on the secondary node as"
8045 " instance will not be started")
8047 # check if failover must be forced instead of migration
8048 if (not self.cleanup and not self.failover and
8049 i_be[constants.BE_ALWAYS_FAILOVER]):
8051 self.lu.LogInfo("Instance configured to always failover; fallback"
8053 self.failover = True
8055 raise errors.OpPrereqError("This instance has been configured to"
8056 " always failover, please allow failover",
8059 # check bridge existance
8060 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8062 if not self.cleanup:
8063 _CheckNodeNotDrained(self.lu, target_node)
8064 if not self.failover:
8065 result = self.rpc.call_instance_migratable(instance.primary_node,
8067 if result.fail_msg and self.fallback:
8068 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8070 self.failover = True
8072 result.Raise("Can't migrate, please use failover",
8073 prereq=True, ecode=errors.ECODE_STATE)
8075 assert not (self.failover and self.cleanup)
8077 if not self.failover:
8078 if self.lu.op.live is not None and self.lu.op.mode is not None:
8079 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8080 " parameters are accepted",
8082 if self.lu.op.live is not None:
8084 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8086 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8087 # reset the 'live' parameter to None so that repeated
8088 # invocations of CheckPrereq do not raise an exception
8089 self.lu.op.live = None
8090 elif self.lu.op.mode is None:
8091 # read the default value from the hypervisor
8092 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8093 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8095 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8097 # Failover is never live
8100 if not (self.failover or self.cleanup):
8101 remote_info = self.rpc.call_instance_info(instance.primary_node,
8103 instance.hypervisor)
8104 remote_info.Raise("Error checking instance on node %s" %
8105 instance.primary_node)
8106 instance_running = bool(remote_info.payload)
8107 if instance_running:
8108 self.current_mem = int(remote_info.payload["memory"])
8110 def _RunAllocator(self):
8111 """Run the allocator based on input opcode.
8114 # FIXME: add a self.ignore_ipolicy option
8115 ial = IAllocator(self.cfg, self.rpc,
8116 mode=constants.IALLOCATOR_MODE_RELOC,
8117 name=self.instance_name,
8118 relocate_from=[self.instance.primary_node],
8121 ial.Run(self.lu.op.iallocator)
8124 raise errors.OpPrereqError("Can't compute nodes using"
8125 " iallocator '%s': %s" %
8126 (self.lu.op.iallocator, ial.info),
8128 if len(ial.result) != ial.required_nodes:
8129 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8130 " of nodes (%s), required %s" %
8131 (self.lu.op.iallocator, len(ial.result),
8132 ial.required_nodes), errors.ECODE_FAULT)
8133 self.target_node = ial.result[0]
8134 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8135 self.instance_name, self.lu.op.iallocator,
8136 utils.CommaJoin(ial.result))
8138 def _WaitUntilSync(self):
8139 """Poll with custom rpc for disk sync.
8141 This uses our own step-based rpc call.
8144 self.feedback_fn("* wait until resync is done")
8148 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8150 (self.instance.disks,
8153 for node, nres in result.items():
8154 nres.Raise("Cannot resync disks on node %s" % node)
8155 node_done, node_percent = nres.payload
8156 all_done = all_done and node_done
8157 if node_percent is not None:
8158 min_percent = min(min_percent, node_percent)
8160 if min_percent < 100:
8161 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8164 def _EnsureSecondary(self, node):
8165 """Demote a node to secondary.
8168 self.feedback_fn("* switching node %s to secondary mode" % node)
8170 for dev in self.instance.disks:
8171 self.cfg.SetDiskID(dev, node)
8173 result = self.rpc.call_blockdev_close(node, self.instance.name,
8174 self.instance.disks)
8175 result.Raise("Cannot change disk to secondary on node %s" % node)
8177 def _GoStandalone(self):
8178 """Disconnect from the network.
8181 self.feedback_fn("* changing into standalone mode")
8182 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8183 self.instance.disks)
8184 for node, nres in result.items():
8185 nres.Raise("Cannot disconnect disks node %s" % node)
8187 def _GoReconnect(self, multimaster):
8188 """Reconnect to the network.
8194 msg = "single-master"
8195 self.feedback_fn("* changing disks into %s mode" % msg)
8196 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8197 (self.instance.disks, self.instance),
8198 self.instance.name, multimaster)
8199 for node, nres in result.items():
8200 nres.Raise("Cannot change disks config on node %s" % node)
8202 def _ExecCleanup(self):
8203 """Try to cleanup after a failed migration.
8205 The cleanup is done by:
8206 - check that the instance is running only on one node
8207 (and update the config if needed)
8208 - change disks on its secondary node to secondary
8209 - wait until disks are fully synchronized
8210 - disconnect from the network
8211 - change disks into single-master mode
8212 - wait again until disks are fully synchronized
8215 instance = self.instance
8216 target_node = self.target_node
8217 source_node = self.source_node
8219 # check running on only one node
8220 self.feedback_fn("* checking where the instance actually runs"
8221 " (if this hangs, the hypervisor might be in"
8223 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8224 for node, result in ins_l.items():
8225 result.Raise("Can't contact node %s" % node)
8227 runningon_source = instance.name in ins_l[source_node].payload
8228 runningon_target = instance.name in ins_l[target_node].payload
8230 if runningon_source and runningon_target:
8231 raise errors.OpExecError("Instance seems to be running on two nodes,"
8232 " or the hypervisor is confused; you will have"
8233 " to ensure manually that it runs only on one"
8234 " and restart this operation")
8236 if not (runningon_source or runningon_target):
8237 raise errors.OpExecError("Instance does not seem to be running at all;"
8238 " in this case it's safer to repair by"
8239 " running 'gnt-instance stop' to ensure disk"
8240 " shutdown, and then restarting it")
8242 if runningon_target:
8243 # the migration has actually succeeded, we need to update the config
8244 self.feedback_fn("* instance running on secondary node (%s),"
8245 " updating config" % target_node)
8246 instance.primary_node = target_node
8247 self.cfg.Update(instance, self.feedback_fn)
8248 demoted_node = source_node
8250 self.feedback_fn("* instance confirmed to be running on its"
8251 " primary node (%s)" % source_node)
8252 demoted_node = target_node
8254 if instance.disk_template in constants.DTS_INT_MIRROR:
8255 self._EnsureSecondary(demoted_node)
8257 self._WaitUntilSync()
8258 except errors.OpExecError:
8259 # we ignore here errors, since if the device is standalone, it
8260 # won't be able to sync
8262 self._GoStandalone()
8263 self._GoReconnect(False)
8264 self._WaitUntilSync()
8266 self.feedback_fn("* done")
8268 def _RevertDiskStatus(self):
8269 """Try to revert the disk status after a failed migration.
8272 target_node = self.target_node
8273 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8277 self._EnsureSecondary(target_node)
8278 self._GoStandalone()
8279 self._GoReconnect(False)
8280 self._WaitUntilSync()
8281 except errors.OpExecError, err:
8282 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8283 " please try to recover the instance manually;"
8284 " error '%s'" % str(err))
8286 def _AbortMigration(self):
8287 """Call the hypervisor code to abort a started migration.
8290 instance = self.instance
8291 target_node = self.target_node
8292 source_node = self.source_node
8293 migration_info = self.migration_info
8295 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8299 abort_msg = abort_result.fail_msg
8301 logging.error("Aborting migration failed on target node %s: %s",
8302 target_node, abort_msg)
8303 # Don't raise an exception here, as we stil have to try to revert the
8304 # disk status, even if this step failed.
8306 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8307 instance, False, self.live)
8308 abort_msg = abort_result.fail_msg
8310 logging.error("Aborting migration failed on source node %s: %s",
8311 source_node, abort_msg)
8313 def _ExecMigration(self):
8314 """Migrate an instance.
8316 The migrate is done by:
8317 - change the disks into dual-master mode
8318 - wait until disks are fully synchronized again
8319 - migrate the instance
8320 - change disks on the new secondary node (the old primary) to secondary
8321 - wait until disks are fully synchronized
8322 - change disks into single-master mode
8325 instance = self.instance
8326 target_node = self.target_node
8327 source_node = self.source_node
8329 # Check for hypervisor version mismatch and warn the user.
8330 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8331 None, [self.instance.hypervisor])
8332 for ninfo in nodeinfo.values():
8333 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8335 (_, _, (src_info, )) = nodeinfo[source_node].payload
8336 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8338 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8339 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8340 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8341 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8342 if src_version != dst_version:
8343 self.feedback_fn("* warning: hypervisor version mismatch between"
8344 " source (%s) and target (%s) node" %
8345 (src_version, dst_version))
8347 self.feedback_fn("* checking disk consistency between source and target")
8348 for (idx, dev) in enumerate(instance.disks):
8349 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8350 raise errors.OpExecError("Disk %s is degraded or not fully"
8351 " synchronized on target node,"
8352 " aborting migration" % idx)
8354 if self.current_mem > self.tgt_free_mem:
8355 if not self.allow_runtime_changes:
8356 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8357 " free memory to fit instance %s on target"
8358 " node %s (have %dMB, need %dMB)" %
8359 (instance.name, target_node,
8360 self.tgt_free_mem, self.current_mem))
8361 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8362 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8365 rpcres.Raise("Cannot modify instance runtime memory")
8367 # First get the migration information from the remote node
8368 result = self.rpc.call_migration_info(source_node, instance)
8369 msg = result.fail_msg
8371 log_err = ("Failed fetching source migration information from %s: %s" %
8373 logging.error(log_err)
8374 raise errors.OpExecError(log_err)
8376 self.migration_info = migration_info = result.payload
8378 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8379 # Then switch the disks to master/master mode
8380 self._EnsureSecondary(target_node)
8381 self._GoStandalone()
8382 self._GoReconnect(True)
8383 self._WaitUntilSync()
8385 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8386 result = self.rpc.call_accept_instance(target_node,
8389 self.nodes_ip[target_node])
8391 msg = result.fail_msg
8393 logging.error("Instance pre-migration failed, trying to revert"
8394 " disk status: %s", msg)
8395 self.feedback_fn("Pre-migration failed, aborting")
8396 self._AbortMigration()
8397 self._RevertDiskStatus()
8398 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8399 (instance.name, msg))
8401 self.feedback_fn("* migrating instance to %s" % target_node)
8402 result = self.rpc.call_instance_migrate(source_node, instance,
8403 self.nodes_ip[target_node],
8405 msg = result.fail_msg
8407 logging.error("Instance migration failed, trying to revert"
8408 " disk status: %s", msg)
8409 self.feedback_fn("Migration failed, aborting")
8410 self._AbortMigration()
8411 self._RevertDiskStatus()
8412 raise errors.OpExecError("Could not migrate instance %s: %s" %
8413 (instance.name, msg))
8415 self.feedback_fn("* starting memory transfer")
8416 last_feedback = time.time()
8418 result = self.rpc.call_instance_get_migration_status(source_node,
8420 msg = result.fail_msg
8421 ms = result.payload # MigrationStatus instance
8422 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8423 logging.error("Instance migration failed, trying to revert"
8424 " disk status: %s", msg)
8425 self.feedback_fn("Migration failed, aborting")
8426 self._AbortMigration()
8427 self._RevertDiskStatus()
8428 raise errors.OpExecError("Could not migrate instance %s: %s" %
8429 (instance.name, msg))
8431 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8432 self.feedback_fn("* memory transfer complete")
8435 if (utils.TimeoutExpired(last_feedback,
8436 self._MIGRATION_FEEDBACK_INTERVAL) and
8437 ms.transferred_ram is not None):
8438 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8439 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8440 last_feedback = time.time()
8442 time.sleep(self._MIGRATION_POLL_INTERVAL)
8444 result = self.rpc.call_instance_finalize_migration_src(source_node,
8448 msg = result.fail_msg
8450 logging.error("Instance migration succeeded, but finalization failed"
8451 " on the source node: %s", msg)
8452 raise errors.OpExecError("Could not finalize instance migration: %s" %
8455 instance.primary_node = target_node
8457 # distribute new instance config to the other nodes
8458 self.cfg.Update(instance, self.feedback_fn)
8460 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8464 msg = result.fail_msg
8466 logging.error("Instance migration succeeded, but finalization failed"
8467 " on the target node: %s", msg)
8468 raise errors.OpExecError("Could not finalize instance migration: %s" %
8471 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8472 self._EnsureSecondary(source_node)
8473 self._WaitUntilSync()
8474 self._GoStandalone()
8475 self._GoReconnect(False)
8476 self._WaitUntilSync()
8478 # If the instance's disk template is `rbd' and there was a successful
8479 # migration, unmap the device from the source node.
8480 if self.instance.disk_template == constants.DT_RBD:
8481 disks = _ExpandCheckDisks(instance, instance.disks)
8482 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8484 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8485 msg = result.fail_msg
8487 logging.error("Migration was successful, but couldn't unmap the"
8488 " block device %s on source node %s: %s",
8489 disk.iv_name, source_node, msg)
8490 logging.error("You need to unmap the device %s manually on %s",
8491 disk.iv_name, source_node)
8493 self.feedback_fn("* done")
8495 def _ExecFailover(self):
8496 """Failover an instance.
8498 The failover is done by shutting it down on its present node and
8499 starting it on the secondary.
8502 instance = self.instance
8503 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8505 source_node = instance.primary_node
8506 target_node = self.target_node
8508 if instance.admin_state == constants.ADMINST_UP:
8509 self.feedback_fn("* checking disk consistency between source and target")
8510 for (idx, dev) in enumerate(instance.disks):
8511 # for drbd, these are drbd over lvm
8512 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8514 if primary_node.offline:
8515 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8517 (primary_node.name, idx, target_node))
8518 elif not self.ignore_consistency:
8519 raise errors.OpExecError("Disk %s is degraded on target node,"
8520 " aborting failover" % idx)
8522 self.feedback_fn("* not checking disk consistency as instance is not"
8525 self.feedback_fn("* shutting down instance on source node")
8526 logging.info("Shutting down instance %s on node %s",
8527 instance.name, source_node)
8529 result = self.rpc.call_instance_shutdown(source_node, instance,
8530 self.shutdown_timeout)
8531 msg = result.fail_msg
8533 if self.ignore_consistency or primary_node.offline:
8534 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8535 " proceeding anyway; please make sure node"
8536 " %s is down; error details: %s",
8537 instance.name, source_node, source_node, msg)
8539 raise errors.OpExecError("Could not shutdown instance %s on"
8541 (instance.name, source_node, msg))
8543 self.feedback_fn("* deactivating the instance's disks on source node")
8544 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8545 raise errors.OpExecError("Can't shut down the instance's disks")
8547 instance.primary_node = target_node
8548 # distribute new instance config to the other nodes
8549 self.cfg.Update(instance, self.feedback_fn)
8551 # Only start the instance if it's marked as up
8552 if instance.admin_state == constants.ADMINST_UP:
8553 self.feedback_fn("* activating the instance's disks on target node %s" %
8555 logging.info("Starting instance %s on node %s",
8556 instance.name, target_node)
8558 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8559 ignore_secondaries=True)
8561 _ShutdownInstanceDisks(self.lu, instance)
8562 raise errors.OpExecError("Can't activate the instance's disks")
8564 self.feedback_fn("* starting the instance on the target node %s" %
8566 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8568 msg = result.fail_msg
8570 _ShutdownInstanceDisks(self.lu, instance)
8571 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8572 (instance.name, target_node, msg))
8574 def Exec(self, feedback_fn):
8575 """Perform the migration.
8578 self.feedback_fn = feedback_fn
8579 self.source_node = self.instance.primary_node
8581 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8582 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8583 self.target_node = self.instance.secondary_nodes[0]
8584 # Otherwise self.target_node has been populated either
8585 # directly, or through an iallocator.
8587 self.all_nodes = [self.source_node, self.target_node]
8588 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8589 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8592 feedback_fn("Failover instance %s" % self.instance.name)
8593 self._ExecFailover()
8595 feedback_fn("Migrating instance %s" % self.instance.name)
8598 return self._ExecCleanup()
8600 return self._ExecMigration()
8603 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8605 """Wrapper around L{_CreateBlockDevInner}.
8607 This method annotates the root device first.
8610 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8611 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8615 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8617 """Create a tree of block devices on a given node.
8619 If this device type has to be created on secondaries, create it and
8622 If not, just recurse to children keeping the same 'force' value.
8624 @attention: The device has to be annotated already.
8626 @param lu: the lu on whose behalf we execute
8627 @param node: the node on which to create the device
8628 @type instance: L{objects.Instance}
8629 @param instance: the instance which owns the device
8630 @type device: L{objects.Disk}
8631 @param device: the device to create
8632 @type force_create: boolean
8633 @param force_create: whether to force creation of this device; this
8634 will be change to True whenever we find a device which has
8635 CreateOnSecondary() attribute
8636 @param info: the extra 'metadata' we should attach to the device
8637 (this will be represented as a LVM tag)
8638 @type force_open: boolean
8639 @param force_open: this parameter will be passes to the
8640 L{backend.BlockdevCreate} function where it specifies
8641 whether we run on primary or not, and it affects both
8642 the child assembly and the device own Open() execution
8645 if device.CreateOnSecondary():
8649 for child in device.children:
8650 _CreateBlockDevInner(lu, node, instance, child, force_create,
8653 if not force_create:
8656 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8659 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8660 """Create a single block device on a given node.
8662 This will not recurse over children of the device, so they must be
8665 @param lu: the lu on whose behalf we execute
8666 @param node: the node on which to create the device
8667 @type instance: L{objects.Instance}
8668 @param instance: the instance which owns the device
8669 @type device: L{objects.Disk}
8670 @param device: the device to create
8671 @param info: the extra 'metadata' we should attach to the device
8672 (this will be represented as a LVM tag)
8673 @type force_open: boolean
8674 @param force_open: this parameter will be passes to the
8675 L{backend.BlockdevCreate} function where it specifies
8676 whether we run on primary or not, and it affects both
8677 the child assembly and the device own Open() execution
8680 lu.cfg.SetDiskID(device, node)
8681 result = lu.rpc.call_blockdev_create(node, device, device.size,
8682 instance.name, force_open, info)
8683 result.Raise("Can't create block device %s on"
8684 " node %s for instance %s" % (device, node, instance.name))
8685 if device.physical_id is None:
8686 device.physical_id = result.payload
8689 def _GenerateUniqueNames(lu, exts):
8690 """Generate a suitable LV name.
8692 This will generate a logical volume name for the given instance.
8697 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8698 results.append("%s%s" % (new_id, val))
8702 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8703 iv_name, p_minor, s_minor):
8704 """Generate a drbd8 device complete with its children.
8707 assert len(vgnames) == len(names) == 2
8708 port = lu.cfg.AllocatePort()
8709 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8711 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8712 logical_id=(vgnames[0], names[0]),
8714 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8715 logical_id=(vgnames[1], names[1]),
8717 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8718 logical_id=(primary, secondary, port,
8721 children=[dev_data, dev_meta],
8722 iv_name=iv_name, params={})
8726 _DISK_TEMPLATE_NAME_PREFIX = {
8727 constants.DT_PLAIN: "",
8728 constants.DT_RBD: ".rbd",
8732 _DISK_TEMPLATE_DEVICE_TYPE = {
8733 constants.DT_PLAIN: constants.LD_LV,
8734 constants.DT_FILE: constants.LD_FILE,
8735 constants.DT_SHARED_FILE: constants.LD_FILE,
8736 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8737 constants.DT_RBD: constants.LD_RBD,
8741 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8742 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8743 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8744 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8745 """Generate the entire disk layout for a given template type.
8748 #TODO: compute space requirements
8750 vgname = lu.cfg.GetVGName()
8751 disk_count = len(disk_info)
8754 if template_name == constants.DT_DISKLESS:
8756 elif template_name == constants.DT_DRBD8:
8757 if len(secondary_nodes) != 1:
8758 raise errors.ProgrammerError("Wrong template configuration")
8759 remote_node = secondary_nodes[0]
8760 minors = lu.cfg.AllocateDRBDMinor(
8761 [primary_node, remote_node] * len(disk_info), instance_name)
8763 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8765 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8768 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8769 for i in range(disk_count)]):
8770 names.append(lv_prefix + "_data")
8771 names.append(lv_prefix + "_meta")
8772 for idx, disk in enumerate(disk_info):
8773 disk_index = idx + base_index
8774 data_vg = disk.get(constants.IDISK_VG, vgname)
8775 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8776 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8777 disk[constants.IDISK_SIZE],
8779 names[idx * 2:idx * 2 + 2],
8780 "disk/%d" % disk_index,
8781 minors[idx * 2], minors[idx * 2 + 1])
8782 disk_dev.mode = disk[constants.IDISK_MODE]
8783 disks.append(disk_dev)
8786 raise errors.ProgrammerError("Wrong template configuration")
8788 if template_name == constants.DT_FILE:
8790 elif template_name == constants.DT_SHARED_FILE:
8791 _req_shr_file_storage()
8793 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8794 if name_prefix is None:
8797 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8798 (name_prefix, base_index + i)
8799 for i in range(disk_count)])
8801 if template_name == constants.DT_PLAIN:
8802 def logical_id_fn(idx, _, disk):
8803 vg = disk.get(constants.IDISK_VG, vgname)
8804 return (vg, names[idx])
8805 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8807 lambda _, disk_index, disk: (file_driver,
8808 "%s/disk%d" % (file_storage_dir,
8810 elif template_name == constants.DT_BLOCK:
8812 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8813 disk[constants.IDISK_ADOPT])
8814 elif template_name == constants.DT_RBD:
8815 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8817 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8819 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8821 for idx, disk in enumerate(disk_info):
8822 disk_index = idx + base_index
8823 size = disk[constants.IDISK_SIZE]
8824 feedback_fn("* disk %s, size %s" %
8825 (disk_index, utils.FormatUnit(size, "h")))
8826 disks.append(objects.Disk(dev_type=dev_type, size=size,
8827 logical_id=logical_id_fn(idx, disk_index, disk),
8828 iv_name="disk/%d" % disk_index,
8829 mode=disk[constants.IDISK_MODE],
8835 def _GetInstanceInfoText(instance):
8836 """Compute that text that should be added to the disk's metadata.
8839 return "originstname+%s" % instance.name
8842 def _CalcEta(time_taken, written, total_size):
8843 """Calculates the ETA based on size written and total size.
8845 @param time_taken: The time taken so far
8846 @param written: amount written so far
8847 @param total_size: The total size of data to be written
8848 @return: The remaining time in seconds
8851 avg_time = time_taken / float(written)
8852 return (total_size - written) * avg_time
8855 def _WipeDisks(lu, instance):
8856 """Wipes instance disks.
8858 @type lu: L{LogicalUnit}
8859 @param lu: the logical unit on whose behalf we execute
8860 @type instance: L{objects.Instance}
8861 @param instance: the instance whose disks we should create
8862 @return: the success of the wipe
8865 node = instance.primary_node
8867 for device in instance.disks:
8868 lu.cfg.SetDiskID(device, node)
8870 logging.info("Pause sync of instance %s disks", instance.name)
8871 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8872 (instance.disks, instance),
8875 for idx, success in enumerate(result.payload):
8877 logging.warn("pause-sync of instance %s for disks %d failed",
8881 for idx, device in enumerate(instance.disks):
8882 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8883 # MAX_WIPE_CHUNK at max
8884 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8885 constants.MIN_WIPE_CHUNK_PERCENT)
8886 # we _must_ make this an int, otherwise rounding errors will
8888 wipe_chunk_size = int(wipe_chunk_size)
8890 lu.LogInfo("* Wiping disk %d", idx)
8891 logging.info("Wiping disk %d for instance %s, node %s using"
8892 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8897 start_time = time.time()
8899 while offset < size:
8900 wipe_size = min(wipe_chunk_size, size - offset)
8901 logging.debug("Wiping disk %d, offset %s, chunk %s",
8902 idx, offset, wipe_size)
8903 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8905 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8906 (idx, offset, wipe_size))
8909 if now - last_output >= 60:
8910 eta = _CalcEta(now - start_time, offset, size)
8911 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8912 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8915 logging.info("Resume sync of instance %s disks", instance.name)
8917 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8918 (instance.disks, instance),
8921 for idx, success in enumerate(result.payload):
8923 lu.LogWarning("Resume sync of disk %d failed, please have a"
8924 " look at the status and troubleshoot the issue", idx)
8925 logging.warn("resume-sync of instance %s for disks %d failed",
8929 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8930 """Create all disks for an instance.
8932 This abstracts away some work from AddInstance.
8934 @type lu: L{LogicalUnit}
8935 @param lu: the logical unit on whose behalf we execute
8936 @type instance: L{objects.Instance}
8937 @param instance: the instance whose disks we should create
8939 @param to_skip: list of indices to skip
8940 @type target_node: string
8941 @param target_node: if passed, overrides the target node for creation
8943 @return: the success of the creation
8946 info = _GetInstanceInfoText(instance)
8947 if target_node is None:
8948 pnode = instance.primary_node
8949 all_nodes = instance.all_nodes
8954 if instance.disk_template in constants.DTS_FILEBASED:
8955 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8956 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8958 result.Raise("Failed to create directory '%s' on"
8959 " node %s" % (file_storage_dir, pnode))
8961 # Note: this needs to be kept in sync with adding of disks in
8962 # LUInstanceSetParams
8963 for idx, device in enumerate(instance.disks):
8964 if to_skip and idx in to_skip:
8966 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8968 for node in all_nodes:
8969 f_create = node == pnode
8970 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8973 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8974 """Remove all disks for an instance.
8976 This abstracts away some work from `AddInstance()` and
8977 `RemoveInstance()`. Note that in case some of the devices couldn't
8978 be removed, the removal will continue with the other ones (compare
8979 with `_CreateDisks()`).
8981 @type lu: L{LogicalUnit}
8982 @param lu: the logical unit on whose behalf we execute
8983 @type instance: L{objects.Instance}
8984 @param instance: the instance whose disks we should remove
8985 @type target_node: string
8986 @param target_node: used to override the node on which to remove the disks
8988 @return: the success of the removal
8991 logging.info("Removing block devices for instance %s", instance.name)
8994 ports_to_release = set()
8995 for (idx, device) in enumerate(instance.disks):
8997 edata = [(target_node, device)]
8999 edata = device.ComputeNodeTree(instance.primary_node)
9000 for node, disk in edata:
9001 lu.cfg.SetDiskID(disk, node)
9002 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
9004 lu.LogWarning("Could not remove disk %s on node %s,"
9005 " continuing anyway: %s", idx, node, msg)
9008 # if this is a DRBD disk, return its port to the pool
9009 if device.dev_type in constants.LDS_DRBD:
9010 ports_to_release.add(device.logical_id[2])
9012 if all_result or ignore_failures:
9013 for port in ports_to_release:
9014 lu.cfg.AddTcpUdpPort(port)
9016 if instance.disk_template == constants.DT_FILE:
9017 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9021 tgt = instance.primary_node
9022 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9024 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9025 file_storage_dir, instance.primary_node, result.fail_msg)
9031 def _ComputeDiskSizePerVG(disk_template, disks):
9032 """Compute disk size requirements in the volume group
9035 def _compute(disks, payload):
9036 """Universal algorithm.
9041 vgs[disk[constants.IDISK_VG]] = \
9042 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9046 # Required free disk space as a function of disk and swap space
9048 constants.DT_DISKLESS: {},
9049 constants.DT_PLAIN: _compute(disks, 0),
9050 # 128 MB are added for drbd metadata for each disk
9051 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9052 constants.DT_FILE: {},
9053 constants.DT_SHARED_FILE: {},
9056 if disk_template not in req_size_dict:
9057 raise errors.ProgrammerError("Disk template '%s' size requirement"
9058 " is unknown" % disk_template)
9060 return req_size_dict[disk_template]
9063 def _ComputeDiskSize(disk_template, disks):
9064 """Compute disk size requirements in the volume group
9067 # Required free disk space as a function of disk and swap space
9069 constants.DT_DISKLESS: None,
9070 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9071 # 128 MB are added for drbd metadata for each disk
9073 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9074 constants.DT_FILE: None,
9075 constants.DT_SHARED_FILE: 0,
9076 constants.DT_BLOCK: 0,
9077 constants.DT_RBD: 0,
9080 if disk_template not in req_size_dict:
9081 raise errors.ProgrammerError("Disk template '%s' size requirement"
9082 " is unknown" % disk_template)
9084 return req_size_dict[disk_template]
9087 def _FilterVmNodes(lu, nodenames):
9088 """Filters out non-vm_capable nodes from a list.
9090 @type lu: L{LogicalUnit}
9091 @param lu: the logical unit for which we check
9092 @type nodenames: list
9093 @param nodenames: the list of nodes on which we should check
9095 @return: the list of vm-capable nodes
9098 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9099 return [name for name in nodenames if name not in vm_nodes]
9102 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9103 """Hypervisor parameter validation.
9105 This function abstract the hypervisor parameter validation to be
9106 used in both instance create and instance modify.
9108 @type lu: L{LogicalUnit}
9109 @param lu: the logical unit for which we check
9110 @type nodenames: list
9111 @param nodenames: the list of nodes on which we should check
9112 @type hvname: string
9113 @param hvname: the name of the hypervisor we should use
9114 @type hvparams: dict
9115 @param hvparams: the parameters which we need to check
9116 @raise errors.OpPrereqError: if the parameters are not valid
9119 nodenames = _FilterVmNodes(lu, nodenames)
9121 cluster = lu.cfg.GetClusterInfo()
9122 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9124 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9125 for node in nodenames:
9129 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9132 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9133 """OS parameters validation.
9135 @type lu: L{LogicalUnit}
9136 @param lu: the logical unit for which we check
9137 @type required: boolean
9138 @param required: whether the validation should fail if the OS is not
9140 @type nodenames: list
9141 @param nodenames: the list of nodes on which we should check
9142 @type osname: string
9143 @param osname: the name of the hypervisor we should use
9144 @type osparams: dict
9145 @param osparams: the parameters which we need to check
9146 @raise errors.OpPrereqError: if the parameters are not valid
9149 nodenames = _FilterVmNodes(lu, nodenames)
9150 result = lu.rpc.call_os_validate(nodenames, required, osname,
9151 [constants.OS_VALIDATE_PARAMETERS],
9153 for node, nres in result.items():
9154 # we don't check for offline cases since this should be run only
9155 # against the master node and/or an instance's nodes
9156 nres.Raise("OS Parameters validation failed on node %s" % node)
9157 if not nres.payload:
9158 lu.LogInfo("OS %s not found on node %s, validation skipped",
9162 class LUInstanceCreate(LogicalUnit):
9163 """Create an instance.
9166 HPATH = "instance-add"
9167 HTYPE = constants.HTYPE_INSTANCE
9170 def CheckArguments(self):
9174 # do not require name_check to ease forward/backward compatibility
9176 if self.op.no_install and self.op.start:
9177 self.LogInfo("No-installation mode selected, disabling startup")
9178 self.op.start = False
9179 # validate/normalize the instance name
9180 self.op.instance_name = \
9181 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9183 if self.op.ip_check and not self.op.name_check:
9184 # TODO: make the ip check more flexible and not depend on the name check
9185 raise errors.OpPrereqError("Cannot do IP address check without a name"
9186 " check", errors.ECODE_INVAL)
9188 # check nics' parameter names
9189 for nic in self.op.nics:
9190 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9192 # check disks. parameter names and consistent adopt/no-adopt strategy
9193 has_adopt = has_no_adopt = False
9194 for disk in self.op.disks:
9195 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9196 if constants.IDISK_ADOPT in disk:
9200 if has_adopt and has_no_adopt:
9201 raise errors.OpPrereqError("Either all disks are adopted or none is",
9204 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9205 raise errors.OpPrereqError("Disk adoption is not supported for the"
9206 " '%s' disk template" %
9207 self.op.disk_template,
9209 if self.op.iallocator is not None:
9210 raise errors.OpPrereqError("Disk adoption not allowed with an"
9211 " iallocator script", errors.ECODE_INVAL)
9212 if self.op.mode == constants.INSTANCE_IMPORT:
9213 raise errors.OpPrereqError("Disk adoption not allowed for"
9214 " instance import", errors.ECODE_INVAL)
9216 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9217 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9218 " but no 'adopt' parameter given" %
9219 self.op.disk_template,
9222 self.adopt_disks = has_adopt
9224 # instance name verification
9225 if self.op.name_check:
9226 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9227 self.op.instance_name = self.hostname1.name
9228 # used in CheckPrereq for ip ping check
9229 self.check_ip = self.hostname1.ip
9231 self.check_ip = None
9233 # file storage checks
9234 if (self.op.file_driver and
9235 not self.op.file_driver in constants.FILE_DRIVER):
9236 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9237 self.op.file_driver, errors.ECODE_INVAL)
9239 if self.op.disk_template == constants.DT_FILE:
9240 opcodes.RequireFileStorage()
9241 elif self.op.disk_template == constants.DT_SHARED_FILE:
9242 opcodes.RequireSharedFileStorage()
9244 ### Node/iallocator related checks
9245 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9247 if self.op.pnode is not None:
9248 if self.op.disk_template in constants.DTS_INT_MIRROR:
9249 if self.op.snode is None:
9250 raise errors.OpPrereqError("The networked disk templates need"
9251 " a mirror node", errors.ECODE_INVAL)
9253 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9255 self.op.snode = None
9257 self._cds = _GetClusterDomainSecret()
9259 if self.op.mode == constants.INSTANCE_IMPORT:
9260 # On import force_variant must be True, because if we forced it at
9261 # initial install, our only chance when importing it back is that it
9263 self.op.force_variant = True
9265 if self.op.no_install:
9266 self.LogInfo("No-installation mode has no effect during import")
9268 elif self.op.mode == constants.INSTANCE_CREATE:
9269 if self.op.os_type is None:
9270 raise errors.OpPrereqError("No guest OS specified",
9272 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9273 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9274 " installation" % self.op.os_type,
9276 if self.op.disk_template is None:
9277 raise errors.OpPrereqError("No disk template specified",
9280 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9281 # Check handshake to ensure both clusters have the same domain secret
9282 src_handshake = self.op.source_handshake
9283 if not src_handshake:
9284 raise errors.OpPrereqError("Missing source handshake",
9287 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9290 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9293 # Load and check source CA
9294 self.source_x509_ca_pem = self.op.source_x509_ca
9295 if not self.source_x509_ca_pem:
9296 raise errors.OpPrereqError("Missing source X509 CA",
9300 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9302 except OpenSSL.crypto.Error, err:
9303 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9304 (err, ), errors.ECODE_INVAL)
9306 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9307 if errcode is not None:
9308 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9311 self.source_x509_ca = cert
9313 src_instance_name = self.op.source_instance_name
9314 if not src_instance_name:
9315 raise errors.OpPrereqError("Missing source instance name",
9318 self.source_instance_name = \
9319 netutils.GetHostname(name=src_instance_name).name
9322 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9323 self.op.mode, errors.ECODE_INVAL)
9325 def ExpandNames(self):
9326 """ExpandNames for CreateInstance.
9328 Figure out the right locks for instance creation.
9331 self.needed_locks = {}
9333 instance_name = self.op.instance_name
9334 # this is just a preventive check, but someone might still add this
9335 # instance in the meantime, and creation will fail at lock-add time
9336 if instance_name in self.cfg.GetInstanceList():
9337 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9338 instance_name, errors.ECODE_EXISTS)
9340 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9342 if self.op.iallocator:
9343 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9344 # specifying a group on instance creation and then selecting nodes from
9346 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9347 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9349 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9350 nodelist = [self.op.pnode]
9351 if self.op.snode is not None:
9352 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9353 nodelist.append(self.op.snode)
9354 self.needed_locks[locking.LEVEL_NODE] = nodelist
9355 # Lock resources of instance's primary and secondary nodes (copy to
9356 # prevent accidential modification)
9357 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9359 # in case of import lock the source node too
9360 if self.op.mode == constants.INSTANCE_IMPORT:
9361 src_node = self.op.src_node
9362 src_path = self.op.src_path
9364 if src_path is None:
9365 self.op.src_path = src_path = self.op.instance_name
9367 if src_node is None:
9368 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9369 self.op.src_node = None
9370 if os.path.isabs(src_path):
9371 raise errors.OpPrereqError("Importing an instance from a path"
9372 " requires a source node option",
9375 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9376 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9377 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9378 if not os.path.isabs(src_path):
9379 self.op.src_path = src_path = \
9380 utils.PathJoin(constants.EXPORT_DIR, src_path)
9382 def _RunAllocator(self):
9383 """Run the allocator based on input opcode.
9386 nics = [n.ToDict() for n in self.nics]
9387 ial = IAllocator(self.cfg, self.rpc,
9388 mode=constants.IALLOCATOR_MODE_ALLOC,
9389 name=self.op.instance_name,
9390 disk_template=self.op.disk_template,
9393 vcpus=self.be_full[constants.BE_VCPUS],
9394 memory=self.be_full[constants.BE_MAXMEM],
9395 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9398 hypervisor=self.op.hypervisor,
9401 ial.Run(self.op.iallocator)
9404 raise errors.OpPrereqError("Can't compute nodes using"
9405 " iallocator '%s': %s" %
9406 (self.op.iallocator, ial.info),
9408 if len(ial.result) != ial.required_nodes:
9409 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9410 " of nodes (%s), required %s" %
9411 (self.op.iallocator, len(ial.result),
9412 ial.required_nodes), errors.ECODE_FAULT)
9413 self.op.pnode = ial.result[0]
9414 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9415 self.op.instance_name, self.op.iallocator,
9416 utils.CommaJoin(ial.result))
9417 if ial.required_nodes == 2:
9418 self.op.snode = ial.result[1]
9420 def BuildHooksEnv(self):
9423 This runs on master, primary and secondary nodes of the instance.
9427 "ADD_MODE": self.op.mode,
9429 if self.op.mode == constants.INSTANCE_IMPORT:
9430 env["SRC_NODE"] = self.op.src_node
9431 env["SRC_PATH"] = self.op.src_path
9432 env["SRC_IMAGES"] = self.src_images
9434 env.update(_BuildInstanceHookEnv(
9435 name=self.op.instance_name,
9436 primary_node=self.op.pnode,
9437 secondary_nodes=self.secondaries,
9438 status=self.op.start,
9439 os_type=self.op.os_type,
9440 minmem=self.be_full[constants.BE_MINMEM],
9441 maxmem=self.be_full[constants.BE_MAXMEM],
9442 vcpus=self.be_full[constants.BE_VCPUS],
9443 nics=_NICListToTuple(self, self.nics),
9444 disk_template=self.op.disk_template,
9445 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9446 for d in self.disks],
9449 hypervisor_name=self.op.hypervisor,
9455 def BuildHooksNodes(self):
9456 """Build hooks nodes.
9459 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9462 def _ReadExportInfo(self):
9463 """Reads the export information from disk.
9465 It will override the opcode source node and path with the actual
9466 information, if these two were not specified before.
9468 @return: the export information
9471 assert self.op.mode == constants.INSTANCE_IMPORT
9473 src_node = self.op.src_node
9474 src_path = self.op.src_path
9476 if src_node is None:
9477 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9478 exp_list = self.rpc.call_export_list(locked_nodes)
9480 for node in exp_list:
9481 if exp_list[node].fail_msg:
9483 if src_path in exp_list[node].payload:
9485 self.op.src_node = src_node = node
9486 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9490 raise errors.OpPrereqError("No export found for relative path %s" %
9491 src_path, errors.ECODE_INVAL)
9493 _CheckNodeOnline(self, src_node)
9494 result = self.rpc.call_export_info(src_node, src_path)
9495 result.Raise("No export or invalid export found in dir %s" % src_path)
9497 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9498 if not export_info.has_section(constants.INISECT_EXP):
9499 raise errors.ProgrammerError("Corrupted export config",
9500 errors.ECODE_ENVIRON)
9502 ei_version = export_info.get(constants.INISECT_EXP, "version")
9503 if (int(ei_version) != constants.EXPORT_VERSION):
9504 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9505 (ei_version, constants.EXPORT_VERSION),
9506 errors.ECODE_ENVIRON)
9509 def _ReadExportParams(self, einfo):
9510 """Use export parameters as defaults.
9512 In case the opcode doesn't specify (as in override) some instance
9513 parameters, then try to use them from the export information, if
9517 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9519 if self.op.disk_template is None:
9520 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9521 self.op.disk_template = einfo.get(constants.INISECT_INS,
9523 if self.op.disk_template not in constants.DISK_TEMPLATES:
9524 raise errors.OpPrereqError("Disk template specified in configuration"
9525 " file is not one of the allowed values:"
9526 " %s" % " ".join(constants.DISK_TEMPLATES))
9528 raise errors.OpPrereqError("No disk template specified and the export"
9529 " is missing the disk_template information",
9532 if not self.op.disks:
9534 # TODO: import the disk iv_name too
9535 for idx in range(constants.MAX_DISKS):
9536 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9537 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9538 disks.append({constants.IDISK_SIZE: disk_sz})
9539 self.op.disks = disks
9540 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9541 raise errors.OpPrereqError("No disk info specified and the export"
9542 " is missing the disk information",
9545 if not self.op.nics:
9547 for idx in range(constants.MAX_NICS):
9548 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9550 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9551 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9558 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9559 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9561 if (self.op.hypervisor is None and
9562 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9563 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9565 if einfo.has_section(constants.INISECT_HYP):
9566 # use the export parameters but do not override the ones
9567 # specified by the user
9568 for name, value in einfo.items(constants.INISECT_HYP):
9569 if name not in self.op.hvparams:
9570 self.op.hvparams[name] = value
9572 if einfo.has_section(constants.INISECT_BEP):
9573 # use the parameters, without overriding
9574 for name, value in einfo.items(constants.INISECT_BEP):
9575 if name not in self.op.beparams:
9576 self.op.beparams[name] = value
9577 # Compatibility for the old "memory" be param
9578 if name == constants.BE_MEMORY:
9579 if constants.BE_MAXMEM not in self.op.beparams:
9580 self.op.beparams[constants.BE_MAXMEM] = value
9581 if constants.BE_MINMEM not in self.op.beparams:
9582 self.op.beparams[constants.BE_MINMEM] = value
9584 # try to read the parameters old style, from the main section
9585 for name in constants.BES_PARAMETERS:
9586 if (name not in self.op.beparams and
9587 einfo.has_option(constants.INISECT_INS, name)):
9588 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9590 if einfo.has_section(constants.INISECT_OSP):
9591 # use the parameters, without overriding
9592 for name, value in einfo.items(constants.INISECT_OSP):
9593 if name not in self.op.osparams:
9594 self.op.osparams[name] = value
9596 def _RevertToDefaults(self, cluster):
9597 """Revert the instance parameters to the default values.
9601 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9602 for name in self.op.hvparams.keys():
9603 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9604 del self.op.hvparams[name]
9606 be_defs = cluster.SimpleFillBE({})
9607 for name in self.op.beparams.keys():
9608 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9609 del self.op.beparams[name]
9611 nic_defs = cluster.SimpleFillNIC({})
9612 for nic in self.op.nics:
9613 for name in constants.NICS_PARAMETERS:
9614 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9617 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9618 for name in self.op.osparams.keys():
9619 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9620 del self.op.osparams[name]
9622 def _CalculateFileStorageDir(self):
9623 """Calculate final instance file storage dir.
9626 # file storage dir calculation/check
9627 self.instance_file_storage_dir = None
9628 if self.op.disk_template in constants.DTS_FILEBASED:
9629 # build the full file storage dir path
9632 if self.op.disk_template == constants.DT_SHARED_FILE:
9633 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9635 get_fsd_fn = self.cfg.GetFileStorageDir
9637 cfg_storagedir = get_fsd_fn()
9638 if not cfg_storagedir:
9639 raise errors.OpPrereqError("Cluster file storage dir not defined")
9640 joinargs.append(cfg_storagedir)
9642 if self.op.file_storage_dir is not None:
9643 joinargs.append(self.op.file_storage_dir)
9645 joinargs.append(self.op.instance_name)
9647 # pylint: disable=W0142
9648 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9650 def CheckPrereq(self): # pylint: disable=R0914
9651 """Check prerequisites.
9654 self._CalculateFileStorageDir()
9656 if self.op.mode == constants.INSTANCE_IMPORT:
9657 export_info = self._ReadExportInfo()
9658 self._ReadExportParams(export_info)
9660 if (not self.cfg.GetVGName() and
9661 self.op.disk_template not in constants.DTS_NOT_LVM):
9662 raise errors.OpPrereqError("Cluster does not support lvm-based"
9663 " instances", errors.ECODE_STATE)
9665 if (self.op.hypervisor is None or
9666 self.op.hypervisor == constants.VALUE_AUTO):
9667 self.op.hypervisor = self.cfg.GetHypervisorType()
9669 cluster = self.cfg.GetClusterInfo()
9670 enabled_hvs = cluster.enabled_hypervisors
9671 if self.op.hypervisor not in enabled_hvs:
9672 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9673 " cluster (%s)" % (self.op.hypervisor,
9674 ",".join(enabled_hvs)),
9677 # Check tag validity
9678 for tag in self.op.tags:
9679 objects.TaggableObject.ValidateTag(tag)
9681 # check hypervisor parameter syntax (locally)
9682 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9683 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9685 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9686 hv_type.CheckParameterSyntax(filled_hvp)
9687 self.hv_full = filled_hvp
9688 # check that we don't specify global parameters on an instance
9689 _CheckGlobalHvParams(self.op.hvparams)
9691 # fill and remember the beparams dict
9692 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9693 for param, value in self.op.beparams.iteritems():
9694 if value == constants.VALUE_AUTO:
9695 self.op.beparams[param] = default_beparams[param]
9696 objects.UpgradeBeParams(self.op.beparams)
9697 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9698 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9700 # build os parameters
9701 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9703 # now that hvp/bep are in final format, let's reset to defaults,
9705 if self.op.identify_defaults:
9706 self._RevertToDefaults(cluster)
9710 for idx, nic in enumerate(self.op.nics):
9711 nic_mode_req = nic.get(constants.INIC_MODE, None)
9712 nic_mode = nic_mode_req
9713 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9714 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9716 # in routed mode, for the first nic, the default ip is 'auto'
9717 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9718 default_ip_mode = constants.VALUE_AUTO
9720 default_ip_mode = constants.VALUE_NONE
9722 # ip validity checks
9723 ip = nic.get(constants.INIC_IP, default_ip_mode)
9724 if ip is None or ip.lower() == constants.VALUE_NONE:
9726 elif ip.lower() == constants.VALUE_AUTO:
9727 if not self.op.name_check:
9728 raise errors.OpPrereqError("IP address set to auto but name checks"
9729 " have been skipped",
9731 nic_ip = self.hostname1.ip
9733 if not netutils.IPAddress.IsValid(ip):
9734 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9738 # TODO: check the ip address for uniqueness
9739 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9740 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9743 # MAC address verification
9744 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9745 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9746 mac = utils.NormalizeAndValidateMac(mac)
9749 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9750 except errors.ReservationError:
9751 raise errors.OpPrereqError("MAC address %s already in use"
9752 " in cluster" % mac,
9753 errors.ECODE_NOTUNIQUE)
9755 # Build nic parameters
9756 link = nic.get(constants.INIC_LINK, None)
9757 if link == constants.VALUE_AUTO:
9758 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9761 nicparams[constants.NIC_MODE] = nic_mode
9763 nicparams[constants.NIC_LINK] = link
9765 check_params = cluster.SimpleFillNIC(nicparams)
9766 objects.NIC.CheckParameterSyntax(check_params)
9767 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9769 # disk checks/pre-build
9770 default_vg = self.cfg.GetVGName()
9772 for disk in self.op.disks:
9773 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9774 if mode not in constants.DISK_ACCESS_SET:
9775 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9776 mode, errors.ECODE_INVAL)
9777 size = disk.get(constants.IDISK_SIZE, None)
9779 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9782 except (TypeError, ValueError):
9783 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9786 data_vg = disk.get(constants.IDISK_VG, default_vg)
9788 constants.IDISK_SIZE: size,
9789 constants.IDISK_MODE: mode,
9790 constants.IDISK_VG: data_vg,
9792 if constants.IDISK_METAVG in disk:
9793 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9794 if constants.IDISK_ADOPT in disk:
9795 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9796 self.disks.append(new_disk)
9798 if self.op.mode == constants.INSTANCE_IMPORT:
9800 for idx in range(len(self.disks)):
9801 option = "disk%d_dump" % idx
9802 if export_info.has_option(constants.INISECT_INS, option):
9803 # FIXME: are the old os-es, disk sizes, etc. useful?
9804 export_name = export_info.get(constants.INISECT_INS, option)
9805 image = utils.PathJoin(self.op.src_path, export_name)
9806 disk_images.append(image)
9808 disk_images.append(False)
9810 self.src_images = disk_images
9812 old_name = export_info.get(constants.INISECT_INS, "name")
9813 if self.op.instance_name == old_name:
9814 for idx, nic in enumerate(self.nics):
9815 if nic.mac == constants.VALUE_AUTO:
9816 nic_mac_ini = "nic%d_mac" % idx
9817 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9819 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9821 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9822 if self.op.ip_check:
9823 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9824 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9825 (self.check_ip, self.op.instance_name),
9826 errors.ECODE_NOTUNIQUE)
9828 #### mac address generation
9829 # By generating here the mac address both the allocator and the hooks get
9830 # the real final mac address rather than the 'auto' or 'generate' value.
9831 # There is a race condition between the generation and the instance object
9832 # creation, which means that we know the mac is valid now, but we're not
9833 # sure it will be when we actually add the instance. If things go bad
9834 # adding the instance will abort because of a duplicate mac, and the
9835 # creation job will fail.
9836 for nic in self.nics:
9837 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9838 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9842 if self.op.iallocator is not None:
9843 self._RunAllocator()
9845 # Release all unneeded node locks
9846 _ReleaseLocks(self, locking.LEVEL_NODE,
9847 keep=filter(None, [self.op.pnode, self.op.snode,
9849 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9850 keep=filter(None, [self.op.pnode, self.op.snode,
9853 #### node related checks
9855 # check primary node
9856 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9857 assert self.pnode is not None, \
9858 "Cannot retrieve locked node %s" % self.op.pnode
9860 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9861 pnode.name, errors.ECODE_STATE)
9863 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9864 pnode.name, errors.ECODE_STATE)
9865 if not pnode.vm_capable:
9866 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9867 " '%s'" % pnode.name, errors.ECODE_STATE)
9869 self.secondaries = []
9871 # mirror node verification
9872 if self.op.disk_template in constants.DTS_INT_MIRROR:
9873 if self.op.snode == pnode.name:
9874 raise errors.OpPrereqError("The secondary node cannot be the"
9875 " primary node", errors.ECODE_INVAL)
9876 _CheckNodeOnline(self, self.op.snode)
9877 _CheckNodeNotDrained(self, self.op.snode)
9878 _CheckNodeVmCapable(self, self.op.snode)
9879 self.secondaries.append(self.op.snode)
9881 snode = self.cfg.GetNodeInfo(self.op.snode)
9882 if pnode.group != snode.group:
9883 self.LogWarning("The primary and secondary nodes are in two"
9884 " different node groups; the disk parameters"
9885 " from the first disk's node group will be"
9888 nodenames = [pnode.name] + self.secondaries
9890 # Verify instance specs
9891 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9893 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9894 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9895 constants.ISPEC_DISK_COUNT: len(self.disks),
9896 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9897 constants.ISPEC_NIC_COUNT: len(self.nics),
9898 constants.ISPEC_SPINDLE_USE: spindle_use,
9901 group_info = self.cfg.GetNodeGroup(pnode.group)
9902 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9903 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9904 if not self.op.ignore_ipolicy and res:
9905 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9906 " policy: %s") % (pnode.group,
9907 utils.CommaJoin(res)),
9910 if not self.adopt_disks:
9911 if self.op.disk_template == constants.DT_RBD:
9912 # _CheckRADOSFreeSpace() is just a placeholder.
9913 # Any function that checks prerequisites can be placed here.
9914 # Check if there is enough space on the RADOS cluster.
9915 _CheckRADOSFreeSpace()
9917 # Check lv size requirements, if not adopting
9918 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9919 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9921 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9922 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9923 disk[constants.IDISK_ADOPT])
9924 for disk in self.disks])
9925 if len(all_lvs) != len(self.disks):
9926 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9928 for lv_name in all_lvs:
9930 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9931 # to ReserveLV uses the same syntax
9932 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9933 except errors.ReservationError:
9934 raise errors.OpPrereqError("LV named %s used by another instance" %
9935 lv_name, errors.ECODE_NOTUNIQUE)
9937 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9938 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9940 node_lvs = self.rpc.call_lv_list([pnode.name],
9941 vg_names.payload.keys())[pnode.name]
9942 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9943 node_lvs = node_lvs.payload
9945 delta = all_lvs.difference(node_lvs.keys())
9947 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9948 utils.CommaJoin(delta),
9950 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9952 raise errors.OpPrereqError("Online logical volumes found, cannot"
9953 " adopt: %s" % utils.CommaJoin(online_lvs),
9955 # update the size of disk based on what is found
9956 for dsk in self.disks:
9957 dsk[constants.IDISK_SIZE] = \
9958 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9959 dsk[constants.IDISK_ADOPT])][0]))
9961 elif self.op.disk_template == constants.DT_BLOCK:
9962 # Normalize and de-duplicate device paths
9963 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9964 for disk in self.disks])
9965 if len(all_disks) != len(self.disks):
9966 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9968 baddisks = [d for d in all_disks
9969 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9971 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9972 " cannot be adopted" %
9973 (", ".join(baddisks),
9974 constants.ADOPTABLE_BLOCKDEV_ROOT),
9977 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9978 list(all_disks))[pnode.name]
9979 node_disks.Raise("Cannot get block device information from node %s" %
9981 node_disks = node_disks.payload
9982 delta = all_disks.difference(node_disks.keys())
9984 raise errors.OpPrereqError("Missing block device(s): %s" %
9985 utils.CommaJoin(delta),
9987 for dsk in self.disks:
9988 dsk[constants.IDISK_SIZE] = \
9989 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9991 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9993 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9994 # check OS parameters (remotely)
9995 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9997 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9999 # memory check on primary node
10000 #TODO(dynmem): use MINMEM for checking
10002 _CheckNodeFreeMemory(self, self.pnode.name,
10003 "creating instance %s" % self.op.instance_name,
10004 self.be_full[constants.BE_MAXMEM],
10005 self.op.hypervisor)
10007 self.dry_run_result = list(nodenames)
10009 def Exec(self, feedback_fn):
10010 """Create and add the instance to the cluster.
10013 instance = self.op.instance_name
10014 pnode_name = self.pnode.name
10016 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10017 self.owned_locks(locking.LEVEL_NODE)), \
10018 "Node locks differ from node resource locks"
10020 ht_kind = self.op.hypervisor
10021 if ht_kind in constants.HTS_REQ_PORT:
10022 network_port = self.cfg.AllocatePort()
10024 network_port = None
10026 # This is ugly but we got a chicken-egg problem here
10027 # We can only take the group disk parameters, as the instance
10028 # has no disks yet (we are generating them right here).
10029 node = self.cfg.GetNodeInfo(pnode_name)
10030 nodegroup = self.cfg.GetNodeGroup(node.group)
10031 disks = _GenerateDiskTemplate(self,
10032 self.op.disk_template,
10033 instance, pnode_name,
10036 self.instance_file_storage_dir,
10037 self.op.file_driver,
10040 self.cfg.GetGroupDiskParams(nodegroup))
10042 iobj = objects.Instance(name=instance, os=self.op.os_type,
10043 primary_node=pnode_name,
10044 nics=self.nics, disks=disks,
10045 disk_template=self.op.disk_template,
10046 admin_state=constants.ADMINST_DOWN,
10047 network_port=network_port,
10048 beparams=self.op.beparams,
10049 hvparams=self.op.hvparams,
10050 hypervisor=self.op.hypervisor,
10051 osparams=self.op.osparams,
10055 for tag in self.op.tags:
10058 if self.adopt_disks:
10059 if self.op.disk_template == constants.DT_PLAIN:
10060 # rename LVs to the newly-generated names; we need to construct
10061 # 'fake' LV disks with the old data, plus the new unique_id
10062 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10064 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10065 rename_to.append(t_dsk.logical_id)
10066 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10067 self.cfg.SetDiskID(t_dsk, pnode_name)
10068 result = self.rpc.call_blockdev_rename(pnode_name,
10069 zip(tmp_disks, rename_to))
10070 result.Raise("Failed to rename adoped LVs")
10072 feedback_fn("* creating instance disks...")
10074 _CreateDisks(self, iobj)
10075 except errors.OpExecError:
10076 self.LogWarning("Device creation failed, reverting...")
10078 _RemoveDisks(self, iobj)
10080 self.cfg.ReleaseDRBDMinors(instance)
10083 feedback_fn("adding instance %s to cluster config" % instance)
10085 self.cfg.AddInstance(iobj, self.proc.GetECId())
10087 # Declare that we don't want to remove the instance lock anymore, as we've
10088 # added the instance to the config
10089 del self.remove_locks[locking.LEVEL_INSTANCE]
10091 if self.op.mode == constants.INSTANCE_IMPORT:
10092 # Release unused nodes
10093 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10095 # Release all nodes
10096 _ReleaseLocks(self, locking.LEVEL_NODE)
10099 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10100 feedback_fn("* wiping instance disks...")
10102 _WipeDisks(self, iobj)
10103 except errors.OpExecError, err:
10104 logging.exception("Wiping disks failed")
10105 self.LogWarning("Wiping instance disks failed (%s)", err)
10109 # Something is already wrong with the disks, don't do anything else
10111 elif self.op.wait_for_sync:
10112 disk_abort = not _WaitForSync(self, iobj)
10113 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10114 # make sure the disks are not degraded (still sync-ing is ok)
10115 feedback_fn("* checking mirrors status")
10116 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10121 _RemoveDisks(self, iobj)
10122 self.cfg.RemoveInstance(iobj.name)
10123 # Make sure the instance lock gets removed
10124 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10125 raise errors.OpExecError("There are some degraded disks for"
10128 # Release all node resource locks
10129 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10131 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10132 if self.op.mode == constants.INSTANCE_CREATE:
10133 if not self.op.no_install:
10134 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10135 not self.op.wait_for_sync)
10137 feedback_fn("* pausing disk sync to install instance OS")
10138 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10141 for idx, success in enumerate(result.payload):
10143 logging.warn("pause-sync of instance %s for disk %d failed",
10146 feedback_fn("* running the instance OS create scripts...")
10147 # FIXME: pass debug option from opcode to backend
10149 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10150 self.op.debug_level)
10152 feedback_fn("* resuming disk sync")
10153 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10156 for idx, success in enumerate(result.payload):
10158 logging.warn("resume-sync of instance %s for disk %d failed",
10161 os_add_result.Raise("Could not add os for instance %s"
10162 " on node %s" % (instance, pnode_name))
10164 elif self.op.mode == constants.INSTANCE_IMPORT:
10165 feedback_fn("* running the instance OS import scripts...")
10169 for idx, image in enumerate(self.src_images):
10173 # FIXME: pass debug option from opcode to backend
10174 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10175 constants.IEIO_FILE, (image, ),
10176 constants.IEIO_SCRIPT,
10177 (iobj.disks[idx], idx),
10179 transfers.append(dt)
10182 masterd.instance.TransferInstanceData(self, feedback_fn,
10183 self.op.src_node, pnode_name,
10184 self.pnode.secondary_ip,
10186 if not compat.all(import_result):
10187 self.LogWarning("Some disks for instance %s on node %s were not"
10188 " imported successfully" % (instance, pnode_name))
10190 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10191 feedback_fn("* preparing remote import...")
10192 # The source cluster will stop the instance before attempting to make a
10193 # connection. In some cases stopping an instance can take a long time,
10194 # hence the shutdown timeout is added to the connection timeout.
10195 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10196 self.op.source_shutdown_timeout)
10197 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10199 assert iobj.primary_node == self.pnode.name
10201 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10202 self.source_x509_ca,
10203 self._cds, timeouts)
10204 if not compat.all(disk_results):
10205 # TODO: Should the instance still be started, even if some disks
10206 # failed to import (valid for local imports, too)?
10207 self.LogWarning("Some disks for instance %s on node %s were not"
10208 " imported successfully" % (instance, pnode_name))
10210 # Run rename script on newly imported instance
10211 assert iobj.name == instance
10212 feedback_fn("Running rename script for %s" % instance)
10213 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10214 self.source_instance_name,
10215 self.op.debug_level)
10216 if result.fail_msg:
10217 self.LogWarning("Failed to run rename script for %s on node"
10218 " %s: %s" % (instance, pnode_name, result.fail_msg))
10221 # also checked in the prereq part
10222 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10225 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10228 iobj.admin_state = constants.ADMINST_UP
10229 self.cfg.Update(iobj, feedback_fn)
10230 logging.info("Starting instance %s on node %s", instance, pnode_name)
10231 feedback_fn("* starting instance...")
10232 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10234 result.Raise("Could not start instance")
10236 return list(iobj.all_nodes)
10239 def _CheckRADOSFreeSpace():
10240 """Compute disk size requirements inside the RADOS cluster.
10243 # For the RADOS cluster we assume there is always enough space.
10247 class LUInstanceConsole(NoHooksLU):
10248 """Connect to an instance's console.
10250 This is somewhat special in that it returns the command line that
10251 you need to run on the master node in order to connect to the
10257 def ExpandNames(self):
10258 self.share_locks = _ShareAll()
10259 self._ExpandAndLockInstance()
10261 def CheckPrereq(self):
10262 """Check prerequisites.
10264 This checks that the instance is in the cluster.
10267 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10268 assert self.instance is not None, \
10269 "Cannot retrieve locked instance %s" % self.op.instance_name
10270 _CheckNodeOnline(self, self.instance.primary_node)
10272 def Exec(self, feedback_fn):
10273 """Connect to the console of an instance
10276 instance = self.instance
10277 node = instance.primary_node
10279 node_insts = self.rpc.call_instance_list([node],
10280 [instance.hypervisor])[node]
10281 node_insts.Raise("Can't get node information from %s" % node)
10283 if instance.name not in node_insts.payload:
10284 if instance.admin_state == constants.ADMINST_UP:
10285 state = constants.INSTST_ERRORDOWN
10286 elif instance.admin_state == constants.ADMINST_DOWN:
10287 state = constants.INSTST_ADMINDOWN
10289 state = constants.INSTST_ADMINOFFLINE
10290 raise errors.OpExecError("Instance %s is not running (state %s)" %
10291 (instance.name, state))
10293 logging.debug("Connecting to console of %s on %s", instance.name, node)
10295 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10298 def _GetInstanceConsole(cluster, instance):
10299 """Returns console information for an instance.
10301 @type cluster: L{objects.Cluster}
10302 @type instance: L{objects.Instance}
10306 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10307 # beparams and hvparams are passed separately, to avoid editing the
10308 # instance and then saving the defaults in the instance itself.
10309 hvparams = cluster.FillHV(instance)
10310 beparams = cluster.FillBE(instance)
10311 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10313 assert console.instance == instance.name
10314 assert console.Validate()
10316 return console.ToDict()
10319 class LUInstanceReplaceDisks(LogicalUnit):
10320 """Replace the disks of an instance.
10323 HPATH = "mirrors-replace"
10324 HTYPE = constants.HTYPE_INSTANCE
10327 def CheckArguments(self):
10328 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10329 self.op.iallocator)
10331 def ExpandNames(self):
10332 self._ExpandAndLockInstance()
10334 assert locking.LEVEL_NODE not in self.needed_locks
10335 assert locking.LEVEL_NODE_RES not in self.needed_locks
10336 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10338 assert self.op.iallocator is None or self.op.remote_node is None, \
10339 "Conflicting options"
10341 if self.op.remote_node is not None:
10342 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10344 # Warning: do not remove the locking of the new secondary here
10345 # unless DRBD8.AddChildren is changed to work in parallel;
10346 # currently it doesn't since parallel invocations of
10347 # FindUnusedMinor will conflict
10348 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10349 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10351 self.needed_locks[locking.LEVEL_NODE] = []
10352 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10354 if self.op.iallocator is not None:
10355 # iallocator will select a new node in the same group
10356 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10358 self.needed_locks[locking.LEVEL_NODE_RES] = []
10360 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10361 self.op.iallocator, self.op.remote_node,
10362 self.op.disks, False, self.op.early_release,
10363 self.op.ignore_ipolicy)
10365 self.tasklets = [self.replacer]
10367 def DeclareLocks(self, level):
10368 if level == locking.LEVEL_NODEGROUP:
10369 assert self.op.remote_node is None
10370 assert self.op.iallocator is not None
10371 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10373 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10374 # Lock all groups used by instance optimistically; this requires going
10375 # via the node before it's locked, requiring verification later on
10376 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10377 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10379 elif level == locking.LEVEL_NODE:
10380 if self.op.iallocator is not None:
10381 assert self.op.remote_node is None
10382 assert not self.needed_locks[locking.LEVEL_NODE]
10384 # Lock member nodes of all locked groups
10385 self.needed_locks[locking.LEVEL_NODE] = [node_name
10386 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10387 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10389 self._LockInstancesNodes()
10390 elif level == locking.LEVEL_NODE_RES:
10392 self.needed_locks[locking.LEVEL_NODE_RES] = \
10393 self.needed_locks[locking.LEVEL_NODE]
10395 def BuildHooksEnv(self):
10396 """Build hooks env.
10398 This runs on the master, the primary and all the secondaries.
10401 instance = self.replacer.instance
10403 "MODE": self.op.mode,
10404 "NEW_SECONDARY": self.op.remote_node,
10405 "OLD_SECONDARY": instance.secondary_nodes[0],
10407 env.update(_BuildInstanceHookEnvByObject(self, instance))
10410 def BuildHooksNodes(self):
10411 """Build hooks nodes.
10414 instance = self.replacer.instance
10416 self.cfg.GetMasterNode(),
10417 instance.primary_node,
10419 if self.op.remote_node is not None:
10420 nl.append(self.op.remote_node)
10423 def CheckPrereq(self):
10424 """Check prerequisites.
10427 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10428 self.op.iallocator is None)
10430 # Verify if node group locks are still correct
10431 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10433 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10435 return LogicalUnit.CheckPrereq(self)
10438 class TLReplaceDisks(Tasklet):
10439 """Replaces disks for an instance.
10441 Note: Locking is not within the scope of this class.
10444 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10445 disks, delay_iallocator, early_release, ignore_ipolicy):
10446 """Initializes this class.
10449 Tasklet.__init__(self, lu)
10452 self.instance_name = instance_name
10454 self.iallocator_name = iallocator_name
10455 self.remote_node = remote_node
10457 self.delay_iallocator = delay_iallocator
10458 self.early_release = early_release
10459 self.ignore_ipolicy = ignore_ipolicy
10462 self.instance = None
10463 self.new_node = None
10464 self.target_node = None
10465 self.other_node = None
10466 self.remote_node_info = None
10467 self.node_secondary_ip = None
10470 def CheckArguments(mode, remote_node, iallocator):
10471 """Helper function for users of this class.
10474 # check for valid parameter combination
10475 if mode == constants.REPLACE_DISK_CHG:
10476 if remote_node is None and iallocator is None:
10477 raise errors.OpPrereqError("When changing the secondary either an"
10478 " iallocator script must be used or the"
10479 " new node given", errors.ECODE_INVAL)
10481 if remote_node is not None and iallocator is not None:
10482 raise errors.OpPrereqError("Give either the iallocator or the new"
10483 " secondary, not both", errors.ECODE_INVAL)
10485 elif remote_node is not None or iallocator is not None:
10486 # Not replacing the secondary
10487 raise errors.OpPrereqError("The iallocator and new node options can"
10488 " only be used when changing the"
10489 " secondary node", errors.ECODE_INVAL)
10492 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10493 """Compute a new secondary node using an IAllocator.
10496 ial = IAllocator(lu.cfg, lu.rpc,
10497 mode=constants.IALLOCATOR_MODE_RELOC,
10498 name=instance_name,
10499 relocate_from=list(relocate_from))
10501 ial.Run(iallocator_name)
10503 if not ial.success:
10504 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10505 " %s" % (iallocator_name, ial.info),
10506 errors.ECODE_NORES)
10508 if len(ial.result) != ial.required_nodes:
10509 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10510 " of nodes (%s), required %s" %
10512 len(ial.result), ial.required_nodes),
10513 errors.ECODE_FAULT)
10515 remote_node_name = ial.result[0]
10517 lu.LogInfo("Selected new secondary for instance '%s': %s",
10518 instance_name, remote_node_name)
10520 return remote_node_name
10522 def _FindFaultyDisks(self, node_name):
10523 """Wrapper for L{_FindFaultyInstanceDisks}.
10526 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10529 def _CheckDisksActivated(self, instance):
10530 """Checks if the instance disks are activated.
10532 @param instance: The instance to check disks
10533 @return: True if they are activated, False otherwise
10536 nodes = instance.all_nodes
10538 for idx, dev in enumerate(instance.disks):
10540 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10541 self.cfg.SetDiskID(dev, node)
10543 result = _BlockdevFind(self, node, dev, instance)
10547 elif result.fail_msg or not result.payload:
10552 def CheckPrereq(self):
10553 """Check prerequisites.
10555 This checks that the instance is in the cluster.
10558 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10559 assert instance is not None, \
10560 "Cannot retrieve locked instance %s" % self.instance_name
10562 if instance.disk_template != constants.DT_DRBD8:
10563 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10564 " instances", errors.ECODE_INVAL)
10566 if len(instance.secondary_nodes) != 1:
10567 raise errors.OpPrereqError("The instance has a strange layout,"
10568 " expected one secondary but found %d" %
10569 len(instance.secondary_nodes),
10570 errors.ECODE_FAULT)
10572 if not self.delay_iallocator:
10573 self._CheckPrereq2()
10575 def _CheckPrereq2(self):
10576 """Check prerequisites, second part.
10578 This function should always be part of CheckPrereq. It was separated and is
10579 now called from Exec because during node evacuation iallocator was only
10580 called with an unmodified cluster model, not taking planned changes into
10584 instance = self.instance
10585 secondary_node = instance.secondary_nodes[0]
10587 if self.iallocator_name is None:
10588 remote_node = self.remote_node
10590 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10591 instance.name, instance.secondary_nodes)
10593 if remote_node is None:
10594 self.remote_node_info = None
10596 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10597 "Remote node '%s' is not locked" % remote_node
10599 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10600 assert self.remote_node_info is not None, \
10601 "Cannot retrieve locked node %s" % remote_node
10603 if remote_node == self.instance.primary_node:
10604 raise errors.OpPrereqError("The specified node is the primary node of"
10605 " the instance", errors.ECODE_INVAL)
10607 if remote_node == secondary_node:
10608 raise errors.OpPrereqError("The specified node is already the"
10609 " secondary node of the instance",
10610 errors.ECODE_INVAL)
10612 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10613 constants.REPLACE_DISK_CHG):
10614 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10615 errors.ECODE_INVAL)
10617 if self.mode == constants.REPLACE_DISK_AUTO:
10618 if not self._CheckDisksActivated(instance):
10619 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10620 " first" % self.instance_name,
10621 errors.ECODE_STATE)
10622 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10623 faulty_secondary = self._FindFaultyDisks(secondary_node)
10625 if faulty_primary and faulty_secondary:
10626 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10627 " one node and can not be repaired"
10628 " automatically" % self.instance_name,
10629 errors.ECODE_STATE)
10632 self.disks = faulty_primary
10633 self.target_node = instance.primary_node
10634 self.other_node = secondary_node
10635 check_nodes = [self.target_node, self.other_node]
10636 elif faulty_secondary:
10637 self.disks = faulty_secondary
10638 self.target_node = secondary_node
10639 self.other_node = instance.primary_node
10640 check_nodes = [self.target_node, self.other_node]
10646 # Non-automatic modes
10647 if self.mode == constants.REPLACE_DISK_PRI:
10648 self.target_node = instance.primary_node
10649 self.other_node = secondary_node
10650 check_nodes = [self.target_node, self.other_node]
10652 elif self.mode == constants.REPLACE_DISK_SEC:
10653 self.target_node = secondary_node
10654 self.other_node = instance.primary_node
10655 check_nodes = [self.target_node, self.other_node]
10657 elif self.mode == constants.REPLACE_DISK_CHG:
10658 self.new_node = remote_node
10659 self.other_node = instance.primary_node
10660 self.target_node = secondary_node
10661 check_nodes = [self.new_node, self.other_node]
10663 _CheckNodeNotDrained(self.lu, remote_node)
10664 _CheckNodeVmCapable(self.lu, remote_node)
10666 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10667 assert old_node_info is not None
10668 if old_node_info.offline and not self.early_release:
10669 # doesn't make sense to delay the release
10670 self.early_release = True
10671 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10672 " early-release mode", secondary_node)
10675 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10678 # If not specified all disks should be replaced
10680 self.disks = range(len(self.instance.disks))
10682 # TODO: This is ugly, but right now we can't distinguish between internal
10683 # submitted opcode and external one. We should fix that.
10684 if self.remote_node_info:
10685 # We change the node, lets verify it still meets instance policy
10686 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10687 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10689 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10690 ignore=self.ignore_ipolicy)
10692 for node in check_nodes:
10693 _CheckNodeOnline(self.lu, node)
10695 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10698 if node_name is not None)
10700 # Release unneeded node and node resource locks
10701 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10702 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10704 # Release any owned node group
10705 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10706 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10708 # Check whether disks are valid
10709 for disk_idx in self.disks:
10710 instance.FindDisk(disk_idx)
10712 # Get secondary node IP addresses
10713 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10714 in self.cfg.GetMultiNodeInfo(touched_nodes))
10716 def Exec(self, feedback_fn):
10717 """Execute disk replacement.
10719 This dispatches the disk replacement to the appropriate handler.
10722 if self.delay_iallocator:
10723 self._CheckPrereq2()
10726 # Verify owned locks before starting operation
10727 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10728 assert set(owned_nodes) == set(self.node_secondary_ip), \
10729 ("Incorrect node locks, owning %s, expected %s" %
10730 (owned_nodes, self.node_secondary_ip.keys()))
10731 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10732 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10734 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10735 assert list(owned_instances) == [self.instance_name], \
10736 "Instance '%s' not locked" % self.instance_name
10738 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10739 "Should not own any node group lock at this point"
10742 feedback_fn("No disks need replacement")
10745 feedback_fn("Replacing disk(s) %s for %s" %
10746 (utils.CommaJoin(self.disks), self.instance.name))
10748 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10750 # Activate the instance disks if we're replacing them on a down instance
10752 _StartInstanceDisks(self.lu, self.instance, True)
10755 # Should we replace the secondary node?
10756 if self.new_node is not None:
10757 fn = self._ExecDrbd8Secondary
10759 fn = self._ExecDrbd8DiskOnly
10761 result = fn(feedback_fn)
10763 # Deactivate the instance disks if we're replacing them on a
10766 _SafeShutdownInstanceDisks(self.lu, self.instance)
10768 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10771 # Verify owned locks
10772 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10773 nodes = frozenset(self.node_secondary_ip)
10774 assert ((self.early_release and not owned_nodes) or
10775 (not self.early_release and not (set(owned_nodes) - nodes))), \
10776 ("Not owning the correct locks, early_release=%s, owned=%r,"
10777 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10781 def _CheckVolumeGroup(self, nodes):
10782 self.lu.LogInfo("Checking volume groups")
10784 vgname = self.cfg.GetVGName()
10786 # Make sure volume group exists on all involved nodes
10787 results = self.rpc.call_vg_list(nodes)
10789 raise errors.OpExecError("Can't list volume groups on the nodes")
10792 res = results[node]
10793 res.Raise("Error checking node %s" % node)
10794 if vgname not in res.payload:
10795 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10798 def _CheckDisksExistence(self, nodes):
10799 # Check disk existence
10800 for idx, dev in enumerate(self.instance.disks):
10801 if idx not in self.disks:
10805 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10806 self.cfg.SetDiskID(dev, node)
10808 result = _BlockdevFind(self, node, dev, self.instance)
10810 msg = result.fail_msg
10811 if msg or not result.payload:
10813 msg = "disk not found"
10814 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10817 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10818 for idx, dev in enumerate(self.instance.disks):
10819 if idx not in self.disks:
10822 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10825 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10826 on_primary, ldisk=ldisk):
10827 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10828 " replace disks for instance %s" %
10829 (node_name, self.instance.name))
10831 def _CreateNewStorage(self, node_name):
10832 """Create new storage on the primary or secondary node.
10834 This is only used for same-node replaces, not for changing the
10835 secondary node, hence we don't want to modify the existing disk.
10840 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10841 for idx, dev in enumerate(disks):
10842 if idx not in self.disks:
10845 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10847 self.cfg.SetDiskID(dev, node_name)
10849 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10850 names = _GenerateUniqueNames(self.lu, lv_names)
10852 (data_disk, meta_disk) = dev.children
10853 vg_data = data_disk.logical_id[0]
10854 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10855 logical_id=(vg_data, names[0]),
10856 params=data_disk.params)
10857 vg_meta = meta_disk.logical_id[0]
10858 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10859 logical_id=(vg_meta, names[1]),
10860 params=meta_disk.params)
10862 new_lvs = [lv_data, lv_meta]
10863 old_lvs = [child.Copy() for child in dev.children]
10864 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10866 # we pass force_create=True to force the LVM creation
10867 for new_lv in new_lvs:
10868 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10869 _GetInstanceInfoText(self.instance), False)
10873 def _CheckDevices(self, node_name, iv_names):
10874 for name, (dev, _, _) in iv_names.iteritems():
10875 self.cfg.SetDiskID(dev, node_name)
10877 result = _BlockdevFind(self, node_name, dev, self.instance)
10879 msg = result.fail_msg
10880 if msg or not result.payload:
10882 msg = "disk not found"
10883 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10886 if result.payload.is_degraded:
10887 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10889 def _RemoveOldStorage(self, node_name, iv_names):
10890 for name, (_, old_lvs, _) in iv_names.iteritems():
10891 self.lu.LogInfo("Remove logical volumes for %s" % name)
10894 self.cfg.SetDiskID(lv, node_name)
10896 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10898 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10899 hint="remove unused LVs manually")
10901 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10902 """Replace a disk on the primary or secondary for DRBD 8.
10904 The algorithm for replace is quite complicated:
10906 1. for each disk to be replaced:
10908 1. create new LVs on the target node with unique names
10909 1. detach old LVs from the drbd device
10910 1. rename old LVs to name_replaced.<time_t>
10911 1. rename new LVs to old LVs
10912 1. attach the new LVs (with the old names now) to the drbd device
10914 1. wait for sync across all devices
10916 1. for each modified disk:
10918 1. remove old LVs (which have the name name_replaces.<time_t>)
10920 Failures are not very well handled.
10925 # Step: check device activation
10926 self.lu.LogStep(1, steps_total, "Check device existence")
10927 self._CheckDisksExistence([self.other_node, self.target_node])
10928 self._CheckVolumeGroup([self.target_node, self.other_node])
10930 # Step: check other node consistency
10931 self.lu.LogStep(2, steps_total, "Check peer consistency")
10932 self._CheckDisksConsistency(self.other_node,
10933 self.other_node == self.instance.primary_node,
10936 # Step: create new storage
10937 self.lu.LogStep(3, steps_total, "Allocate new storage")
10938 iv_names = self._CreateNewStorage(self.target_node)
10940 # Step: for each lv, detach+rename*2+attach
10941 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10942 for dev, old_lvs, new_lvs in iv_names.itervalues():
10943 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10945 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10947 result.Raise("Can't detach drbd from local storage on node"
10948 " %s for device %s" % (self.target_node, dev.iv_name))
10950 #cfg.Update(instance)
10952 # ok, we created the new LVs, so now we know we have the needed
10953 # storage; as such, we proceed on the target node to rename
10954 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10955 # using the assumption that logical_id == physical_id (which in
10956 # turn is the unique_id on that node)
10958 # FIXME(iustin): use a better name for the replaced LVs
10959 temp_suffix = int(time.time())
10960 ren_fn = lambda d, suff: (d.physical_id[0],
10961 d.physical_id[1] + "_replaced-%s" % suff)
10963 # Build the rename list based on what LVs exist on the node
10964 rename_old_to_new = []
10965 for to_ren in old_lvs:
10966 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10967 if not result.fail_msg and result.payload:
10969 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10971 self.lu.LogInfo("Renaming the old LVs on the target node")
10972 result = self.rpc.call_blockdev_rename(self.target_node,
10974 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10976 # Now we rename the new LVs to the old LVs
10977 self.lu.LogInfo("Renaming the new LVs on the target node")
10978 rename_new_to_old = [(new, old.physical_id)
10979 for old, new in zip(old_lvs, new_lvs)]
10980 result = self.rpc.call_blockdev_rename(self.target_node,
10982 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10984 # Intermediate steps of in memory modifications
10985 for old, new in zip(old_lvs, new_lvs):
10986 new.logical_id = old.logical_id
10987 self.cfg.SetDiskID(new, self.target_node)
10989 # We need to modify old_lvs so that removal later removes the
10990 # right LVs, not the newly added ones; note that old_lvs is a
10992 for disk in old_lvs:
10993 disk.logical_id = ren_fn(disk, temp_suffix)
10994 self.cfg.SetDiskID(disk, self.target_node)
10996 # Now that the new lvs have the old name, we can add them to the device
10997 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10998 result = self.rpc.call_blockdev_addchildren(self.target_node,
10999 (dev, self.instance), new_lvs)
11000 msg = result.fail_msg
11002 for new_lv in new_lvs:
11003 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11006 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11007 hint=("cleanup manually the unused logical"
11009 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11011 cstep = itertools.count(5)
11013 if self.early_release:
11014 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11015 self._RemoveOldStorage(self.target_node, iv_names)
11016 # TODO: Check if releasing locks early still makes sense
11017 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11019 # Release all resource locks except those used by the instance
11020 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11021 keep=self.node_secondary_ip.keys())
11023 # Release all node locks while waiting for sync
11024 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11026 # TODO: Can the instance lock be downgraded here? Take the optional disk
11027 # shutdown in the caller into consideration.
11030 # This can fail as the old devices are degraded and _WaitForSync
11031 # does a combined result over all disks, so we don't check its return value
11032 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11033 _WaitForSync(self.lu, self.instance)
11035 # Check all devices manually
11036 self._CheckDevices(self.instance.primary_node, iv_names)
11038 # Step: remove old storage
11039 if not self.early_release:
11040 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11041 self._RemoveOldStorage(self.target_node, iv_names)
11043 def _ExecDrbd8Secondary(self, feedback_fn):
11044 """Replace the secondary node for DRBD 8.
11046 The algorithm for replace is quite complicated:
11047 - for all disks of the instance:
11048 - create new LVs on the new node with same names
11049 - shutdown the drbd device on the old secondary
11050 - disconnect the drbd network on the primary
11051 - create the drbd device on the new secondary
11052 - network attach the drbd on the primary, using an artifice:
11053 the drbd code for Attach() will connect to the network if it
11054 finds a device which is connected to the good local disks but
11055 not network enabled
11056 - wait for sync across all devices
11057 - remove all disks from the old secondary
11059 Failures are not very well handled.
11064 pnode = self.instance.primary_node
11066 # Step: check device activation
11067 self.lu.LogStep(1, steps_total, "Check device existence")
11068 self._CheckDisksExistence([self.instance.primary_node])
11069 self._CheckVolumeGroup([self.instance.primary_node])
11071 # Step: check other node consistency
11072 self.lu.LogStep(2, steps_total, "Check peer consistency")
11073 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11075 # Step: create new storage
11076 self.lu.LogStep(3, steps_total, "Allocate new storage")
11077 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11078 for idx, dev in enumerate(disks):
11079 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11080 (self.new_node, idx))
11081 # we pass force_create=True to force LVM creation
11082 for new_lv in dev.children:
11083 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11084 True, _GetInstanceInfoText(self.instance), False)
11086 # Step 4: dbrd minors and drbd setups changes
11087 # after this, we must manually remove the drbd minors on both the
11088 # error and the success paths
11089 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11090 minors = self.cfg.AllocateDRBDMinor([self.new_node
11091 for dev in self.instance.disks],
11092 self.instance.name)
11093 logging.debug("Allocated minors %r", minors)
11096 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11097 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11098 (self.new_node, idx))
11099 # create new devices on new_node; note that we create two IDs:
11100 # one without port, so the drbd will be activated without
11101 # networking information on the new node at this stage, and one
11102 # with network, for the latter activation in step 4
11103 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11104 if self.instance.primary_node == o_node1:
11107 assert self.instance.primary_node == o_node2, "Three-node instance?"
11110 new_alone_id = (self.instance.primary_node, self.new_node, None,
11111 p_minor, new_minor, o_secret)
11112 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11113 p_minor, new_minor, o_secret)
11115 iv_names[idx] = (dev, dev.children, new_net_id)
11116 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11118 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11119 logical_id=new_alone_id,
11120 children=dev.children,
11123 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11126 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11128 _GetInstanceInfoText(self.instance), False)
11129 except errors.GenericError:
11130 self.cfg.ReleaseDRBDMinors(self.instance.name)
11133 # We have new devices, shutdown the drbd on the old secondary
11134 for idx, dev in enumerate(self.instance.disks):
11135 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11136 self.cfg.SetDiskID(dev, self.target_node)
11137 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11139 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11140 "node: %s" % (idx, msg),
11141 hint=("Please cleanup this device manually as"
11142 " soon as possible"))
11144 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11145 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11146 self.instance.disks)[pnode]
11148 msg = result.fail_msg
11150 # detaches didn't succeed (unlikely)
11151 self.cfg.ReleaseDRBDMinors(self.instance.name)
11152 raise errors.OpExecError("Can't detach the disks from the network on"
11153 " old node: %s" % (msg,))
11155 # if we managed to detach at least one, we update all the disks of
11156 # the instance to point to the new secondary
11157 self.lu.LogInfo("Updating instance configuration")
11158 for dev, _, new_logical_id in iv_names.itervalues():
11159 dev.logical_id = new_logical_id
11160 self.cfg.SetDiskID(dev, self.instance.primary_node)
11162 self.cfg.Update(self.instance, feedback_fn)
11164 # Release all node locks (the configuration has been updated)
11165 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11167 # and now perform the drbd attach
11168 self.lu.LogInfo("Attaching primary drbds to new secondary"
11169 " (standalone => connected)")
11170 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11172 self.node_secondary_ip,
11173 (self.instance.disks, self.instance),
11174 self.instance.name,
11176 for to_node, to_result in result.items():
11177 msg = to_result.fail_msg
11179 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11181 hint=("please do a gnt-instance info to see the"
11182 " status of disks"))
11184 cstep = itertools.count(5)
11186 if self.early_release:
11187 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11188 self._RemoveOldStorage(self.target_node, iv_names)
11189 # TODO: Check if releasing locks early still makes sense
11190 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11192 # Release all resource locks except those used by the instance
11193 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11194 keep=self.node_secondary_ip.keys())
11196 # TODO: Can the instance lock be downgraded here? Take the optional disk
11197 # shutdown in the caller into consideration.
11200 # This can fail as the old devices are degraded and _WaitForSync
11201 # does a combined result over all disks, so we don't check its return value
11202 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11203 _WaitForSync(self.lu, self.instance)
11205 # Check all devices manually
11206 self._CheckDevices(self.instance.primary_node, iv_names)
11208 # Step: remove old storage
11209 if not self.early_release:
11210 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11211 self._RemoveOldStorage(self.target_node, iv_names)
11214 class LURepairNodeStorage(NoHooksLU):
11215 """Repairs the volume group on a node.
11220 def CheckArguments(self):
11221 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11223 storage_type = self.op.storage_type
11225 if (constants.SO_FIX_CONSISTENCY not in
11226 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11227 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11228 " repaired" % storage_type,
11229 errors.ECODE_INVAL)
11231 def ExpandNames(self):
11232 self.needed_locks = {
11233 locking.LEVEL_NODE: [self.op.node_name],
11236 def _CheckFaultyDisks(self, instance, node_name):
11237 """Ensure faulty disks abort the opcode or at least warn."""
11239 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11241 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11242 " node '%s'" % (instance.name, node_name),
11243 errors.ECODE_STATE)
11244 except errors.OpPrereqError, err:
11245 if self.op.ignore_consistency:
11246 self.proc.LogWarning(str(err.args[0]))
11250 def CheckPrereq(self):
11251 """Check prerequisites.
11254 # Check whether any instance on this node has faulty disks
11255 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11256 if inst.admin_state != constants.ADMINST_UP:
11258 check_nodes = set(inst.all_nodes)
11259 check_nodes.discard(self.op.node_name)
11260 for inst_node_name in check_nodes:
11261 self._CheckFaultyDisks(inst, inst_node_name)
11263 def Exec(self, feedback_fn):
11264 feedback_fn("Repairing storage unit '%s' on %s ..." %
11265 (self.op.name, self.op.node_name))
11267 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11268 result = self.rpc.call_storage_execute(self.op.node_name,
11269 self.op.storage_type, st_args,
11271 constants.SO_FIX_CONSISTENCY)
11272 result.Raise("Failed to repair storage unit '%s' on %s" %
11273 (self.op.name, self.op.node_name))
11276 class LUNodeEvacuate(NoHooksLU):
11277 """Evacuates instances off a list of nodes.
11282 _MODE2IALLOCATOR = {
11283 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11284 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11285 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11287 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11288 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11289 constants.IALLOCATOR_NEVAC_MODES)
11291 def CheckArguments(self):
11292 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11294 def ExpandNames(self):
11295 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11297 if self.op.remote_node is not None:
11298 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11299 assert self.op.remote_node
11301 if self.op.remote_node == self.op.node_name:
11302 raise errors.OpPrereqError("Can not use evacuated node as a new"
11303 " secondary node", errors.ECODE_INVAL)
11305 if self.op.mode != constants.NODE_EVAC_SEC:
11306 raise errors.OpPrereqError("Without the use of an iallocator only"
11307 " secondary instances can be evacuated",
11308 errors.ECODE_INVAL)
11311 self.share_locks = _ShareAll()
11312 self.needed_locks = {
11313 locking.LEVEL_INSTANCE: [],
11314 locking.LEVEL_NODEGROUP: [],
11315 locking.LEVEL_NODE: [],
11318 # Determine nodes (via group) optimistically, needs verification once locks
11319 # have been acquired
11320 self.lock_nodes = self._DetermineNodes()
11322 def _DetermineNodes(self):
11323 """Gets the list of nodes to operate on.
11326 if self.op.remote_node is None:
11327 # Iallocator will choose any node(s) in the same group
11328 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11330 group_nodes = frozenset([self.op.remote_node])
11332 # Determine nodes to be locked
11333 return set([self.op.node_name]) | group_nodes
11335 def _DetermineInstances(self):
11336 """Builds list of instances to operate on.
11339 assert self.op.mode in constants.NODE_EVAC_MODES
11341 if self.op.mode == constants.NODE_EVAC_PRI:
11342 # Primary instances only
11343 inst_fn = _GetNodePrimaryInstances
11344 assert self.op.remote_node is None, \
11345 "Evacuating primary instances requires iallocator"
11346 elif self.op.mode == constants.NODE_EVAC_SEC:
11347 # Secondary instances only
11348 inst_fn = _GetNodeSecondaryInstances
11351 assert self.op.mode == constants.NODE_EVAC_ALL
11352 inst_fn = _GetNodeInstances
11353 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11355 raise errors.OpPrereqError("Due to an issue with the iallocator"
11356 " interface it is not possible to evacuate"
11357 " all instances at once; specify explicitly"
11358 " whether to evacuate primary or secondary"
11360 errors.ECODE_INVAL)
11362 return inst_fn(self.cfg, self.op.node_name)
11364 def DeclareLocks(self, level):
11365 if level == locking.LEVEL_INSTANCE:
11366 # Lock instances optimistically, needs verification once node and group
11367 # locks have been acquired
11368 self.needed_locks[locking.LEVEL_INSTANCE] = \
11369 set(i.name for i in self._DetermineInstances())
11371 elif level == locking.LEVEL_NODEGROUP:
11372 # Lock node groups for all potential target nodes optimistically, needs
11373 # verification once nodes have been acquired
11374 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11375 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11377 elif level == locking.LEVEL_NODE:
11378 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11380 def CheckPrereq(self):
11382 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11383 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11384 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11386 need_nodes = self._DetermineNodes()
11388 if not owned_nodes.issuperset(need_nodes):
11389 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11390 " locks were acquired, current nodes are"
11391 " are '%s', used to be '%s'; retry the"
11393 (self.op.node_name,
11394 utils.CommaJoin(need_nodes),
11395 utils.CommaJoin(owned_nodes)),
11396 errors.ECODE_STATE)
11398 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11399 if owned_groups != wanted_groups:
11400 raise errors.OpExecError("Node groups changed since locks were acquired,"
11401 " current groups are '%s', used to be '%s';"
11402 " retry the operation" %
11403 (utils.CommaJoin(wanted_groups),
11404 utils.CommaJoin(owned_groups)))
11406 # Determine affected instances
11407 self.instances = self._DetermineInstances()
11408 self.instance_names = [i.name for i in self.instances]
11410 if set(self.instance_names) != owned_instances:
11411 raise errors.OpExecError("Instances on node '%s' changed since locks"
11412 " were acquired, current instances are '%s',"
11413 " used to be '%s'; retry the operation" %
11414 (self.op.node_name,
11415 utils.CommaJoin(self.instance_names),
11416 utils.CommaJoin(owned_instances)))
11418 if self.instance_names:
11419 self.LogInfo("Evacuating instances from node '%s': %s",
11421 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11423 self.LogInfo("No instances to evacuate from node '%s'",
11426 if self.op.remote_node is not None:
11427 for i in self.instances:
11428 if i.primary_node == self.op.remote_node:
11429 raise errors.OpPrereqError("Node %s is the primary node of"
11430 " instance %s, cannot use it as"
11432 (self.op.remote_node, i.name),
11433 errors.ECODE_INVAL)
11435 def Exec(self, feedback_fn):
11436 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11438 if not self.instance_names:
11439 # No instances to evacuate
11442 elif self.op.iallocator is not None:
11443 # TODO: Implement relocation to other group
11444 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11445 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11446 instances=list(self.instance_names))
11448 ial.Run(self.op.iallocator)
11450 if not ial.success:
11451 raise errors.OpPrereqError("Can't compute node evacuation using"
11452 " iallocator '%s': %s" %
11453 (self.op.iallocator, ial.info),
11454 errors.ECODE_NORES)
11456 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11458 elif self.op.remote_node is not None:
11459 assert self.op.mode == constants.NODE_EVAC_SEC
11461 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11462 remote_node=self.op.remote_node,
11464 mode=constants.REPLACE_DISK_CHG,
11465 early_release=self.op.early_release)]
11466 for instance_name in self.instance_names
11470 raise errors.ProgrammerError("No iallocator or remote node")
11472 return ResultWithJobs(jobs)
11475 def _SetOpEarlyRelease(early_release, op):
11476 """Sets C{early_release} flag on opcodes if available.
11480 op.early_release = early_release
11481 except AttributeError:
11482 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11487 def _NodeEvacDest(use_nodes, group, nodes):
11488 """Returns group or nodes depending on caller's choice.
11492 return utils.CommaJoin(nodes)
11497 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11498 """Unpacks the result of change-group and node-evacuate iallocator requests.
11500 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11501 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11503 @type lu: L{LogicalUnit}
11504 @param lu: Logical unit instance
11505 @type alloc_result: tuple/list
11506 @param alloc_result: Result from iallocator
11507 @type early_release: bool
11508 @param early_release: Whether to release locks early if possible
11509 @type use_nodes: bool
11510 @param use_nodes: Whether to display node names instead of groups
11513 (moved, failed, jobs) = alloc_result
11516 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11517 for (name, reason) in failed)
11518 lu.LogWarning("Unable to evacuate instances %s", failreason)
11519 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11522 lu.LogInfo("Instances to be moved: %s",
11523 utils.CommaJoin("%s (to %s)" %
11524 (name, _NodeEvacDest(use_nodes, group, nodes))
11525 for (name, group, nodes) in moved))
11527 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11528 map(opcodes.OpCode.LoadOpCode, ops))
11532 class LUInstanceGrowDisk(LogicalUnit):
11533 """Grow a disk of an instance.
11536 HPATH = "disk-grow"
11537 HTYPE = constants.HTYPE_INSTANCE
11540 def ExpandNames(self):
11541 self._ExpandAndLockInstance()
11542 self.needed_locks[locking.LEVEL_NODE] = []
11543 self.needed_locks[locking.LEVEL_NODE_RES] = []
11544 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11545 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11547 def DeclareLocks(self, level):
11548 if level == locking.LEVEL_NODE:
11549 self._LockInstancesNodes()
11550 elif level == locking.LEVEL_NODE_RES:
11552 self.needed_locks[locking.LEVEL_NODE_RES] = \
11553 self.needed_locks[locking.LEVEL_NODE][:]
11555 def BuildHooksEnv(self):
11556 """Build hooks env.
11558 This runs on the master, the primary and all the secondaries.
11562 "DISK": self.op.disk,
11563 "AMOUNT": self.op.amount,
11564 "ABSOLUTE": self.op.absolute,
11566 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11569 def BuildHooksNodes(self):
11570 """Build hooks nodes.
11573 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11576 def CheckPrereq(self):
11577 """Check prerequisites.
11579 This checks that the instance is in the cluster.
11582 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11583 assert instance is not None, \
11584 "Cannot retrieve locked instance %s" % self.op.instance_name
11585 nodenames = list(instance.all_nodes)
11586 for node in nodenames:
11587 _CheckNodeOnline(self, node)
11589 self.instance = instance
11591 if instance.disk_template not in constants.DTS_GROWABLE:
11592 raise errors.OpPrereqError("Instance's disk layout does not support"
11593 " growing", errors.ECODE_INVAL)
11595 self.disk = instance.FindDisk(self.op.disk)
11597 if self.op.absolute:
11598 self.target = self.op.amount
11599 self.delta = self.target - self.disk.size
11601 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11602 "current disk size (%s)" %
11603 (utils.FormatUnit(self.target, "h"),
11604 utils.FormatUnit(self.disk.size, "h")),
11605 errors.ECODE_STATE)
11607 self.delta = self.op.amount
11608 self.target = self.disk.size + self.delta
11610 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11611 utils.FormatUnit(self.delta, "h"),
11612 errors.ECODE_INVAL)
11614 if instance.disk_template not in (constants.DT_FILE,
11615 constants.DT_SHARED_FILE,
11617 # TODO: check the free disk space for file, when that feature will be
11619 _CheckNodesFreeDiskPerVG(self, nodenames,
11620 self.disk.ComputeGrowth(self.delta))
11622 def Exec(self, feedback_fn):
11623 """Execute disk grow.
11626 instance = self.instance
11629 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11630 assert (self.owned_locks(locking.LEVEL_NODE) ==
11631 self.owned_locks(locking.LEVEL_NODE_RES))
11633 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11635 raise errors.OpExecError("Cannot activate block device to grow")
11637 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11638 (self.op.disk, instance.name,
11639 utils.FormatUnit(self.delta, "h"),
11640 utils.FormatUnit(self.target, "h")))
11642 # First run all grow ops in dry-run mode
11643 for node in instance.all_nodes:
11644 self.cfg.SetDiskID(disk, node)
11645 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11647 result.Raise("Grow request failed to node %s" % node)
11649 # We know that (as far as we can test) operations across different
11650 # nodes will succeed, time to run it for real
11651 for node in instance.all_nodes:
11652 self.cfg.SetDiskID(disk, node)
11653 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11655 result.Raise("Grow request failed to node %s" % node)
11657 # TODO: Rewrite code to work properly
11658 # DRBD goes into sync mode for a short amount of time after executing the
11659 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11660 # calling "resize" in sync mode fails. Sleeping for a short amount of
11661 # time is a work-around.
11664 disk.RecordGrow(self.delta)
11665 self.cfg.Update(instance, feedback_fn)
11667 # Changes have been recorded, release node lock
11668 _ReleaseLocks(self, locking.LEVEL_NODE)
11670 # Downgrade lock while waiting for sync
11671 self.glm.downgrade(locking.LEVEL_INSTANCE)
11673 if self.op.wait_for_sync:
11674 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11676 self.proc.LogWarning("Disk sync-ing has not returned a good"
11677 " status; please check the instance")
11678 if instance.admin_state != constants.ADMINST_UP:
11679 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11680 elif instance.admin_state != constants.ADMINST_UP:
11681 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11682 " not supposed to be running because no wait for"
11683 " sync mode was requested")
11685 assert self.owned_locks(locking.LEVEL_NODE_RES)
11686 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11689 class LUInstanceQueryData(NoHooksLU):
11690 """Query runtime instance data.
11695 def ExpandNames(self):
11696 self.needed_locks = {}
11698 # Use locking if requested or when non-static information is wanted
11699 if not (self.op.static or self.op.use_locking):
11700 self.LogWarning("Non-static data requested, locks need to be acquired")
11701 self.op.use_locking = True
11703 if self.op.instances or not self.op.use_locking:
11704 # Expand instance names right here
11705 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11707 # Will use acquired locks
11708 self.wanted_names = None
11710 if self.op.use_locking:
11711 self.share_locks = _ShareAll()
11713 if self.wanted_names is None:
11714 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11716 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11718 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11719 self.needed_locks[locking.LEVEL_NODE] = []
11720 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11722 def DeclareLocks(self, level):
11723 if self.op.use_locking:
11724 if level == locking.LEVEL_NODEGROUP:
11725 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11727 # Lock all groups used by instances optimistically; this requires going
11728 # via the node before it's locked, requiring verification later on
11729 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11730 frozenset(group_uuid
11731 for instance_name in owned_instances
11733 self.cfg.GetInstanceNodeGroups(instance_name))
11735 elif level == locking.LEVEL_NODE:
11736 self._LockInstancesNodes()
11738 def CheckPrereq(self):
11739 """Check prerequisites.
11741 This only checks the optional instance list against the existing names.
11744 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11745 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11746 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11748 if self.wanted_names is None:
11749 assert self.op.use_locking, "Locking was not used"
11750 self.wanted_names = owned_instances
11752 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11754 if self.op.use_locking:
11755 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11758 assert not (owned_instances or owned_groups or owned_nodes)
11760 self.wanted_instances = instances.values()
11762 def _ComputeBlockdevStatus(self, node, instance, dev):
11763 """Returns the status of a block device
11766 if self.op.static or not node:
11769 self.cfg.SetDiskID(dev, node)
11771 result = self.rpc.call_blockdev_find(node, dev)
11775 result.Raise("Can't compute disk status for %s" % instance.name)
11777 status = result.payload
11781 return (status.dev_path, status.major, status.minor,
11782 status.sync_percent, status.estimated_time,
11783 status.is_degraded, status.ldisk_status)
11785 def _ComputeDiskStatus(self, instance, snode, dev):
11786 """Compute block device status.
11789 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11791 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11793 def _ComputeDiskStatusInner(self, instance, snode, dev):
11794 """Compute block device status.
11796 @attention: The device has to be annotated already.
11799 if dev.dev_type in constants.LDS_DRBD:
11800 # we change the snode then (otherwise we use the one passed in)
11801 if dev.logical_id[0] == instance.primary_node:
11802 snode = dev.logical_id[1]
11804 snode = dev.logical_id[0]
11806 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11808 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11811 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11818 "iv_name": dev.iv_name,
11819 "dev_type": dev.dev_type,
11820 "logical_id": dev.logical_id,
11821 "physical_id": dev.physical_id,
11822 "pstatus": dev_pstatus,
11823 "sstatus": dev_sstatus,
11824 "children": dev_children,
11829 def Exec(self, feedback_fn):
11830 """Gather and return data"""
11833 cluster = self.cfg.GetClusterInfo()
11835 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11836 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11838 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11839 for node in nodes.values()))
11841 group2name_fn = lambda uuid: groups[uuid].name
11843 for instance in self.wanted_instances:
11844 pnode = nodes[instance.primary_node]
11846 if self.op.static or pnode.offline:
11847 remote_state = None
11849 self.LogWarning("Primary node %s is marked offline, returning static"
11850 " information only for instance %s" %
11851 (pnode.name, instance.name))
11853 remote_info = self.rpc.call_instance_info(instance.primary_node,
11855 instance.hypervisor)
11856 remote_info.Raise("Error checking node %s" % instance.primary_node)
11857 remote_info = remote_info.payload
11858 if remote_info and "state" in remote_info:
11859 remote_state = "up"
11861 if instance.admin_state == constants.ADMINST_UP:
11862 remote_state = "down"
11864 remote_state = instance.admin_state
11866 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11869 snodes_group_uuids = [nodes[snode_name].group
11870 for snode_name in instance.secondary_nodes]
11872 result[instance.name] = {
11873 "name": instance.name,
11874 "config_state": instance.admin_state,
11875 "run_state": remote_state,
11876 "pnode": instance.primary_node,
11877 "pnode_group_uuid": pnode.group,
11878 "pnode_group_name": group2name_fn(pnode.group),
11879 "snodes": instance.secondary_nodes,
11880 "snodes_group_uuids": snodes_group_uuids,
11881 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11883 # this happens to be the same format used for hooks
11884 "nics": _NICListToTuple(self, instance.nics),
11885 "disk_template": instance.disk_template,
11887 "hypervisor": instance.hypervisor,
11888 "network_port": instance.network_port,
11889 "hv_instance": instance.hvparams,
11890 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11891 "be_instance": instance.beparams,
11892 "be_actual": cluster.FillBE(instance),
11893 "os_instance": instance.osparams,
11894 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11895 "serial_no": instance.serial_no,
11896 "mtime": instance.mtime,
11897 "ctime": instance.ctime,
11898 "uuid": instance.uuid,
11904 def PrepareContainerMods(mods, private_fn):
11905 """Prepares a list of container modifications by adding a private data field.
11907 @type mods: list of tuples; (operation, index, parameters)
11908 @param mods: List of modifications
11909 @type private_fn: callable or None
11910 @param private_fn: Callable for constructing a private data field for a
11915 if private_fn is None:
11920 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11923 #: Type description for changes as returned by L{ApplyContainerMods}'s
11925 _TApplyContModsCbChanges = \
11926 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11927 ht.TNonEmptyString,
11932 def ApplyContainerMods(kind, container, chgdesc, mods,
11933 create_fn, modify_fn, remove_fn):
11934 """Applies descriptions in C{mods} to C{container}.
11937 @param kind: One-word item description
11938 @type container: list
11939 @param container: Container to modify
11940 @type chgdesc: None or list
11941 @param chgdesc: List of applied changes
11943 @param mods: Modifications as returned by L{PrepareContainerMods}
11944 @type create_fn: callable
11945 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11946 receives absolute item index, parameters and private data object as added
11947 by L{PrepareContainerMods}, returns tuple containing new item and changes
11949 @type modify_fn: callable
11950 @param modify_fn: Callback for modifying an existing item
11951 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11952 and private data object as added by L{PrepareContainerMods}, returns
11954 @type remove_fn: callable
11955 @param remove_fn: Callback on removing item; receives absolute item index,
11956 item and private data object as added by L{PrepareContainerMods}
11959 for (op, idx, params, private) in mods:
11962 absidx = len(container) - 1
11964 raise IndexError("Not accepting negative indices other than -1")
11965 elif idx > len(container):
11966 raise IndexError("Got %s index %s, but there are only %s" %
11967 (kind, idx, len(container)))
11973 if op == constants.DDM_ADD:
11974 # Calculate where item will be added
11976 addidx = len(container)
11980 if create_fn is None:
11983 (item, changes) = create_fn(addidx, params, private)
11986 container.append(item)
11989 assert idx <= len(container)
11990 # list.insert does so before the specified index
11991 container.insert(idx, item)
11993 # Retrieve existing item
11995 item = container[absidx]
11997 raise IndexError("Invalid %s index %s" % (kind, idx))
11999 if op == constants.DDM_REMOVE:
12002 if remove_fn is not None:
12003 remove_fn(absidx, item, private)
12005 changes = [("%s/%s" % (kind, absidx), "remove")]
12007 assert container[absidx] == item
12008 del container[absidx]
12009 elif op == constants.DDM_MODIFY:
12010 if modify_fn is not None:
12011 changes = modify_fn(absidx, item, params, private)
12013 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12015 assert _TApplyContModsCbChanges(changes)
12017 if not (chgdesc is None or changes is None):
12018 chgdesc.extend(changes)
12021 def _UpdateIvNames(base_index, disks):
12022 """Updates the C{iv_name} attribute of disks.
12024 @type disks: list of L{objects.Disk}
12027 for (idx, disk) in enumerate(disks):
12028 disk.iv_name = "disk/%s" % (base_index + idx, )
12031 class _InstNicModPrivate:
12032 """Data structure for network interface modifications.
12034 Used by L{LUInstanceSetParams}.
12037 def __init__(self):
12042 class LUInstanceSetParams(LogicalUnit):
12043 """Modifies an instances's parameters.
12046 HPATH = "instance-modify"
12047 HTYPE = constants.HTYPE_INSTANCE
12051 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12052 assert ht.TList(mods)
12053 assert not mods or len(mods[0]) in (2, 3)
12055 if mods and len(mods[0]) == 2:
12059 for op, params in mods:
12060 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12061 result.append((op, -1, params))
12065 raise errors.OpPrereqError("Only one %s add or remove operation is"
12066 " supported at a time" % kind,
12067 errors.ECODE_INVAL)
12069 result.append((constants.DDM_MODIFY, op, params))
12071 assert verify_fn(result)
12078 def _CheckMods(kind, mods, key_types, item_fn):
12079 """Ensures requested disk/NIC modifications are valid.
12082 for (op, _, params) in mods:
12083 assert ht.TDict(params)
12085 utils.ForceDictType(params, key_types)
12087 if op == constants.DDM_REMOVE:
12089 raise errors.OpPrereqError("No settings should be passed when"
12090 " removing a %s" % kind,
12091 errors.ECODE_INVAL)
12092 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12093 item_fn(op, params)
12095 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12098 def _VerifyDiskModification(op, params):
12099 """Verifies a disk modification.
12102 if op == constants.DDM_ADD:
12103 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12104 if mode not in constants.DISK_ACCESS_SET:
12105 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12106 errors.ECODE_INVAL)
12108 size = params.get(constants.IDISK_SIZE, None)
12110 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12111 constants.IDISK_SIZE, errors.ECODE_INVAL)
12115 except (TypeError, ValueError), err:
12116 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12117 errors.ECODE_INVAL)
12119 params[constants.IDISK_SIZE] = size
12121 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12122 raise errors.OpPrereqError("Disk size change not possible, use"
12123 " grow-disk", errors.ECODE_INVAL)
12126 def _VerifyNicModification(op, params):
12127 """Verifies a network interface modification.
12130 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12131 ip = params.get(constants.INIC_IP, None)
12134 elif ip.lower() == constants.VALUE_NONE:
12135 params[constants.INIC_IP] = None
12136 elif not netutils.IPAddress.IsValid(ip):
12137 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12138 errors.ECODE_INVAL)
12140 bridge = params.get("bridge", None)
12141 link = params.get(constants.INIC_LINK, None)
12142 if bridge and link:
12143 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12144 " at the same time", errors.ECODE_INVAL)
12145 elif bridge and bridge.lower() == constants.VALUE_NONE:
12146 params["bridge"] = None
12147 elif link and link.lower() == constants.VALUE_NONE:
12148 params[constants.INIC_LINK] = None
12150 if op == constants.DDM_ADD:
12151 macaddr = params.get(constants.INIC_MAC, None)
12152 if macaddr is None:
12153 params[constants.INIC_MAC] = constants.VALUE_AUTO
12155 if constants.INIC_MAC in params:
12156 macaddr = params[constants.INIC_MAC]
12157 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12158 macaddr = utils.NormalizeAndValidateMac(macaddr)
12160 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12161 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12162 " modifying an existing NIC",
12163 errors.ECODE_INVAL)
12165 def CheckArguments(self):
12166 if not (self.op.nics or self.op.disks or self.op.disk_template or
12167 self.op.hvparams or self.op.beparams or self.op.os_name or
12168 self.op.offline is not None or self.op.runtime_mem):
12169 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12171 if self.op.hvparams:
12172 _CheckGlobalHvParams(self.op.hvparams)
12175 self._UpgradeDiskNicMods("disk", self.op.disks,
12176 opcodes.OpInstanceSetParams.TestDiskModifications)
12178 self._UpgradeDiskNicMods("NIC", self.op.nics,
12179 opcodes.OpInstanceSetParams.TestNicModifications)
12181 # Check disk modifications
12182 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12183 self._VerifyDiskModification)
12185 if self.op.disks and self.op.disk_template is not None:
12186 raise errors.OpPrereqError("Disk template conversion and other disk"
12187 " changes not supported at the same time",
12188 errors.ECODE_INVAL)
12190 if (self.op.disk_template and
12191 self.op.disk_template in constants.DTS_INT_MIRROR and
12192 self.op.remote_node is None):
12193 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12194 " one requires specifying a secondary node",
12195 errors.ECODE_INVAL)
12197 # Check NIC modifications
12198 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12199 self._VerifyNicModification)
12201 def ExpandNames(self):
12202 self._ExpandAndLockInstance()
12203 # Can't even acquire node locks in shared mode as upcoming changes in
12204 # Ganeti 2.6 will start to modify the node object on disk conversion
12205 self.needed_locks[locking.LEVEL_NODE] = []
12206 self.needed_locks[locking.LEVEL_NODE_RES] = []
12207 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12209 def DeclareLocks(self, level):
12210 # TODO: Acquire group lock in shared mode (disk parameters)
12211 if level == locking.LEVEL_NODE:
12212 self._LockInstancesNodes()
12213 if self.op.disk_template and self.op.remote_node:
12214 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12215 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12216 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12218 self.needed_locks[locking.LEVEL_NODE_RES] = \
12219 self.needed_locks[locking.LEVEL_NODE][:]
12221 def BuildHooksEnv(self):
12222 """Build hooks env.
12224 This runs on the master, primary and secondaries.
12228 if constants.BE_MINMEM in self.be_new:
12229 args["minmem"] = self.be_new[constants.BE_MINMEM]
12230 if constants.BE_MAXMEM in self.be_new:
12231 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12232 if constants.BE_VCPUS in self.be_new:
12233 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12234 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12235 # information at all.
12237 if self._new_nics is not None:
12240 for nic in self._new_nics:
12241 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12242 mode = nicparams[constants.NIC_MODE]
12243 link = nicparams[constants.NIC_LINK]
12244 nics.append((nic.ip, nic.mac, mode, link))
12246 args["nics"] = nics
12248 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12249 if self.op.disk_template:
12250 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12251 if self.op.runtime_mem:
12252 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12256 def BuildHooksNodes(self):
12257 """Build hooks nodes.
12260 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12263 def _PrepareNicModification(self, params, private, old_ip, old_params,
12265 update_params_dict = dict([(key, params[key])
12266 for key in constants.NICS_PARAMETERS
12269 if "bridge" in params:
12270 update_params_dict[constants.NIC_LINK] = params["bridge"]
12272 new_params = _GetUpdatedParams(old_params, update_params_dict)
12273 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12275 new_filled_params = cluster.SimpleFillNIC(new_params)
12276 objects.NIC.CheckParameterSyntax(new_filled_params)
12278 new_mode = new_filled_params[constants.NIC_MODE]
12279 if new_mode == constants.NIC_MODE_BRIDGED:
12280 bridge = new_filled_params[constants.NIC_LINK]
12281 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12283 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12285 self.warn.append(msg)
12287 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12289 elif new_mode == constants.NIC_MODE_ROUTED:
12290 ip = params.get(constants.INIC_IP, old_ip)
12292 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12293 " on a routed NIC", errors.ECODE_INVAL)
12295 if constants.INIC_MAC in params:
12296 mac = params[constants.INIC_MAC]
12298 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12299 errors.ECODE_INVAL)
12300 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12301 # otherwise generate the MAC address
12302 params[constants.INIC_MAC] = \
12303 self.cfg.GenerateMAC(self.proc.GetECId())
12305 # or validate/reserve the current one
12307 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12308 except errors.ReservationError:
12309 raise errors.OpPrereqError("MAC address '%s' already in use"
12310 " in cluster" % mac,
12311 errors.ECODE_NOTUNIQUE)
12313 private.params = new_params
12314 private.filled = new_filled_params
12316 return (None, None)
12318 def CheckPrereq(self):
12319 """Check prerequisites.
12321 This only checks the instance list against the existing names.
12324 # checking the new params on the primary/secondary nodes
12326 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12327 cluster = self.cluster = self.cfg.GetClusterInfo()
12328 assert self.instance is not None, \
12329 "Cannot retrieve locked instance %s" % self.op.instance_name
12330 pnode = instance.primary_node
12331 nodelist = list(instance.all_nodes)
12332 pnode_info = self.cfg.GetNodeInfo(pnode)
12333 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12335 # Prepare disk/NIC modifications
12336 self.diskmod = PrepareContainerMods(self.op.disks, None)
12337 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12340 if self.op.os_name and not self.op.force:
12341 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12342 self.op.force_variant)
12343 instance_os = self.op.os_name
12345 instance_os = instance.os
12347 assert not (self.op.disk_template and self.op.disks), \
12348 "Can't modify disk template and apply disk changes at the same time"
12350 if self.op.disk_template:
12351 if instance.disk_template == self.op.disk_template:
12352 raise errors.OpPrereqError("Instance already has disk template %s" %
12353 instance.disk_template, errors.ECODE_INVAL)
12355 if (instance.disk_template,
12356 self.op.disk_template) not in self._DISK_CONVERSIONS:
12357 raise errors.OpPrereqError("Unsupported disk template conversion from"
12358 " %s to %s" % (instance.disk_template,
12359 self.op.disk_template),
12360 errors.ECODE_INVAL)
12361 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12362 msg="cannot change disk template")
12363 if self.op.disk_template in constants.DTS_INT_MIRROR:
12364 if self.op.remote_node == pnode:
12365 raise errors.OpPrereqError("Given new secondary node %s is the same"
12366 " as the primary node of the instance" %
12367 self.op.remote_node, errors.ECODE_STATE)
12368 _CheckNodeOnline(self, self.op.remote_node)
12369 _CheckNodeNotDrained(self, self.op.remote_node)
12370 # FIXME: here we assume that the old instance type is DT_PLAIN
12371 assert instance.disk_template == constants.DT_PLAIN
12372 disks = [{constants.IDISK_SIZE: d.size,
12373 constants.IDISK_VG: d.logical_id[0]}
12374 for d in instance.disks]
12375 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12376 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12378 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12379 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12380 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12381 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12382 ignore=self.op.ignore_ipolicy)
12383 if pnode_info.group != snode_info.group:
12384 self.LogWarning("The primary and secondary nodes are in two"
12385 " different node groups; the disk parameters"
12386 " from the first disk's node group will be"
12389 # hvparams processing
12390 if self.op.hvparams:
12391 hv_type = instance.hypervisor
12392 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12393 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12394 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12397 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12398 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12399 self.hv_proposed = self.hv_new = hv_new # the new actual values
12400 self.hv_inst = i_hvdict # the new dict (without defaults)
12402 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12404 self.hv_new = self.hv_inst = {}
12406 # beparams processing
12407 if self.op.beparams:
12408 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12410 objects.UpgradeBeParams(i_bedict)
12411 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12412 be_new = cluster.SimpleFillBE(i_bedict)
12413 self.be_proposed = self.be_new = be_new # the new actual values
12414 self.be_inst = i_bedict # the new dict (without defaults)
12416 self.be_new = self.be_inst = {}
12417 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12418 be_old = cluster.FillBE(instance)
12420 # CPU param validation -- checking every time a paramtere is
12421 # changed to cover all cases where either CPU mask or vcpus have
12423 if (constants.BE_VCPUS in self.be_proposed and
12424 constants.HV_CPU_MASK in self.hv_proposed):
12426 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12427 # Verify mask is consistent with number of vCPUs. Can skip this
12428 # test if only 1 entry in the CPU mask, which means same mask
12429 # is applied to all vCPUs.
12430 if (len(cpu_list) > 1 and
12431 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12432 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12434 (self.be_proposed[constants.BE_VCPUS],
12435 self.hv_proposed[constants.HV_CPU_MASK]),
12436 errors.ECODE_INVAL)
12438 # Only perform this test if a new CPU mask is given
12439 if constants.HV_CPU_MASK in self.hv_new:
12440 # Calculate the largest CPU number requested
12441 max_requested_cpu = max(map(max, cpu_list))
12442 # Check that all of the instance's nodes have enough physical CPUs to
12443 # satisfy the requested CPU mask
12444 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12445 max_requested_cpu + 1, instance.hypervisor)
12447 # osparams processing
12448 if self.op.osparams:
12449 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12450 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12451 self.os_inst = i_osdict # the new dict (without defaults)
12457 #TODO(dynmem): do the appropriate check involving MINMEM
12458 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12459 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12460 mem_check_list = [pnode]
12461 if be_new[constants.BE_AUTO_BALANCE]:
12462 # either we changed auto_balance to yes or it was from before
12463 mem_check_list.extend(instance.secondary_nodes)
12464 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12465 instance.hypervisor)
12466 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12467 [instance.hypervisor])
12468 pninfo = nodeinfo[pnode]
12469 msg = pninfo.fail_msg
12471 # Assume the primary node is unreachable and go ahead
12472 self.warn.append("Can't get info from primary node %s: %s" %
12475 (_, _, (pnhvinfo, )) = pninfo.payload
12476 if not isinstance(pnhvinfo.get("memory_free", None), int):
12477 self.warn.append("Node data from primary node %s doesn't contain"
12478 " free memory information" % pnode)
12479 elif instance_info.fail_msg:
12480 self.warn.append("Can't get instance runtime information: %s" %
12481 instance_info.fail_msg)
12483 if instance_info.payload:
12484 current_mem = int(instance_info.payload["memory"])
12486 # Assume instance not running
12487 # (there is a slight race condition here, but it's not very
12488 # probable, and we have no other way to check)
12489 # TODO: Describe race condition
12491 #TODO(dynmem): do the appropriate check involving MINMEM
12492 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12493 pnhvinfo["memory_free"])
12495 raise errors.OpPrereqError("This change will prevent the instance"
12496 " from starting, due to %d MB of memory"
12497 " missing on its primary node" %
12499 errors.ECODE_NORES)
12501 if be_new[constants.BE_AUTO_BALANCE]:
12502 for node, nres in nodeinfo.items():
12503 if node not in instance.secondary_nodes:
12505 nres.Raise("Can't get info from secondary node %s" % node,
12506 prereq=True, ecode=errors.ECODE_STATE)
12507 (_, _, (nhvinfo, )) = nres.payload
12508 if not isinstance(nhvinfo.get("memory_free", None), int):
12509 raise errors.OpPrereqError("Secondary node %s didn't return free"
12510 " memory information" % node,
12511 errors.ECODE_STATE)
12512 #TODO(dynmem): do the appropriate check involving MINMEM
12513 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12514 raise errors.OpPrereqError("This change will prevent the instance"
12515 " from failover to its secondary node"
12516 " %s, due to not enough memory" % node,
12517 errors.ECODE_STATE)
12519 if self.op.runtime_mem:
12520 remote_info = self.rpc.call_instance_info(instance.primary_node,
12522 instance.hypervisor)
12523 remote_info.Raise("Error checking node %s" % instance.primary_node)
12524 if not remote_info.payload: # not running already
12525 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12526 errors.ECODE_STATE)
12528 current_memory = remote_info.payload["memory"]
12529 if (not self.op.force and
12530 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12531 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12532 raise errors.OpPrereqError("Instance %s must have memory between %d"
12533 " and %d MB of memory unless --force is"
12534 " given" % (instance.name,
12535 self.be_proposed[constants.BE_MINMEM],
12536 self.be_proposed[constants.BE_MAXMEM]),
12537 errors.ECODE_INVAL)
12539 if self.op.runtime_mem > current_memory:
12540 _CheckNodeFreeMemory(self, instance.primary_node,
12541 "ballooning memory for instance %s" %
12543 self.op.memory - current_memory,
12544 instance.hypervisor)
12546 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12547 raise errors.OpPrereqError("Disk operations not supported for"
12548 " diskless instances",
12549 errors.ECODE_INVAL)
12551 def _PrepareNicCreate(_, params, private):
12552 return self._PrepareNicModification(params, private, None, {},
12555 def _PrepareNicMod(_, nic, params, private):
12556 return self._PrepareNicModification(params, private, nic.ip,
12557 nic.nicparams, cluster, pnode)
12559 # Verify NIC changes (operating on copy)
12560 nics = instance.nics[:]
12561 ApplyContainerMods("NIC", nics, None, self.nicmod,
12562 _PrepareNicCreate, _PrepareNicMod, None)
12563 if len(nics) > constants.MAX_NICS:
12564 raise errors.OpPrereqError("Instance has too many network interfaces"
12565 " (%d), cannot add more" % constants.MAX_NICS,
12566 errors.ECODE_STATE)
12568 # Verify disk changes (operating on a copy)
12569 disks = instance.disks[:]
12570 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12571 if len(disks) > constants.MAX_DISKS:
12572 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12573 " more" % constants.MAX_DISKS,
12574 errors.ECODE_STATE)
12576 if self.op.offline is not None:
12577 if self.op.offline:
12578 msg = "can't change to offline"
12580 msg = "can't change to online"
12581 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12583 # Pre-compute NIC changes (necessary to use result in hooks)
12584 self._nic_chgdesc = []
12586 # Operate on copies as this is still in prereq
12587 nics = [nic.Copy() for nic in instance.nics]
12588 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12589 self._CreateNewNic, self._ApplyNicMods, None)
12590 self._new_nics = nics
12592 self._new_nics = None
12594 def _ConvertPlainToDrbd(self, feedback_fn):
12595 """Converts an instance from plain to drbd.
12598 feedback_fn("Converting template to drbd")
12599 instance = self.instance
12600 pnode = instance.primary_node
12601 snode = self.op.remote_node
12603 assert instance.disk_template == constants.DT_PLAIN
12605 # create a fake disk info for _GenerateDiskTemplate
12606 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12607 constants.IDISK_VG: d.logical_id[0]}
12608 for d in instance.disks]
12609 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12610 instance.name, pnode, [snode],
12611 disk_info, None, None, 0, feedback_fn,
12613 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12615 info = _GetInstanceInfoText(instance)
12616 feedback_fn("Creating additional volumes...")
12617 # first, create the missing data and meta devices
12618 for disk in anno_disks:
12619 # unfortunately this is... not too nice
12620 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12622 for child in disk.children:
12623 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12624 # at this stage, all new LVs have been created, we can rename the
12626 feedback_fn("Renaming original volumes...")
12627 rename_list = [(o, n.children[0].logical_id)
12628 for (o, n) in zip(instance.disks, new_disks)]
12629 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12630 result.Raise("Failed to rename original LVs")
12632 feedback_fn("Initializing DRBD devices...")
12633 # all child devices are in place, we can now create the DRBD devices
12634 for disk in anno_disks:
12635 for node in [pnode, snode]:
12636 f_create = node == pnode
12637 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12639 # at this point, the instance has been modified
12640 instance.disk_template = constants.DT_DRBD8
12641 instance.disks = new_disks
12642 self.cfg.Update(instance, feedback_fn)
12644 # Release node locks while waiting for sync
12645 _ReleaseLocks(self, locking.LEVEL_NODE)
12647 # disks are created, waiting for sync
12648 disk_abort = not _WaitForSync(self, instance,
12649 oneshot=not self.op.wait_for_sync)
12651 raise errors.OpExecError("There are some degraded disks for"
12652 " this instance, please cleanup manually")
12654 # Node resource locks will be released by caller
12656 def _ConvertDrbdToPlain(self, feedback_fn):
12657 """Converts an instance from drbd to plain.
12660 instance = self.instance
12662 assert len(instance.secondary_nodes) == 1
12663 assert instance.disk_template == constants.DT_DRBD8
12665 pnode = instance.primary_node
12666 snode = instance.secondary_nodes[0]
12667 feedback_fn("Converting template to plain")
12669 old_disks = instance.disks
12670 new_disks = [d.children[0] for d in old_disks]
12672 # copy over size and mode
12673 for parent, child in zip(old_disks, new_disks):
12674 child.size = parent.size
12675 child.mode = parent.mode
12677 # this is a DRBD disk, return its port to the pool
12678 # NOTE: this must be done right before the call to cfg.Update!
12679 for disk in old_disks:
12680 tcp_port = disk.logical_id[2]
12681 self.cfg.AddTcpUdpPort(tcp_port)
12683 # update instance structure
12684 instance.disks = new_disks
12685 instance.disk_template = constants.DT_PLAIN
12686 self.cfg.Update(instance, feedback_fn)
12688 # Release locks in case removing disks takes a while
12689 _ReleaseLocks(self, locking.LEVEL_NODE)
12691 feedback_fn("Removing volumes on the secondary node...")
12692 for disk in old_disks:
12693 self.cfg.SetDiskID(disk, snode)
12694 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12696 self.LogWarning("Could not remove block device %s on node %s,"
12697 " continuing anyway: %s", disk.iv_name, snode, msg)
12699 feedback_fn("Removing unneeded volumes on the primary node...")
12700 for idx, disk in enumerate(old_disks):
12701 meta = disk.children[1]
12702 self.cfg.SetDiskID(meta, pnode)
12703 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12705 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12706 " continuing anyway: %s", idx, pnode, msg)
12708 def _CreateNewDisk(self, idx, params, _):
12709 """Creates a new disk.
12712 instance = self.instance
12715 if instance.disk_template in constants.DTS_FILEBASED:
12716 (file_driver, file_path) = instance.disks[0].logical_id
12717 file_path = os.path.dirname(file_path)
12719 file_driver = file_path = None
12722 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12723 instance.primary_node, instance.secondary_nodes,
12724 [params], file_path, file_driver, idx,
12725 self.Log, self.diskparams)[0]
12727 info = _GetInstanceInfoText(instance)
12729 logging.info("Creating volume %s for instance %s",
12730 disk.iv_name, instance.name)
12731 # Note: this needs to be kept in sync with _CreateDisks
12733 for node in instance.all_nodes:
12734 f_create = (node == instance.primary_node)
12736 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12737 except errors.OpExecError, err:
12738 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12739 disk.iv_name, disk, node, err)
12742 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12746 def _ModifyDisk(idx, disk, params, _):
12747 """Modifies a disk.
12750 disk.mode = params[constants.IDISK_MODE]
12753 ("disk.mode/%d" % idx, disk.mode),
12756 def _RemoveDisk(self, idx, root, _):
12760 for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12761 self.cfg.SetDiskID(disk, node)
12762 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12764 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12765 " continuing anyway", idx, node, msg)
12767 # if this is a DRBD disk, return its port to the pool
12768 if root.dev_type in constants.LDS_DRBD:
12769 self.cfg.AddTcpUdpPort(root.logical_id[2])
12772 def _CreateNewNic(idx, params, private):
12773 """Creates data structure for a new network interface.
12776 mac = params[constants.INIC_MAC]
12777 ip = params.get(constants.INIC_IP, None)
12778 nicparams = private.params
12780 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12782 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12783 (mac, ip, private.filled[constants.NIC_MODE],
12784 private.filled[constants.NIC_LINK])),
12788 def _ApplyNicMods(idx, nic, params, private):
12789 """Modifies a network interface.
12794 for key in [constants.INIC_MAC, constants.INIC_IP]:
12796 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12797 setattr(nic, key, params[key])
12800 nic.nicparams = private.params
12802 for (key, val) in params.items():
12803 changes.append(("nic.%s/%d" % (key, idx), val))
12807 def Exec(self, feedback_fn):
12808 """Modifies an instance.
12810 All parameters take effect only at the next restart of the instance.
12813 # Process here the warnings from CheckPrereq, as we don't have a
12814 # feedback_fn there.
12815 # TODO: Replace with self.LogWarning
12816 for warn in self.warn:
12817 feedback_fn("WARNING: %s" % warn)
12819 assert ((self.op.disk_template is None) ^
12820 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12821 "Not owning any node resource locks"
12824 instance = self.instance
12827 if self.op.runtime_mem:
12828 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12830 self.op.runtime_mem)
12831 rpcres.Raise("Cannot modify instance runtime memory")
12832 result.append(("runtime_memory", self.op.runtime_mem))
12834 # Apply disk changes
12835 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12836 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12837 _UpdateIvNames(0, instance.disks)
12839 if self.op.disk_template:
12841 check_nodes = set(instance.all_nodes)
12842 if self.op.remote_node:
12843 check_nodes.add(self.op.remote_node)
12844 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12845 owned = self.owned_locks(level)
12846 assert not (check_nodes - owned), \
12847 ("Not owning the correct locks, owning %r, expected at least %r" %
12848 (owned, check_nodes))
12850 r_shut = _ShutdownInstanceDisks(self, instance)
12852 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12853 " proceed with disk template conversion")
12854 mode = (instance.disk_template, self.op.disk_template)
12856 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12858 self.cfg.ReleaseDRBDMinors(instance.name)
12860 result.append(("disk_template", self.op.disk_template))
12862 assert instance.disk_template == self.op.disk_template, \
12863 ("Expected disk template '%s', found '%s'" %
12864 (self.op.disk_template, instance.disk_template))
12866 # Release node and resource locks if there are any (they might already have
12867 # been released during disk conversion)
12868 _ReleaseLocks(self, locking.LEVEL_NODE)
12869 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12871 # Apply NIC changes
12872 if self._new_nics is not None:
12873 instance.nics = self._new_nics
12874 result.extend(self._nic_chgdesc)
12877 if self.op.hvparams:
12878 instance.hvparams = self.hv_inst
12879 for key, val in self.op.hvparams.iteritems():
12880 result.append(("hv/%s" % key, val))
12883 if self.op.beparams:
12884 instance.beparams = self.be_inst
12885 for key, val in self.op.beparams.iteritems():
12886 result.append(("be/%s" % key, val))
12889 if self.op.os_name:
12890 instance.os = self.op.os_name
12893 if self.op.osparams:
12894 instance.osparams = self.os_inst
12895 for key, val in self.op.osparams.iteritems():
12896 result.append(("os/%s" % key, val))
12898 if self.op.offline is None:
12901 elif self.op.offline:
12902 # Mark instance as offline
12903 self.cfg.MarkInstanceOffline(instance.name)
12904 result.append(("admin_state", constants.ADMINST_OFFLINE))
12906 # Mark instance as online, but stopped
12907 self.cfg.MarkInstanceDown(instance.name)
12908 result.append(("admin_state", constants.ADMINST_DOWN))
12910 self.cfg.Update(instance, feedback_fn)
12912 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12913 self.owned_locks(locking.LEVEL_NODE)), \
12914 "All node locks should have been released by now"
12918 _DISK_CONVERSIONS = {
12919 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12920 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12924 class LUInstanceChangeGroup(LogicalUnit):
12925 HPATH = "instance-change-group"
12926 HTYPE = constants.HTYPE_INSTANCE
12929 def ExpandNames(self):
12930 self.share_locks = _ShareAll()
12931 self.needed_locks = {
12932 locking.LEVEL_NODEGROUP: [],
12933 locking.LEVEL_NODE: [],
12936 self._ExpandAndLockInstance()
12938 if self.op.target_groups:
12939 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12940 self.op.target_groups)
12942 self.req_target_uuids = None
12944 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12946 def DeclareLocks(self, level):
12947 if level == locking.LEVEL_NODEGROUP:
12948 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12950 if self.req_target_uuids:
12951 lock_groups = set(self.req_target_uuids)
12953 # Lock all groups used by instance optimistically; this requires going
12954 # via the node before it's locked, requiring verification later on
12955 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12956 lock_groups.update(instance_groups)
12958 # No target groups, need to lock all of them
12959 lock_groups = locking.ALL_SET
12961 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12963 elif level == locking.LEVEL_NODE:
12964 if self.req_target_uuids:
12965 # Lock all nodes used by instances
12966 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12967 self._LockInstancesNodes()
12969 # Lock all nodes in all potential target groups
12970 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12971 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12972 member_nodes = [node_name
12973 for group in lock_groups
12974 for node_name in self.cfg.GetNodeGroup(group).members]
12975 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12977 # Lock all nodes as all groups are potential targets
12978 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12980 def CheckPrereq(self):
12981 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12982 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12983 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12985 assert (self.req_target_uuids is None or
12986 owned_groups.issuperset(self.req_target_uuids))
12987 assert owned_instances == set([self.op.instance_name])
12989 # Get instance information
12990 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12992 # Check if node groups for locked instance are still correct
12993 assert owned_nodes.issuperset(self.instance.all_nodes), \
12994 ("Instance %s's nodes changed while we kept the lock" %
12995 self.op.instance_name)
12997 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13000 if self.req_target_uuids:
13001 # User requested specific target groups
13002 self.target_uuids = frozenset(self.req_target_uuids)
13004 # All groups except those used by the instance are potential targets
13005 self.target_uuids = owned_groups - inst_groups
13007 conflicting_groups = self.target_uuids & inst_groups
13008 if conflicting_groups:
13009 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13010 " used by the instance '%s'" %
13011 (utils.CommaJoin(conflicting_groups),
13012 self.op.instance_name),
13013 errors.ECODE_INVAL)
13015 if not self.target_uuids:
13016 raise errors.OpPrereqError("There are no possible target groups",
13017 errors.ECODE_INVAL)
13019 def BuildHooksEnv(self):
13020 """Build hooks env.
13023 assert self.target_uuids
13026 "TARGET_GROUPS": " ".join(self.target_uuids),
13029 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13033 def BuildHooksNodes(self):
13034 """Build hooks nodes.
13037 mn = self.cfg.GetMasterNode()
13038 return ([mn], [mn])
13040 def Exec(self, feedback_fn):
13041 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13043 assert instances == [self.op.instance_name], "Instance not locked"
13045 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13046 instances=instances, target_groups=list(self.target_uuids))
13048 ial.Run(self.op.iallocator)
13050 if not ial.success:
13051 raise errors.OpPrereqError("Can't compute solution for changing group of"
13052 " instance '%s' using iallocator '%s': %s" %
13053 (self.op.instance_name, self.op.iallocator,
13055 errors.ECODE_NORES)
13057 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13059 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13060 " instance '%s'", len(jobs), self.op.instance_name)
13062 return ResultWithJobs(jobs)
13065 class LUBackupQuery(NoHooksLU):
13066 """Query the exports list
13071 def CheckArguments(self):
13072 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13073 ["node", "export"], self.op.use_locking)
13075 def ExpandNames(self):
13076 self.expq.ExpandNames(self)
13078 def DeclareLocks(self, level):
13079 self.expq.DeclareLocks(self, level)
13081 def Exec(self, feedback_fn):
13084 for (node, expname) in self.expq.OldStyleQuery(self):
13085 if expname is None:
13086 result[node] = False
13088 result.setdefault(node, []).append(expname)
13093 class _ExportQuery(_QueryBase):
13094 FIELDS = query.EXPORT_FIELDS
13096 #: The node name is not a unique key for this query
13097 SORT_FIELD = "node"
13099 def ExpandNames(self, lu):
13100 lu.needed_locks = {}
13102 # The following variables interact with _QueryBase._GetNames
13104 self.wanted = _GetWantedNodes(lu, self.names)
13106 self.wanted = locking.ALL_SET
13108 self.do_locking = self.use_locking
13110 if self.do_locking:
13111 lu.share_locks = _ShareAll()
13112 lu.needed_locks = {
13113 locking.LEVEL_NODE: self.wanted,
13116 def DeclareLocks(self, lu, level):
13119 def _GetQueryData(self, lu):
13120 """Computes the list of nodes and their attributes.
13123 # Locking is not used
13125 assert not (compat.any(lu.glm.is_owned(level)
13126 for level in locking.LEVELS
13127 if level != locking.LEVEL_CLUSTER) or
13128 self.do_locking or self.use_locking)
13130 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13134 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13136 result.append((node, None))
13138 result.extend((node, expname) for expname in nres.payload)
13143 class LUBackupPrepare(NoHooksLU):
13144 """Prepares an instance for an export and returns useful information.
13149 def ExpandNames(self):
13150 self._ExpandAndLockInstance()
13152 def CheckPrereq(self):
13153 """Check prerequisites.
13156 instance_name = self.op.instance_name
13158 self.instance = self.cfg.GetInstanceInfo(instance_name)
13159 assert self.instance is not None, \
13160 "Cannot retrieve locked instance %s" % self.op.instance_name
13161 _CheckNodeOnline(self, self.instance.primary_node)
13163 self._cds = _GetClusterDomainSecret()
13165 def Exec(self, feedback_fn):
13166 """Prepares an instance for an export.
13169 instance = self.instance
13171 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13172 salt = utils.GenerateSecret(8)
13174 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13175 result = self.rpc.call_x509_cert_create(instance.primary_node,
13176 constants.RIE_CERT_VALIDITY)
13177 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13179 (name, cert_pem) = result.payload
13181 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13185 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13186 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13188 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13194 class LUBackupExport(LogicalUnit):
13195 """Export an instance to an image in the cluster.
13198 HPATH = "instance-export"
13199 HTYPE = constants.HTYPE_INSTANCE
13202 def CheckArguments(self):
13203 """Check the arguments.
13206 self.x509_key_name = self.op.x509_key_name
13207 self.dest_x509_ca_pem = self.op.destination_x509_ca
13209 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13210 if not self.x509_key_name:
13211 raise errors.OpPrereqError("Missing X509 key name for encryption",
13212 errors.ECODE_INVAL)
13214 if not self.dest_x509_ca_pem:
13215 raise errors.OpPrereqError("Missing destination X509 CA",
13216 errors.ECODE_INVAL)
13218 def ExpandNames(self):
13219 self._ExpandAndLockInstance()
13221 # Lock all nodes for local exports
13222 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13223 # FIXME: lock only instance primary and destination node
13225 # Sad but true, for now we have do lock all nodes, as we don't know where
13226 # the previous export might be, and in this LU we search for it and
13227 # remove it from its current node. In the future we could fix this by:
13228 # - making a tasklet to search (share-lock all), then create the
13229 # new one, then one to remove, after
13230 # - removing the removal operation altogether
13231 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13233 def DeclareLocks(self, level):
13234 """Last minute lock declaration."""
13235 # All nodes are locked anyway, so nothing to do here.
13237 def BuildHooksEnv(self):
13238 """Build hooks env.
13240 This will run on the master, primary node and target node.
13244 "EXPORT_MODE": self.op.mode,
13245 "EXPORT_NODE": self.op.target_node,
13246 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13247 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13248 # TODO: Generic function for boolean env variables
13249 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13252 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13256 def BuildHooksNodes(self):
13257 """Build hooks nodes.
13260 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13262 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13263 nl.append(self.op.target_node)
13267 def CheckPrereq(self):
13268 """Check prerequisites.
13270 This checks that the instance and node names are valid.
13273 instance_name = self.op.instance_name
13275 self.instance = self.cfg.GetInstanceInfo(instance_name)
13276 assert self.instance is not None, \
13277 "Cannot retrieve locked instance %s" % self.op.instance_name
13278 _CheckNodeOnline(self, self.instance.primary_node)
13280 if (self.op.remove_instance and
13281 self.instance.admin_state == constants.ADMINST_UP and
13282 not self.op.shutdown):
13283 raise errors.OpPrereqError("Can not remove instance without shutting it"
13286 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13287 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13288 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13289 assert self.dst_node is not None
13291 _CheckNodeOnline(self, self.dst_node.name)
13292 _CheckNodeNotDrained(self, self.dst_node.name)
13295 self.dest_disk_info = None
13296 self.dest_x509_ca = None
13298 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13299 self.dst_node = None
13301 if len(self.op.target_node) != len(self.instance.disks):
13302 raise errors.OpPrereqError(("Received destination information for %s"
13303 " disks, but instance %s has %s disks") %
13304 (len(self.op.target_node), instance_name,
13305 len(self.instance.disks)),
13306 errors.ECODE_INVAL)
13308 cds = _GetClusterDomainSecret()
13310 # Check X509 key name
13312 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13313 except (TypeError, ValueError), err:
13314 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13316 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13317 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13318 errors.ECODE_INVAL)
13320 # Load and verify CA
13322 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13323 except OpenSSL.crypto.Error, err:
13324 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13325 (err, ), errors.ECODE_INVAL)
13327 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13328 if errcode is not None:
13329 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13330 (msg, ), errors.ECODE_INVAL)
13332 self.dest_x509_ca = cert
13334 # Verify target information
13336 for idx, disk_data in enumerate(self.op.target_node):
13338 (host, port, magic) = \
13339 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13340 except errors.GenericError, err:
13341 raise errors.OpPrereqError("Target info for disk %s: %s" %
13342 (idx, err), errors.ECODE_INVAL)
13344 disk_info.append((host, port, magic))
13346 assert len(disk_info) == len(self.op.target_node)
13347 self.dest_disk_info = disk_info
13350 raise errors.ProgrammerError("Unhandled export mode %r" %
13353 # instance disk type verification
13354 # TODO: Implement export support for file-based disks
13355 for disk in self.instance.disks:
13356 if disk.dev_type == constants.LD_FILE:
13357 raise errors.OpPrereqError("Export not supported for instances with"
13358 " file-based disks", errors.ECODE_INVAL)
13360 def _CleanupExports(self, feedback_fn):
13361 """Removes exports of current instance from all other nodes.
13363 If an instance in a cluster with nodes A..D was exported to node C, its
13364 exports will be removed from the nodes A, B and D.
13367 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13369 nodelist = self.cfg.GetNodeList()
13370 nodelist.remove(self.dst_node.name)
13372 # on one-node clusters nodelist will be empty after the removal
13373 # if we proceed the backup would be removed because OpBackupQuery
13374 # substitutes an empty list with the full cluster node list.
13375 iname = self.instance.name
13377 feedback_fn("Removing old exports for instance %s" % iname)
13378 exportlist = self.rpc.call_export_list(nodelist)
13379 for node in exportlist:
13380 if exportlist[node].fail_msg:
13382 if iname in exportlist[node].payload:
13383 msg = self.rpc.call_export_remove(node, iname).fail_msg
13385 self.LogWarning("Could not remove older export for instance %s"
13386 " on node %s: %s", iname, node, msg)
13388 def Exec(self, feedback_fn):
13389 """Export an instance to an image in the cluster.
13392 assert self.op.mode in constants.EXPORT_MODES
13394 instance = self.instance
13395 src_node = instance.primary_node
13397 if self.op.shutdown:
13398 # shutdown the instance, but not the disks
13399 feedback_fn("Shutting down instance %s" % instance.name)
13400 result = self.rpc.call_instance_shutdown(src_node, instance,
13401 self.op.shutdown_timeout)
13402 # TODO: Maybe ignore failures if ignore_remove_failures is set
13403 result.Raise("Could not shutdown instance %s on"
13404 " node %s" % (instance.name, src_node))
13406 # set the disks ID correctly since call_instance_start needs the
13407 # correct drbd minor to create the symlinks
13408 for disk in instance.disks:
13409 self.cfg.SetDiskID(disk, src_node)
13411 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13414 # Activate the instance disks if we'exporting a stopped instance
13415 feedback_fn("Activating disks for %s" % instance.name)
13416 _StartInstanceDisks(self, instance, None)
13419 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13422 helper.CreateSnapshots()
13424 if (self.op.shutdown and
13425 instance.admin_state == constants.ADMINST_UP and
13426 not self.op.remove_instance):
13427 assert not activate_disks
13428 feedback_fn("Starting instance %s" % instance.name)
13429 result = self.rpc.call_instance_start(src_node,
13430 (instance, None, None), False)
13431 msg = result.fail_msg
13433 feedback_fn("Failed to start instance: %s" % msg)
13434 _ShutdownInstanceDisks(self, instance)
13435 raise errors.OpExecError("Could not start instance: %s" % msg)
13437 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13438 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13439 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13440 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13441 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13443 (key_name, _, _) = self.x509_key_name
13446 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13449 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13450 key_name, dest_ca_pem,
13455 # Check for backwards compatibility
13456 assert len(dresults) == len(instance.disks)
13457 assert compat.all(isinstance(i, bool) for i in dresults), \
13458 "Not all results are boolean: %r" % dresults
13462 feedback_fn("Deactivating disks for %s" % instance.name)
13463 _ShutdownInstanceDisks(self, instance)
13465 if not (compat.all(dresults) and fin_resu):
13468 failures.append("export finalization")
13469 if not compat.all(dresults):
13470 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13472 failures.append("disk export: disk(s) %s" % fdsk)
13474 raise errors.OpExecError("Export failed, errors in %s" %
13475 utils.CommaJoin(failures))
13477 # At this point, the export was successful, we can cleanup/finish
13479 # Remove instance if requested
13480 if self.op.remove_instance:
13481 feedback_fn("Removing instance %s" % instance.name)
13482 _RemoveInstance(self, feedback_fn, instance,
13483 self.op.ignore_remove_failures)
13485 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13486 self._CleanupExports(feedback_fn)
13488 return fin_resu, dresults
13491 class LUBackupRemove(NoHooksLU):
13492 """Remove exports related to the named instance.
13497 def ExpandNames(self):
13498 self.needed_locks = {}
13499 # We need all nodes to be locked in order for RemoveExport to work, but we
13500 # don't need to lock the instance itself, as nothing will happen to it (and
13501 # we can remove exports also for a removed instance)
13502 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13504 def Exec(self, feedback_fn):
13505 """Remove any export.
13508 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13509 # If the instance was not found we'll try with the name that was passed in.
13510 # This will only work if it was an FQDN, though.
13512 if not instance_name:
13514 instance_name = self.op.instance_name
13516 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13517 exportlist = self.rpc.call_export_list(locked_nodes)
13519 for node in exportlist:
13520 msg = exportlist[node].fail_msg
13522 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13524 if instance_name in exportlist[node].payload:
13526 result = self.rpc.call_export_remove(node, instance_name)
13527 msg = result.fail_msg
13529 logging.error("Could not remove export for instance %s"
13530 " on node %s: %s", instance_name, node, msg)
13532 if fqdn_warn and not found:
13533 feedback_fn("Export not found. If trying to remove an export belonging"
13534 " to a deleted instance please use its Fully Qualified"
13538 class LUGroupAdd(LogicalUnit):
13539 """Logical unit for creating node groups.
13542 HPATH = "group-add"
13543 HTYPE = constants.HTYPE_GROUP
13546 def ExpandNames(self):
13547 # We need the new group's UUID here so that we can create and acquire the
13548 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13549 # that it should not check whether the UUID exists in the configuration.
13550 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13551 self.needed_locks = {}
13552 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13554 def CheckPrereq(self):
13555 """Check prerequisites.
13557 This checks that the given group name is not an existing node group
13562 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13563 except errors.OpPrereqError:
13566 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13567 " node group (UUID: %s)" %
13568 (self.op.group_name, existing_uuid),
13569 errors.ECODE_EXISTS)
13571 if self.op.ndparams:
13572 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13574 if self.op.hv_state:
13575 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13577 self.new_hv_state = None
13579 if self.op.disk_state:
13580 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13582 self.new_disk_state = None
13584 if self.op.diskparams:
13585 for templ in constants.DISK_TEMPLATES:
13586 if templ in self.op.diskparams:
13587 utils.ForceDictType(self.op.diskparams[templ],
13588 constants.DISK_DT_TYPES)
13589 self.new_diskparams = self.op.diskparams
13591 self.new_diskparams = None
13593 if self.op.ipolicy:
13594 cluster = self.cfg.GetClusterInfo()
13595 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13597 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13598 except errors.ConfigurationError, err:
13599 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13600 errors.ECODE_INVAL)
13602 def BuildHooksEnv(self):
13603 """Build hooks env.
13607 "GROUP_NAME": self.op.group_name,
13610 def BuildHooksNodes(self):
13611 """Build hooks nodes.
13614 mn = self.cfg.GetMasterNode()
13615 return ([mn], [mn])
13617 def Exec(self, feedback_fn):
13618 """Add the node group to the cluster.
13621 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13622 uuid=self.group_uuid,
13623 alloc_policy=self.op.alloc_policy,
13624 ndparams=self.op.ndparams,
13625 diskparams=self.new_diskparams,
13626 ipolicy=self.op.ipolicy,
13627 hv_state_static=self.new_hv_state,
13628 disk_state_static=self.new_disk_state)
13630 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13631 del self.remove_locks[locking.LEVEL_NODEGROUP]
13634 class LUGroupAssignNodes(NoHooksLU):
13635 """Logical unit for assigning nodes to groups.
13640 def ExpandNames(self):
13641 # These raise errors.OpPrereqError on their own:
13642 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13643 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13645 # We want to lock all the affected nodes and groups. We have readily
13646 # available the list of nodes, and the *destination* group. To gather the
13647 # list of "source" groups, we need to fetch node information later on.
13648 self.needed_locks = {
13649 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13650 locking.LEVEL_NODE: self.op.nodes,
13653 def DeclareLocks(self, level):
13654 if level == locking.LEVEL_NODEGROUP:
13655 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13657 # Try to get all affected nodes' groups without having the group or node
13658 # lock yet. Needs verification later in the code flow.
13659 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13661 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13663 def CheckPrereq(self):
13664 """Check prerequisites.
13667 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13668 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13669 frozenset(self.op.nodes))
13671 expected_locks = (set([self.group_uuid]) |
13672 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13673 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13674 if actual_locks != expected_locks:
13675 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13676 " current groups are '%s', used to be '%s'" %
13677 (utils.CommaJoin(expected_locks),
13678 utils.CommaJoin(actual_locks)))
13680 self.node_data = self.cfg.GetAllNodesInfo()
13681 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13682 instance_data = self.cfg.GetAllInstancesInfo()
13684 if self.group is None:
13685 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13686 (self.op.group_name, self.group_uuid))
13688 (new_splits, previous_splits) = \
13689 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13690 for node in self.op.nodes],
13691 self.node_data, instance_data)
13694 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13696 if not self.op.force:
13697 raise errors.OpExecError("The following instances get split by this"
13698 " change and --force was not given: %s" %
13701 self.LogWarning("This operation will split the following instances: %s",
13704 if previous_splits:
13705 self.LogWarning("In addition, these already-split instances continue"
13706 " to be split across groups: %s",
13707 utils.CommaJoin(utils.NiceSort(previous_splits)))
13709 def Exec(self, feedback_fn):
13710 """Assign nodes to a new group.
13713 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13715 self.cfg.AssignGroupNodes(mods)
13718 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13719 """Check for split instances after a node assignment.
13721 This method considers a series of node assignments as an atomic operation,
13722 and returns information about split instances after applying the set of
13725 In particular, it returns information about newly split instances, and
13726 instances that were already split, and remain so after the change.
13728 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13731 @type changes: list of (node_name, new_group_uuid) pairs.
13732 @param changes: list of node assignments to consider.
13733 @param node_data: a dict with data for all nodes
13734 @param instance_data: a dict with all instances to consider
13735 @rtype: a two-tuple
13736 @return: a list of instances that were previously okay and result split as a
13737 consequence of this change, and a list of instances that were previously
13738 split and this change does not fix.
13741 changed_nodes = dict((node, group) for node, group in changes
13742 if node_data[node].group != group)
13744 all_split_instances = set()
13745 previously_split_instances = set()
13747 def InstanceNodes(instance):
13748 return [instance.primary_node] + list(instance.secondary_nodes)
13750 for inst in instance_data.values():
13751 if inst.disk_template not in constants.DTS_INT_MIRROR:
13754 instance_nodes = InstanceNodes(inst)
13756 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13757 previously_split_instances.add(inst.name)
13759 if len(set(changed_nodes.get(node, node_data[node].group)
13760 for node in instance_nodes)) > 1:
13761 all_split_instances.add(inst.name)
13763 return (list(all_split_instances - previously_split_instances),
13764 list(previously_split_instances & all_split_instances))
13767 class _GroupQuery(_QueryBase):
13768 FIELDS = query.GROUP_FIELDS
13770 def ExpandNames(self, lu):
13771 lu.needed_locks = {}
13773 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13774 self._cluster = lu.cfg.GetClusterInfo()
13775 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13778 self.wanted = [name_to_uuid[name]
13779 for name in utils.NiceSort(name_to_uuid.keys())]
13781 # Accept names to be either names or UUIDs.
13784 all_uuid = frozenset(self._all_groups.keys())
13786 for name in self.names:
13787 if name in all_uuid:
13788 self.wanted.append(name)
13789 elif name in name_to_uuid:
13790 self.wanted.append(name_to_uuid[name])
13792 missing.append(name)
13795 raise errors.OpPrereqError("Some groups do not exist: %s" %
13796 utils.CommaJoin(missing),
13797 errors.ECODE_NOENT)
13799 def DeclareLocks(self, lu, level):
13802 def _GetQueryData(self, lu):
13803 """Computes the list of node groups and their attributes.
13806 do_nodes = query.GQ_NODE in self.requested_data
13807 do_instances = query.GQ_INST in self.requested_data
13809 group_to_nodes = None
13810 group_to_instances = None
13812 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13813 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13814 # latter GetAllInstancesInfo() is not enough, for we have to go through
13815 # instance->node. Hence, we will need to process nodes even if we only need
13816 # instance information.
13817 if do_nodes or do_instances:
13818 all_nodes = lu.cfg.GetAllNodesInfo()
13819 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13822 for node in all_nodes.values():
13823 if node.group in group_to_nodes:
13824 group_to_nodes[node.group].append(node.name)
13825 node_to_group[node.name] = node.group
13828 all_instances = lu.cfg.GetAllInstancesInfo()
13829 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13831 for instance in all_instances.values():
13832 node = instance.primary_node
13833 if node in node_to_group:
13834 group_to_instances[node_to_group[node]].append(instance.name)
13837 # Do not pass on node information if it was not requested.
13838 group_to_nodes = None
13840 return query.GroupQueryData(self._cluster,
13841 [self._all_groups[uuid]
13842 for uuid in self.wanted],
13843 group_to_nodes, group_to_instances)
13846 class LUGroupQuery(NoHooksLU):
13847 """Logical unit for querying node groups.
13852 def CheckArguments(self):
13853 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13854 self.op.output_fields, False)
13856 def ExpandNames(self):
13857 self.gq.ExpandNames(self)
13859 def DeclareLocks(self, level):
13860 self.gq.DeclareLocks(self, level)
13862 def Exec(self, feedback_fn):
13863 return self.gq.OldStyleQuery(self)
13866 class LUGroupSetParams(LogicalUnit):
13867 """Modifies the parameters of a node group.
13870 HPATH = "group-modify"
13871 HTYPE = constants.HTYPE_GROUP
13874 def CheckArguments(self):
13877 self.op.diskparams,
13878 self.op.alloc_policy,
13880 self.op.disk_state,
13884 if all_changes.count(None) == len(all_changes):
13885 raise errors.OpPrereqError("Please pass at least one modification",
13886 errors.ECODE_INVAL)
13888 def ExpandNames(self):
13889 # This raises errors.OpPrereqError on its own:
13890 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13892 self.needed_locks = {
13893 locking.LEVEL_INSTANCE: [],
13894 locking.LEVEL_NODEGROUP: [self.group_uuid],
13897 self.share_locks[locking.LEVEL_INSTANCE] = 1
13899 def DeclareLocks(self, level):
13900 if level == locking.LEVEL_INSTANCE:
13901 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13903 # Lock instances optimistically, needs verification once group lock has
13905 self.needed_locks[locking.LEVEL_INSTANCE] = \
13906 self.cfg.GetNodeGroupInstances(self.group_uuid)
13909 def _UpdateAndVerifyDiskParams(old, new):
13910 """Updates and verifies disk parameters.
13913 new_params = _GetUpdatedParams(old, new)
13914 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13917 def CheckPrereq(self):
13918 """Check prerequisites.
13921 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13923 # Check if locked instances are still correct
13924 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13926 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13927 cluster = self.cfg.GetClusterInfo()
13929 if self.group is None:
13930 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13931 (self.op.group_name, self.group_uuid))
13933 if self.op.ndparams:
13934 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13935 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13936 self.new_ndparams = new_ndparams
13938 if self.op.diskparams:
13939 diskparams = self.group.diskparams
13940 new_diskparams = dict((dt,
13941 self._UpdateAndVerifyDiskParams(diskparams[dt],
13942 self.op.diskparams[dt]))
13943 for dt in constants.DISK_TEMPLATES
13944 if dt in self.op.diskparams)
13945 self.new_diskparams = objects.FillDiskParams(diskparams, new_diskparams)
13947 if self.op.hv_state:
13948 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13949 self.group.hv_state_static)
13951 if self.op.disk_state:
13952 self.new_disk_state = \
13953 _MergeAndVerifyDiskState(self.op.disk_state,
13954 self.group.disk_state_static)
13956 if self.op.ipolicy:
13957 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13961 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13962 inst_filter = lambda inst: inst.name in owned_instances
13963 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13965 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13967 new_ipolicy, instances)
13970 self.LogWarning("After the ipolicy change the following instances"
13971 " violate them: %s",
13972 utils.CommaJoin(violations))
13974 def BuildHooksEnv(self):
13975 """Build hooks env.
13979 "GROUP_NAME": self.op.group_name,
13980 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13983 def BuildHooksNodes(self):
13984 """Build hooks nodes.
13987 mn = self.cfg.GetMasterNode()
13988 return ([mn], [mn])
13990 def Exec(self, feedback_fn):
13991 """Modifies the node group.
13996 if self.op.ndparams:
13997 self.group.ndparams = self.new_ndparams
13998 result.append(("ndparams", str(self.group.ndparams)))
14000 if self.op.diskparams:
14001 self.group.diskparams = self.new_diskparams
14002 result.append(("diskparams", str(self.group.diskparams)))
14004 if self.op.alloc_policy:
14005 self.group.alloc_policy = self.op.alloc_policy
14007 if self.op.hv_state:
14008 self.group.hv_state_static = self.new_hv_state
14010 if self.op.disk_state:
14011 self.group.disk_state_static = self.new_disk_state
14013 if self.op.ipolicy:
14014 self.group.ipolicy = self.new_ipolicy
14016 self.cfg.Update(self.group, feedback_fn)
14020 class LUGroupRemove(LogicalUnit):
14021 HPATH = "group-remove"
14022 HTYPE = constants.HTYPE_GROUP
14025 def ExpandNames(self):
14026 # This will raises errors.OpPrereqError on its own:
14027 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14028 self.needed_locks = {
14029 locking.LEVEL_NODEGROUP: [self.group_uuid],
14032 def CheckPrereq(self):
14033 """Check prerequisites.
14035 This checks that the given group name exists as a node group, that is
14036 empty (i.e., contains no nodes), and that is not the last group of the
14040 # Verify that the group is empty.
14041 group_nodes = [node.name
14042 for node in self.cfg.GetAllNodesInfo().values()
14043 if node.group == self.group_uuid]
14046 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14048 (self.op.group_name,
14049 utils.CommaJoin(utils.NiceSort(group_nodes))),
14050 errors.ECODE_STATE)
14052 # Verify the cluster would not be left group-less.
14053 if len(self.cfg.GetNodeGroupList()) == 1:
14054 raise errors.OpPrereqError("Group '%s' is the only group,"
14055 " cannot be removed" %
14056 self.op.group_name,
14057 errors.ECODE_STATE)
14059 def BuildHooksEnv(self):
14060 """Build hooks env.
14064 "GROUP_NAME": self.op.group_name,
14067 def BuildHooksNodes(self):
14068 """Build hooks nodes.
14071 mn = self.cfg.GetMasterNode()
14072 return ([mn], [mn])
14074 def Exec(self, feedback_fn):
14075 """Remove the node group.
14079 self.cfg.RemoveNodeGroup(self.group_uuid)
14080 except errors.ConfigurationError:
14081 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14082 (self.op.group_name, self.group_uuid))
14084 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14087 class LUGroupRename(LogicalUnit):
14088 HPATH = "group-rename"
14089 HTYPE = constants.HTYPE_GROUP
14092 def ExpandNames(self):
14093 # This raises errors.OpPrereqError on its own:
14094 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14096 self.needed_locks = {
14097 locking.LEVEL_NODEGROUP: [self.group_uuid],
14100 def CheckPrereq(self):
14101 """Check prerequisites.
14103 Ensures requested new name is not yet used.
14107 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14108 except errors.OpPrereqError:
14111 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14112 " node group (UUID: %s)" %
14113 (self.op.new_name, new_name_uuid),
14114 errors.ECODE_EXISTS)
14116 def BuildHooksEnv(self):
14117 """Build hooks env.
14121 "OLD_NAME": self.op.group_name,
14122 "NEW_NAME": self.op.new_name,
14125 def BuildHooksNodes(self):
14126 """Build hooks nodes.
14129 mn = self.cfg.GetMasterNode()
14131 all_nodes = self.cfg.GetAllNodesInfo()
14132 all_nodes.pop(mn, None)
14135 run_nodes.extend(node.name for node in all_nodes.values()
14136 if node.group == self.group_uuid)
14138 return (run_nodes, run_nodes)
14140 def Exec(self, feedback_fn):
14141 """Rename the node group.
14144 group = self.cfg.GetNodeGroup(self.group_uuid)
14147 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14148 (self.op.group_name, self.group_uuid))
14150 group.name = self.op.new_name
14151 self.cfg.Update(group, feedback_fn)
14153 return self.op.new_name
14156 class LUGroupEvacuate(LogicalUnit):
14157 HPATH = "group-evacuate"
14158 HTYPE = constants.HTYPE_GROUP
14161 def ExpandNames(self):
14162 # This raises errors.OpPrereqError on its own:
14163 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14165 if self.op.target_groups:
14166 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14167 self.op.target_groups)
14169 self.req_target_uuids = []
14171 if self.group_uuid in self.req_target_uuids:
14172 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14173 " as a target group (targets are %s)" %
14175 utils.CommaJoin(self.req_target_uuids)),
14176 errors.ECODE_INVAL)
14178 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14180 self.share_locks = _ShareAll()
14181 self.needed_locks = {
14182 locking.LEVEL_INSTANCE: [],
14183 locking.LEVEL_NODEGROUP: [],
14184 locking.LEVEL_NODE: [],
14187 def DeclareLocks(self, level):
14188 if level == locking.LEVEL_INSTANCE:
14189 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14191 # Lock instances optimistically, needs verification once node and group
14192 # locks have been acquired
14193 self.needed_locks[locking.LEVEL_INSTANCE] = \
14194 self.cfg.GetNodeGroupInstances(self.group_uuid)
14196 elif level == locking.LEVEL_NODEGROUP:
14197 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14199 if self.req_target_uuids:
14200 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14202 # Lock all groups used by instances optimistically; this requires going
14203 # via the node before it's locked, requiring verification later on
14204 lock_groups.update(group_uuid
14205 for instance_name in
14206 self.owned_locks(locking.LEVEL_INSTANCE)
14208 self.cfg.GetInstanceNodeGroups(instance_name))
14210 # No target groups, need to lock all of them
14211 lock_groups = locking.ALL_SET
14213 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14215 elif level == locking.LEVEL_NODE:
14216 # This will only lock the nodes in the group to be evacuated which
14217 # contain actual instances
14218 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14219 self._LockInstancesNodes()
14221 # Lock all nodes in group to be evacuated and target groups
14222 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14223 assert self.group_uuid in owned_groups
14224 member_nodes = [node_name
14225 for group in owned_groups
14226 for node_name in self.cfg.GetNodeGroup(group).members]
14227 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14229 def CheckPrereq(self):
14230 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14231 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14232 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14234 assert owned_groups.issuperset(self.req_target_uuids)
14235 assert self.group_uuid in owned_groups
14237 # Check if locked instances are still correct
14238 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14240 # Get instance information
14241 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14243 # Check if node groups for locked instances are still correct
14244 _CheckInstancesNodeGroups(self.cfg, self.instances,
14245 owned_groups, owned_nodes, self.group_uuid)
14247 if self.req_target_uuids:
14248 # User requested specific target groups
14249 self.target_uuids = self.req_target_uuids
14251 # All groups except the one to be evacuated are potential targets
14252 self.target_uuids = [group_uuid for group_uuid in owned_groups
14253 if group_uuid != self.group_uuid]
14255 if not self.target_uuids:
14256 raise errors.OpPrereqError("There are no possible target groups",
14257 errors.ECODE_INVAL)
14259 def BuildHooksEnv(self):
14260 """Build hooks env.
14264 "GROUP_NAME": self.op.group_name,
14265 "TARGET_GROUPS": " ".join(self.target_uuids),
14268 def BuildHooksNodes(self):
14269 """Build hooks nodes.
14272 mn = self.cfg.GetMasterNode()
14274 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14276 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14278 return (run_nodes, run_nodes)
14280 def Exec(self, feedback_fn):
14281 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14283 assert self.group_uuid not in self.target_uuids
14285 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14286 instances=instances, target_groups=self.target_uuids)
14288 ial.Run(self.op.iallocator)
14290 if not ial.success:
14291 raise errors.OpPrereqError("Can't compute group evacuation using"
14292 " iallocator '%s': %s" %
14293 (self.op.iallocator, ial.info),
14294 errors.ECODE_NORES)
14296 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14298 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14299 len(jobs), self.op.group_name)
14301 return ResultWithJobs(jobs)
14304 class TagsLU(NoHooksLU): # pylint: disable=W0223
14305 """Generic tags LU.
14307 This is an abstract class which is the parent of all the other tags LUs.
14310 def ExpandNames(self):
14311 self.group_uuid = None
14312 self.needed_locks = {}
14314 if self.op.kind == constants.TAG_NODE:
14315 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14316 lock_level = locking.LEVEL_NODE
14317 lock_name = self.op.name
14318 elif self.op.kind == constants.TAG_INSTANCE:
14319 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14320 lock_level = locking.LEVEL_INSTANCE
14321 lock_name = self.op.name
14322 elif self.op.kind == constants.TAG_NODEGROUP:
14323 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14324 lock_level = locking.LEVEL_NODEGROUP
14325 lock_name = self.group_uuid
14330 if lock_level and getattr(self.op, "use_locking", True):
14331 self.needed_locks[lock_level] = lock_name
14333 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14334 # not possible to acquire the BGL based on opcode parameters)
14336 def CheckPrereq(self):
14337 """Check prerequisites.
14340 if self.op.kind == constants.TAG_CLUSTER:
14341 self.target = self.cfg.GetClusterInfo()
14342 elif self.op.kind == constants.TAG_NODE:
14343 self.target = self.cfg.GetNodeInfo(self.op.name)
14344 elif self.op.kind == constants.TAG_INSTANCE:
14345 self.target = self.cfg.GetInstanceInfo(self.op.name)
14346 elif self.op.kind == constants.TAG_NODEGROUP:
14347 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14349 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14350 str(self.op.kind), errors.ECODE_INVAL)
14353 class LUTagsGet(TagsLU):
14354 """Returns the tags of a given object.
14359 def ExpandNames(self):
14360 TagsLU.ExpandNames(self)
14362 # Share locks as this is only a read operation
14363 self.share_locks = _ShareAll()
14365 def Exec(self, feedback_fn):
14366 """Returns the tag list.
14369 return list(self.target.GetTags())
14372 class LUTagsSearch(NoHooksLU):
14373 """Searches the tags for a given pattern.
14378 def ExpandNames(self):
14379 self.needed_locks = {}
14381 def CheckPrereq(self):
14382 """Check prerequisites.
14384 This checks the pattern passed for validity by compiling it.
14388 self.re = re.compile(self.op.pattern)
14389 except re.error, err:
14390 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14391 (self.op.pattern, err), errors.ECODE_INVAL)
14393 def Exec(self, feedback_fn):
14394 """Returns the tag list.
14398 tgts = [("/cluster", cfg.GetClusterInfo())]
14399 ilist = cfg.GetAllInstancesInfo().values()
14400 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14401 nlist = cfg.GetAllNodesInfo().values()
14402 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14403 tgts.extend(("/nodegroup/%s" % n.name, n)
14404 for n in cfg.GetAllNodeGroupsInfo().values())
14406 for path, target in tgts:
14407 for tag in target.GetTags():
14408 if self.re.search(tag):
14409 results.append((path, tag))
14413 class LUTagsSet(TagsLU):
14414 """Sets a tag on a given object.
14419 def CheckPrereq(self):
14420 """Check prerequisites.
14422 This checks the type and length of the tag name and value.
14425 TagsLU.CheckPrereq(self)
14426 for tag in self.op.tags:
14427 objects.TaggableObject.ValidateTag(tag)
14429 def Exec(self, feedback_fn):
14434 for tag in self.op.tags:
14435 self.target.AddTag(tag)
14436 except errors.TagError, err:
14437 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14438 self.cfg.Update(self.target, feedback_fn)
14441 class LUTagsDel(TagsLU):
14442 """Delete a list of tags from a given object.
14447 def CheckPrereq(self):
14448 """Check prerequisites.
14450 This checks that we have the given tag.
14453 TagsLU.CheckPrereq(self)
14454 for tag in self.op.tags:
14455 objects.TaggableObject.ValidateTag(tag)
14456 del_tags = frozenset(self.op.tags)
14457 cur_tags = self.target.GetTags()
14459 diff_tags = del_tags - cur_tags
14461 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14462 raise errors.OpPrereqError("Tag(s) %s not found" %
14463 (utils.CommaJoin(diff_names), ),
14464 errors.ECODE_NOENT)
14466 def Exec(self, feedback_fn):
14467 """Remove the tag from the object.
14470 for tag in self.op.tags:
14471 self.target.RemoveTag(tag)
14472 self.cfg.Update(self.target, feedback_fn)
14475 class LUTestDelay(NoHooksLU):
14476 """Sleep for a specified amount of time.
14478 This LU sleeps on the master and/or nodes for a specified amount of
14484 def ExpandNames(self):
14485 """Expand names and set required locks.
14487 This expands the node list, if any.
14490 self.needed_locks = {}
14491 if self.op.on_nodes:
14492 # _GetWantedNodes can be used here, but is not always appropriate to use
14493 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14494 # more information.
14495 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14496 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14498 def _TestDelay(self):
14499 """Do the actual sleep.
14502 if self.op.on_master:
14503 if not utils.TestDelay(self.op.duration):
14504 raise errors.OpExecError("Error during master delay test")
14505 if self.op.on_nodes:
14506 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14507 for node, node_result in result.items():
14508 node_result.Raise("Failure during rpc call to node %s" % node)
14510 def Exec(self, feedback_fn):
14511 """Execute the test delay opcode, with the wanted repetitions.
14514 if self.op.repeat == 0:
14517 top_value = self.op.repeat - 1
14518 for i in range(self.op.repeat):
14519 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14523 class LUTestJqueue(NoHooksLU):
14524 """Utility LU to test some aspects of the job queue.
14529 # Must be lower than default timeout for WaitForJobChange to see whether it
14530 # notices changed jobs
14531 _CLIENT_CONNECT_TIMEOUT = 20.0
14532 _CLIENT_CONFIRM_TIMEOUT = 60.0
14535 def _NotifyUsingSocket(cls, cb, errcls):
14536 """Opens a Unix socket and waits for another program to connect.
14539 @param cb: Callback to send socket name to client
14540 @type errcls: class
14541 @param errcls: Exception class to use for errors
14544 # Using a temporary directory as there's no easy way to create temporary
14545 # sockets without writing a custom loop around tempfile.mktemp and
14547 tmpdir = tempfile.mkdtemp()
14549 tmpsock = utils.PathJoin(tmpdir, "sock")
14551 logging.debug("Creating temporary socket at %s", tmpsock)
14552 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14557 # Send details to client
14560 # Wait for client to connect before continuing
14561 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14563 (conn, _) = sock.accept()
14564 except socket.error, err:
14565 raise errcls("Client didn't connect in time (%s)" % err)
14569 # Remove as soon as client is connected
14570 shutil.rmtree(tmpdir)
14572 # Wait for client to close
14575 # pylint: disable=E1101
14576 # Instance of '_socketobject' has no ... member
14577 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14579 except socket.error, err:
14580 raise errcls("Client failed to confirm notification (%s)" % err)
14584 def _SendNotification(self, test, arg, sockname):
14585 """Sends a notification to the client.
14588 @param test: Test name
14589 @param arg: Test argument (depends on test)
14590 @type sockname: string
14591 @param sockname: Socket path
14594 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14596 def _Notify(self, prereq, test, arg):
14597 """Notifies the client of a test.
14600 @param prereq: Whether this is a prereq-phase test
14602 @param test: Test name
14603 @param arg: Test argument (depends on test)
14607 errcls = errors.OpPrereqError
14609 errcls = errors.OpExecError
14611 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14615 def CheckArguments(self):
14616 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14617 self.expandnames_calls = 0
14619 def ExpandNames(self):
14620 checkargs_calls = getattr(self, "checkargs_calls", 0)
14621 if checkargs_calls < 1:
14622 raise errors.ProgrammerError("CheckArguments was not called")
14624 self.expandnames_calls += 1
14626 if self.op.notify_waitlock:
14627 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14629 self.LogInfo("Expanding names")
14631 # Get lock on master node (just to get a lock, not for a particular reason)
14632 self.needed_locks = {
14633 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14636 def Exec(self, feedback_fn):
14637 if self.expandnames_calls < 1:
14638 raise errors.ProgrammerError("ExpandNames was not called")
14640 if self.op.notify_exec:
14641 self._Notify(False, constants.JQT_EXEC, None)
14643 self.LogInfo("Executing")
14645 if self.op.log_messages:
14646 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14647 for idx, msg in enumerate(self.op.log_messages):
14648 self.LogInfo("Sending log message %s", idx + 1)
14649 feedback_fn(constants.JQT_MSGPREFIX + msg)
14650 # Report how many test messages have been sent
14651 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14654 raise errors.OpExecError("Opcode failure was requested")
14659 class IAllocator(object):
14660 """IAllocator framework.
14662 An IAllocator instance has three sets of attributes:
14663 - cfg that is needed to query the cluster
14664 - input data (all members of the _KEYS class attribute are required)
14665 - four buffer attributes (in|out_data|text), that represent the
14666 input (to the external script) in text and data structure format,
14667 and the output from it, again in two formats
14668 - the result variables from the script (success, info, nodes) for
14672 # pylint: disable=R0902
14673 # lots of instance attributes
14675 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14677 self.rpc = rpc_runner
14678 # init buffer variables
14679 self.in_text = self.out_text = self.in_data = self.out_data = None
14680 # init all input fields so that pylint is happy
14682 self.memory = self.disks = self.disk_template = self.spindle_use = None
14683 self.os = self.tags = self.nics = self.vcpus = None
14684 self.hypervisor = None
14685 self.relocate_from = None
14687 self.instances = None
14688 self.evac_mode = None
14689 self.target_groups = []
14691 self.required_nodes = None
14692 # init result fields
14693 self.success = self.info = self.result = None
14696 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14698 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14699 " IAllocator" % self.mode)
14701 keyset = [n for (n, _) in keydata]
14704 if key not in keyset:
14705 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14706 " IAllocator" % key)
14707 setattr(self, key, kwargs[key])
14710 if key not in kwargs:
14711 raise errors.ProgrammerError("Missing input parameter '%s' to"
14712 " IAllocator" % key)
14713 self._BuildInputData(compat.partial(fn, self), keydata)
14715 def _ComputeClusterData(self):
14716 """Compute the generic allocator input data.
14718 This is the data that is independent of the actual operation.
14722 cluster_info = cfg.GetClusterInfo()
14725 "version": constants.IALLOCATOR_VERSION,
14726 "cluster_name": cfg.GetClusterName(),
14727 "cluster_tags": list(cluster_info.GetTags()),
14728 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14729 "ipolicy": cluster_info.ipolicy,
14731 ninfo = cfg.GetAllNodesInfo()
14732 iinfo = cfg.GetAllInstancesInfo().values()
14733 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14736 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14738 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14739 hypervisor_name = self.hypervisor
14740 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14741 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14743 hypervisor_name = cluster_info.primary_hypervisor
14745 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14748 self.rpc.call_all_instances_info(node_list,
14749 cluster_info.enabled_hypervisors)
14751 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14753 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14754 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14755 i_list, config_ndata)
14756 assert len(data["nodes"]) == len(ninfo), \
14757 "Incomplete node data computed"
14759 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14761 self.in_data = data
14764 def _ComputeNodeGroupData(cfg):
14765 """Compute node groups data.
14768 cluster = cfg.GetClusterInfo()
14769 ng = dict((guuid, {
14770 "name": gdata.name,
14771 "alloc_policy": gdata.alloc_policy,
14772 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14774 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14779 def _ComputeBasicNodeData(cfg, node_cfg):
14780 """Compute global node data.
14783 @returns: a dict of name: (node dict, node config)
14786 # fill in static (config-based) values
14787 node_results = dict((ninfo.name, {
14788 "tags": list(ninfo.GetTags()),
14789 "primary_ip": ninfo.primary_ip,
14790 "secondary_ip": ninfo.secondary_ip,
14791 "offline": ninfo.offline,
14792 "drained": ninfo.drained,
14793 "master_candidate": ninfo.master_candidate,
14794 "group": ninfo.group,
14795 "master_capable": ninfo.master_capable,
14796 "vm_capable": ninfo.vm_capable,
14797 "ndparams": cfg.GetNdParams(ninfo),
14799 for ninfo in node_cfg.values())
14801 return node_results
14804 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14806 """Compute global node data.
14808 @param node_results: the basic node structures as filled from the config
14811 #TODO(dynmem): compute the right data on MAX and MIN memory
14812 # make a copy of the current dict
14813 node_results = dict(node_results)
14814 for nname, nresult in node_data.items():
14815 assert nname in node_results, "Missing basic data for node %s" % nname
14816 ninfo = node_cfg[nname]
14818 if not (ninfo.offline or ninfo.drained):
14819 nresult.Raise("Can't get data for node %s" % nname)
14820 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14822 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14824 for attr in ["memory_total", "memory_free", "memory_dom0",
14825 "vg_size", "vg_free", "cpu_total"]:
14826 if attr not in remote_info:
14827 raise errors.OpExecError("Node '%s' didn't return attribute"
14828 " '%s'" % (nname, attr))
14829 if not isinstance(remote_info[attr], int):
14830 raise errors.OpExecError("Node '%s' returned invalid value"
14832 (nname, attr, remote_info[attr]))
14833 # compute memory used by primary instances
14834 i_p_mem = i_p_up_mem = 0
14835 for iinfo, beinfo in i_list:
14836 if iinfo.primary_node == nname:
14837 i_p_mem += beinfo[constants.BE_MAXMEM]
14838 if iinfo.name not in node_iinfo[nname].payload:
14841 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14842 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14843 remote_info["memory_free"] -= max(0, i_mem_diff)
14845 if iinfo.admin_state == constants.ADMINST_UP:
14846 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14848 # compute memory used by instances
14850 "total_memory": remote_info["memory_total"],
14851 "reserved_memory": remote_info["memory_dom0"],
14852 "free_memory": remote_info["memory_free"],
14853 "total_disk": remote_info["vg_size"],
14854 "free_disk": remote_info["vg_free"],
14855 "total_cpus": remote_info["cpu_total"],
14856 "i_pri_memory": i_p_mem,
14857 "i_pri_up_memory": i_p_up_mem,
14859 pnr_dyn.update(node_results[nname])
14860 node_results[nname] = pnr_dyn
14862 return node_results
14865 def _ComputeInstanceData(cluster_info, i_list):
14866 """Compute global instance data.
14870 for iinfo, beinfo in i_list:
14872 for nic in iinfo.nics:
14873 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14877 "mode": filled_params[constants.NIC_MODE],
14878 "link": filled_params[constants.NIC_LINK],
14880 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14881 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14882 nic_data.append(nic_dict)
14884 "tags": list(iinfo.GetTags()),
14885 "admin_state": iinfo.admin_state,
14886 "vcpus": beinfo[constants.BE_VCPUS],
14887 "memory": beinfo[constants.BE_MAXMEM],
14888 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14890 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14892 "disks": [{constants.IDISK_SIZE: dsk.size,
14893 constants.IDISK_MODE: dsk.mode}
14894 for dsk in iinfo.disks],
14895 "disk_template": iinfo.disk_template,
14896 "hypervisor": iinfo.hypervisor,
14898 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14900 instance_data[iinfo.name] = pir
14902 return instance_data
14904 def _AddNewInstance(self):
14905 """Add new instance data to allocator structure.
14907 This in combination with _AllocatorGetClusterData will create the
14908 correct structure needed as input for the allocator.
14910 The checks for the completeness of the opcode must have already been
14914 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14916 if self.disk_template in constants.DTS_INT_MIRROR:
14917 self.required_nodes = 2
14919 self.required_nodes = 1
14923 "disk_template": self.disk_template,
14926 "vcpus": self.vcpus,
14927 "memory": self.memory,
14928 "spindle_use": self.spindle_use,
14929 "disks": self.disks,
14930 "disk_space_total": disk_space,
14932 "required_nodes": self.required_nodes,
14933 "hypervisor": self.hypervisor,
14938 def _AddRelocateInstance(self):
14939 """Add relocate instance data to allocator structure.
14941 This in combination with _IAllocatorGetClusterData will create the
14942 correct structure needed as input for the allocator.
14944 The checks for the completeness of the opcode must have already been
14948 instance = self.cfg.GetInstanceInfo(self.name)
14949 if instance is None:
14950 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14951 " IAllocator" % self.name)
14953 if instance.disk_template not in constants.DTS_MIRRORED:
14954 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14955 errors.ECODE_INVAL)
14957 if instance.disk_template in constants.DTS_INT_MIRROR and \
14958 len(instance.secondary_nodes) != 1:
14959 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14960 errors.ECODE_STATE)
14962 self.required_nodes = 1
14963 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14964 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14968 "disk_space_total": disk_space,
14969 "required_nodes": self.required_nodes,
14970 "relocate_from": self.relocate_from,
14974 def _AddNodeEvacuate(self):
14975 """Get data for node-evacuate requests.
14979 "instances": self.instances,
14980 "evac_mode": self.evac_mode,
14983 def _AddChangeGroup(self):
14984 """Get data for node-evacuate requests.
14988 "instances": self.instances,
14989 "target_groups": self.target_groups,
14992 def _BuildInputData(self, fn, keydata):
14993 """Build input data structures.
14996 self._ComputeClusterData()
14999 request["type"] = self.mode
15000 for keyname, keytype in keydata:
15001 if keyname not in request:
15002 raise errors.ProgrammerError("Request parameter %s is missing" %
15004 val = request[keyname]
15005 if not keytype(val):
15006 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15007 " validation, value %s, expected"
15008 " type %s" % (keyname, val, keytype))
15009 self.in_data["request"] = request
15011 self.in_text = serializer.Dump(self.in_data)
15013 _STRING_LIST = ht.TListOf(ht.TString)
15014 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15015 # pylint: disable=E1101
15016 # Class '...' has no 'OP_ID' member
15017 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15018 opcodes.OpInstanceMigrate.OP_ID,
15019 opcodes.OpInstanceReplaceDisks.OP_ID])
15023 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15024 ht.TItems([ht.TNonEmptyString,
15025 ht.TNonEmptyString,
15026 ht.TListOf(ht.TNonEmptyString),
15029 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15030 ht.TItems([ht.TNonEmptyString,
15033 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15034 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15037 constants.IALLOCATOR_MODE_ALLOC:
15040 ("name", ht.TString),
15041 ("memory", ht.TInt),
15042 ("spindle_use", ht.TInt),
15043 ("disks", ht.TListOf(ht.TDict)),
15044 ("disk_template", ht.TString),
15045 ("os", ht.TString),
15046 ("tags", _STRING_LIST),
15047 ("nics", ht.TListOf(ht.TDict)),
15048 ("vcpus", ht.TInt),
15049 ("hypervisor", ht.TString),
15051 constants.IALLOCATOR_MODE_RELOC:
15052 (_AddRelocateInstance,
15053 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15055 constants.IALLOCATOR_MODE_NODE_EVAC:
15056 (_AddNodeEvacuate, [
15057 ("instances", _STRING_LIST),
15058 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15060 constants.IALLOCATOR_MODE_CHG_GROUP:
15061 (_AddChangeGroup, [
15062 ("instances", _STRING_LIST),
15063 ("target_groups", _STRING_LIST),
15067 def Run(self, name, validate=True, call_fn=None):
15068 """Run an instance allocator and return the results.
15071 if call_fn is None:
15072 call_fn = self.rpc.call_iallocator_runner
15074 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15075 result.Raise("Failure while running the iallocator script")
15077 self.out_text = result.payload
15079 self._ValidateResult()
15081 def _ValidateResult(self):
15082 """Process the allocator results.
15084 This will process and if successful save the result in
15085 self.out_data and the other parameters.
15089 rdict = serializer.Load(self.out_text)
15090 except Exception, err:
15091 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15093 if not isinstance(rdict, dict):
15094 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15096 # TODO: remove backwards compatiblity in later versions
15097 if "nodes" in rdict and "result" not in rdict:
15098 rdict["result"] = rdict["nodes"]
15101 for key in "success", "info", "result":
15102 if key not in rdict:
15103 raise errors.OpExecError("Can't parse iallocator results:"
15104 " missing key '%s'" % key)
15105 setattr(self, key, rdict[key])
15107 if not self._result_check(self.result):
15108 raise errors.OpExecError("Iallocator returned invalid result,"
15109 " expected %s, got %s" %
15110 (self._result_check, self.result),
15111 errors.ECODE_INVAL)
15113 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15114 assert self.relocate_from is not None
15115 assert self.required_nodes == 1
15117 node2group = dict((name, ndata["group"])
15118 for (name, ndata) in self.in_data["nodes"].items())
15120 fn = compat.partial(self._NodesToGroups, node2group,
15121 self.in_data["nodegroups"])
15123 instance = self.cfg.GetInstanceInfo(self.name)
15124 request_groups = fn(self.relocate_from + [instance.primary_node])
15125 result_groups = fn(rdict["result"] + [instance.primary_node])
15127 if self.success and not set(result_groups).issubset(request_groups):
15128 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15129 " differ from original groups (%s)" %
15130 (utils.CommaJoin(result_groups),
15131 utils.CommaJoin(request_groups)))
15133 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15134 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15136 self.out_data = rdict
15139 def _NodesToGroups(node2group, groups, nodes):
15140 """Returns a list of unique group names for a list of nodes.
15142 @type node2group: dict
15143 @param node2group: Map from node name to group UUID
15145 @param groups: Group information
15147 @param nodes: Node names
15154 group_uuid = node2group[node]
15156 # Ignore unknown node
15160 group = groups[group_uuid]
15162 # Can't find group, let's use UUID
15163 group_name = group_uuid
15165 group_name = group["name"]
15167 result.add(group_name)
15169 return sorted(result)
15172 class LUTestAllocator(NoHooksLU):
15173 """Run allocator tests.
15175 This LU runs the allocator tests
15178 def CheckPrereq(self):
15179 """Check prerequisites.
15181 This checks the opcode parameters depending on the director and mode test.
15184 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15185 for attr in ["memory", "disks", "disk_template",
15186 "os", "tags", "nics", "vcpus"]:
15187 if not hasattr(self.op, attr):
15188 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15189 attr, errors.ECODE_INVAL)
15190 iname = self.cfg.ExpandInstanceName(self.op.name)
15191 if iname is not None:
15192 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15193 iname, errors.ECODE_EXISTS)
15194 if not isinstance(self.op.nics, list):
15195 raise errors.OpPrereqError("Invalid parameter 'nics'",
15196 errors.ECODE_INVAL)
15197 if not isinstance(self.op.disks, list):
15198 raise errors.OpPrereqError("Invalid parameter 'disks'",
15199 errors.ECODE_INVAL)
15200 for row in self.op.disks:
15201 if (not isinstance(row, dict) or
15202 constants.IDISK_SIZE not in row or
15203 not isinstance(row[constants.IDISK_SIZE], int) or
15204 constants.IDISK_MODE not in row or
15205 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15206 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15207 " parameter", errors.ECODE_INVAL)
15208 if self.op.hypervisor is None:
15209 self.op.hypervisor = self.cfg.GetHypervisorType()
15210 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15211 fname = _ExpandInstanceName(self.cfg, self.op.name)
15212 self.op.name = fname
15213 self.relocate_from = \
15214 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15215 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15216 constants.IALLOCATOR_MODE_NODE_EVAC):
15217 if not self.op.instances:
15218 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15219 self.op.instances = _GetWantedInstances(self, self.op.instances)
15221 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15222 self.op.mode, errors.ECODE_INVAL)
15224 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15225 if self.op.allocator is None:
15226 raise errors.OpPrereqError("Missing allocator name",
15227 errors.ECODE_INVAL)
15228 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15229 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15230 self.op.direction, errors.ECODE_INVAL)
15232 def Exec(self, feedback_fn):
15233 """Run the allocator test.
15236 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15237 ial = IAllocator(self.cfg, self.rpc,
15240 memory=self.op.memory,
15241 disks=self.op.disks,
15242 disk_template=self.op.disk_template,
15246 vcpus=self.op.vcpus,
15247 hypervisor=self.op.hypervisor,
15249 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15250 ial = IAllocator(self.cfg, self.rpc,
15253 relocate_from=list(self.relocate_from),
15255 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15256 ial = IAllocator(self.cfg, self.rpc,
15258 instances=self.op.instances,
15259 target_groups=self.op.target_groups)
15260 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15261 ial = IAllocator(self.cfg, self.rpc,
15263 instances=self.op.instances,
15264 evac_mode=self.op.evac_mode)
15266 raise errors.ProgrammerError("Uncatched mode %s in"
15267 " LUTestAllocator.Exec", self.op.mode)
15269 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15270 result = ial.in_text
15272 ial.Run(self.op.allocator, validate=False)
15273 result = ial.out_text
15277 #: Query type implementations
15279 constants.QR_CLUSTER: _ClusterQuery,
15280 constants.QR_INSTANCE: _InstanceQuery,
15281 constants.QR_NODE: _NodeQuery,
15282 constants.QR_GROUP: _GroupQuery,
15283 constants.QR_OS: _OsQuery,
15284 constants.QR_EXPORT: _ExportQuery,
15287 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15290 def _GetQueryImplementation(name):
15291 """Returns the implemtnation for a query type.
15293 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15297 return _QUERY_IMPL[name]
15299 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15300 errors.ECODE_INVAL)