4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _CopyLockList(names):
707 """Makes a copy of a list of lock names.
709 Handles L{locking.ALL_SET} correctly.
712 if names == locking.ALL_SET:
713 return locking.ALL_SET
718 def _GetWantedNodes(lu, nodes):
719 """Returns list of checked and expanded node names.
721 @type lu: L{LogicalUnit}
722 @param lu: the logical unit on whose behalf we execute
724 @param nodes: list of node names or None for all nodes
726 @return: the list of nodes, sorted
727 @raise errors.ProgrammerError: if the nodes parameter is wrong type
731 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
733 return utils.NiceSort(lu.cfg.GetNodeList())
736 def _GetWantedInstances(lu, instances):
737 """Returns list of checked and expanded instance names.
739 @type lu: L{LogicalUnit}
740 @param lu: the logical unit on whose behalf we execute
741 @type instances: list
742 @param instances: list of instance names or None for all instances
744 @return: the list of instances, sorted
745 @raise errors.OpPrereqError: if the instances parameter is wrong type
746 @raise errors.OpPrereqError: if any of the passed instances is not found
750 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
752 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
756 def _GetUpdatedParams(old_params, update_dict,
757 use_default=True, use_none=False):
758 """Return the new version of a parameter dictionary.
760 @type old_params: dict
761 @param old_params: old parameters
762 @type update_dict: dict
763 @param update_dict: dict containing new parameter values, or
764 constants.VALUE_DEFAULT to reset the parameter to its default
766 @param use_default: boolean
767 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
768 values as 'to be deleted' values
769 @param use_none: boolean
770 @type use_none: whether to recognise C{None} values as 'to be
773 @return: the new parameter dictionary
776 params_copy = copy.deepcopy(old_params)
777 for key, val in update_dict.iteritems():
778 if ((use_default and val == constants.VALUE_DEFAULT) or
779 (use_none and val is None)):
785 params_copy[key] = val
789 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
790 """Return the new version of a instance policy.
792 @param group_policy: whether this policy applies to a group and thus
793 we should support removal of policy entries
796 use_none = use_default = group_policy
797 ipolicy = copy.deepcopy(old_ipolicy)
798 for key, value in new_ipolicy.items():
799 if key not in constants.IPOLICY_ALL_KEYS:
800 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
802 if key in constants.IPOLICY_ISPECS:
803 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
804 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
806 use_default=use_default)
808 if (not value or value == [constants.VALUE_DEFAULT] or
809 value == constants.VALUE_DEFAULT):
813 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
814 " on the cluster'" % key,
817 if key in constants.IPOLICY_PARAMETERS:
818 # FIXME: we assume all such values are float
820 ipolicy[key] = float(value)
821 except (TypeError, ValueError), err:
822 raise errors.OpPrereqError("Invalid value for attribute"
823 " '%s': '%s', error: %s" %
824 (key, value, err), errors.ECODE_INVAL)
826 # FIXME: we assume all others are lists; this should be redone
828 ipolicy[key] = list(value)
830 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
831 except errors.ConfigurationError, err:
832 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
837 def _UpdateAndVerifySubDict(base, updates, type_check):
838 """Updates and verifies a dict with sub dicts of the same type.
840 @param base: The dict with the old data
841 @param updates: The dict with the new data
842 @param type_check: Dict suitable to ForceDictType to verify correct types
843 @returns: A new dict with updated and verified values
847 new = _GetUpdatedParams(old, value)
848 utils.ForceDictType(new, type_check)
851 ret = copy.deepcopy(base)
852 ret.update(dict((key, fn(base.get(key, {}), value))
853 for key, value in updates.items()))
857 def _MergeAndVerifyHvState(op_input, obj_input):
858 """Combines the hv state from an opcode with the one of the object
860 @param op_input: The input dict from the opcode
861 @param obj_input: The input dict from the objects
862 @return: The verified and updated dict
866 invalid_hvs = set(op_input) - constants.HYPER_TYPES
868 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
869 " %s" % utils.CommaJoin(invalid_hvs),
871 if obj_input is None:
873 type_check = constants.HVSTS_PARAMETER_TYPES
874 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
879 def _MergeAndVerifyDiskState(op_input, obj_input):
880 """Combines the disk state from an opcode with the one of the object
882 @param op_input: The input dict from the opcode
883 @param obj_input: The input dict from the objects
884 @return: The verified and updated dict
887 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
889 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
890 utils.CommaJoin(invalid_dst),
892 type_check = constants.DSS_PARAMETER_TYPES
893 if obj_input is None:
895 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
897 for key, value in op_input.items())
902 def _ReleaseLocks(lu, level, names=None, keep=None):
903 """Releases locks owned by an LU.
905 @type lu: L{LogicalUnit}
906 @param level: Lock level
907 @type names: list or None
908 @param names: Names of locks to release
909 @type keep: list or None
910 @param keep: Names of locks to retain
913 assert not (keep is not None and names is not None), \
914 "Only one of the 'names' and the 'keep' parameters can be given"
916 if names is not None:
917 should_release = names.__contains__
919 should_release = lambda name: name not in keep
921 should_release = None
923 owned = lu.owned_locks(level)
925 # Not owning any lock at this level, do nothing
932 # Determine which locks to release
934 if should_release(name):
939 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
941 # Release just some locks
942 lu.glm.release(level, names=release)
944 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
947 lu.glm.release(level)
949 assert not lu.glm.is_owned(level), "No locks should be owned"
952 def _MapInstanceDisksToNodes(instances):
953 """Creates a map from (node, volume) to instance name.
955 @type instances: list of L{objects.Instance}
956 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
959 return dict(((node, vol), inst.name)
960 for inst in instances
961 for (node, vols) in inst.MapLVsByNode().items()
965 def _RunPostHook(lu, node_name):
966 """Runs the post-hook for an opcode on a single node.
969 hm = lu.proc.BuildHooksManager(lu)
971 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
972 except Exception, err: # pylint: disable=W0703
973 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
976 def _CheckOutputFields(static, dynamic, selected):
977 """Checks whether all selected fields are valid.
979 @type static: L{utils.FieldSet}
980 @param static: static fields set
981 @type dynamic: L{utils.FieldSet}
982 @param dynamic: dynamic fields set
989 delta = f.NonMatching(selected)
991 raise errors.OpPrereqError("Unknown output fields selected: %s"
992 % ",".join(delta), errors.ECODE_INVAL)
995 def _CheckGlobalHvParams(params):
996 """Validates that given hypervisor params are not global ones.
998 This will ensure that instances don't get customised versions of
1002 used_globals = constants.HVC_GLOBALS.intersection(params)
1004 msg = ("The following hypervisor parameters are global and cannot"
1005 " be customized at instance level, please modify them at"
1006 " cluster level: %s" % utils.CommaJoin(used_globals))
1007 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1010 def _CheckNodeOnline(lu, node, msg=None):
1011 """Ensure that a given node is online.
1013 @param lu: the LU on behalf of which we make the check
1014 @param node: the node to check
1015 @param msg: if passed, should be a message to replace the default one
1016 @raise errors.OpPrereqError: if the node is offline
1020 msg = "Can't use offline node"
1021 if lu.cfg.GetNodeInfo(node).offline:
1022 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1025 def _CheckNodeNotDrained(lu, node):
1026 """Ensure that a given node is not drained.
1028 @param lu: the LU on behalf of which we make the check
1029 @param node: the node to check
1030 @raise errors.OpPrereqError: if the node is drained
1033 if lu.cfg.GetNodeInfo(node).drained:
1034 raise errors.OpPrereqError("Can't use drained node %s" % node,
1038 def _CheckNodeVmCapable(lu, node):
1039 """Ensure that a given node is vm capable.
1041 @param lu: the LU on behalf of which we make the check
1042 @param node: the node to check
1043 @raise errors.OpPrereqError: if the node is not vm capable
1046 if not lu.cfg.GetNodeInfo(node).vm_capable:
1047 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1051 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1052 """Ensure that a node supports a given OS.
1054 @param lu: the LU on behalf of which we make the check
1055 @param node: the node to check
1056 @param os_name: the OS to query about
1057 @param force_variant: whether to ignore variant errors
1058 @raise errors.OpPrereqError: if the node is not supporting the OS
1061 result = lu.rpc.call_os_get(node, os_name)
1062 result.Raise("OS '%s' not in supported OS list for node %s" %
1064 prereq=True, ecode=errors.ECODE_INVAL)
1065 if not force_variant:
1066 _CheckOSVariant(result.payload, os_name)
1069 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1070 """Ensure that a node has the given secondary ip.
1072 @type lu: L{LogicalUnit}
1073 @param lu: the LU on behalf of which we make the check
1075 @param node: the node to check
1076 @type secondary_ip: string
1077 @param secondary_ip: the ip to check
1078 @type prereq: boolean
1079 @param prereq: whether to throw a prerequisite or an execute error
1080 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1081 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1084 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1085 result.Raise("Failure checking secondary ip on node %s" % node,
1086 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1087 if not result.payload:
1088 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1089 " please fix and re-run this command" % secondary_ip)
1091 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1093 raise errors.OpExecError(msg)
1096 def _GetClusterDomainSecret():
1097 """Reads the cluster domain secret.
1100 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1104 def _CheckInstanceState(lu, instance, req_states, msg=None):
1105 """Ensure that an instance is in one of the required states.
1107 @param lu: the LU on behalf of which we make the check
1108 @param instance: the instance to check
1109 @param msg: if passed, should be a message to replace the default one
1110 @raise errors.OpPrereqError: if the instance is not in the required state
1114 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1115 if instance.admin_state not in req_states:
1116 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1117 (instance.name, instance.admin_state, msg),
1120 if constants.ADMINST_UP not in req_states:
1121 pnode = instance.primary_node
1122 if not lu.cfg.GetNodeInfo(pnode).offline:
1123 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1124 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1125 prereq=True, ecode=errors.ECODE_ENVIRON)
1126 if instance.name in ins_l.payload:
1127 raise errors.OpPrereqError("Instance %s is running, %s" %
1128 (instance.name, msg), errors.ECODE_STATE)
1130 lu.LogWarning("Primary node offline, ignoring check that instance"
1134 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1135 """Computes if value is in the desired range.
1137 @param name: name of the parameter for which we perform the check
1138 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1140 @param ipolicy: dictionary containing min, max and std values
1141 @param value: actual value that we want to use
1142 @return: None or element not meeting the criteria
1146 if value in [None, constants.VALUE_AUTO]:
1148 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1149 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1150 if value > max_v or min_v > value:
1152 fqn = "%s/%s" % (name, qualifier)
1155 return ("%s value %s is not in range [%s, %s]" %
1156 (fqn, value, min_v, max_v))
1160 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1161 nic_count, disk_sizes, spindle_use,
1162 _compute_fn=_ComputeMinMaxSpec):
1163 """Verifies ipolicy against provided specs.
1166 @param ipolicy: The ipolicy
1168 @param mem_size: The memory size
1169 @type cpu_count: int
1170 @param cpu_count: Used cpu cores
1171 @type disk_count: int
1172 @param disk_count: Number of disks used
1173 @type nic_count: int
1174 @param nic_count: Number of nics used
1175 @type disk_sizes: list of ints
1176 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1177 @type spindle_use: int
1178 @param spindle_use: The number of spindles this instance uses
1179 @param _compute_fn: The compute function (unittest only)
1180 @return: A list of violations, or an empty list of no violations are found
1183 assert disk_count == len(disk_sizes)
1186 (constants.ISPEC_MEM_SIZE, "", mem_size),
1187 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1188 (constants.ISPEC_DISK_COUNT, "", disk_count),
1189 (constants.ISPEC_NIC_COUNT, "", nic_count),
1190 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1191 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1192 for idx, d in enumerate(disk_sizes)]
1195 (_compute_fn(name, qualifier, ipolicy, value)
1196 for (name, qualifier, value) in test_settings))
1199 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1200 _compute_fn=_ComputeIPolicySpecViolation):
1201 """Compute if instance meets the specs of ipolicy.
1204 @param ipolicy: The ipolicy to verify against
1205 @type instance: L{objects.Instance}
1206 @param instance: The instance to verify
1207 @param _compute_fn: The function to verify ipolicy (unittest only)
1208 @see: L{_ComputeIPolicySpecViolation}
1211 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1212 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1213 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1214 disk_count = len(instance.disks)
1215 disk_sizes = [disk.size for disk in instance.disks]
1216 nic_count = len(instance.nics)
1218 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1219 disk_sizes, spindle_use)
1222 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1223 _compute_fn=_ComputeIPolicySpecViolation):
1224 """Compute if instance specs meets the specs of ipolicy.
1227 @param ipolicy: The ipolicy to verify against
1228 @param instance_spec: dict
1229 @param instance_spec: The instance spec to verify
1230 @param _compute_fn: The function to verify ipolicy (unittest only)
1231 @see: L{_ComputeIPolicySpecViolation}
1234 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1235 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1236 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1237 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1238 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1239 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1241 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1242 disk_sizes, spindle_use)
1245 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1247 _compute_fn=_ComputeIPolicyInstanceViolation):
1248 """Compute if instance meets the specs of the new target group.
1250 @param ipolicy: The ipolicy to verify
1251 @param instance: The instance object to verify
1252 @param current_group: The current group of the instance
1253 @param target_group: The new group of the instance
1254 @param _compute_fn: The function to verify ipolicy (unittest only)
1255 @see: L{_ComputeIPolicySpecViolation}
1258 if current_group == target_group:
1261 return _compute_fn(ipolicy, instance)
1264 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1265 _compute_fn=_ComputeIPolicyNodeViolation):
1266 """Checks that the target node is correct in terms of instance policy.
1268 @param ipolicy: The ipolicy to verify
1269 @param instance: The instance object to verify
1270 @param node: The new node to relocate
1271 @param ignore: Ignore violations of the ipolicy
1272 @param _compute_fn: The function to verify ipolicy (unittest only)
1273 @see: L{_ComputeIPolicySpecViolation}
1276 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1277 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1280 msg = ("Instance does not meet target node group's (%s) instance"
1281 " policy: %s") % (node.group, utils.CommaJoin(res))
1285 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1288 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1289 """Computes a set of any instances that would violate the new ipolicy.
1291 @param old_ipolicy: The current (still in-place) ipolicy
1292 @param new_ipolicy: The new (to become) ipolicy
1293 @param instances: List of instances to verify
1294 @return: A list of instances which violates the new ipolicy but
1298 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1299 _ComputeViolatingInstances(old_ipolicy, instances))
1302 def _ExpandItemName(fn, name, kind):
1303 """Expand an item name.
1305 @param fn: the function to use for expansion
1306 @param name: requested item name
1307 @param kind: text description ('Node' or 'Instance')
1308 @return: the resolved (full) name
1309 @raise errors.OpPrereqError: if the item is not found
1312 full_name = fn(name)
1313 if full_name is None:
1314 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1319 def _ExpandNodeName(cfg, name):
1320 """Wrapper over L{_ExpandItemName} for nodes."""
1321 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1324 def _ExpandInstanceName(cfg, name):
1325 """Wrapper over L{_ExpandItemName} for instance."""
1326 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1329 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1330 minmem, maxmem, vcpus, nics, disk_template, disks,
1331 bep, hvp, hypervisor_name, tags):
1332 """Builds instance related env variables for hooks
1334 This builds the hook environment from individual variables.
1337 @param name: the name of the instance
1338 @type primary_node: string
1339 @param primary_node: the name of the instance's primary node
1340 @type secondary_nodes: list
1341 @param secondary_nodes: list of secondary nodes as strings
1342 @type os_type: string
1343 @param os_type: the name of the instance's OS
1344 @type status: string
1345 @param status: the desired status of the instance
1346 @type minmem: string
1347 @param minmem: the minimum memory size of the instance
1348 @type maxmem: string
1349 @param maxmem: the maximum memory size of the instance
1351 @param vcpus: the count of VCPUs the instance has
1353 @param nics: list of tuples (ip, mac, mode, link) representing
1354 the NICs the instance has
1355 @type disk_template: string
1356 @param disk_template: the disk template of the instance
1358 @param disks: the list of (size, mode) pairs
1360 @param bep: the backend parameters for the instance
1362 @param hvp: the hypervisor parameters for the instance
1363 @type hypervisor_name: string
1364 @param hypervisor_name: the hypervisor for the instance
1366 @param tags: list of instance tags as strings
1368 @return: the hook environment for this instance
1373 "INSTANCE_NAME": name,
1374 "INSTANCE_PRIMARY": primary_node,
1375 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1376 "INSTANCE_OS_TYPE": os_type,
1377 "INSTANCE_STATUS": status,
1378 "INSTANCE_MINMEM": minmem,
1379 "INSTANCE_MAXMEM": maxmem,
1380 # TODO(2.7) remove deprecated "memory" value
1381 "INSTANCE_MEMORY": maxmem,
1382 "INSTANCE_VCPUS": vcpus,
1383 "INSTANCE_DISK_TEMPLATE": disk_template,
1384 "INSTANCE_HYPERVISOR": hypervisor_name,
1387 nic_count = len(nics)
1388 for idx, (ip, mac, mode, link) in enumerate(nics):
1391 env["INSTANCE_NIC%d_IP" % idx] = ip
1392 env["INSTANCE_NIC%d_MAC" % idx] = mac
1393 env["INSTANCE_NIC%d_MODE" % idx] = mode
1394 env["INSTANCE_NIC%d_LINK" % idx] = link
1395 if mode == constants.NIC_MODE_BRIDGED:
1396 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1400 env["INSTANCE_NIC_COUNT"] = nic_count
1403 disk_count = len(disks)
1404 for idx, (size, mode) in enumerate(disks):
1405 env["INSTANCE_DISK%d_SIZE" % idx] = size
1406 env["INSTANCE_DISK%d_MODE" % idx] = mode
1410 env["INSTANCE_DISK_COUNT"] = disk_count
1415 env["INSTANCE_TAGS"] = " ".join(tags)
1417 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1418 for key, value in source.items():
1419 env["INSTANCE_%s_%s" % (kind, key)] = value
1424 def _NICListToTuple(lu, nics):
1425 """Build a list of nic information tuples.
1427 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1428 value in LUInstanceQueryData.
1430 @type lu: L{LogicalUnit}
1431 @param lu: the logical unit on whose behalf we execute
1432 @type nics: list of L{objects.NIC}
1433 @param nics: list of nics to convert to hooks tuples
1437 cluster = lu.cfg.GetClusterInfo()
1441 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1442 mode = filled_params[constants.NIC_MODE]
1443 link = filled_params[constants.NIC_LINK]
1444 hooks_nics.append((ip, mac, mode, link))
1448 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1449 """Builds instance related env variables for hooks from an object.
1451 @type lu: L{LogicalUnit}
1452 @param lu: the logical unit on whose behalf we execute
1453 @type instance: L{objects.Instance}
1454 @param instance: the instance for which we should build the
1456 @type override: dict
1457 @param override: dictionary with key/values that will override
1460 @return: the hook environment dictionary
1463 cluster = lu.cfg.GetClusterInfo()
1464 bep = cluster.FillBE(instance)
1465 hvp = cluster.FillHV(instance)
1467 "name": instance.name,
1468 "primary_node": instance.primary_node,
1469 "secondary_nodes": instance.secondary_nodes,
1470 "os_type": instance.os,
1471 "status": instance.admin_state,
1472 "maxmem": bep[constants.BE_MAXMEM],
1473 "minmem": bep[constants.BE_MINMEM],
1474 "vcpus": bep[constants.BE_VCPUS],
1475 "nics": _NICListToTuple(lu, instance.nics),
1476 "disk_template": instance.disk_template,
1477 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1480 "hypervisor_name": instance.hypervisor,
1481 "tags": instance.tags,
1484 args.update(override)
1485 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1488 def _AdjustCandidatePool(lu, exceptions):
1489 """Adjust the candidate pool after node operations.
1492 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1494 lu.LogInfo("Promoted nodes to master candidate role: %s",
1495 utils.CommaJoin(node.name for node in mod_list))
1496 for name in mod_list:
1497 lu.context.ReaddNode(name)
1498 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1500 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1504 def _DecideSelfPromotion(lu, exceptions=None):
1505 """Decide whether I should promote myself as a master candidate.
1508 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1509 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1510 # the new node will increase mc_max with one, so:
1511 mc_should = min(mc_should + 1, cp_size)
1512 return mc_now < mc_should
1515 def _CalculateGroupIPolicy(cluster, group):
1516 """Calculate instance policy for group.
1519 return cluster.SimpleFillIPolicy(group.ipolicy)
1522 def _ComputeViolatingInstances(ipolicy, instances):
1523 """Computes a set of instances who violates given ipolicy.
1525 @param ipolicy: The ipolicy to verify
1526 @type instances: object.Instance
1527 @param instances: List of instances to verify
1528 @return: A frozenset of instance names violating the ipolicy
1531 return frozenset([inst.name for inst in instances
1532 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1535 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1536 """Check that the brigdes needed by a list of nics exist.
1539 cluster = lu.cfg.GetClusterInfo()
1540 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1541 brlist = [params[constants.NIC_LINK] for params in paramslist
1542 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1544 result = lu.rpc.call_bridges_exist(target_node, brlist)
1545 result.Raise("Error checking bridges on destination node '%s'" %
1546 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1549 def _CheckInstanceBridgesExist(lu, instance, node=None):
1550 """Check that the brigdes needed by an instance exist.
1554 node = instance.primary_node
1555 _CheckNicsBridgesExist(lu, instance.nics, node)
1558 def _CheckOSVariant(os_obj, name):
1559 """Check whether an OS name conforms to the os variants specification.
1561 @type os_obj: L{objects.OS}
1562 @param os_obj: OS object to check
1564 @param name: OS name passed by the user, to check for validity
1567 variant = objects.OS.GetVariant(name)
1568 if not os_obj.supported_variants:
1570 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1571 " passed)" % (os_obj.name, variant),
1575 raise errors.OpPrereqError("OS name must include a variant",
1578 if variant not in os_obj.supported_variants:
1579 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1582 def _GetNodeInstancesInner(cfg, fn):
1583 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1586 def _GetNodeInstances(cfg, node_name):
1587 """Returns a list of all primary and secondary instances on a node.
1591 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1594 def _GetNodePrimaryInstances(cfg, node_name):
1595 """Returns primary instances on a node.
1598 return _GetNodeInstancesInner(cfg,
1599 lambda inst: node_name == inst.primary_node)
1602 def _GetNodeSecondaryInstances(cfg, node_name):
1603 """Returns secondary instances on a node.
1606 return _GetNodeInstancesInner(cfg,
1607 lambda inst: node_name in inst.secondary_nodes)
1610 def _GetStorageTypeArgs(cfg, storage_type):
1611 """Returns the arguments for a storage type.
1614 # Special case for file storage
1615 if storage_type == constants.ST_FILE:
1616 # storage.FileStorage wants a list of storage directories
1617 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1622 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1625 for dev in instance.disks:
1626 cfg.SetDiskID(dev, node_name)
1628 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1630 result.Raise("Failed to get disk status from node %s" % node_name,
1631 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1633 for idx, bdev_status in enumerate(result.payload):
1634 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1640 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1641 """Check the sanity of iallocator and node arguments and use the
1642 cluster-wide iallocator if appropriate.
1644 Check that at most one of (iallocator, node) is specified. If none is
1645 specified, then the LU's opcode's iallocator slot is filled with the
1646 cluster-wide default iallocator.
1648 @type iallocator_slot: string
1649 @param iallocator_slot: the name of the opcode iallocator slot
1650 @type node_slot: string
1651 @param node_slot: the name of the opcode target node slot
1654 node = getattr(lu.op, node_slot, None)
1655 iallocator = getattr(lu.op, iallocator_slot, None)
1657 if node is not None and iallocator is not None:
1658 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1660 elif node is None and iallocator is None:
1661 default_iallocator = lu.cfg.GetDefaultIAllocator()
1662 if default_iallocator:
1663 setattr(lu.op, iallocator_slot, default_iallocator)
1665 raise errors.OpPrereqError("No iallocator or node given and no"
1666 " cluster-wide default iallocator found;"
1667 " please specify either an iallocator or a"
1668 " node, or set a cluster-wide default"
1672 def _GetDefaultIAllocator(cfg, iallocator):
1673 """Decides on which iallocator to use.
1675 @type cfg: L{config.ConfigWriter}
1676 @param cfg: Cluster configuration object
1677 @type iallocator: string or None
1678 @param iallocator: Iallocator specified in opcode
1680 @return: Iallocator name
1684 # Use default iallocator
1685 iallocator = cfg.GetDefaultIAllocator()
1688 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1689 " opcode nor as a cluster-wide default",
1695 class LUClusterPostInit(LogicalUnit):
1696 """Logical unit for running hooks after cluster initialization.
1699 HPATH = "cluster-init"
1700 HTYPE = constants.HTYPE_CLUSTER
1702 def BuildHooksEnv(self):
1707 "OP_TARGET": self.cfg.GetClusterName(),
1710 def BuildHooksNodes(self):
1711 """Build hooks nodes.
1714 return ([], [self.cfg.GetMasterNode()])
1716 def Exec(self, feedback_fn):
1723 class LUClusterDestroy(LogicalUnit):
1724 """Logical unit for destroying the cluster.
1727 HPATH = "cluster-destroy"
1728 HTYPE = constants.HTYPE_CLUSTER
1730 def BuildHooksEnv(self):
1735 "OP_TARGET": self.cfg.GetClusterName(),
1738 def BuildHooksNodes(self):
1739 """Build hooks nodes.
1744 def CheckPrereq(self):
1745 """Check prerequisites.
1747 This checks whether the cluster is empty.
1749 Any errors are signaled by raising errors.OpPrereqError.
1752 master = self.cfg.GetMasterNode()
1754 nodelist = self.cfg.GetNodeList()
1755 if len(nodelist) != 1 or nodelist[0] != master:
1756 raise errors.OpPrereqError("There are still %d node(s) in"
1757 " this cluster." % (len(nodelist) - 1),
1759 instancelist = self.cfg.GetInstanceList()
1761 raise errors.OpPrereqError("There are still %d instance(s) in"
1762 " this cluster." % len(instancelist),
1765 def Exec(self, feedback_fn):
1766 """Destroys the cluster.
1769 master_params = self.cfg.GetMasterNetworkParameters()
1771 # Run post hooks on master node before it's removed
1772 _RunPostHook(self, master_params.name)
1774 ems = self.cfg.GetUseExternalMipScript()
1775 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1778 self.LogWarning("Error disabling the master IP address: %s",
1781 return master_params.name
1784 def _VerifyCertificate(filename):
1785 """Verifies a certificate for L{LUClusterVerifyConfig}.
1787 @type filename: string
1788 @param filename: Path to PEM file
1792 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1793 utils.ReadFile(filename))
1794 except Exception, err: # pylint: disable=W0703
1795 return (LUClusterVerifyConfig.ETYPE_ERROR,
1796 "Failed to load X509 certificate %s: %s" % (filename, err))
1799 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1800 constants.SSL_CERT_EXPIRATION_ERROR)
1803 fnamemsg = "While verifying %s: %s" % (filename, msg)
1808 return (None, fnamemsg)
1809 elif errcode == utils.CERT_WARNING:
1810 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1811 elif errcode == utils.CERT_ERROR:
1812 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1814 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1817 def _GetAllHypervisorParameters(cluster, instances):
1818 """Compute the set of all hypervisor parameters.
1820 @type cluster: L{objects.Cluster}
1821 @param cluster: the cluster object
1822 @param instances: list of L{objects.Instance}
1823 @param instances: additional instances from which to obtain parameters
1824 @rtype: list of (origin, hypervisor, parameters)
1825 @return: a list with all parameters found, indicating the hypervisor they
1826 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1831 for hv_name in cluster.enabled_hypervisors:
1832 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1834 for os_name, os_hvp in cluster.os_hvp.items():
1835 for hv_name, hv_params in os_hvp.items():
1837 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1838 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1840 # TODO: collapse identical parameter values in a single one
1841 for instance in instances:
1842 if instance.hvparams:
1843 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1844 cluster.FillHV(instance)))
1849 class _VerifyErrors(object):
1850 """Mix-in for cluster/group verify LUs.
1852 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1853 self.op and self._feedback_fn to be available.)
1857 ETYPE_FIELD = "code"
1858 ETYPE_ERROR = "ERROR"
1859 ETYPE_WARNING = "WARNING"
1861 def _Error(self, ecode, item, msg, *args, **kwargs):
1862 """Format an error message.
1864 Based on the opcode's error_codes parameter, either format a
1865 parseable error code, or a simpler error string.
1867 This must be called only from Exec and functions called from Exec.
1870 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1871 itype, etxt, _ = ecode
1872 # first complete the msg
1875 # then format the whole message
1876 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1877 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1883 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1884 # and finally report it via the feedback_fn
1885 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1887 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1888 """Log an error message if the passed condition is True.
1892 or self.op.debug_simulate_errors) # pylint: disable=E1101
1894 # If the error code is in the list of ignored errors, demote the error to a
1896 (_, etxt, _) = ecode
1897 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1898 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1901 self._Error(ecode, *args, **kwargs)
1903 # do not mark the operation as failed for WARN cases only
1904 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1905 self.bad = self.bad or cond
1908 class LUClusterVerify(NoHooksLU):
1909 """Submits all jobs necessary to verify the cluster.
1914 def ExpandNames(self):
1915 self.needed_locks = {}
1917 def Exec(self, feedback_fn):
1920 if self.op.group_name:
1921 groups = [self.op.group_name]
1922 depends_fn = lambda: None
1924 groups = self.cfg.GetNodeGroupList()
1926 # Verify global configuration
1928 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1931 # Always depend on global verification
1932 depends_fn = lambda: [(-len(jobs), [])]
1934 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1935 ignore_errors=self.op.ignore_errors,
1936 depends=depends_fn())]
1937 for group in groups)
1939 # Fix up all parameters
1940 for op in itertools.chain(*jobs): # pylint: disable=W0142
1941 op.debug_simulate_errors = self.op.debug_simulate_errors
1942 op.verbose = self.op.verbose
1943 op.error_codes = self.op.error_codes
1945 op.skip_checks = self.op.skip_checks
1946 except AttributeError:
1947 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1949 return ResultWithJobs(jobs)
1952 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1953 """Verifies the cluster config.
1958 def _VerifyHVP(self, hvp_data):
1959 """Verifies locally the syntax of the hypervisor parameters.
1962 for item, hv_name, hv_params in hvp_data:
1963 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1966 hv_class = hypervisor.GetHypervisor(hv_name)
1967 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1968 hv_class.CheckParameterSyntax(hv_params)
1969 except errors.GenericError, err:
1970 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1972 def ExpandNames(self):
1973 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1974 self.share_locks = _ShareAll()
1976 def CheckPrereq(self):
1977 """Check prerequisites.
1980 # Retrieve all information
1981 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1982 self.all_node_info = self.cfg.GetAllNodesInfo()
1983 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1985 def Exec(self, feedback_fn):
1986 """Verify integrity of cluster, performing various test on nodes.
1990 self._feedback_fn = feedback_fn
1992 feedback_fn("* Verifying cluster config")
1994 for msg in self.cfg.VerifyConfig():
1995 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1997 feedback_fn("* Verifying cluster certificate files")
1999 for cert_filename in constants.ALL_CERT_FILES:
2000 (errcode, msg) = _VerifyCertificate(cert_filename)
2001 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2003 feedback_fn("* Verifying hypervisor parameters")
2005 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2006 self.all_inst_info.values()))
2008 feedback_fn("* Verifying all nodes belong to an existing group")
2010 # We do this verification here because, should this bogus circumstance
2011 # occur, it would never be caught by VerifyGroup, which only acts on
2012 # nodes/instances reachable from existing node groups.
2014 dangling_nodes = set(node.name for node in self.all_node_info.values()
2015 if node.group not in self.all_group_info)
2017 dangling_instances = {}
2018 no_node_instances = []
2020 for inst in self.all_inst_info.values():
2021 if inst.primary_node in dangling_nodes:
2022 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2023 elif inst.primary_node not in self.all_node_info:
2024 no_node_instances.append(inst.name)
2029 utils.CommaJoin(dangling_instances.get(node.name,
2031 for node in dangling_nodes]
2033 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2035 "the following nodes (and their instances) belong to a non"
2036 " existing group: %s", utils.CommaJoin(pretty_dangling))
2038 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2040 "the following instances have a non-existing primary-node:"
2041 " %s", utils.CommaJoin(no_node_instances))
2046 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2047 """Verifies the status of a node group.
2050 HPATH = "cluster-verify"
2051 HTYPE = constants.HTYPE_CLUSTER
2054 _HOOKS_INDENT_RE = re.compile("^", re.M)
2056 class NodeImage(object):
2057 """A class representing the logical and physical status of a node.
2060 @ivar name: the node name to which this object refers
2061 @ivar volumes: a structure as returned from
2062 L{ganeti.backend.GetVolumeList} (runtime)
2063 @ivar instances: a list of running instances (runtime)
2064 @ivar pinst: list of configured primary instances (config)
2065 @ivar sinst: list of configured secondary instances (config)
2066 @ivar sbp: dictionary of {primary-node: list of instances} for all
2067 instances for which this node is secondary (config)
2068 @ivar mfree: free memory, as reported by hypervisor (runtime)
2069 @ivar dfree: free disk, as reported by the node (runtime)
2070 @ivar offline: the offline status (config)
2071 @type rpc_fail: boolean
2072 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2073 not whether the individual keys were correct) (runtime)
2074 @type lvm_fail: boolean
2075 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2076 @type hyp_fail: boolean
2077 @ivar hyp_fail: whether the RPC call didn't return the instance list
2078 @type ghost: boolean
2079 @ivar ghost: whether this is a known node or not (config)
2080 @type os_fail: boolean
2081 @ivar os_fail: whether the RPC call didn't return valid OS data
2083 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2084 @type vm_capable: boolean
2085 @ivar vm_capable: whether the node can host instances
2088 def __init__(self, offline=False, name=None, vm_capable=True):
2097 self.offline = offline
2098 self.vm_capable = vm_capable
2099 self.rpc_fail = False
2100 self.lvm_fail = False
2101 self.hyp_fail = False
2103 self.os_fail = False
2106 def ExpandNames(self):
2107 # This raises errors.OpPrereqError on its own:
2108 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2110 # Get instances in node group; this is unsafe and needs verification later
2112 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2114 self.needed_locks = {
2115 locking.LEVEL_INSTANCE: inst_names,
2116 locking.LEVEL_NODEGROUP: [self.group_uuid],
2117 locking.LEVEL_NODE: [],
2120 self.share_locks = _ShareAll()
2122 def DeclareLocks(self, level):
2123 if level == locking.LEVEL_NODE:
2124 # Get members of node group; this is unsafe and needs verification later
2125 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2127 all_inst_info = self.cfg.GetAllInstancesInfo()
2129 # In Exec(), we warn about mirrored instances that have primary and
2130 # secondary living in separate node groups. To fully verify that
2131 # volumes for these instances are healthy, we will need to do an
2132 # extra call to their secondaries. We ensure here those nodes will
2134 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2135 # Important: access only the instances whose lock is owned
2136 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2137 nodes.update(all_inst_info[inst].secondary_nodes)
2139 self.needed_locks[locking.LEVEL_NODE] = nodes
2141 def CheckPrereq(self):
2142 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2143 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2145 group_nodes = set(self.group_info.members)
2147 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2150 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2152 unlocked_instances = \
2153 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2156 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2157 utils.CommaJoin(unlocked_nodes),
2160 if unlocked_instances:
2161 raise errors.OpPrereqError("Missing lock for instances: %s" %
2162 utils.CommaJoin(unlocked_instances),
2165 self.all_node_info = self.cfg.GetAllNodesInfo()
2166 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2168 self.my_node_names = utils.NiceSort(group_nodes)
2169 self.my_inst_names = utils.NiceSort(group_instances)
2171 self.my_node_info = dict((name, self.all_node_info[name])
2172 for name in self.my_node_names)
2174 self.my_inst_info = dict((name, self.all_inst_info[name])
2175 for name in self.my_inst_names)
2177 # We detect here the nodes that will need the extra RPC calls for verifying
2178 # split LV volumes; they should be locked.
2179 extra_lv_nodes = set()
2181 for inst in self.my_inst_info.values():
2182 if inst.disk_template in constants.DTS_INT_MIRROR:
2183 for nname in inst.all_nodes:
2184 if self.all_node_info[nname].group != self.group_uuid:
2185 extra_lv_nodes.add(nname)
2187 unlocked_lv_nodes = \
2188 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2190 if unlocked_lv_nodes:
2191 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2192 utils.CommaJoin(unlocked_lv_nodes),
2194 self.extra_lv_nodes = list(extra_lv_nodes)
2196 def _VerifyNode(self, ninfo, nresult):
2197 """Perform some basic validation on data returned from a node.
2199 - check the result data structure is well formed and has all the
2201 - check ganeti version
2203 @type ninfo: L{objects.Node}
2204 @param ninfo: the node to check
2205 @param nresult: the results from the node
2207 @return: whether overall this call was successful (and we can expect
2208 reasonable values in the respose)
2212 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2214 # main result, nresult should be a non-empty dict
2215 test = not nresult or not isinstance(nresult, dict)
2216 _ErrorIf(test, constants.CV_ENODERPC, node,
2217 "unable to verify node: no data returned")
2221 # compares ganeti version
2222 local_version = constants.PROTOCOL_VERSION
2223 remote_version = nresult.get("version", None)
2224 test = not (remote_version and
2225 isinstance(remote_version, (list, tuple)) and
2226 len(remote_version) == 2)
2227 _ErrorIf(test, constants.CV_ENODERPC, node,
2228 "connection to node returned invalid data")
2232 test = local_version != remote_version[0]
2233 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2234 "incompatible protocol versions: master %s,"
2235 " node %s", local_version, remote_version[0])
2239 # node seems compatible, we can actually try to look into its results
2241 # full package version
2242 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2243 constants.CV_ENODEVERSION, node,
2244 "software version mismatch: master %s, node %s",
2245 constants.RELEASE_VERSION, remote_version[1],
2246 code=self.ETYPE_WARNING)
2248 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2249 if ninfo.vm_capable and isinstance(hyp_result, dict):
2250 for hv_name, hv_result in hyp_result.iteritems():
2251 test = hv_result is not None
2252 _ErrorIf(test, constants.CV_ENODEHV, node,
2253 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2255 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2256 if ninfo.vm_capable and isinstance(hvp_result, list):
2257 for item, hv_name, hv_result in hvp_result:
2258 _ErrorIf(True, constants.CV_ENODEHV, node,
2259 "hypervisor %s parameter verify failure (source %s): %s",
2260 hv_name, item, hv_result)
2262 test = nresult.get(constants.NV_NODESETUP,
2263 ["Missing NODESETUP results"])
2264 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2269 def _VerifyNodeTime(self, ninfo, nresult,
2270 nvinfo_starttime, nvinfo_endtime):
2271 """Check the node time.
2273 @type ninfo: L{objects.Node}
2274 @param ninfo: the node to check
2275 @param nresult: the remote results for the node
2276 @param nvinfo_starttime: the start time of the RPC call
2277 @param nvinfo_endtime: the end time of the RPC call
2281 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2283 ntime = nresult.get(constants.NV_TIME, None)
2285 ntime_merged = utils.MergeTime(ntime)
2286 except (ValueError, TypeError):
2287 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2290 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2291 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2292 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2293 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2297 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2298 "Node time diverges by at least %s from master node time",
2301 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2302 """Check the node LVM results.
2304 @type ninfo: L{objects.Node}
2305 @param ninfo: the node to check
2306 @param nresult: the remote results for the node
2307 @param vg_name: the configured VG name
2314 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2316 # checks vg existence and size > 20G
2317 vglist = nresult.get(constants.NV_VGLIST, None)
2319 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2321 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2322 constants.MIN_VG_SIZE)
2323 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2326 pvlist = nresult.get(constants.NV_PVLIST, None)
2327 test = pvlist is None
2328 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2330 # check that ':' is not present in PV names, since it's a
2331 # special character for lvcreate (denotes the range of PEs to
2333 for _, pvname, owner_vg in pvlist:
2334 test = ":" in pvname
2335 _ErrorIf(test, constants.CV_ENODELVM, node,
2336 "Invalid character ':' in PV '%s' of VG '%s'",
2339 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2340 """Check the node bridges.
2342 @type ninfo: L{objects.Node}
2343 @param ninfo: the node to check
2344 @param nresult: the remote results for the node
2345 @param bridges: the expected list of bridges
2352 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2354 missing = nresult.get(constants.NV_BRIDGES, None)
2355 test = not isinstance(missing, list)
2356 _ErrorIf(test, constants.CV_ENODENET, node,
2357 "did not return valid bridge information")
2359 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2360 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2362 def _VerifyNodeUserScripts(self, ninfo, nresult):
2363 """Check the results of user scripts presence and executability on the node
2365 @type ninfo: L{objects.Node}
2366 @param ninfo: the node to check
2367 @param nresult: the remote results for the node
2372 test = not constants.NV_USERSCRIPTS in nresult
2373 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2374 "did not return user scripts information")
2376 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2378 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2379 "user scripts not present or not executable: %s" %
2380 utils.CommaJoin(sorted(broken_scripts)))
2382 def _VerifyNodeNetwork(self, ninfo, nresult):
2383 """Check the node network connectivity results.
2385 @type ninfo: L{objects.Node}
2386 @param ninfo: the node to check
2387 @param nresult: the remote results for the node
2391 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2393 test = constants.NV_NODELIST not in nresult
2394 _ErrorIf(test, constants.CV_ENODESSH, node,
2395 "node hasn't returned node ssh connectivity data")
2397 if nresult[constants.NV_NODELIST]:
2398 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2399 _ErrorIf(True, constants.CV_ENODESSH, node,
2400 "ssh communication with node '%s': %s", a_node, a_msg)
2402 test = constants.NV_NODENETTEST not in nresult
2403 _ErrorIf(test, constants.CV_ENODENET, node,
2404 "node hasn't returned node tcp connectivity data")
2406 if nresult[constants.NV_NODENETTEST]:
2407 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2409 _ErrorIf(True, constants.CV_ENODENET, node,
2410 "tcp communication with node '%s': %s",
2411 anode, nresult[constants.NV_NODENETTEST][anode])
2413 test = constants.NV_MASTERIP not in nresult
2414 _ErrorIf(test, constants.CV_ENODENET, node,
2415 "node hasn't returned node master IP reachability data")
2417 if not nresult[constants.NV_MASTERIP]:
2418 if node == self.master_node:
2419 msg = "the master node cannot reach the master IP (not configured?)"
2421 msg = "cannot reach the master IP"
2422 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2424 def _VerifyInstance(self, instance, instanceconfig, node_image,
2426 """Verify an instance.
2428 This function checks to see if the required block devices are
2429 available on the instance's node.
2432 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2433 node_current = instanceconfig.primary_node
2435 node_vol_should = {}
2436 instanceconfig.MapLVsByNode(node_vol_should)
2438 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2439 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2440 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2442 for node in node_vol_should:
2443 n_img = node_image[node]
2444 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2445 # ignore missing volumes on offline or broken nodes
2447 for volume in node_vol_should[node]:
2448 test = volume not in n_img.volumes
2449 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2450 "volume %s missing on node %s", volume, node)
2452 if instanceconfig.admin_state == constants.ADMINST_UP:
2453 pri_img = node_image[node_current]
2454 test = instance not in pri_img.instances and not pri_img.offline
2455 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2456 "instance not running on its primary node %s",
2459 diskdata = [(nname, success, status, idx)
2460 for (nname, disks) in diskstatus.items()
2461 for idx, (success, status) in enumerate(disks)]
2463 for nname, success, bdev_status, idx in diskdata:
2464 # the 'ghost node' construction in Exec() ensures that we have a
2466 snode = node_image[nname]
2467 bad_snode = snode.ghost or snode.offline
2468 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2469 not success and not bad_snode,
2470 constants.CV_EINSTANCEFAULTYDISK, instance,
2471 "couldn't retrieve status for disk/%s on %s: %s",
2472 idx, nname, bdev_status)
2473 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2474 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2475 constants.CV_EINSTANCEFAULTYDISK, instance,
2476 "disk/%s on %s is faulty", idx, nname)
2478 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2479 """Verify if there are any unknown volumes in the cluster.
2481 The .os, .swap and backup volumes are ignored. All other volumes are
2482 reported as unknown.
2484 @type reserved: L{ganeti.utils.FieldSet}
2485 @param reserved: a FieldSet of reserved volume names
2488 for node, n_img in node_image.items():
2489 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2490 self.all_node_info[node].group != self.group_uuid):
2491 # skip non-healthy nodes
2493 for volume in n_img.volumes:
2494 test = ((node not in node_vol_should or
2495 volume not in node_vol_should[node]) and
2496 not reserved.Matches(volume))
2497 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2498 "volume %s is unknown", volume)
2500 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2501 """Verify N+1 Memory Resilience.
2503 Check that if one single node dies we can still start all the
2504 instances it was primary for.
2507 cluster_info = self.cfg.GetClusterInfo()
2508 for node, n_img in node_image.items():
2509 # This code checks that every node which is now listed as
2510 # secondary has enough memory to host all instances it is
2511 # supposed to should a single other node in the cluster fail.
2512 # FIXME: not ready for failover to an arbitrary node
2513 # FIXME: does not support file-backed instances
2514 # WARNING: we currently take into account down instances as well
2515 # as up ones, considering that even if they're down someone
2516 # might want to start them even in the event of a node failure.
2517 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2518 # we're skipping nodes marked offline and nodes in other groups from
2519 # the N+1 warning, since most likely we don't have good memory
2520 # infromation from them; we already list instances living on such
2521 # nodes, and that's enough warning
2523 #TODO(dynmem): also consider ballooning out other instances
2524 for prinode, instances in n_img.sbp.items():
2526 for instance in instances:
2527 bep = cluster_info.FillBE(instance_cfg[instance])
2528 if bep[constants.BE_AUTO_BALANCE]:
2529 needed_mem += bep[constants.BE_MINMEM]
2530 test = n_img.mfree < needed_mem
2531 self._ErrorIf(test, constants.CV_ENODEN1, node,
2532 "not enough memory to accomodate instance failovers"
2533 " should node %s fail (%dMiB needed, %dMiB available)",
2534 prinode, needed_mem, n_img.mfree)
2537 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2538 (files_all, files_opt, files_mc, files_vm)):
2539 """Verifies file checksums collected from all nodes.
2541 @param errorif: Callback for reporting errors
2542 @param nodeinfo: List of L{objects.Node} objects
2543 @param master_node: Name of master node
2544 @param all_nvinfo: RPC results
2547 # Define functions determining which nodes to consider for a file
2550 (files_mc, lambda node: (node.master_candidate or
2551 node.name == master_node)),
2552 (files_vm, lambda node: node.vm_capable),
2555 # Build mapping from filename to list of nodes which should have the file
2557 for (files, fn) in files2nodefn:
2559 filenodes = nodeinfo
2561 filenodes = filter(fn, nodeinfo)
2562 nodefiles.update((filename,
2563 frozenset(map(operator.attrgetter("name"), filenodes)))
2564 for filename in files)
2566 assert set(nodefiles) == (files_all | files_mc | files_vm)
2568 fileinfo = dict((filename, {}) for filename in nodefiles)
2569 ignore_nodes = set()
2571 for node in nodeinfo:
2573 ignore_nodes.add(node.name)
2576 nresult = all_nvinfo[node.name]
2578 if nresult.fail_msg or not nresult.payload:
2581 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2583 test = not (node_files and isinstance(node_files, dict))
2584 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2585 "Node did not return file checksum data")
2587 ignore_nodes.add(node.name)
2590 # Build per-checksum mapping from filename to nodes having it
2591 for (filename, checksum) in node_files.items():
2592 assert filename in nodefiles
2593 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2595 for (filename, checksums) in fileinfo.items():
2596 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2598 # Nodes having the file
2599 with_file = frozenset(node_name
2600 for nodes in fileinfo[filename].values()
2601 for node_name in nodes) - ignore_nodes
2603 expected_nodes = nodefiles[filename] - ignore_nodes
2605 # Nodes missing file
2606 missing_file = expected_nodes - with_file
2608 if filename in files_opt:
2610 errorif(missing_file and missing_file != expected_nodes,
2611 constants.CV_ECLUSTERFILECHECK, None,
2612 "File %s is optional, but it must exist on all or no"
2613 " nodes (not found on %s)",
2614 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2616 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2617 "File %s is missing from node(s) %s", filename,
2618 utils.CommaJoin(utils.NiceSort(missing_file)))
2620 # Warn if a node has a file it shouldn't
2621 unexpected = with_file - expected_nodes
2623 constants.CV_ECLUSTERFILECHECK, None,
2624 "File %s should not exist on node(s) %s",
2625 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2627 # See if there are multiple versions of the file
2628 test = len(checksums) > 1
2630 variants = ["variant %s on %s" %
2631 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2632 for (idx, (checksum, nodes)) in
2633 enumerate(sorted(checksums.items()))]
2637 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2638 "File %s found with %s different checksums (%s)",
2639 filename, len(checksums), "; ".join(variants))
2641 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2643 """Verifies and the node DRBD status.
2645 @type ninfo: L{objects.Node}
2646 @param ninfo: the node to check
2647 @param nresult: the remote results for the node
2648 @param instanceinfo: the dict of instances
2649 @param drbd_helper: the configured DRBD usermode helper
2650 @param drbd_map: the DRBD map as returned by
2651 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2655 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2658 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2659 test = (helper_result == None)
2660 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2661 "no drbd usermode helper returned")
2663 status, payload = helper_result
2665 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2666 "drbd usermode helper check unsuccessful: %s", payload)
2667 test = status and (payload != drbd_helper)
2668 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2669 "wrong drbd usermode helper: %s", payload)
2671 # compute the DRBD minors
2673 for minor, instance in drbd_map[node].items():
2674 test = instance not in instanceinfo
2675 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2676 "ghost instance '%s' in temporary DRBD map", instance)
2677 # ghost instance should not be running, but otherwise we
2678 # don't give double warnings (both ghost instance and
2679 # unallocated minor in use)
2681 node_drbd[minor] = (instance, False)
2683 instance = instanceinfo[instance]
2684 node_drbd[minor] = (instance.name,
2685 instance.admin_state == constants.ADMINST_UP)
2687 # and now check them
2688 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2689 test = not isinstance(used_minors, (tuple, list))
2690 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2691 "cannot parse drbd status file: %s", str(used_minors))
2693 # we cannot check drbd status
2696 for minor, (iname, must_exist) in node_drbd.items():
2697 test = minor not in used_minors and must_exist
2698 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2699 "drbd minor %d of instance %s is not active", minor, iname)
2700 for minor in used_minors:
2701 test = minor not in node_drbd
2702 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2703 "unallocated drbd minor %d is in use", minor)
2705 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2706 """Builds the node OS structures.
2708 @type ninfo: L{objects.Node}
2709 @param ninfo: the node to check
2710 @param nresult: the remote results for the node
2711 @param nimg: the node image object
2715 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2717 remote_os = nresult.get(constants.NV_OSLIST, None)
2718 test = (not isinstance(remote_os, list) or
2719 not compat.all(isinstance(v, list) and len(v) == 7
2720 for v in remote_os))
2722 _ErrorIf(test, constants.CV_ENODEOS, node,
2723 "node hasn't returned valid OS data")
2732 for (name, os_path, status, diagnose,
2733 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2735 if name not in os_dict:
2738 # parameters is a list of lists instead of list of tuples due to
2739 # JSON lacking a real tuple type, fix it:
2740 parameters = [tuple(v) for v in parameters]
2741 os_dict[name].append((os_path, status, diagnose,
2742 set(variants), set(parameters), set(api_ver)))
2744 nimg.oslist = os_dict
2746 def _VerifyNodeOS(self, ninfo, nimg, base):
2747 """Verifies the node OS list.
2749 @type ninfo: L{objects.Node}
2750 @param ninfo: the node to check
2751 @param nimg: the node image object
2752 @param base: the 'template' node we match against (e.g. from the master)
2756 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2758 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2760 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2761 for os_name, os_data in nimg.oslist.items():
2762 assert os_data, "Empty OS status for OS %s?!" % os_name
2763 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2764 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2765 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2766 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2767 "OS '%s' has multiple entries (first one shadows the rest): %s",
2768 os_name, utils.CommaJoin([v[0] for v in os_data]))
2769 # comparisons with the 'base' image
2770 test = os_name not in base.oslist
2771 _ErrorIf(test, constants.CV_ENODEOS, node,
2772 "Extra OS %s not present on reference node (%s)",
2776 assert base.oslist[os_name], "Base node has empty OS status?"
2777 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2779 # base OS is invalid, skipping
2781 for kind, a, b in [("API version", f_api, b_api),
2782 ("variants list", f_var, b_var),
2783 ("parameters", beautify_params(f_param),
2784 beautify_params(b_param))]:
2785 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2786 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2787 kind, os_name, base.name,
2788 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2790 # check any missing OSes
2791 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2792 _ErrorIf(missing, constants.CV_ENODEOS, node,
2793 "OSes present on reference node %s but missing on this node: %s",
2794 base.name, utils.CommaJoin(missing))
2796 def _VerifyOob(self, ninfo, nresult):
2797 """Verifies out of band functionality of a node.
2799 @type ninfo: L{objects.Node}
2800 @param ninfo: the node to check
2801 @param nresult: the remote results for the node
2805 # We just have to verify the paths on master and/or master candidates
2806 # as the oob helper is invoked on the master
2807 if ((ninfo.master_candidate or ninfo.master_capable) and
2808 constants.NV_OOB_PATHS in nresult):
2809 for path_result in nresult[constants.NV_OOB_PATHS]:
2810 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2812 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2813 """Verifies and updates the node volume data.
2815 This function will update a L{NodeImage}'s internal structures
2816 with data from the remote call.
2818 @type ninfo: L{objects.Node}
2819 @param ninfo: the node to check
2820 @param nresult: the remote results for the node
2821 @param nimg: the node image object
2822 @param vg_name: the configured VG name
2826 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2828 nimg.lvm_fail = True
2829 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2832 elif isinstance(lvdata, basestring):
2833 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2834 utils.SafeEncode(lvdata))
2835 elif not isinstance(lvdata, dict):
2836 _ErrorIf(True, constants.CV_ENODELVM, node,
2837 "rpc call to node failed (lvlist)")
2839 nimg.volumes = lvdata
2840 nimg.lvm_fail = False
2842 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2843 """Verifies and updates the node instance list.
2845 If the listing was successful, then updates this node's instance
2846 list. Otherwise, it marks the RPC call as failed for the instance
2849 @type ninfo: L{objects.Node}
2850 @param ninfo: the node to check
2851 @param nresult: the remote results for the node
2852 @param nimg: the node image object
2855 idata = nresult.get(constants.NV_INSTANCELIST, None)
2856 test = not isinstance(idata, list)
2857 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2858 "rpc call to node failed (instancelist): %s",
2859 utils.SafeEncode(str(idata)))
2861 nimg.hyp_fail = True
2863 nimg.instances = idata
2865 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2866 """Verifies and computes a node information map
2868 @type ninfo: L{objects.Node}
2869 @param ninfo: the node to check
2870 @param nresult: the remote results for the node
2871 @param nimg: the node image object
2872 @param vg_name: the configured VG name
2876 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2878 # try to read free memory (from the hypervisor)
2879 hv_info = nresult.get(constants.NV_HVINFO, None)
2880 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2881 _ErrorIf(test, constants.CV_ENODEHV, node,
2882 "rpc call to node failed (hvinfo)")
2885 nimg.mfree = int(hv_info["memory_free"])
2886 except (ValueError, TypeError):
2887 _ErrorIf(True, constants.CV_ENODERPC, node,
2888 "node returned invalid nodeinfo, check hypervisor")
2890 # FIXME: devise a free space model for file based instances as well
2891 if vg_name is not None:
2892 test = (constants.NV_VGLIST not in nresult or
2893 vg_name not in nresult[constants.NV_VGLIST])
2894 _ErrorIf(test, constants.CV_ENODELVM, node,
2895 "node didn't return data for the volume group '%s'"
2896 " - it is either missing or broken", vg_name)
2899 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2900 except (ValueError, TypeError):
2901 _ErrorIf(True, constants.CV_ENODERPC, node,
2902 "node returned invalid LVM info, check LVM status")
2904 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2905 """Gets per-disk status information for all instances.
2907 @type nodelist: list of strings
2908 @param nodelist: Node names
2909 @type node_image: dict of (name, L{objects.Node})
2910 @param node_image: Node objects
2911 @type instanceinfo: dict of (name, L{objects.Instance})
2912 @param instanceinfo: Instance objects
2913 @rtype: {instance: {node: [(succes, payload)]}}
2914 @return: a dictionary of per-instance dictionaries with nodes as
2915 keys and disk information as values; the disk information is a
2916 list of tuples (success, payload)
2919 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2922 node_disks_devonly = {}
2923 diskless_instances = set()
2924 diskless = constants.DT_DISKLESS
2926 for nname in nodelist:
2927 node_instances = list(itertools.chain(node_image[nname].pinst,
2928 node_image[nname].sinst))
2929 diskless_instances.update(inst for inst in node_instances
2930 if instanceinfo[inst].disk_template == diskless)
2931 disks = [(inst, disk)
2932 for inst in node_instances
2933 for disk in instanceinfo[inst].disks]
2936 # No need to collect data
2939 node_disks[nname] = disks
2941 # _AnnotateDiskParams makes already copies of the disks
2943 for (inst, dev) in disks:
2944 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2945 self.cfg.SetDiskID(anno_disk, nname)
2946 devonly.append(anno_disk)
2948 node_disks_devonly[nname] = devonly
2950 assert len(node_disks) == len(node_disks_devonly)
2952 # Collect data from all nodes with disks
2953 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2956 assert len(result) == len(node_disks)
2960 for (nname, nres) in result.items():
2961 disks = node_disks[nname]
2964 # No data from this node
2965 data = len(disks) * [(False, "node offline")]
2968 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2969 "while getting disk information: %s", msg)
2971 # No data from this node
2972 data = len(disks) * [(False, msg)]
2975 for idx, i in enumerate(nres.payload):
2976 if isinstance(i, (tuple, list)) and len(i) == 2:
2979 logging.warning("Invalid result from node %s, entry %d: %s",
2981 data.append((False, "Invalid result from the remote node"))
2983 for ((inst, _), status) in zip(disks, data):
2984 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2986 # Add empty entries for diskless instances.
2987 for inst in diskless_instances:
2988 assert inst not in instdisk
2991 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2992 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2993 compat.all(isinstance(s, (tuple, list)) and
2994 len(s) == 2 for s in statuses)
2995 for inst, nnames in instdisk.items()
2996 for nname, statuses in nnames.items())
2997 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3002 def _SshNodeSelector(group_uuid, all_nodes):
3003 """Create endless iterators for all potential SSH check hosts.
3006 nodes = [node for node in all_nodes
3007 if (node.group != group_uuid and
3009 keyfunc = operator.attrgetter("group")
3011 return map(itertools.cycle,
3012 [sorted(map(operator.attrgetter("name"), names))
3013 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3017 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3018 """Choose which nodes should talk to which other nodes.
3020 We will make nodes contact all nodes in their group, and one node from
3023 @warning: This algorithm has a known issue if one node group is much
3024 smaller than others (e.g. just one node). In such a case all other
3025 nodes will talk to the single node.
3028 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3029 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3031 return (online_nodes,
3032 dict((name, sorted([i.next() for i in sel]))
3033 for name in online_nodes))
3035 def BuildHooksEnv(self):
3038 Cluster-Verify hooks just ran in the post phase and their failure makes
3039 the output be logged in the verify output and the verification to fail.
3043 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3046 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3047 for node in self.my_node_info.values())
3051 def BuildHooksNodes(self):
3052 """Build hooks nodes.
3055 return ([], self.my_node_names)
3057 def Exec(self, feedback_fn):
3058 """Verify integrity of the node group, performing various test on nodes.
3061 # This method has too many local variables. pylint: disable=R0914
3062 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3064 if not self.my_node_names:
3066 feedback_fn("* Empty node group, skipping verification")
3070 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3071 verbose = self.op.verbose
3072 self._feedback_fn = feedback_fn
3074 vg_name = self.cfg.GetVGName()
3075 drbd_helper = self.cfg.GetDRBDHelper()
3076 cluster = self.cfg.GetClusterInfo()
3077 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3078 hypervisors = cluster.enabled_hypervisors
3079 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3081 i_non_redundant = [] # Non redundant instances
3082 i_non_a_balanced = [] # Non auto-balanced instances
3083 i_offline = 0 # Count of offline instances
3084 n_offline = 0 # Count of offline nodes
3085 n_drained = 0 # Count of nodes being drained
3086 node_vol_should = {}
3088 # FIXME: verify OS list
3091 filemap = _ComputeAncillaryFiles(cluster, False)
3093 # do local checksums
3094 master_node = self.master_node = self.cfg.GetMasterNode()
3095 master_ip = self.cfg.GetMasterIP()
3097 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3100 if self.cfg.GetUseExternalMipScript():
3101 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3103 node_verify_param = {
3104 constants.NV_FILELIST:
3105 utils.UniqueSequence(filename
3106 for files in filemap
3107 for filename in files),
3108 constants.NV_NODELIST:
3109 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3110 self.all_node_info.values()),
3111 constants.NV_HYPERVISOR: hypervisors,
3112 constants.NV_HVPARAMS:
3113 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3114 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3115 for node in node_data_list
3116 if not node.offline],
3117 constants.NV_INSTANCELIST: hypervisors,
3118 constants.NV_VERSION: None,
3119 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3120 constants.NV_NODESETUP: None,
3121 constants.NV_TIME: None,
3122 constants.NV_MASTERIP: (master_node, master_ip),
3123 constants.NV_OSLIST: None,
3124 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3125 constants.NV_USERSCRIPTS: user_scripts,
3128 if vg_name is not None:
3129 node_verify_param[constants.NV_VGLIST] = None
3130 node_verify_param[constants.NV_LVLIST] = vg_name
3131 node_verify_param[constants.NV_PVLIST] = [vg_name]
3134 node_verify_param[constants.NV_DRBDLIST] = None
3135 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3138 # FIXME: this needs to be changed per node-group, not cluster-wide
3140 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3141 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3142 bridges.add(default_nicpp[constants.NIC_LINK])
3143 for instance in self.my_inst_info.values():
3144 for nic in instance.nics:
3145 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3146 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3147 bridges.add(full_nic[constants.NIC_LINK])
3150 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3152 # Build our expected cluster state
3153 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3155 vm_capable=node.vm_capable))
3156 for node in node_data_list)
3160 for node in self.all_node_info.values():
3161 path = _SupportsOob(self.cfg, node)
3162 if path and path not in oob_paths:
3163 oob_paths.append(path)
3166 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3168 for instance in self.my_inst_names:
3169 inst_config = self.my_inst_info[instance]
3170 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3173 for nname in inst_config.all_nodes:
3174 if nname not in node_image:
3175 gnode = self.NodeImage(name=nname)
3176 gnode.ghost = (nname not in self.all_node_info)
3177 node_image[nname] = gnode
3179 inst_config.MapLVsByNode(node_vol_should)
3181 pnode = inst_config.primary_node
3182 node_image[pnode].pinst.append(instance)
3184 for snode in inst_config.secondary_nodes:
3185 nimg = node_image[snode]
3186 nimg.sinst.append(instance)
3187 if pnode not in nimg.sbp:
3188 nimg.sbp[pnode] = []
3189 nimg.sbp[pnode].append(instance)
3191 # At this point, we have the in-memory data structures complete,
3192 # except for the runtime information, which we'll gather next
3194 # Due to the way our RPC system works, exact response times cannot be
3195 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3196 # time before and after executing the request, we can at least have a time
3198 nvinfo_starttime = time.time()
3199 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3201 self.cfg.GetClusterName())
3202 nvinfo_endtime = time.time()
3204 if self.extra_lv_nodes and vg_name is not None:
3206 self.rpc.call_node_verify(self.extra_lv_nodes,
3207 {constants.NV_LVLIST: vg_name},
3208 self.cfg.GetClusterName())
3210 extra_lv_nvinfo = {}
3212 all_drbd_map = self.cfg.ComputeDRBDMap()
3214 feedback_fn("* Gathering disk information (%s nodes)" %
3215 len(self.my_node_names))
3216 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3219 feedback_fn("* Verifying configuration file consistency")
3221 # If not all nodes are being checked, we need to make sure the master node
3222 # and a non-checked vm_capable node are in the list.
3223 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3225 vf_nvinfo = all_nvinfo.copy()
3226 vf_node_info = list(self.my_node_info.values())
3227 additional_nodes = []
3228 if master_node not in self.my_node_info:
3229 additional_nodes.append(master_node)
3230 vf_node_info.append(self.all_node_info[master_node])
3231 # Add the first vm_capable node we find which is not included,
3232 # excluding the master node (which we already have)
3233 for node in absent_nodes:
3234 nodeinfo = self.all_node_info[node]
3235 if (nodeinfo.vm_capable and not nodeinfo.offline and
3236 node != master_node):
3237 additional_nodes.append(node)
3238 vf_node_info.append(self.all_node_info[node])
3240 key = constants.NV_FILELIST
3241 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3242 {key: node_verify_param[key]},
3243 self.cfg.GetClusterName()))
3245 vf_nvinfo = all_nvinfo
3246 vf_node_info = self.my_node_info.values()
3248 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3250 feedback_fn("* Verifying node status")
3254 for node_i in node_data_list:
3256 nimg = node_image[node]
3260 feedback_fn("* Skipping offline node %s" % (node,))
3264 if node == master_node:
3266 elif node_i.master_candidate:
3267 ntype = "master candidate"
3268 elif node_i.drained:
3274 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3276 msg = all_nvinfo[node].fail_msg
3277 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3280 nimg.rpc_fail = True
3283 nresult = all_nvinfo[node].payload
3285 nimg.call_ok = self._VerifyNode(node_i, nresult)
3286 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3287 self._VerifyNodeNetwork(node_i, nresult)
3288 self._VerifyNodeUserScripts(node_i, nresult)
3289 self._VerifyOob(node_i, nresult)
3292 self._VerifyNodeLVM(node_i, nresult, vg_name)
3293 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3296 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3297 self._UpdateNodeInstances(node_i, nresult, nimg)
3298 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3299 self._UpdateNodeOS(node_i, nresult, nimg)
3301 if not nimg.os_fail:
3302 if refos_img is None:
3304 self._VerifyNodeOS(node_i, nimg, refos_img)
3305 self._VerifyNodeBridges(node_i, nresult, bridges)
3307 # Check whether all running instancies are primary for the node. (This
3308 # can no longer be done from _VerifyInstance below, since some of the
3309 # wrong instances could be from other node groups.)
3310 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3312 for inst in non_primary_inst:
3313 test = inst in self.all_inst_info
3314 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3315 "instance should not run on node %s", node_i.name)
3316 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3317 "node is running unknown instance %s", inst)
3319 for node, result in extra_lv_nvinfo.items():
3320 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3321 node_image[node], vg_name)
3323 feedback_fn("* Verifying instance status")
3324 for instance in self.my_inst_names:
3326 feedback_fn("* Verifying instance %s" % instance)
3327 inst_config = self.my_inst_info[instance]
3328 self._VerifyInstance(instance, inst_config, node_image,
3330 inst_nodes_offline = []
3332 pnode = inst_config.primary_node
3333 pnode_img = node_image[pnode]
3334 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3335 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3336 " primary node failed", instance)
3338 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3340 constants.CV_EINSTANCEBADNODE, instance,
3341 "instance is marked as running and lives on offline node %s",
3342 inst_config.primary_node)
3344 # If the instance is non-redundant we cannot survive losing its primary
3345 # node, so we are not N+1 compliant. On the other hand we have no disk
3346 # templates with more than one secondary so that situation is not well
3348 # FIXME: does not support file-backed instances
3349 if not inst_config.secondary_nodes:
3350 i_non_redundant.append(instance)
3352 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3353 constants.CV_EINSTANCELAYOUT,
3354 instance, "instance has multiple secondary nodes: %s",
3355 utils.CommaJoin(inst_config.secondary_nodes),
3356 code=self.ETYPE_WARNING)
3358 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3359 pnode = inst_config.primary_node
3360 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3361 instance_groups = {}
3363 for node in instance_nodes:
3364 instance_groups.setdefault(self.all_node_info[node].group,
3368 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3369 # Sort so that we always list the primary node first.
3370 for group, nodes in sorted(instance_groups.items(),
3371 key=lambda (_, nodes): pnode in nodes,
3374 self._ErrorIf(len(instance_groups) > 1,
3375 constants.CV_EINSTANCESPLITGROUPS,
3376 instance, "instance has primary and secondary nodes in"
3377 " different groups: %s", utils.CommaJoin(pretty_list),
3378 code=self.ETYPE_WARNING)
3380 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3381 i_non_a_balanced.append(instance)
3383 for snode in inst_config.secondary_nodes:
3384 s_img = node_image[snode]
3385 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3386 snode, "instance %s, connection to secondary node failed",
3390 inst_nodes_offline.append(snode)
3392 # warn that the instance lives on offline nodes
3393 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3394 "instance has offline secondary node(s) %s",
3395 utils.CommaJoin(inst_nodes_offline))
3396 # ... or ghost/non-vm_capable nodes
3397 for node in inst_config.all_nodes:
3398 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3399 instance, "instance lives on ghost node %s", node)
3400 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3401 instance, "instance lives on non-vm_capable node %s", node)
3403 feedback_fn("* Verifying orphan volumes")
3404 reserved = utils.FieldSet(*cluster.reserved_lvs)
3406 # We will get spurious "unknown volume" warnings if any node of this group
3407 # is secondary for an instance whose primary is in another group. To avoid
3408 # them, we find these instances and add their volumes to node_vol_should.
3409 for inst in self.all_inst_info.values():
3410 for secondary in inst.secondary_nodes:
3411 if (secondary in self.my_node_info
3412 and inst.name not in self.my_inst_info):
3413 inst.MapLVsByNode(node_vol_should)
3416 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3418 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3419 feedback_fn("* Verifying N+1 Memory redundancy")
3420 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3422 feedback_fn("* Other Notes")
3424 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3425 % len(i_non_redundant))
3427 if i_non_a_balanced:
3428 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3429 % len(i_non_a_balanced))
3432 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3435 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3438 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3442 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3443 """Analyze the post-hooks' result
3445 This method analyses the hook result, handles it, and sends some
3446 nicely-formatted feedback back to the user.
3448 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3449 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3450 @param hooks_results: the results of the multi-node hooks rpc call
3451 @param feedback_fn: function used send feedback back to the caller
3452 @param lu_result: previous Exec result
3453 @return: the new Exec result, based on the previous result
3457 # We only really run POST phase hooks, only for non-empty groups,
3458 # and are only interested in their results
3459 if not self.my_node_names:
3462 elif phase == constants.HOOKS_PHASE_POST:
3463 # Used to change hooks' output to proper indentation
3464 feedback_fn("* Hooks Results")
3465 assert hooks_results, "invalid result from hooks"
3467 for node_name in hooks_results:
3468 res = hooks_results[node_name]
3470 test = msg and not res.offline
3471 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3472 "Communication failure in hooks execution: %s", msg)
3473 if res.offline or msg:
3474 # No need to investigate payload if node is offline or gave
3477 for script, hkr, output in res.payload:
3478 test = hkr == constants.HKR_FAIL
3479 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3480 "Script %s failed, output:", script)
3482 output = self._HOOKS_INDENT_RE.sub(" ", output)
3483 feedback_fn("%s" % output)
3489 class LUClusterVerifyDisks(NoHooksLU):
3490 """Verifies the cluster disks status.
3495 def ExpandNames(self):
3496 self.share_locks = _ShareAll()
3497 self.needed_locks = {
3498 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3501 def Exec(self, feedback_fn):
3502 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3504 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3505 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3506 for group in group_names])
3509 class LUGroupVerifyDisks(NoHooksLU):
3510 """Verifies the status of all disks in a node group.
3515 def ExpandNames(self):
3516 # Raises errors.OpPrereqError on its own if group can't be found
3517 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3519 self.share_locks = _ShareAll()
3520 self.needed_locks = {
3521 locking.LEVEL_INSTANCE: [],
3522 locking.LEVEL_NODEGROUP: [],
3523 locking.LEVEL_NODE: [],
3526 def DeclareLocks(self, level):
3527 if level == locking.LEVEL_INSTANCE:
3528 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3530 # Lock instances optimistically, needs verification once node and group
3531 # locks have been acquired
3532 self.needed_locks[locking.LEVEL_INSTANCE] = \
3533 self.cfg.GetNodeGroupInstances(self.group_uuid)
3535 elif level == locking.LEVEL_NODEGROUP:
3536 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3538 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3539 set([self.group_uuid] +
3540 # Lock all groups used by instances optimistically; this requires
3541 # going via the node before it's locked, requiring verification
3544 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3545 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3547 elif level == locking.LEVEL_NODE:
3548 # This will only lock the nodes in the group to be verified which contain
3550 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3551 self._LockInstancesNodes()
3553 # Lock all nodes in group to be verified
3554 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3555 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3556 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3558 def CheckPrereq(self):
3559 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3560 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3561 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3563 assert self.group_uuid in owned_groups
3565 # Check if locked instances are still correct
3566 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3568 # Get instance information
3569 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3571 # Check if node groups for locked instances are still correct
3572 _CheckInstancesNodeGroups(self.cfg, self.instances,
3573 owned_groups, owned_nodes, self.group_uuid)
3575 def Exec(self, feedback_fn):
3576 """Verify integrity of cluster disks.
3578 @rtype: tuple of three items
3579 @return: a tuple of (dict of node-to-node_error, list of instances
3580 which need activate-disks, dict of instance: (node, volume) for
3585 res_instances = set()
3588 nv_dict = _MapInstanceDisksToNodes([inst
3589 for inst in self.instances.values()
3590 if inst.admin_state == constants.ADMINST_UP])
3593 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3594 set(self.cfg.GetVmCapableNodeList()))
3596 node_lvs = self.rpc.call_lv_list(nodes, [])
3598 for (node, node_res) in node_lvs.items():
3599 if node_res.offline:
3602 msg = node_res.fail_msg
3604 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3605 res_nodes[node] = msg
3608 for lv_name, (_, _, lv_online) in node_res.payload.items():
3609 inst = nv_dict.pop((node, lv_name), None)
3610 if not (lv_online or inst is None):
3611 res_instances.add(inst)
3613 # any leftover items in nv_dict are missing LVs, let's arrange the data
3615 for key, inst in nv_dict.iteritems():
3616 res_missing.setdefault(inst, []).append(list(key))
3618 return (res_nodes, list(res_instances), res_missing)
3621 class LUClusterRepairDiskSizes(NoHooksLU):
3622 """Verifies the cluster disks sizes.
3627 def ExpandNames(self):
3628 if self.op.instances:
3629 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3630 self.needed_locks = {
3631 locking.LEVEL_NODE_RES: [],
3632 locking.LEVEL_INSTANCE: self.wanted_names,
3634 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3636 self.wanted_names = None
3637 self.needed_locks = {
3638 locking.LEVEL_NODE_RES: locking.ALL_SET,
3639 locking.LEVEL_INSTANCE: locking.ALL_SET,
3641 self.share_locks = {
3642 locking.LEVEL_NODE_RES: 1,
3643 locking.LEVEL_INSTANCE: 0,
3646 def DeclareLocks(self, level):
3647 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3648 self._LockInstancesNodes(primary_only=True, level=level)
3650 def CheckPrereq(self):
3651 """Check prerequisites.
3653 This only checks the optional instance list against the existing names.
3656 if self.wanted_names is None:
3657 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3659 self.wanted_instances = \
3660 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3662 def _EnsureChildSizes(self, disk):
3663 """Ensure children of the disk have the needed disk size.
3665 This is valid mainly for DRBD8 and fixes an issue where the
3666 children have smaller disk size.
3668 @param disk: an L{ganeti.objects.Disk} object
3671 if disk.dev_type == constants.LD_DRBD8:
3672 assert disk.children, "Empty children for DRBD8?"
3673 fchild = disk.children[0]
3674 mismatch = fchild.size < disk.size
3676 self.LogInfo("Child disk has size %d, parent %d, fixing",
3677 fchild.size, disk.size)
3678 fchild.size = disk.size
3680 # and we recurse on this child only, not on the metadev
3681 return self._EnsureChildSizes(fchild) or mismatch
3685 def Exec(self, feedback_fn):
3686 """Verify the size of cluster disks.
3689 # TODO: check child disks too
3690 # TODO: check differences in size between primary/secondary nodes
3692 for instance in self.wanted_instances:
3693 pnode = instance.primary_node
3694 if pnode not in per_node_disks:
3695 per_node_disks[pnode] = []
3696 for idx, disk in enumerate(instance.disks):
3697 per_node_disks[pnode].append((instance, idx, disk))
3699 assert not (frozenset(per_node_disks.keys()) -
3700 self.owned_locks(locking.LEVEL_NODE_RES)), \
3701 "Not owning correct locks"
3702 assert not self.owned_locks(locking.LEVEL_NODE)
3705 for node, dskl in per_node_disks.items():
3706 newl = [v[2].Copy() for v in dskl]
3708 self.cfg.SetDiskID(dsk, node)
3709 result = self.rpc.call_blockdev_getsize(node, newl)
3711 self.LogWarning("Failure in blockdev_getsize call to node"
3712 " %s, ignoring", node)
3714 if len(result.payload) != len(dskl):
3715 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3716 " result.payload=%s", node, len(dskl), result.payload)
3717 self.LogWarning("Invalid result from node %s, ignoring node results",
3720 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3722 self.LogWarning("Disk %d of instance %s did not return size"
3723 " information, ignoring", idx, instance.name)
3725 if not isinstance(size, (int, long)):
3726 self.LogWarning("Disk %d of instance %s did not return valid"
3727 " size information, ignoring", idx, instance.name)
3730 if size != disk.size:
3731 self.LogInfo("Disk %d of instance %s has mismatched size,"
3732 " correcting: recorded %d, actual %d", idx,
3733 instance.name, disk.size, size)
3735 self.cfg.Update(instance, feedback_fn)
3736 changed.append((instance.name, idx, size))
3737 if self._EnsureChildSizes(disk):
3738 self.cfg.Update(instance, feedback_fn)
3739 changed.append((instance.name, idx, disk.size))
3743 class LUClusterRename(LogicalUnit):
3744 """Rename the cluster.
3747 HPATH = "cluster-rename"
3748 HTYPE = constants.HTYPE_CLUSTER
3750 def BuildHooksEnv(self):
3755 "OP_TARGET": self.cfg.GetClusterName(),
3756 "NEW_NAME": self.op.name,
3759 def BuildHooksNodes(self):
3760 """Build hooks nodes.
3763 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3765 def CheckPrereq(self):
3766 """Verify that the passed name is a valid one.
3769 hostname = netutils.GetHostname(name=self.op.name,
3770 family=self.cfg.GetPrimaryIPFamily())
3772 new_name = hostname.name
3773 self.ip = new_ip = hostname.ip
3774 old_name = self.cfg.GetClusterName()
3775 old_ip = self.cfg.GetMasterIP()
3776 if new_name == old_name and new_ip == old_ip:
3777 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3778 " cluster has changed",
3780 if new_ip != old_ip:
3781 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3782 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3783 " reachable on the network" %
3784 new_ip, errors.ECODE_NOTUNIQUE)
3786 self.op.name = new_name
3788 def Exec(self, feedback_fn):
3789 """Rename the cluster.
3792 clustername = self.op.name
3795 # shutdown the master IP
3796 master_params = self.cfg.GetMasterNetworkParameters()
3797 ems = self.cfg.GetUseExternalMipScript()
3798 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3800 result.Raise("Could not disable the master role")
3803 cluster = self.cfg.GetClusterInfo()
3804 cluster.cluster_name = clustername
3805 cluster.master_ip = new_ip
3806 self.cfg.Update(cluster, feedback_fn)
3808 # update the known hosts file
3809 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3810 node_list = self.cfg.GetOnlineNodeList()
3812 node_list.remove(master_params.name)
3815 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3817 master_params.ip = new_ip
3818 result = self.rpc.call_node_activate_master_ip(master_params.name,
3820 msg = result.fail_msg
3822 self.LogWarning("Could not re-enable the master role on"
3823 " the master, please restart manually: %s", msg)
3828 def _ValidateNetmask(cfg, netmask):
3829 """Checks if a netmask is valid.
3831 @type cfg: L{config.ConfigWriter}
3832 @param cfg: The cluster configuration
3834 @param netmask: the netmask to be verified
3835 @raise errors.OpPrereqError: if the validation fails
3838 ip_family = cfg.GetPrimaryIPFamily()
3840 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3841 except errors.ProgrammerError:
3842 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3844 if not ipcls.ValidateNetmask(netmask):
3845 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3849 class LUClusterSetParams(LogicalUnit):
3850 """Change the parameters of the cluster.
3853 HPATH = "cluster-modify"
3854 HTYPE = constants.HTYPE_CLUSTER
3857 def CheckArguments(self):
3861 if self.op.uid_pool:
3862 uidpool.CheckUidPool(self.op.uid_pool)
3864 if self.op.add_uids:
3865 uidpool.CheckUidPool(self.op.add_uids)
3867 if self.op.remove_uids:
3868 uidpool.CheckUidPool(self.op.remove_uids)
3870 if self.op.master_netmask is not None:
3871 _ValidateNetmask(self.cfg, self.op.master_netmask)
3873 if self.op.diskparams:
3874 for dt_params in self.op.diskparams.values():
3875 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3877 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3878 except errors.OpPrereqError, err:
3879 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3882 def ExpandNames(self):
3883 # FIXME: in the future maybe other cluster params won't require checking on
3884 # all nodes to be modified.
3885 self.needed_locks = {
3886 locking.LEVEL_NODE: locking.ALL_SET,
3887 locking.LEVEL_INSTANCE: locking.ALL_SET,
3888 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3890 self.share_locks = {
3891 locking.LEVEL_NODE: 1,
3892 locking.LEVEL_INSTANCE: 1,
3893 locking.LEVEL_NODEGROUP: 1,
3896 def BuildHooksEnv(self):
3901 "OP_TARGET": self.cfg.GetClusterName(),
3902 "NEW_VG_NAME": self.op.vg_name,
3905 def BuildHooksNodes(self):
3906 """Build hooks nodes.
3909 mn = self.cfg.GetMasterNode()
3912 def CheckPrereq(self):
3913 """Check prerequisites.
3915 This checks whether the given params don't conflict and
3916 if the given volume group is valid.
3919 if self.op.vg_name is not None and not self.op.vg_name:
3920 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3921 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3922 " instances exist", errors.ECODE_INVAL)
3924 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3925 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3926 raise errors.OpPrereqError("Cannot disable drbd helper while"
3927 " drbd-based instances exist",
3930 node_list = self.owned_locks(locking.LEVEL_NODE)
3932 # if vg_name not None, checks given volume group on all nodes
3934 vglist = self.rpc.call_vg_list(node_list)
3935 for node in node_list:
3936 msg = vglist[node].fail_msg
3938 # ignoring down node
3939 self.LogWarning("Error while gathering data on node %s"
3940 " (ignoring node): %s", node, msg)
3942 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3944 constants.MIN_VG_SIZE)
3946 raise errors.OpPrereqError("Error on node '%s': %s" %
3947 (node, vgstatus), errors.ECODE_ENVIRON)
3949 if self.op.drbd_helper:
3950 # checks given drbd helper on all nodes
3951 helpers = self.rpc.call_drbd_helper(node_list)
3952 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3954 self.LogInfo("Not checking drbd helper on offline node %s", node)
3956 msg = helpers[node].fail_msg
3958 raise errors.OpPrereqError("Error checking drbd helper on node"
3959 " '%s': %s" % (node, msg),
3960 errors.ECODE_ENVIRON)
3961 node_helper = helpers[node].payload
3962 if node_helper != self.op.drbd_helper:
3963 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3964 (node, node_helper), errors.ECODE_ENVIRON)
3966 self.cluster = cluster = self.cfg.GetClusterInfo()
3967 # validate params changes
3968 if self.op.beparams:
3969 objects.UpgradeBeParams(self.op.beparams)
3970 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3971 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3973 if self.op.ndparams:
3974 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3975 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3977 # TODO: we need a more general way to handle resetting
3978 # cluster-level parameters to default values
3979 if self.new_ndparams["oob_program"] == "":
3980 self.new_ndparams["oob_program"] = \
3981 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3983 if self.op.hv_state:
3984 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3985 self.cluster.hv_state_static)
3986 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3987 for hv, values in new_hv_state.items())
3989 if self.op.disk_state:
3990 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3991 self.cluster.disk_state_static)
3992 self.new_disk_state = \
3993 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3994 for name, values in svalues.items()))
3995 for storage, svalues in new_disk_state.items())
3998 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4001 all_instances = self.cfg.GetAllInstancesInfo().values()
4003 for group in self.cfg.GetAllNodeGroupsInfo().values():
4004 instances = frozenset([inst for inst in all_instances
4005 if compat.any(node in group.members
4006 for node in inst.all_nodes)])
4007 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4008 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
4010 new_ipolicy, instances)
4012 violations.update(new)
4015 self.LogWarning("After the ipolicy change the following instances"
4016 " violate them: %s",
4017 utils.CommaJoin(utils.NiceSort(violations)))
4019 if self.op.nicparams:
4020 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4021 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4022 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4025 # check all instances for consistency
4026 for instance in self.cfg.GetAllInstancesInfo().values():
4027 for nic_idx, nic in enumerate(instance.nics):
4028 params_copy = copy.deepcopy(nic.nicparams)
4029 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4031 # check parameter syntax
4033 objects.NIC.CheckParameterSyntax(params_filled)
4034 except errors.ConfigurationError, err:
4035 nic_errors.append("Instance %s, nic/%d: %s" %
4036 (instance.name, nic_idx, err))
4038 # if we're moving instances to routed, check that they have an ip
4039 target_mode = params_filled[constants.NIC_MODE]
4040 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4041 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4042 " address" % (instance.name, nic_idx))
4044 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4045 "\n".join(nic_errors))
4047 # hypervisor list/parameters
4048 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4049 if self.op.hvparams:
4050 for hv_name, hv_dict in self.op.hvparams.items():
4051 if hv_name not in self.new_hvparams:
4052 self.new_hvparams[hv_name] = hv_dict
4054 self.new_hvparams[hv_name].update(hv_dict)
4056 # disk template parameters
4057 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4058 if self.op.diskparams:
4059 for dt_name, dt_params in self.op.diskparams.items():
4060 if dt_name not in self.op.diskparams:
4061 self.new_diskparams[dt_name] = dt_params
4063 self.new_diskparams[dt_name].update(dt_params)
4065 # os hypervisor parameters
4066 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4068 for os_name, hvs in self.op.os_hvp.items():
4069 if os_name not in self.new_os_hvp:
4070 self.new_os_hvp[os_name] = hvs
4072 for hv_name, hv_dict in hvs.items():
4073 if hv_name not in self.new_os_hvp[os_name]:
4074 self.new_os_hvp[os_name][hv_name] = hv_dict
4076 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4079 self.new_osp = objects.FillDict(cluster.osparams, {})
4080 if self.op.osparams:
4081 for os_name, osp in self.op.osparams.items():
4082 if os_name not in self.new_osp:
4083 self.new_osp[os_name] = {}
4085 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4088 if not self.new_osp[os_name]:
4089 # we removed all parameters
4090 del self.new_osp[os_name]
4092 # check the parameter validity (remote check)
4093 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4094 os_name, self.new_osp[os_name])
4096 # changes to the hypervisor list
4097 if self.op.enabled_hypervisors is not None:
4098 self.hv_list = self.op.enabled_hypervisors
4099 for hv in self.hv_list:
4100 # if the hypervisor doesn't already exist in the cluster
4101 # hvparams, we initialize it to empty, and then (in both
4102 # cases) we make sure to fill the defaults, as we might not
4103 # have a complete defaults list if the hypervisor wasn't
4105 if hv not in new_hvp:
4107 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4108 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4110 self.hv_list = cluster.enabled_hypervisors
4112 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4113 # either the enabled list has changed, or the parameters have, validate
4114 for hv_name, hv_params in self.new_hvparams.items():
4115 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4116 (self.op.enabled_hypervisors and
4117 hv_name in self.op.enabled_hypervisors)):
4118 # either this is a new hypervisor, or its parameters have changed
4119 hv_class = hypervisor.GetHypervisor(hv_name)
4120 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4121 hv_class.CheckParameterSyntax(hv_params)
4122 _CheckHVParams(self, node_list, hv_name, hv_params)
4125 # no need to check any newly-enabled hypervisors, since the
4126 # defaults have already been checked in the above code-block
4127 for os_name, os_hvp in self.new_os_hvp.items():
4128 for hv_name, hv_params in os_hvp.items():
4129 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4130 # we need to fill in the new os_hvp on top of the actual hv_p
4131 cluster_defaults = self.new_hvparams.get(hv_name, {})
4132 new_osp = objects.FillDict(cluster_defaults, hv_params)
4133 hv_class = hypervisor.GetHypervisor(hv_name)
4134 hv_class.CheckParameterSyntax(new_osp)
4135 _CheckHVParams(self, node_list, hv_name, new_osp)
4137 if self.op.default_iallocator:
4138 alloc_script = utils.FindFile(self.op.default_iallocator,
4139 constants.IALLOCATOR_SEARCH_PATH,
4141 if alloc_script is None:
4142 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4143 " specified" % self.op.default_iallocator,
4146 def Exec(self, feedback_fn):
4147 """Change the parameters of the cluster.
4150 if self.op.vg_name is not None:
4151 new_volume = self.op.vg_name
4154 if new_volume != self.cfg.GetVGName():
4155 self.cfg.SetVGName(new_volume)
4157 feedback_fn("Cluster LVM configuration already in desired"
4158 " state, not changing")
4159 if self.op.drbd_helper is not None:
4160 new_helper = self.op.drbd_helper
4163 if new_helper != self.cfg.GetDRBDHelper():
4164 self.cfg.SetDRBDHelper(new_helper)
4166 feedback_fn("Cluster DRBD helper already in desired state,"
4168 if self.op.hvparams:
4169 self.cluster.hvparams = self.new_hvparams
4171 self.cluster.os_hvp = self.new_os_hvp
4172 if self.op.enabled_hypervisors is not None:
4173 self.cluster.hvparams = self.new_hvparams
4174 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4175 if self.op.beparams:
4176 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4177 if self.op.nicparams:
4178 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4180 self.cluster.ipolicy = self.new_ipolicy
4181 if self.op.osparams:
4182 self.cluster.osparams = self.new_osp
4183 if self.op.ndparams:
4184 self.cluster.ndparams = self.new_ndparams
4185 if self.op.diskparams:
4186 self.cluster.diskparams = self.new_diskparams
4187 if self.op.hv_state:
4188 self.cluster.hv_state_static = self.new_hv_state
4189 if self.op.disk_state:
4190 self.cluster.disk_state_static = self.new_disk_state
4192 if self.op.candidate_pool_size is not None:
4193 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4194 # we need to update the pool size here, otherwise the save will fail
4195 _AdjustCandidatePool(self, [])
4197 if self.op.maintain_node_health is not None:
4198 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4199 feedback_fn("Note: CONFD was disabled at build time, node health"
4200 " maintenance is not useful (still enabling it)")
4201 self.cluster.maintain_node_health = self.op.maintain_node_health
4203 if self.op.prealloc_wipe_disks is not None:
4204 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4206 if self.op.add_uids is not None:
4207 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4209 if self.op.remove_uids is not None:
4210 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4212 if self.op.uid_pool is not None:
4213 self.cluster.uid_pool = self.op.uid_pool
4215 if self.op.default_iallocator is not None:
4216 self.cluster.default_iallocator = self.op.default_iallocator
4218 if self.op.reserved_lvs is not None:
4219 self.cluster.reserved_lvs = self.op.reserved_lvs
4221 if self.op.use_external_mip_script is not None:
4222 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4224 def helper_os(aname, mods, desc):
4226 lst = getattr(self.cluster, aname)
4227 for key, val in mods:
4228 if key == constants.DDM_ADD:
4230 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4233 elif key == constants.DDM_REMOVE:
4237 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4239 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4241 if self.op.hidden_os:
4242 helper_os("hidden_os", self.op.hidden_os, "hidden")
4244 if self.op.blacklisted_os:
4245 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4247 if self.op.master_netdev:
4248 master_params = self.cfg.GetMasterNetworkParameters()
4249 ems = self.cfg.GetUseExternalMipScript()
4250 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4251 self.cluster.master_netdev)
4252 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4254 result.Raise("Could not disable the master ip")
4255 feedback_fn("Changing master_netdev from %s to %s" %
4256 (master_params.netdev, self.op.master_netdev))
4257 self.cluster.master_netdev = self.op.master_netdev
4259 if self.op.master_netmask:
4260 master_params = self.cfg.GetMasterNetworkParameters()
4261 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4262 result = self.rpc.call_node_change_master_netmask(master_params.name,
4263 master_params.netmask,
4264 self.op.master_netmask,
4266 master_params.netdev)
4268 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4271 self.cluster.master_netmask = self.op.master_netmask
4273 self.cfg.Update(self.cluster, feedback_fn)
4275 if self.op.master_netdev:
4276 master_params = self.cfg.GetMasterNetworkParameters()
4277 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4278 self.op.master_netdev)
4279 ems = self.cfg.GetUseExternalMipScript()
4280 result = self.rpc.call_node_activate_master_ip(master_params.name,
4283 self.LogWarning("Could not re-enable the master ip on"
4284 " the master, please restart manually: %s",
4288 def _UploadHelper(lu, nodes, fname):
4289 """Helper for uploading a file and showing warnings.
4292 if os.path.exists(fname):
4293 result = lu.rpc.call_upload_file(nodes, fname)
4294 for to_node, to_result in result.items():
4295 msg = to_result.fail_msg
4297 msg = ("Copy of file %s to node %s failed: %s" %
4298 (fname, to_node, msg))
4299 lu.proc.LogWarning(msg)
4302 def _ComputeAncillaryFiles(cluster, redist):
4303 """Compute files external to Ganeti which need to be consistent.
4305 @type redist: boolean
4306 @param redist: Whether to include files which need to be redistributed
4309 # Compute files for all nodes
4311 constants.SSH_KNOWN_HOSTS_FILE,
4312 constants.CONFD_HMAC_KEY,
4313 constants.CLUSTER_DOMAIN_SECRET_FILE,
4314 constants.SPICE_CERT_FILE,
4315 constants.SPICE_CACERT_FILE,
4316 constants.RAPI_USERS_FILE,
4320 files_all.update(constants.ALL_CERT_FILES)
4321 files_all.update(ssconf.SimpleStore().GetFileList())
4323 # we need to ship at least the RAPI certificate
4324 files_all.add(constants.RAPI_CERT_FILE)
4326 if cluster.modify_etc_hosts:
4327 files_all.add(constants.ETC_HOSTS)
4329 if cluster.use_external_mip_script:
4330 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4332 # Files which are optional, these must:
4333 # - be present in one other category as well
4334 # - either exist or not exist on all nodes of that category (mc, vm all)
4336 constants.RAPI_USERS_FILE,
4339 # Files which should only be on master candidates
4343 files_mc.add(constants.CLUSTER_CONF_FILE)
4345 # Files which should only be on VM-capable nodes
4346 files_vm = set(filename
4347 for hv_name in cluster.enabled_hypervisors
4348 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4350 files_opt |= set(filename
4351 for hv_name in cluster.enabled_hypervisors
4352 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4354 # Filenames in each category must be unique
4355 all_files_set = files_all | files_mc | files_vm
4356 assert (len(all_files_set) ==
4357 sum(map(len, [files_all, files_mc, files_vm]))), \
4358 "Found file listed in more than one file list"
4360 # Optional files must be present in one other category
4361 assert all_files_set.issuperset(files_opt), \
4362 "Optional file not in a different required list"
4364 return (files_all, files_opt, files_mc, files_vm)
4367 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4368 """Distribute additional files which are part of the cluster configuration.
4370 ConfigWriter takes care of distributing the config and ssconf files, but
4371 there are more files which should be distributed to all nodes. This function
4372 makes sure those are copied.
4374 @param lu: calling logical unit
4375 @param additional_nodes: list of nodes not in the config to distribute to
4376 @type additional_vm: boolean
4377 @param additional_vm: whether the additional nodes are vm-capable or not
4380 # Gather target nodes
4381 cluster = lu.cfg.GetClusterInfo()
4382 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4384 online_nodes = lu.cfg.GetOnlineNodeList()
4385 online_set = frozenset(online_nodes)
4386 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4388 if additional_nodes is not None:
4389 online_nodes.extend(additional_nodes)
4391 vm_nodes.extend(additional_nodes)
4393 # Never distribute to master node
4394 for nodelist in [online_nodes, vm_nodes]:
4395 if master_info.name in nodelist:
4396 nodelist.remove(master_info.name)
4399 (files_all, _, files_mc, files_vm) = \
4400 _ComputeAncillaryFiles(cluster, True)
4402 # Never re-distribute configuration file from here
4403 assert not (constants.CLUSTER_CONF_FILE in files_all or
4404 constants.CLUSTER_CONF_FILE in files_vm)
4405 assert not files_mc, "Master candidates not handled in this function"
4408 (online_nodes, files_all),
4409 (vm_nodes, files_vm),
4413 for (node_list, files) in filemap:
4415 _UploadHelper(lu, node_list, fname)
4418 class LUClusterRedistConf(NoHooksLU):
4419 """Force the redistribution of cluster configuration.
4421 This is a very simple LU.
4426 def ExpandNames(self):
4427 self.needed_locks = {
4428 locking.LEVEL_NODE: locking.ALL_SET,
4430 self.share_locks[locking.LEVEL_NODE] = 1
4432 def Exec(self, feedback_fn):
4433 """Redistribute the configuration.
4436 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4437 _RedistributeAncillaryFiles(self)
4440 class LUClusterActivateMasterIp(NoHooksLU):
4441 """Activate the master IP on the master node.
4444 def Exec(self, feedback_fn):
4445 """Activate the master IP.
4448 master_params = self.cfg.GetMasterNetworkParameters()
4449 ems = self.cfg.GetUseExternalMipScript()
4450 result = self.rpc.call_node_activate_master_ip(master_params.name,
4452 result.Raise("Could not activate the master IP")
4455 class LUClusterDeactivateMasterIp(NoHooksLU):
4456 """Deactivate the master IP on the master node.
4459 def Exec(self, feedback_fn):
4460 """Deactivate the master IP.
4463 master_params = self.cfg.GetMasterNetworkParameters()
4464 ems = self.cfg.GetUseExternalMipScript()
4465 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4467 result.Raise("Could not deactivate the master IP")
4470 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4471 """Sleep and poll for an instance's disk to sync.
4474 if not instance.disks or disks is not None and not disks:
4477 disks = _ExpandCheckDisks(instance, disks)
4480 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4482 node = instance.primary_node
4485 lu.cfg.SetDiskID(dev, node)
4487 # TODO: Convert to utils.Retry
4490 degr_retries = 10 # in seconds, as we sleep 1 second each time
4494 cumul_degraded = False
4495 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4496 msg = rstats.fail_msg
4498 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4501 raise errors.RemoteError("Can't contact node %s for mirror data,"
4502 " aborting." % node)
4505 rstats = rstats.payload
4507 for i, mstat in enumerate(rstats):
4509 lu.LogWarning("Can't compute data for node %s/%s",
4510 node, disks[i].iv_name)
4513 cumul_degraded = (cumul_degraded or
4514 (mstat.is_degraded and mstat.sync_percent is None))
4515 if mstat.sync_percent is not None:
4517 if mstat.estimated_time is not None:
4518 rem_time = ("%s remaining (estimated)" %
4519 utils.FormatSeconds(mstat.estimated_time))
4520 max_time = mstat.estimated_time
4522 rem_time = "no time estimate"
4523 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4524 (disks[i].iv_name, mstat.sync_percent, rem_time))
4526 # if we're done but degraded, let's do a few small retries, to
4527 # make sure we see a stable and not transient situation; therefore
4528 # we force restart of the loop
4529 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4530 logging.info("Degraded disks found, %d retries left", degr_retries)
4538 time.sleep(min(60, max_time))
4541 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4542 return not cumul_degraded
4545 def _BlockdevFind(lu, node, dev, instance):
4546 """Wrapper around call_blockdev_find to annotate diskparams.
4548 @param lu: A reference to the lu object
4549 @param node: The node to call out
4550 @param dev: The device to find
4551 @param instance: The instance object the device belongs to
4552 @returns The result of the rpc call
4555 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4556 return lu.rpc.call_blockdev_find(node, disk)
4559 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4560 """Wrapper around L{_CheckDiskConsistencyInner}.
4563 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4564 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4568 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4570 """Check that mirrors are not degraded.
4572 @attention: The device has to be annotated already.
4574 The ldisk parameter, if True, will change the test from the
4575 is_degraded attribute (which represents overall non-ok status for
4576 the device(s)) to the ldisk (representing the local storage status).
4579 lu.cfg.SetDiskID(dev, node)
4583 if on_primary or dev.AssembleOnSecondary():
4584 rstats = lu.rpc.call_blockdev_find(node, dev)
4585 msg = rstats.fail_msg
4587 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4589 elif not rstats.payload:
4590 lu.LogWarning("Can't find disk on node %s", node)
4594 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4596 result = result and not rstats.payload.is_degraded
4599 for child in dev.children:
4600 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4606 class LUOobCommand(NoHooksLU):
4607 """Logical unit for OOB handling.
4611 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4613 def ExpandNames(self):
4614 """Gather locks we need.
4617 if self.op.node_names:
4618 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4619 lock_names = self.op.node_names
4621 lock_names = locking.ALL_SET
4623 self.needed_locks = {
4624 locking.LEVEL_NODE: lock_names,
4627 def CheckPrereq(self):
4628 """Check prerequisites.
4631 - the node exists in the configuration
4634 Any errors are signaled by raising errors.OpPrereqError.
4638 self.master_node = self.cfg.GetMasterNode()
4640 assert self.op.power_delay >= 0.0
4642 if self.op.node_names:
4643 if (self.op.command in self._SKIP_MASTER and
4644 self.master_node in self.op.node_names):
4645 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4646 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4648 if master_oob_handler:
4649 additional_text = ("run '%s %s %s' if you want to operate on the"
4650 " master regardless") % (master_oob_handler,
4654 additional_text = "it does not support out-of-band operations"
4656 raise errors.OpPrereqError(("Operating on the master node %s is not"
4657 " allowed for %s; %s") %
4658 (self.master_node, self.op.command,
4659 additional_text), errors.ECODE_INVAL)
4661 self.op.node_names = self.cfg.GetNodeList()
4662 if self.op.command in self._SKIP_MASTER:
4663 self.op.node_names.remove(self.master_node)
4665 if self.op.command in self._SKIP_MASTER:
4666 assert self.master_node not in self.op.node_names
4668 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4670 raise errors.OpPrereqError("Node %s not found" % node_name,
4673 self.nodes.append(node)
4675 if (not self.op.ignore_status and
4676 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4677 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4678 " not marked offline") % node_name,
4681 def Exec(self, feedback_fn):
4682 """Execute OOB and return result if we expect any.
4685 master_node = self.master_node
4688 for idx, node in enumerate(utils.NiceSort(self.nodes,
4689 key=lambda node: node.name)):
4690 node_entry = [(constants.RS_NORMAL, node.name)]
4691 ret.append(node_entry)
4693 oob_program = _SupportsOob(self.cfg, node)
4696 node_entry.append((constants.RS_UNAVAIL, None))
4699 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4700 self.op.command, oob_program, node.name)
4701 result = self.rpc.call_run_oob(master_node, oob_program,
4702 self.op.command, node.name,
4706 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4707 node.name, result.fail_msg)
4708 node_entry.append((constants.RS_NODATA, None))
4711 self._CheckPayload(result)
4712 except errors.OpExecError, err:
4713 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4715 node_entry.append((constants.RS_NODATA, None))
4717 if self.op.command == constants.OOB_HEALTH:
4718 # For health we should log important events
4719 for item, status in result.payload:
4720 if status in [constants.OOB_STATUS_WARNING,
4721 constants.OOB_STATUS_CRITICAL]:
4722 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4723 item, node.name, status)
4725 if self.op.command == constants.OOB_POWER_ON:
4727 elif self.op.command == constants.OOB_POWER_OFF:
4728 node.powered = False
4729 elif self.op.command == constants.OOB_POWER_STATUS:
4730 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4731 if powered != node.powered:
4732 logging.warning(("Recorded power state (%s) of node '%s' does not"
4733 " match actual power state (%s)"), node.powered,
4736 # For configuration changing commands we should update the node
4737 if self.op.command in (constants.OOB_POWER_ON,
4738 constants.OOB_POWER_OFF):
4739 self.cfg.Update(node, feedback_fn)
4741 node_entry.append((constants.RS_NORMAL, result.payload))
4743 if (self.op.command == constants.OOB_POWER_ON and
4744 idx < len(self.nodes) - 1):
4745 time.sleep(self.op.power_delay)
4749 def _CheckPayload(self, result):
4750 """Checks if the payload is valid.
4752 @param result: RPC result
4753 @raises errors.OpExecError: If payload is not valid
4757 if self.op.command == constants.OOB_HEALTH:
4758 if not isinstance(result.payload, list):
4759 errs.append("command 'health' is expected to return a list but got %s" %
4760 type(result.payload))
4762 for item, status in result.payload:
4763 if status not in constants.OOB_STATUSES:
4764 errs.append("health item '%s' has invalid status '%s'" %
4767 if self.op.command == constants.OOB_POWER_STATUS:
4768 if not isinstance(result.payload, dict):
4769 errs.append("power-status is expected to return a dict but got %s" %
4770 type(result.payload))
4772 if self.op.command in [
4773 constants.OOB_POWER_ON,
4774 constants.OOB_POWER_OFF,
4775 constants.OOB_POWER_CYCLE,
4777 if result.payload is not None:
4778 errs.append("%s is expected to not return payload but got '%s'" %
4779 (self.op.command, result.payload))
4782 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4783 utils.CommaJoin(errs))
4786 class _OsQuery(_QueryBase):
4787 FIELDS = query.OS_FIELDS
4789 def ExpandNames(self, lu):
4790 # Lock all nodes in shared mode
4791 # Temporary removal of locks, should be reverted later
4792 # TODO: reintroduce locks when they are lighter-weight
4793 lu.needed_locks = {}
4794 #self.share_locks[locking.LEVEL_NODE] = 1
4795 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4797 # The following variables interact with _QueryBase._GetNames
4799 self.wanted = self.names
4801 self.wanted = locking.ALL_SET
4803 self.do_locking = self.use_locking
4805 def DeclareLocks(self, lu, level):
4809 def _DiagnoseByOS(rlist):
4810 """Remaps a per-node return list into an a per-os per-node dictionary
4812 @param rlist: a map with node names as keys and OS objects as values
4815 @return: a dictionary with osnames as keys and as value another
4816 map, with nodes as keys and tuples of (path, status, diagnose,
4817 variants, parameters, api_versions) as values, eg::
4819 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4820 (/srv/..., False, "invalid api")],
4821 "node2": [(/srv/..., True, "", [], [])]}
4826 # we build here the list of nodes that didn't fail the RPC (at RPC
4827 # level), so that nodes with a non-responding node daemon don't
4828 # make all OSes invalid
4829 good_nodes = [node_name for node_name in rlist
4830 if not rlist[node_name].fail_msg]
4831 for node_name, nr in rlist.items():
4832 if nr.fail_msg or not nr.payload:
4834 for (name, path, status, diagnose, variants,
4835 params, api_versions) in nr.payload:
4836 if name not in all_os:
4837 # build a list of nodes for this os containing empty lists
4838 # for each node in node_list
4840 for nname in good_nodes:
4841 all_os[name][nname] = []
4842 # convert params from [name, help] to (name, help)
4843 params = [tuple(v) for v in params]
4844 all_os[name][node_name].append((path, status, diagnose,
4845 variants, params, api_versions))
4848 def _GetQueryData(self, lu):
4849 """Computes the list of nodes and their attributes.
4852 # Locking is not used
4853 assert not (compat.any(lu.glm.is_owned(level)
4854 for level in locking.LEVELS
4855 if level != locking.LEVEL_CLUSTER) or
4856 self.do_locking or self.use_locking)
4858 valid_nodes = [node.name
4859 for node in lu.cfg.GetAllNodesInfo().values()
4860 if not node.offline and node.vm_capable]
4861 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4862 cluster = lu.cfg.GetClusterInfo()
4866 for (os_name, os_data) in pol.items():
4867 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4868 hidden=(os_name in cluster.hidden_os),
4869 blacklisted=(os_name in cluster.blacklisted_os))
4873 api_versions = set()
4875 for idx, osl in enumerate(os_data.values()):
4876 info.valid = bool(info.valid and osl and osl[0][1])
4880 (node_variants, node_params, node_api) = osl[0][3:6]
4883 variants.update(node_variants)
4884 parameters.update(node_params)
4885 api_versions.update(node_api)
4887 # Filter out inconsistent values
4888 variants.intersection_update(node_variants)
4889 parameters.intersection_update(node_params)
4890 api_versions.intersection_update(node_api)
4892 info.variants = list(variants)
4893 info.parameters = list(parameters)
4894 info.api_versions = list(api_versions)
4896 data[os_name] = info
4898 # Prepare data in requested order
4899 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4903 class LUOsDiagnose(NoHooksLU):
4904 """Logical unit for OS diagnose/query.
4910 def _BuildFilter(fields, names):
4911 """Builds a filter for querying OSes.
4914 name_filter = qlang.MakeSimpleFilter("name", names)
4916 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4917 # respective field is not requested
4918 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4919 for fname in ["hidden", "blacklisted"]
4920 if fname not in fields]
4921 if "valid" not in fields:
4922 status_filter.append([qlang.OP_TRUE, "valid"])
4925 status_filter.insert(0, qlang.OP_AND)
4927 status_filter = None
4929 if name_filter and status_filter:
4930 return [qlang.OP_AND, name_filter, status_filter]
4934 return status_filter
4936 def CheckArguments(self):
4937 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4938 self.op.output_fields, False)
4940 def ExpandNames(self):
4941 self.oq.ExpandNames(self)
4943 def Exec(self, feedback_fn):
4944 return self.oq.OldStyleQuery(self)
4947 class LUNodeRemove(LogicalUnit):
4948 """Logical unit for removing a node.
4951 HPATH = "node-remove"
4952 HTYPE = constants.HTYPE_NODE
4954 def BuildHooksEnv(self):
4959 "OP_TARGET": self.op.node_name,
4960 "NODE_NAME": self.op.node_name,
4963 def BuildHooksNodes(self):
4964 """Build hooks nodes.
4966 This doesn't run on the target node in the pre phase as a failed
4967 node would then be impossible to remove.
4970 all_nodes = self.cfg.GetNodeList()
4972 all_nodes.remove(self.op.node_name)
4975 return (all_nodes, all_nodes)
4977 def CheckPrereq(self):
4978 """Check prerequisites.
4981 - the node exists in the configuration
4982 - it does not have primary or secondary instances
4983 - it's not the master
4985 Any errors are signaled by raising errors.OpPrereqError.
4988 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4989 node = self.cfg.GetNodeInfo(self.op.node_name)
4990 assert node is not None
4992 masternode = self.cfg.GetMasterNode()
4993 if node.name == masternode:
4994 raise errors.OpPrereqError("Node is the master node, failover to another"
4995 " node is required", errors.ECODE_INVAL)
4997 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4998 if node.name in instance.all_nodes:
4999 raise errors.OpPrereqError("Instance %s is still running on the node,"
5000 " please remove first" % instance_name,
5002 self.op.node_name = node.name
5005 def Exec(self, feedback_fn):
5006 """Removes the node from the cluster.
5010 logging.info("Stopping the node daemon and removing configs from node %s",
5013 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5015 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5018 # Promote nodes to master candidate as needed
5019 _AdjustCandidatePool(self, exceptions=[node.name])
5020 self.context.RemoveNode(node.name)
5022 # Run post hooks on the node before it's removed
5023 _RunPostHook(self, node.name)
5025 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5026 msg = result.fail_msg
5028 self.LogWarning("Errors encountered on the remote node while leaving"
5029 " the cluster: %s", msg)
5031 # Remove node from our /etc/hosts
5032 if self.cfg.GetClusterInfo().modify_etc_hosts:
5033 master_node = self.cfg.GetMasterNode()
5034 result = self.rpc.call_etc_hosts_modify(master_node,
5035 constants.ETC_HOSTS_REMOVE,
5037 result.Raise("Can't update hosts file with new host data")
5038 _RedistributeAncillaryFiles(self)
5041 class _NodeQuery(_QueryBase):
5042 FIELDS = query.NODE_FIELDS
5044 def ExpandNames(self, lu):
5045 lu.needed_locks = {}
5046 lu.share_locks = _ShareAll()
5049 self.wanted = _GetWantedNodes(lu, self.names)
5051 self.wanted = locking.ALL_SET
5053 self.do_locking = (self.use_locking and
5054 query.NQ_LIVE in self.requested_data)
5057 # If any non-static field is requested we need to lock the nodes
5058 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5060 def DeclareLocks(self, lu, level):
5063 def _GetQueryData(self, lu):
5064 """Computes the list of nodes and their attributes.
5067 all_info = lu.cfg.GetAllNodesInfo()
5069 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5071 # Gather data as requested
5072 if query.NQ_LIVE in self.requested_data:
5073 # filter out non-vm_capable nodes
5074 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5076 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5077 [lu.cfg.GetHypervisorType()])
5078 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5079 for (name, nresult) in node_data.items()
5080 if not nresult.fail_msg and nresult.payload)
5084 if query.NQ_INST in self.requested_data:
5085 node_to_primary = dict([(name, set()) for name in nodenames])
5086 node_to_secondary = dict([(name, set()) for name in nodenames])
5088 inst_data = lu.cfg.GetAllInstancesInfo()
5090 for inst in inst_data.values():
5091 if inst.primary_node in node_to_primary:
5092 node_to_primary[inst.primary_node].add(inst.name)
5093 for secnode in inst.secondary_nodes:
5094 if secnode in node_to_secondary:
5095 node_to_secondary[secnode].add(inst.name)
5097 node_to_primary = None
5098 node_to_secondary = None
5100 if query.NQ_OOB in self.requested_data:
5101 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5102 for name, node in all_info.iteritems())
5106 if query.NQ_GROUP in self.requested_data:
5107 groups = lu.cfg.GetAllNodeGroupsInfo()
5111 return query.NodeQueryData([all_info[name] for name in nodenames],
5112 live_data, lu.cfg.GetMasterNode(),
5113 node_to_primary, node_to_secondary, groups,
5114 oob_support, lu.cfg.GetClusterInfo())
5117 class LUNodeQuery(NoHooksLU):
5118 """Logical unit for querying nodes.
5121 # pylint: disable=W0142
5124 def CheckArguments(self):
5125 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5126 self.op.output_fields, self.op.use_locking)
5128 def ExpandNames(self):
5129 self.nq.ExpandNames(self)
5131 def DeclareLocks(self, level):
5132 self.nq.DeclareLocks(self, level)
5134 def Exec(self, feedback_fn):
5135 return self.nq.OldStyleQuery(self)
5138 class LUNodeQueryvols(NoHooksLU):
5139 """Logical unit for getting volumes on node(s).
5143 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5144 _FIELDS_STATIC = utils.FieldSet("node")
5146 def CheckArguments(self):
5147 _CheckOutputFields(static=self._FIELDS_STATIC,
5148 dynamic=self._FIELDS_DYNAMIC,
5149 selected=self.op.output_fields)
5151 def ExpandNames(self):
5152 self.share_locks = _ShareAll()
5153 self.needed_locks = {}
5155 if not self.op.nodes:
5156 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5158 self.needed_locks[locking.LEVEL_NODE] = \
5159 _GetWantedNodes(self, self.op.nodes)
5161 def Exec(self, feedback_fn):
5162 """Computes the list of nodes and their attributes.
5165 nodenames = self.owned_locks(locking.LEVEL_NODE)
5166 volumes = self.rpc.call_node_volumes(nodenames)
5168 ilist = self.cfg.GetAllInstancesInfo()
5169 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5172 for node in nodenames:
5173 nresult = volumes[node]
5176 msg = nresult.fail_msg
5178 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5181 node_vols = sorted(nresult.payload,
5182 key=operator.itemgetter("dev"))
5184 for vol in node_vols:
5186 for field in self.op.output_fields:
5189 elif field == "phys":
5193 elif field == "name":
5195 elif field == "size":
5196 val = int(float(vol["size"]))
5197 elif field == "instance":
5198 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5200 raise errors.ParameterError(field)
5201 node_output.append(str(val))
5203 output.append(node_output)
5208 class LUNodeQueryStorage(NoHooksLU):
5209 """Logical unit for getting information on storage units on node(s).
5212 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5215 def CheckArguments(self):
5216 _CheckOutputFields(static=self._FIELDS_STATIC,
5217 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5218 selected=self.op.output_fields)
5220 def ExpandNames(self):
5221 self.share_locks = _ShareAll()
5222 self.needed_locks = {}
5225 self.needed_locks[locking.LEVEL_NODE] = \
5226 _GetWantedNodes(self, self.op.nodes)
5228 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5230 def Exec(self, feedback_fn):
5231 """Computes the list of nodes and their attributes.
5234 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5236 # Always get name to sort by
5237 if constants.SF_NAME in self.op.output_fields:
5238 fields = self.op.output_fields[:]
5240 fields = [constants.SF_NAME] + self.op.output_fields
5242 # Never ask for node or type as it's only known to the LU
5243 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5244 while extra in fields:
5245 fields.remove(extra)
5247 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5248 name_idx = field_idx[constants.SF_NAME]
5250 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5251 data = self.rpc.call_storage_list(self.nodes,
5252 self.op.storage_type, st_args,
5253 self.op.name, fields)
5257 for node in utils.NiceSort(self.nodes):
5258 nresult = data[node]
5262 msg = nresult.fail_msg
5264 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5267 rows = dict([(row[name_idx], row) for row in nresult.payload])
5269 for name in utils.NiceSort(rows.keys()):
5274 for field in self.op.output_fields:
5275 if field == constants.SF_NODE:
5277 elif field == constants.SF_TYPE:
5278 val = self.op.storage_type
5279 elif field in field_idx:
5280 val = row[field_idx[field]]
5282 raise errors.ParameterError(field)
5291 class _InstanceQuery(_QueryBase):
5292 FIELDS = query.INSTANCE_FIELDS
5294 def ExpandNames(self, lu):
5295 lu.needed_locks = {}
5296 lu.share_locks = _ShareAll()
5299 self.wanted = _GetWantedInstances(lu, self.names)
5301 self.wanted = locking.ALL_SET
5303 self.do_locking = (self.use_locking and
5304 query.IQ_LIVE in self.requested_data)
5306 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5307 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5308 lu.needed_locks[locking.LEVEL_NODE] = []
5309 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5311 self.do_grouplocks = (self.do_locking and
5312 query.IQ_NODES in self.requested_data)
5314 def DeclareLocks(self, lu, level):
5316 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5317 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5319 # Lock all groups used by instances optimistically; this requires going
5320 # via the node before it's locked, requiring verification later on
5321 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5323 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5324 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5325 elif level == locking.LEVEL_NODE:
5326 lu._LockInstancesNodes() # pylint: disable=W0212
5329 def _CheckGroupLocks(lu):
5330 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5331 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5333 # Check if node groups for locked instances are still correct
5334 for instance_name in owned_instances:
5335 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5337 def _GetQueryData(self, lu):
5338 """Computes the list of instances and their attributes.
5341 if self.do_grouplocks:
5342 self._CheckGroupLocks(lu)
5344 cluster = lu.cfg.GetClusterInfo()
5345 all_info = lu.cfg.GetAllInstancesInfo()
5347 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5349 instance_list = [all_info[name] for name in instance_names]
5350 nodes = frozenset(itertools.chain(*(inst.all_nodes
5351 for inst in instance_list)))
5352 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5355 wrongnode_inst = set()
5357 # Gather data as requested
5358 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5360 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5362 result = node_data[name]
5364 # offline nodes will be in both lists
5365 assert result.fail_msg
5366 offline_nodes.append(name)
5368 bad_nodes.append(name)
5369 elif result.payload:
5370 for inst in result.payload:
5371 if inst in all_info:
5372 if all_info[inst].primary_node == name:
5373 live_data.update(result.payload)
5375 wrongnode_inst.add(inst)
5377 # orphan instance; we don't list it here as we don't
5378 # handle this case yet in the output of instance listing
5379 logging.warning("Orphan instance '%s' found on node %s",
5381 # else no instance is alive
5385 if query.IQ_DISKUSAGE in self.requested_data:
5386 disk_usage = dict((inst.name,
5387 _ComputeDiskSize(inst.disk_template,
5388 [{constants.IDISK_SIZE: disk.size}
5389 for disk in inst.disks]))
5390 for inst in instance_list)
5394 if query.IQ_CONSOLE in self.requested_data:
5396 for inst in instance_list:
5397 if inst.name in live_data:
5398 # Instance is running
5399 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5401 consinfo[inst.name] = None
5402 assert set(consinfo.keys()) == set(instance_names)
5406 if query.IQ_NODES in self.requested_data:
5407 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5409 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5410 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5411 for uuid in set(map(operator.attrgetter("group"),
5417 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5418 disk_usage, offline_nodes, bad_nodes,
5419 live_data, wrongnode_inst, consinfo,
5423 class LUQuery(NoHooksLU):
5424 """Query for resources/items of a certain kind.
5427 # pylint: disable=W0142
5430 def CheckArguments(self):
5431 qcls = _GetQueryImplementation(self.op.what)
5433 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5435 def ExpandNames(self):
5436 self.impl.ExpandNames(self)
5438 def DeclareLocks(self, level):
5439 self.impl.DeclareLocks(self, level)
5441 def Exec(self, feedback_fn):
5442 return self.impl.NewStyleQuery(self)
5445 class LUQueryFields(NoHooksLU):
5446 """Query for resources/items of a certain kind.
5449 # pylint: disable=W0142
5452 def CheckArguments(self):
5453 self.qcls = _GetQueryImplementation(self.op.what)
5455 def ExpandNames(self):
5456 self.needed_locks = {}
5458 def Exec(self, feedback_fn):
5459 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5462 class LUNodeModifyStorage(NoHooksLU):
5463 """Logical unit for modifying a storage volume on a node.
5468 def CheckArguments(self):
5469 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5471 storage_type = self.op.storage_type
5474 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5476 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5477 " modified" % storage_type,
5480 diff = set(self.op.changes.keys()) - modifiable
5482 raise errors.OpPrereqError("The following fields can not be modified for"
5483 " storage units of type '%s': %r" %
5484 (storage_type, list(diff)),
5487 def ExpandNames(self):
5488 self.needed_locks = {
5489 locking.LEVEL_NODE: self.op.node_name,
5492 def Exec(self, feedback_fn):
5493 """Computes the list of nodes and their attributes.
5496 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5497 result = self.rpc.call_storage_modify(self.op.node_name,
5498 self.op.storage_type, st_args,
5499 self.op.name, self.op.changes)
5500 result.Raise("Failed to modify storage unit '%s' on %s" %
5501 (self.op.name, self.op.node_name))
5504 class LUNodeAdd(LogicalUnit):
5505 """Logical unit for adding node to the cluster.
5509 HTYPE = constants.HTYPE_NODE
5510 _NFLAGS = ["master_capable", "vm_capable"]
5512 def CheckArguments(self):
5513 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5514 # validate/normalize the node name
5515 self.hostname = netutils.GetHostname(name=self.op.node_name,
5516 family=self.primary_ip_family)
5517 self.op.node_name = self.hostname.name
5519 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5520 raise errors.OpPrereqError("Cannot readd the master node",
5523 if self.op.readd and self.op.group:
5524 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5525 " being readded", errors.ECODE_INVAL)
5527 def BuildHooksEnv(self):
5530 This will run on all nodes before, and on all nodes + the new node after.
5534 "OP_TARGET": self.op.node_name,
5535 "NODE_NAME": self.op.node_name,
5536 "NODE_PIP": self.op.primary_ip,
5537 "NODE_SIP": self.op.secondary_ip,
5538 "MASTER_CAPABLE": str(self.op.master_capable),
5539 "VM_CAPABLE": str(self.op.vm_capable),
5542 def BuildHooksNodes(self):
5543 """Build hooks nodes.
5546 # Exclude added node
5547 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5548 post_nodes = pre_nodes + [self.op.node_name, ]
5550 return (pre_nodes, post_nodes)
5552 def CheckPrereq(self):
5553 """Check prerequisites.
5556 - the new node is not already in the config
5558 - its parameters (single/dual homed) matches the cluster
5560 Any errors are signaled by raising errors.OpPrereqError.
5564 hostname = self.hostname
5565 node = hostname.name
5566 primary_ip = self.op.primary_ip = hostname.ip
5567 if self.op.secondary_ip is None:
5568 if self.primary_ip_family == netutils.IP6Address.family:
5569 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5570 " IPv4 address must be given as secondary",
5572 self.op.secondary_ip = primary_ip
5574 secondary_ip = self.op.secondary_ip
5575 if not netutils.IP4Address.IsValid(secondary_ip):
5576 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5577 " address" % secondary_ip, errors.ECODE_INVAL)
5579 node_list = cfg.GetNodeList()
5580 if not self.op.readd and node in node_list:
5581 raise errors.OpPrereqError("Node %s is already in the configuration" %
5582 node, errors.ECODE_EXISTS)
5583 elif self.op.readd and node not in node_list:
5584 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5587 self.changed_primary_ip = False
5589 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5590 if self.op.readd and node == existing_node_name:
5591 if existing_node.secondary_ip != secondary_ip:
5592 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5593 " address configuration as before",
5595 if existing_node.primary_ip != primary_ip:
5596 self.changed_primary_ip = True
5600 if (existing_node.primary_ip == primary_ip or
5601 existing_node.secondary_ip == primary_ip or
5602 existing_node.primary_ip == secondary_ip or
5603 existing_node.secondary_ip == secondary_ip):
5604 raise errors.OpPrereqError("New node ip address(es) conflict with"
5605 " existing node %s" % existing_node.name,
5606 errors.ECODE_NOTUNIQUE)
5608 # After this 'if' block, None is no longer a valid value for the
5609 # _capable op attributes
5611 old_node = self.cfg.GetNodeInfo(node)
5612 assert old_node is not None, "Can't retrieve locked node %s" % node
5613 for attr in self._NFLAGS:
5614 if getattr(self.op, attr) is None:
5615 setattr(self.op, attr, getattr(old_node, attr))
5617 for attr in self._NFLAGS:
5618 if getattr(self.op, attr) is None:
5619 setattr(self.op, attr, True)
5621 if self.op.readd and not self.op.vm_capable:
5622 pri, sec = cfg.GetNodeInstances(node)
5624 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5625 " flag set to false, but it already holds"
5626 " instances" % node,
5629 # check that the type of the node (single versus dual homed) is the
5630 # same as for the master
5631 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5632 master_singlehomed = myself.secondary_ip == myself.primary_ip
5633 newbie_singlehomed = secondary_ip == primary_ip
5634 if master_singlehomed != newbie_singlehomed:
5635 if master_singlehomed:
5636 raise errors.OpPrereqError("The master has no secondary ip but the"
5637 " new node has one",
5640 raise errors.OpPrereqError("The master has a secondary ip but the"
5641 " new node doesn't have one",
5644 # checks reachability
5645 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5646 raise errors.OpPrereqError("Node not reachable by ping",
5647 errors.ECODE_ENVIRON)
5649 if not newbie_singlehomed:
5650 # check reachability from my secondary ip to newbie's secondary ip
5651 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5652 source=myself.secondary_ip):
5653 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5654 " based ping to node daemon port",
5655 errors.ECODE_ENVIRON)
5662 if self.op.master_capable:
5663 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5665 self.master_candidate = False
5668 self.new_node = old_node
5670 node_group = cfg.LookupNodeGroup(self.op.group)
5671 self.new_node = objects.Node(name=node,
5672 primary_ip=primary_ip,
5673 secondary_ip=secondary_ip,
5674 master_candidate=self.master_candidate,
5675 offline=False, drained=False,
5678 if self.op.ndparams:
5679 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5681 if self.op.hv_state:
5682 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5684 if self.op.disk_state:
5685 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5687 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5688 # it a property on the base class.
5689 result = rpc.DnsOnlyRunner().call_version([node])[node]
5690 result.Raise("Can't get version information from node %s" % node)
5691 if constants.PROTOCOL_VERSION == result.payload:
5692 logging.info("Communication to node %s fine, sw version %s match",
5693 node, result.payload)
5695 raise errors.OpPrereqError("Version mismatch master version %s,"
5696 " node version %s" %
5697 (constants.PROTOCOL_VERSION, result.payload),
5698 errors.ECODE_ENVIRON)
5700 def Exec(self, feedback_fn):
5701 """Adds the new node to the cluster.
5704 new_node = self.new_node
5705 node = new_node.name
5707 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5710 # We adding a new node so we assume it's powered
5711 new_node.powered = True
5713 # for re-adds, reset the offline/drained/master-candidate flags;
5714 # we need to reset here, otherwise offline would prevent RPC calls
5715 # later in the procedure; this also means that if the re-add
5716 # fails, we are left with a non-offlined, broken node
5718 new_node.drained = new_node.offline = False # pylint: disable=W0201
5719 self.LogInfo("Readding a node, the offline/drained flags were reset")
5720 # if we demote the node, we do cleanup later in the procedure
5721 new_node.master_candidate = self.master_candidate
5722 if self.changed_primary_ip:
5723 new_node.primary_ip = self.op.primary_ip
5725 # copy the master/vm_capable flags
5726 for attr in self._NFLAGS:
5727 setattr(new_node, attr, getattr(self.op, attr))
5729 # notify the user about any possible mc promotion
5730 if new_node.master_candidate:
5731 self.LogInfo("Node will be a master candidate")
5733 if self.op.ndparams:
5734 new_node.ndparams = self.op.ndparams
5736 new_node.ndparams = {}
5738 if self.op.hv_state:
5739 new_node.hv_state_static = self.new_hv_state
5741 if self.op.disk_state:
5742 new_node.disk_state_static = self.new_disk_state
5744 # Add node to our /etc/hosts, and add key to known_hosts
5745 if self.cfg.GetClusterInfo().modify_etc_hosts:
5746 master_node = self.cfg.GetMasterNode()
5747 result = self.rpc.call_etc_hosts_modify(master_node,
5748 constants.ETC_HOSTS_ADD,
5751 result.Raise("Can't update hosts file with new host data")
5753 if new_node.secondary_ip != new_node.primary_ip:
5754 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5757 node_verify_list = [self.cfg.GetMasterNode()]
5758 node_verify_param = {
5759 constants.NV_NODELIST: ([node], {}),
5760 # TODO: do a node-net-test as well?
5763 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5764 self.cfg.GetClusterName())
5765 for verifier in node_verify_list:
5766 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5767 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5769 for failed in nl_payload:
5770 feedback_fn("ssh/hostname verification failed"
5771 " (checking from %s): %s" %
5772 (verifier, nl_payload[failed]))
5773 raise errors.OpExecError("ssh/hostname verification failed")
5776 _RedistributeAncillaryFiles(self)
5777 self.context.ReaddNode(new_node)
5778 # make sure we redistribute the config
5779 self.cfg.Update(new_node, feedback_fn)
5780 # and make sure the new node will not have old files around
5781 if not new_node.master_candidate:
5782 result = self.rpc.call_node_demote_from_mc(new_node.name)
5783 msg = result.fail_msg
5785 self.LogWarning("Node failed to demote itself from master"
5786 " candidate status: %s" % msg)
5788 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5789 additional_vm=self.op.vm_capable)
5790 self.context.AddNode(new_node, self.proc.GetECId())
5793 class LUNodeSetParams(LogicalUnit):
5794 """Modifies the parameters of a node.
5796 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5797 to the node role (as _ROLE_*)
5798 @cvar _R2F: a dictionary from node role to tuples of flags
5799 @cvar _FLAGS: a list of attribute names corresponding to the flags
5802 HPATH = "node-modify"
5803 HTYPE = constants.HTYPE_NODE
5805 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5807 (True, False, False): _ROLE_CANDIDATE,
5808 (False, True, False): _ROLE_DRAINED,
5809 (False, False, True): _ROLE_OFFLINE,
5810 (False, False, False): _ROLE_REGULAR,
5812 _R2F = dict((v, k) for k, v in _F2R.items())
5813 _FLAGS = ["master_candidate", "drained", "offline"]
5815 def CheckArguments(self):
5816 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5817 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5818 self.op.master_capable, self.op.vm_capable,
5819 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5821 if all_mods.count(None) == len(all_mods):
5822 raise errors.OpPrereqError("Please pass at least one modification",
5824 if all_mods.count(True) > 1:
5825 raise errors.OpPrereqError("Can't set the node into more than one"
5826 " state at the same time",
5829 # Boolean value that tells us whether we might be demoting from MC
5830 self.might_demote = (self.op.master_candidate == False or
5831 self.op.offline == True or
5832 self.op.drained == True or
5833 self.op.master_capable == False)
5835 if self.op.secondary_ip:
5836 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5837 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5838 " address" % self.op.secondary_ip,
5841 self.lock_all = self.op.auto_promote and self.might_demote
5842 self.lock_instances = self.op.secondary_ip is not None
5844 def _InstanceFilter(self, instance):
5845 """Filter for getting affected instances.
5848 return (instance.disk_template in constants.DTS_INT_MIRROR and
5849 self.op.node_name in instance.all_nodes)
5851 def ExpandNames(self):
5853 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5855 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5857 # Since modifying a node can have severe effects on currently running
5858 # operations the resource lock is at least acquired in shared mode
5859 self.needed_locks[locking.LEVEL_NODE_RES] = \
5860 self.needed_locks[locking.LEVEL_NODE]
5862 # Get node resource and instance locks in shared mode; they are not used
5863 # for anything but read-only access
5864 self.share_locks[locking.LEVEL_NODE_RES] = 1
5865 self.share_locks[locking.LEVEL_INSTANCE] = 1
5867 if self.lock_instances:
5868 self.needed_locks[locking.LEVEL_INSTANCE] = \
5869 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5871 def BuildHooksEnv(self):
5874 This runs on the master node.
5878 "OP_TARGET": self.op.node_name,
5879 "MASTER_CANDIDATE": str(self.op.master_candidate),
5880 "OFFLINE": str(self.op.offline),
5881 "DRAINED": str(self.op.drained),
5882 "MASTER_CAPABLE": str(self.op.master_capable),
5883 "VM_CAPABLE": str(self.op.vm_capable),
5886 def BuildHooksNodes(self):
5887 """Build hooks nodes.
5890 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5893 def CheckPrereq(self):
5894 """Check prerequisites.
5896 This only checks the instance list against the existing names.
5899 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5901 if self.lock_instances:
5902 affected_instances = \
5903 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5905 # Verify instance locks
5906 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5907 wanted_instances = frozenset(affected_instances.keys())
5908 if wanted_instances - owned_instances:
5909 raise errors.OpPrereqError("Instances affected by changing node %s's"
5910 " secondary IP address have changed since"
5911 " locks were acquired, wanted '%s', have"
5912 " '%s'; retry the operation" %
5914 utils.CommaJoin(wanted_instances),
5915 utils.CommaJoin(owned_instances)),
5918 affected_instances = None
5920 if (self.op.master_candidate is not None or
5921 self.op.drained is not None or
5922 self.op.offline is not None):
5923 # we can't change the master's node flags
5924 if self.op.node_name == self.cfg.GetMasterNode():
5925 raise errors.OpPrereqError("The master role can be changed"
5926 " only via master-failover",
5929 if self.op.master_candidate and not node.master_capable:
5930 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5931 " it a master candidate" % node.name,
5934 if self.op.vm_capable == False:
5935 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5937 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5938 " the vm_capable flag" % node.name,
5941 if node.master_candidate and self.might_demote and not self.lock_all:
5942 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5943 # check if after removing the current node, we're missing master
5945 (mc_remaining, mc_should, _) = \
5946 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5947 if mc_remaining < mc_should:
5948 raise errors.OpPrereqError("Not enough master candidates, please"
5949 " pass auto promote option to allow"
5950 " promotion (--auto-promote or RAPI"
5951 " auto_promote=True)", errors.ECODE_STATE)
5953 self.old_flags = old_flags = (node.master_candidate,
5954 node.drained, node.offline)
5955 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5956 self.old_role = old_role = self._F2R[old_flags]
5958 # Check for ineffective changes
5959 for attr in self._FLAGS:
5960 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5961 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5962 setattr(self.op, attr, None)
5964 # Past this point, any flag change to False means a transition
5965 # away from the respective state, as only real changes are kept
5967 # TODO: We might query the real power state if it supports OOB
5968 if _SupportsOob(self.cfg, node):
5969 if self.op.offline is False and not (node.powered or
5970 self.op.powered == True):
5971 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5972 " offline status can be reset") %
5974 elif self.op.powered is not None:
5975 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5976 " as it does not support out-of-band"
5977 " handling") % self.op.node_name)
5979 # If we're being deofflined/drained, we'll MC ourself if needed
5980 if (self.op.drained == False or self.op.offline == False or
5981 (self.op.master_capable and not node.master_capable)):
5982 if _DecideSelfPromotion(self):
5983 self.op.master_candidate = True
5984 self.LogInfo("Auto-promoting node to master candidate")
5986 # If we're no longer master capable, we'll demote ourselves from MC
5987 if self.op.master_capable == False and node.master_candidate:
5988 self.LogInfo("Demoting from master candidate")
5989 self.op.master_candidate = False
5992 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5993 if self.op.master_candidate:
5994 new_role = self._ROLE_CANDIDATE
5995 elif self.op.drained:
5996 new_role = self._ROLE_DRAINED
5997 elif self.op.offline:
5998 new_role = self._ROLE_OFFLINE
5999 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6000 # False is still in new flags, which means we're un-setting (the
6002 new_role = self._ROLE_REGULAR
6003 else: # no new flags, nothing, keep old role
6006 self.new_role = new_role
6008 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6009 # Trying to transition out of offline status
6010 result = self.rpc.call_version([node.name])[node.name]
6012 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6013 " to report its version: %s" %
6014 (node.name, result.fail_msg),
6017 self.LogWarning("Transitioning node from offline to online state"
6018 " without using re-add. Please make sure the node"
6021 if self.op.secondary_ip:
6022 # Ok even without locking, because this can't be changed by any LU
6023 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6024 master_singlehomed = master.secondary_ip == master.primary_ip
6025 if master_singlehomed and self.op.secondary_ip:
6026 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6027 " homed cluster", errors.ECODE_INVAL)
6029 assert not (frozenset(affected_instances) -
6030 self.owned_locks(locking.LEVEL_INSTANCE))
6033 if affected_instances:
6034 raise errors.OpPrereqError("Cannot change secondary IP address:"
6035 " offline node has instances (%s)"
6036 " configured to use it" %
6037 utils.CommaJoin(affected_instances.keys()))
6039 # On online nodes, check that no instances are running, and that
6040 # the node has the new ip and we can reach it.
6041 for instance in affected_instances.values():
6042 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6043 msg="cannot change secondary ip")
6045 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6046 if master.name != node.name:
6047 # check reachability from master secondary ip to new secondary ip
6048 if not netutils.TcpPing(self.op.secondary_ip,
6049 constants.DEFAULT_NODED_PORT,
6050 source=master.secondary_ip):
6051 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6052 " based ping to node daemon port",
6053 errors.ECODE_ENVIRON)
6055 if self.op.ndparams:
6056 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6057 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6058 self.new_ndparams = new_ndparams
6060 if self.op.hv_state:
6061 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6062 self.node.hv_state_static)
6064 if self.op.disk_state:
6065 self.new_disk_state = \
6066 _MergeAndVerifyDiskState(self.op.disk_state,
6067 self.node.disk_state_static)
6069 def Exec(self, feedback_fn):
6074 old_role = self.old_role
6075 new_role = self.new_role
6079 if self.op.ndparams:
6080 node.ndparams = self.new_ndparams
6082 if self.op.powered is not None:
6083 node.powered = self.op.powered
6085 if self.op.hv_state:
6086 node.hv_state_static = self.new_hv_state
6088 if self.op.disk_state:
6089 node.disk_state_static = self.new_disk_state
6091 for attr in ["master_capable", "vm_capable"]:
6092 val = getattr(self.op, attr)
6094 setattr(node, attr, val)
6095 result.append((attr, str(val)))
6097 if new_role != old_role:
6098 # Tell the node to demote itself, if no longer MC and not offline
6099 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6100 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6102 self.LogWarning("Node failed to demote itself: %s", msg)
6104 new_flags = self._R2F[new_role]
6105 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6107 result.append((desc, str(nf)))
6108 (node.master_candidate, node.drained, node.offline) = new_flags
6110 # we locked all nodes, we adjust the CP before updating this node
6112 _AdjustCandidatePool(self, [node.name])
6114 if self.op.secondary_ip:
6115 node.secondary_ip = self.op.secondary_ip
6116 result.append(("secondary_ip", self.op.secondary_ip))
6118 # this will trigger configuration file update, if needed
6119 self.cfg.Update(node, feedback_fn)
6121 # this will trigger job queue propagation or cleanup if the mc
6123 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6124 self.context.ReaddNode(node)
6129 class LUNodePowercycle(NoHooksLU):
6130 """Powercycles a node.
6135 def CheckArguments(self):
6136 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6137 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6138 raise errors.OpPrereqError("The node is the master and the force"
6139 " parameter was not set",
6142 def ExpandNames(self):
6143 """Locking for PowercycleNode.
6145 This is a last-resort option and shouldn't block on other
6146 jobs. Therefore, we grab no locks.
6149 self.needed_locks = {}
6151 def Exec(self, feedback_fn):
6155 result = self.rpc.call_node_powercycle(self.op.node_name,
6156 self.cfg.GetHypervisorType())
6157 result.Raise("Failed to schedule the reboot")
6158 return result.payload
6161 class LUClusterQuery(NoHooksLU):
6162 """Query cluster configuration.
6167 def ExpandNames(self):
6168 self.needed_locks = {}
6170 def Exec(self, feedback_fn):
6171 """Return cluster config.
6174 cluster = self.cfg.GetClusterInfo()
6177 # Filter just for enabled hypervisors
6178 for os_name, hv_dict in cluster.os_hvp.items():
6179 os_hvp[os_name] = {}
6180 for hv_name, hv_params in hv_dict.items():
6181 if hv_name in cluster.enabled_hypervisors:
6182 os_hvp[os_name][hv_name] = hv_params
6184 # Convert ip_family to ip_version
6185 primary_ip_version = constants.IP4_VERSION
6186 if cluster.primary_ip_family == netutils.IP6Address.family:
6187 primary_ip_version = constants.IP6_VERSION
6190 "software_version": constants.RELEASE_VERSION,
6191 "protocol_version": constants.PROTOCOL_VERSION,
6192 "config_version": constants.CONFIG_VERSION,
6193 "os_api_version": max(constants.OS_API_VERSIONS),
6194 "export_version": constants.EXPORT_VERSION,
6195 "architecture": runtime.GetArchInfo(),
6196 "name": cluster.cluster_name,
6197 "master": cluster.master_node,
6198 "default_hypervisor": cluster.primary_hypervisor,
6199 "enabled_hypervisors": cluster.enabled_hypervisors,
6200 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6201 for hypervisor_name in cluster.enabled_hypervisors]),
6203 "beparams": cluster.beparams,
6204 "osparams": cluster.osparams,
6205 "ipolicy": cluster.ipolicy,
6206 "nicparams": cluster.nicparams,
6207 "ndparams": cluster.ndparams,
6208 "diskparams": cluster.diskparams,
6209 "candidate_pool_size": cluster.candidate_pool_size,
6210 "master_netdev": cluster.master_netdev,
6211 "master_netmask": cluster.master_netmask,
6212 "use_external_mip_script": cluster.use_external_mip_script,
6213 "volume_group_name": cluster.volume_group_name,
6214 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6215 "file_storage_dir": cluster.file_storage_dir,
6216 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6217 "maintain_node_health": cluster.maintain_node_health,
6218 "ctime": cluster.ctime,
6219 "mtime": cluster.mtime,
6220 "uuid": cluster.uuid,
6221 "tags": list(cluster.GetTags()),
6222 "uid_pool": cluster.uid_pool,
6223 "default_iallocator": cluster.default_iallocator,
6224 "reserved_lvs": cluster.reserved_lvs,
6225 "primary_ip_version": primary_ip_version,
6226 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6227 "hidden_os": cluster.hidden_os,
6228 "blacklisted_os": cluster.blacklisted_os,
6234 class LUClusterConfigQuery(NoHooksLU):
6235 """Return configuration values.
6240 def CheckArguments(self):
6241 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6243 def ExpandNames(self):
6244 self.cq.ExpandNames(self)
6246 def DeclareLocks(self, level):
6247 self.cq.DeclareLocks(self, level)
6249 def Exec(self, feedback_fn):
6250 result = self.cq.OldStyleQuery(self)
6252 assert len(result) == 1
6257 class _ClusterQuery(_QueryBase):
6258 FIELDS = query.CLUSTER_FIELDS
6260 #: Do not sort (there is only one item)
6263 def ExpandNames(self, lu):
6264 lu.needed_locks = {}
6266 # The following variables interact with _QueryBase._GetNames
6267 self.wanted = locking.ALL_SET
6268 self.do_locking = self.use_locking
6271 raise errors.OpPrereqError("Can not use locking for cluster queries",
6274 def DeclareLocks(self, lu, level):
6277 def _GetQueryData(self, lu):
6278 """Computes the list of nodes and their attributes.
6281 # Locking is not used
6282 assert not (compat.any(lu.glm.is_owned(level)
6283 for level in locking.LEVELS
6284 if level != locking.LEVEL_CLUSTER) or
6285 self.do_locking or self.use_locking)
6287 if query.CQ_CONFIG in self.requested_data:
6288 cluster = lu.cfg.GetClusterInfo()
6290 cluster = NotImplemented
6292 if query.CQ_QUEUE_DRAINED in self.requested_data:
6293 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6295 drain_flag = NotImplemented
6297 if query.CQ_WATCHER_PAUSE in self.requested_data:
6298 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6300 watcher_pause = NotImplemented
6302 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6305 class LUInstanceActivateDisks(NoHooksLU):
6306 """Bring up an instance's disks.
6311 def ExpandNames(self):
6312 self._ExpandAndLockInstance()
6313 self.needed_locks[locking.LEVEL_NODE] = []
6314 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6316 def DeclareLocks(self, level):
6317 if level == locking.LEVEL_NODE:
6318 self._LockInstancesNodes()
6320 def CheckPrereq(self):
6321 """Check prerequisites.
6323 This checks that the instance is in the cluster.
6326 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6327 assert self.instance is not None, \
6328 "Cannot retrieve locked instance %s" % self.op.instance_name
6329 _CheckNodeOnline(self, self.instance.primary_node)
6331 def Exec(self, feedback_fn):
6332 """Activate the disks.
6335 disks_ok, disks_info = \
6336 _AssembleInstanceDisks(self, self.instance,
6337 ignore_size=self.op.ignore_size)
6339 raise errors.OpExecError("Cannot activate block devices")
6344 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6346 """Prepare the block devices for an instance.
6348 This sets up the block devices on all nodes.
6350 @type lu: L{LogicalUnit}
6351 @param lu: the logical unit on whose behalf we execute
6352 @type instance: L{objects.Instance}
6353 @param instance: the instance for whose disks we assemble
6354 @type disks: list of L{objects.Disk} or None
6355 @param disks: which disks to assemble (or all, if None)
6356 @type ignore_secondaries: boolean
6357 @param ignore_secondaries: if true, errors on secondary nodes
6358 won't result in an error return from the function
6359 @type ignore_size: boolean
6360 @param ignore_size: if true, the current known size of the disk
6361 will not be used during the disk activation, useful for cases
6362 when the size is wrong
6363 @return: False if the operation failed, otherwise a list of
6364 (host, instance_visible_name, node_visible_name)
6365 with the mapping from node devices to instance devices
6370 iname = instance.name
6371 disks = _ExpandCheckDisks(instance, disks)
6373 # With the two passes mechanism we try to reduce the window of
6374 # opportunity for the race condition of switching DRBD to primary
6375 # before handshaking occured, but we do not eliminate it
6377 # The proper fix would be to wait (with some limits) until the
6378 # connection has been made and drbd transitions from WFConnection
6379 # into any other network-connected state (Connected, SyncTarget,
6382 # 1st pass, assemble on all nodes in secondary mode
6383 for idx, inst_disk in enumerate(disks):
6384 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6386 node_disk = node_disk.Copy()
6387 node_disk.UnsetSize()
6388 lu.cfg.SetDiskID(node_disk, node)
6389 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6391 msg = result.fail_msg
6393 is_offline_secondary = (node in instance.secondary_nodes and
6395 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6396 " (is_primary=False, pass=1): %s",
6397 inst_disk.iv_name, node, msg)
6398 if not (ignore_secondaries or is_offline_secondary):
6401 # FIXME: race condition on drbd migration to primary
6403 # 2nd pass, do only the primary node
6404 for idx, inst_disk in enumerate(disks):
6407 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6408 if node != instance.primary_node:
6411 node_disk = node_disk.Copy()
6412 node_disk.UnsetSize()
6413 lu.cfg.SetDiskID(node_disk, node)
6414 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6416 msg = result.fail_msg
6418 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6419 " (is_primary=True, pass=2): %s",
6420 inst_disk.iv_name, node, msg)
6423 dev_path = result.payload
6425 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6427 # leave the disks configured for the primary node
6428 # this is a workaround that would be fixed better by
6429 # improving the logical/physical id handling
6431 lu.cfg.SetDiskID(disk, instance.primary_node)
6433 return disks_ok, device_info
6436 def _StartInstanceDisks(lu, instance, force):
6437 """Start the disks of an instance.
6440 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6441 ignore_secondaries=force)
6443 _ShutdownInstanceDisks(lu, instance)
6444 if force is not None and not force:
6445 lu.proc.LogWarning("", hint="If the message above refers to a"
6447 " you can retry the operation using '--force'.")
6448 raise errors.OpExecError("Disk consistency error")
6451 class LUInstanceDeactivateDisks(NoHooksLU):
6452 """Shutdown an instance's disks.
6457 def ExpandNames(self):
6458 self._ExpandAndLockInstance()
6459 self.needed_locks[locking.LEVEL_NODE] = []
6460 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6462 def DeclareLocks(self, level):
6463 if level == locking.LEVEL_NODE:
6464 self._LockInstancesNodes()
6466 def CheckPrereq(self):
6467 """Check prerequisites.
6469 This checks that the instance is in the cluster.
6472 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6473 assert self.instance is not None, \
6474 "Cannot retrieve locked instance %s" % self.op.instance_name
6476 def Exec(self, feedback_fn):
6477 """Deactivate the disks
6480 instance = self.instance
6482 _ShutdownInstanceDisks(self, instance)
6484 _SafeShutdownInstanceDisks(self, instance)
6487 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6488 """Shutdown block devices of an instance.
6490 This function checks if an instance is running, before calling
6491 _ShutdownInstanceDisks.
6494 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6495 _ShutdownInstanceDisks(lu, instance, disks=disks)
6498 def _ExpandCheckDisks(instance, disks):
6499 """Return the instance disks selected by the disks list
6501 @type disks: list of L{objects.Disk} or None
6502 @param disks: selected disks
6503 @rtype: list of L{objects.Disk}
6504 @return: selected instance disks to act on
6508 return instance.disks
6510 if not set(disks).issubset(instance.disks):
6511 raise errors.ProgrammerError("Can only act on disks belonging to the"
6516 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6517 """Shutdown block devices of an instance.
6519 This does the shutdown on all nodes of the instance.
6521 If the ignore_primary is false, errors on the primary node are
6526 disks = _ExpandCheckDisks(instance, disks)
6529 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6530 lu.cfg.SetDiskID(top_disk, node)
6531 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6532 msg = result.fail_msg
6534 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6535 disk.iv_name, node, msg)
6536 if ((node == instance.primary_node and not ignore_primary) or
6537 (node != instance.primary_node and not result.offline)):
6542 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6543 """Checks if a node has enough free memory.
6545 This function check if a given node has the needed amount of free
6546 memory. In case the node has less memory or we cannot get the
6547 information from the node, this function raise an OpPrereqError
6550 @type lu: C{LogicalUnit}
6551 @param lu: a logical unit from which we get configuration data
6553 @param node: the node to check
6554 @type reason: C{str}
6555 @param reason: string to use in the error message
6556 @type requested: C{int}
6557 @param requested: the amount of memory in MiB to check for
6558 @type hypervisor_name: C{str}
6559 @param hypervisor_name: the hypervisor to ask for memory stats
6561 @return: node current free memory
6562 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6563 we cannot check the node
6566 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6567 nodeinfo[node].Raise("Can't get data from node %s" % node,
6568 prereq=True, ecode=errors.ECODE_ENVIRON)
6569 (_, _, (hv_info, )) = nodeinfo[node].payload
6571 free_mem = hv_info.get("memory_free", None)
6572 if not isinstance(free_mem, int):
6573 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6574 " was '%s'" % (node, free_mem),
6575 errors.ECODE_ENVIRON)
6576 if requested > free_mem:
6577 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6578 " needed %s MiB, available %s MiB" %
6579 (node, reason, requested, free_mem),
6584 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6585 """Checks if nodes have enough free disk space in the all VGs.
6587 This function check if all given nodes have the needed amount of
6588 free disk. In case any node has less disk or we cannot get the
6589 information from the node, this function raise an OpPrereqError
6592 @type lu: C{LogicalUnit}
6593 @param lu: a logical unit from which we get configuration data
6594 @type nodenames: C{list}
6595 @param nodenames: the list of node names to check
6596 @type req_sizes: C{dict}
6597 @param req_sizes: the hash of vg and corresponding amount of disk in
6599 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6600 or we cannot check the node
6603 for vg, req_size in req_sizes.items():
6604 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6607 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6608 """Checks if nodes have enough free disk space in the specified VG.
6610 This function check if all given nodes have the needed amount of
6611 free disk. In case any node has less disk or we cannot get the
6612 information from the node, this function raise an OpPrereqError
6615 @type lu: C{LogicalUnit}
6616 @param lu: a logical unit from which we get configuration data
6617 @type nodenames: C{list}
6618 @param nodenames: the list of node names to check
6620 @param vg: the volume group to check
6621 @type requested: C{int}
6622 @param requested: the amount of disk in MiB to check for
6623 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6624 or we cannot check the node
6627 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6628 for node in nodenames:
6629 info = nodeinfo[node]
6630 info.Raise("Cannot get current information from node %s" % node,
6631 prereq=True, ecode=errors.ECODE_ENVIRON)
6632 (_, (vg_info, ), _) = info.payload
6633 vg_free = vg_info.get("vg_free", None)
6634 if not isinstance(vg_free, int):
6635 raise errors.OpPrereqError("Can't compute free disk space on node"
6636 " %s for vg %s, result was '%s'" %
6637 (node, vg, vg_free), errors.ECODE_ENVIRON)
6638 if requested > vg_free:
6639 raise errors.OpPrereqError("Not enough disk space on target node %s"
6640 " vg %s: required %d MiB, available %d MiB" %
6641 (node, vg, requested, vg_free),
6645 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6646 """Checks if nodes have enough physical CPUs
6648 This function checks if all given nodes have the needed number of
6649 physical CPUs. In case any node has less CPUs or we cannot get the
6650 information from the node, this function raises an OpPrereqError
6653 @type lu: C{LogicalUnit}
6654 @param lu: a logical unit from which we get configuration data
6655 @type nodenames: C{list}
6656 @param nodenames: the list of node names to check
6657 @type requested: C{int}
6658 @param requested: the minimum acceptable number of physical CPUs
6659 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6660 or we cannot check the node
6663 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6664 for node in nodenames:
6665 info = nodeinfo[node]
6666 info.Raise("Cannot get current information from node %s" % node,
6667 prereq=True, ecode=errors.ECODE_ENVIRON)
6668 (_, _, (hv_info, )) = info.payload
6669 num_cpus = hv_info.get("cpu_total", None)
6670 if not isinstance(num_cpus, int):
6671 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6672 " on node %s, result was '%s'" %
6673 (node, num_cpus), errors.ECODE_ENVIRON)
6674 if requested > num_cpus:
6675 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6676 "required" % (node, num_cpus, requested),
6680 class LUInstanceStartup(LogicalUnit):
6681 """Starts an instance.
6684 HPATH = "instance-start"
6685 HTYPE = constants.HTYPE_INSTANCE
6688 def CheckArguments(self):
6690 if self.op.beparams:
6691 # fill the beparams dict
6692 objects.UpgradeBeParams(self.op.beparams)
6693 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6695 def ExpandNames(self):
6696 self._ExpandAndLockInstance()
6697 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6699 def DeclareLocks(self, level):
6700 if level == locking.LEVEL_NODE_RES:
6701 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6703 def BuildHooksEnv(self):
6706 This runs on master, primary and secondary nodes of the instance.
6710 "FORCE": self.op.force,
6713 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6717 def BuildHooksNodes(self):
6718 """Build hooks nodes.
6721 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6724 def CheckPrereq(self):
6725 """Check prerequisites.
6727 This checks that the instance is in the cluster.
6730 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6731 assert self.instance is not None, \
6732 "Cannot retrieve locked instance %s" % self.op.instance_name
6735 if self.op.hvparams:
6736 # check hypervisor parameter syntax (locally)
6737 cluster = self.cfg.GetClusterInfo()
6738 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6739 filled_hvp = cluster.FillHV(instance)
6740 filled_hvp.update(self.op.hvparams)
6741 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6742 hv_type.CheckParameterSyntax(filled_hvp)
6743 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6745 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6747 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6749 if self.primary_offline and self.op.ignore_offline_nodes:
6750 self.proc.LogWarning("Ignoring offline primary node")
6752 if self.op.hvparams or self.op.beparams:
6753 self.proc.LogWarning("Overridden parameters are ignored")
6755 _CheckNodeOnline(self, instance.primary_node)
6757 bep = self.cfg.GetClusterInfo().FillBE(instance)
6758 bep.update(self.op.beparams)
6760 # check bridges existence
6761 _CheckInstanceBridgesExist(self, instance)
6763 remote_info = self.rpc.call_instance_info(instance.primary_node,
6765 instance.hypervisor)
6766 remote_info.Raise("Error checking node %s" % instance.primary_node,
6767 prereq=True, ecode=errors.ECODE_ENVIRON)
6768 if not remote_info.payload: # not running already
6769 _CheckNodeFreeMemory(self, instance.primary_node,
6770 "starting instance %s" % instance.name,
6771 bep[constants.BE_MINMEM], instance.hypervisor)
6773 def Exec(self, feedback_fn):
6774 """Start the instance.
6777 instance = self.instance
6778 force = self.op.force
6780 if not self.op.no_remember:
6781 self.cfg.MarkInstanceUp(instance.name)
6783 if self.primary_offline:
6784 assert self.op.ignore_offline_nodes
6785 self.proc.LogInfo("Primary node offline, marked instance as started")
6787 node_current = instance.primary_node
6789 _StartInstanceDisks(self, instance, force)
6792 self.rpc.call_instance_start(node_current,
6793 (instance, self.op.hvparams,
6795 self.op.startup_paused)
6796 msg = result.fail_msg
6798 _ShutdownInstanceDisks(self, instance)
6799 raise errors.OpExecError("Could not start instance: %s" % msg)
6802 class LUInstanceReboot(LogicalUnit):
6803 """Reboot an instance.
6806 HPATH = "instance-reboot"
6807 HTYPE = constants.HTYPE_INSTANCE
6810 def ExpandNames(self):
6811 self._ExpandAndLockInstance()
6813 def BuildHooksEnv(self):
6816 This runs on master, primary and secondary nodes of the instance.
6820 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6821 "REBOOT_TYPE": self.op.reboot_type,
6822 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6825 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6829 def BuildHooksNodes(self):
6830 """Build hooks nodes.
6833 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6836 def CheckPrereq(self):
6837 """Check prerequisites.
6839 This checks that the instance is in the cluster.
6842 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6843 assert self.instance is not None, \
6844 "Cannot retrieve locked instance %s" % self.op.instance_name
6845 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6846 _CheckNodeOnline(self, instance.primary_node)
6848 # check bridges existence
6849 _CheckInstanceBridgesExist(self, instance)
6851 def Exec(self, feedback_fn):
6852 """Reboot the instance.
6855 instance = self.instance
6856 ignore_secondaries = self.op.ignore_secondaries
6857 reboot_type = self.op.reboot_type
6859 remote_info = self.rpc.call_instance_info(instance.primary_node,
6861 instance.hypervisor)
6862 remote_info.Raise("Error checking node %s" % instance.primary_node)
6863 instance_running = bool(remote_info.payload)
6865 node_current = instance.primary_node
6867 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6868 constants.INSTANCE_REBOOT_HARD]:
6869 for disk in instance.disks:
6870 self.cfg.SetDiskID(disk, node_current)
6871 result = self.rpc.call_instance_reboot(node_current, instance,
6873 self.op.shutdown_timeout)
6874 result.Raise("Could not reboot instance")
6876 if instance_running:
6877 result = self.rpc.call_instance_shutdown(node_current, instance,
6878 self.op.shutdown_timeout)
6879 result.Raise("Could not shutdown instance for full reboot")
6880 _ShutdownInstanceDisks(self, instance)
6882 self.LogInfo("Instance %s was already stopped, starting now",
6884 _StartInstanceDisks(self, instance, ignore_secondaries)
6885 result = self.rpc.call_instance_start(node_current,
6886 (instance, None, None), False)
6887 msg = result.fail_msg
6889 _ShutdownInstanceDisks(self, instance)
6890 raise errors.OpExecError("Could not start instance for"
6891 " full reboot: %s" % msg)
6893 self.cfg.MarkInstanceUp(instance.name)
6896 class LUInstanceShutdown(LogicalUnit):
6897 """Shutdown an instance.
6900 HPATH = "instance-stop"
6901 HTYPE = constants.HTYPE_INSTANCE
6904 def ExpandNames(self):
6905 self._ExpandAndLockInstance()
6907 def BuildHooksEnv(self):
6910 This runs on master, primary and secondary nodes of the instance.
6913 env = _BuildInstanceHookEnvByObject(self, self.instance)
6914 env["TIMEOUT"] = self.op.timeout
6917 def BuildHooksNodes(self):
6918 """Build hooks nodes.
6921 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6924 def CheckPrereq(self):
6925 """Check prerequisites.
6927 This checks that the instance is in the cluster.
6930 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6931 assert self.instance is not None, \
6932 "Cannot retrieve locked instance %s" % self.op.instance_name
6934 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6936 self.primary_offline = \
6937 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6939 if self.primary_offline and self.op.ignore_offline_nodes:
6940 self.proc.LogWarning("Ignoring offline primary node")
6942 _CheckNodeOnline(self, self.instance.primary_node)
6944 def Exec(self, feedback_fn):
6945 """Shutdown the instance.
6948 instance = self.instance
6949 node_current = instance.primary_node
6950 timeout = self.op.timeout
6952 if not self.op.no_remember:
6953 self.cfg.MarkInstanceDown(instance.name)
6955 if self.primary_offline:
6956 assert self.op.ignore_offline_nodes
6957 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6959 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6960 msg = result.fail_msg
6962 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6964 _ShutdownInstanceDisks(self, instance)
6967 class LUInstanceReinstall(LogicalUnit):
6968 """Reinstall an instance.
6971 HPATH = "instance-reinstall"
6972 HTYPE = constants.HTYPE_INSTANCE
6975 def ExpandNames(self):
6976 self._ExpandAndLockInstance()
6978 def BuildHooksEnv(self):
6981 This runs on master, primary and secondary nodes of the instance.
6984 return _BuildInstanceHookEnvByObject(self, self.instance)
6986 def BuildHooksNodes(self):
6987 """Build hooks nodes.
6990 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6993 def CheckPrereq(self):
6994 """Check prerequisites.
6996 This checks that the instance is in the cluster and is not running.
6999 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7000 assert instance is not None, \
7001 "Cannot retrieve locked instance %s" % self.op.instance_name
7002 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7003 " offline, cannot reinstall")
7005 if instance.disk_template == constants.DT_DISKLESS:
7006 raise errors.OpPrereqError("Instance '%s' has no disks" %
7007 self.op.instance_name,
7009 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7011 if self.op.os_type is not None:
7013 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7014 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7015 instance_os = self.op.os_type
7017 instance_os = instance.os
7019 nodelist = list(instance.all_nodes)
7021 if self.op.osparams:
7022 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7023 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7024 self.os_inst = i_osdict # the new dict (without defaults)
7028 self.instance = instance
7030 def Exec(self, feedback_fn):
7031 """Reinstall the instance.
7034 inst = self.instance
7036 if self.op.os_type is not None:
7037 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7038 inst.os = self.op.os_type
7039 # Write to configuration
7040 self.cfg.Update(inst, feedback_fn)
7042 _StartInstanceDisks(self, inst, None)
7044 feedback_fn("Running the instance OS create scripts...")
7045 # FIXME: pass debug option from opcode to backend
7046 result = self.rpc.call_instance_os_add(inst.primary_node,
7047 (inst, self.os_inst), True,
7048 self.op.debug_level)
7049 result.Raise("Could not install OS for instance %s on node %s" %
7050 (inst.name, inst.primary_node))
7052 _ShutdownInstanceDisks(self, inst)
7055 class LUInstanceRecreateDisks(LogicalUnit):
7056 """Recreate an instance's missing disks.
7059 HPATH = "instance-recreate-disks"
7060 HTYPE = constants.HTYPE_INSTANCE
7063 _MODIFYABLE = frozenset([
7064 constants.IDISK_SIZE,
7065 constants.IDISK_MODE,
7068 # New or changed disk parameters may have different semantics
7069 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7070 constants.IDISK_ADOPT,
7072 # TODO: Implement support changing VG while recreating
7074 constants.IDISK_METAVG,
7077 def CheckArguments(self):
7078 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7079 # Normalize and convert deprecated list of disk indices
7080 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7082 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7084 raise errors.OpPrereqError("Some disks have been specified more than"
7085 " once: %s" % utils.CommaJoin(duplicates),
7088 for (idx, params) in self.op.disks:
7089 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7090 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7092 raise errors.OpPrereqError("Parameters for disk %s try to change"
7093 " unmodifyable parameter(s): %s" %
7094 (idx, utils.CommaJoin(unsupported)),
7097 def ExpandNames(self):
7098 self._ExpandAndLockInstance()
7099 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7101 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7102 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7104 self.needed_locks[locking.LEVEL_NODE] = []
7105 self.needed_locks[locking.LEVEL_NODE_RES] = []
7107 def DeclareLocks(self, level):
7108 if level == locking.LEVEL_NODE:
7109 # if we replace the nodes, we only need to lock the old primary,
7110 # otherwise we need to lock all nodes for disk re-creation
7111 primary_only = bool(self.op.nodes)
7112 self._LockInstancesNodes(primary_only=primary_only)
7113 elif level == locking.LEVEL_NODE_RES:
7115 self.needed_locks[locking.LEVEL_NODE_RES] = \
7116 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7118 def BuildHooksEnv(self):
7121 This runs on master, primary and secondary nodes of the instance.
7124 return _BuildInstanceHookEnvByObject(self, self.instance)
7126 def BuildHooksNodes(self):
7127 """Build hooks nodes.
7130 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7133 def CheckPrereq(self):
7134 """Check prerequisites.
7136 This checks that the instance is in the cluster and is not running.
7139 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7140 assert instance is not None, \
7141 "Cannot retrieve locked instance %s" % self.op.instance_name
7143 if len(self.op.nodes) != len(instance.all_nodes):
7144 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7145 " %d replacement nodes were specified" %
7146 (instance.name, len(instance.all_nodes),
7147 len(self.op.nodes)),
7149 assert instance.disk_template != constants.DT_DRBD8 or \
7150 len(self.op.nodes) == 2
7151 assert instance.disk_template != constants.DT_PLAIN or \
7152 len(self.op.nodes) == 1
7153 primary_node = self.op.nodes[0]
7155 primary_node = instance.primary_node
7156 _CheckNodeOnline(self, primary_node)
7158 if instance.disk_template == constants.DT_DISKLESS:
7159 raise errors.OpPrereqError("Instance '%s' has no disks" %
7160 self.op.instance_name, errors.ECODE_INVAL)
7162 # if we replace nodes *and* the old primary is offline, we don't
7164 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7165 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7166 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7167 if not (self.op.nodes and old_pnode.offline):
7168 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7169 msg="cannot recreate disks")
7172 self.disks = dict(self.op.disks)
7174 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7176 maxidx = max(self.disks.keys())
7177 if maxidx >= len(instance.disks):
7178 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7181 if (self.op.nodes and
7182 sorted(self.disks.keys()) != range(len(instance.disks))):
7183 raise errors.OpPrereqError("Can't recreate disks partially and"
7184 " change the nodes at the same time",
7187 self.instance = instance
7189 def Exec(self, feedback_fn):
7190 """Recreate the disks.
7193 instance = self.instance
7195 assert (self.owned_locks(locking.LEVEL_NODE) ==
7196 self.owned_locks(locking.LEVEL_NODE_RES))
7199 mods = [] # keeps track of needed changes
7201 for idx, disk in enumerate(instance.disks):
7203 changes = self.disks[idx]
7205 # Disk should not be recreated
7209 # update secondaries for disks, if needed
7210 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7211 # need to update the nodes and minors
7212 assert len(self.op.nodes) == 2
7213 assert len(disk.logical_id) == 6 # otherwise disk internals
7215 (_, _, old_port, _, _, old_secret) = disk.logical_id
7216 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7217 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7218 new_minors[0], new_minors[1], old_secret)
7219 assert len(disk.logical_id) == len(new_id)
7223 mods.append((idx, new_id, changes))
7225 # now that we have passed all asserts above, we can apply the mods
7226 # in a single run (to avoid partial changes)
7227 for idx, new_id, changes in mods:
7228 disk = instance.disks[idx]
7229 if new_id is not None:
7230 assert disk.dev_type == constants.LD_DRBD8
7231 disk.logical_id = new_id
7233 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7234 mode=changes.get(constants.IDISK_MODE, None))
7236 # change primary node, if needed
7238 instance.primary_node = self.op.nodes[0]
7239 self.LogWarning("Changing the instance's nodes, you will have to"
7240 " remove any disks left on the older nodes manually")
7243 self.cfg.Update(instance, feedback_fn)
7245 _CreateDisks(self, instance, to_skip=to_skip)
7248 class LUInstanceRename(LogicalUnit):
7249 """Rename an instance.
7252 HPATH = "instance-rename"
7253 HTYPE = constants.HTYPE_INSTANCE
7255 def CheckArguments(self):
7259 if self.op.ip_check and not self.op.name_check:
7260 # TODO: make the ip check more flexible and not depend on the name check
7261 raise errors.OpPrereqError("IP address check requires a name check",
7264 def BuildHooksEnv(self):
7267 This runs on master, primary and secondary nodes of the instance.
7270 env = _BuildInstanceHookEnvByObject(self, self.instance)
7271 env["INSTANCE_NEW_NAME"] = self.op.new_name
7274 def BuildHooksNodes(self):
7275 """Build hooks nodes.
7278 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7281 def CheckPrereq(self):
7282 """Check prerequisites.
7284 This checks that the instance is in the cluster and is not running.
7287 self.op.instance_name = _ExpandInstanceName(self.cfg,
7288 self.op.instance_name)
7289 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7290 assert instance is not None
7291 _CheckNodeOnline(self, instance.primary_node)
7292 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7293 msg="cannot rename")
7294 self.instance = instance
7296 new_name = self.op.new_name
7297 if self.op.name_check:
7298 hostname = netutils.GetHostname(name=new_name)
7299 if hostname.name != new_name:
7300 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7302 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7303 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7304 " same as given hostname '%s'") %
7305 (hostname.name, self.op.new_name),
7307 new_name = self.op.new_name = hostname.name
7308 if (self.op.ip_check and
7309 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7310 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7311 (hostname.ip, new_name),
7312 errors.ECODE_NOTUNIQUE)
7314 instance_list = self.cfg.GetInstanceList()
7315 if new_name in instance_list and new_name != instance.name:
7316 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7317 new_name, errors.ECODE_EXISTS)
7319 def Exec(self, feedback_fn):
7320 """Rename the instance.
7323 inst = self.instance
7324 old_name = inst.name
7326 rename_file_storage = False
7327 if (inst.disk_template in constants.DTS_FILEBASED and
7328 self.op.new_name != inst.name):
7329 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7330 rename_file_storage = True
7332 self.cfg.RenameInstance(inst.name, self.op.new_name)
7333 # Change the instance lock. This is definitely safe while we hold the BGL.
7334 # Otherwise the new lock would have to be added in acquired mode.
7336 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7337 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7339 # re-read the instance from the configuration after rename
7340 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7342 if rename_file_storage:
7343 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7344 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7345 old_file_storage_dir,
7346 new_file_storage_dir)
7347 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7348 " (but the instance has been renamed in Ganeti)" %
7349 (inst.primary_node, old_file_storage_dir,
7350 new_file_storage_dir))
7352 _StartInstanceDisks(self, inst, None)
7354 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7355 old_name, self.op.debug_level)
7356 msg = result.fail_msg
7358 msg = ("Could not run OS rename script for instance %s on node %s"
7359 " (but the instance has been renamed in Ganeti): %s" %
7360 (inst.name, inst.primary_node, msg))
7361 self.proc.LogWarning(msg)
7363 _ShutdownInstanceDisks(self, inst)
7368 class LUInstanceRemove(LogicalUnit):
7369 """Remove an instance.
7372 HPATH = "instance-remove"
7373 HTYPE = constants.HTYPE_INSTANCE
7376 def ExpandNames(self):
7377 self._ExpandAndLockInstance()
7378 self.needed_locks[locking.LEVEL_NODE] = []
7379 self.needed_locks[locking.LEVEL_NODE_RES] = []
7380 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7382 def DeclareLocks(self, level):
7383 if level == locking.LEVEL_NODE:
7384 self._LockInstancesNodes()
7385 elif level == locking.LEVEL_NODE_RES:
7387 self.needed_locks[locking.LEVEL_NODE_RES] = \
7388 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7390 def BuildHooksEnv(self):
7393 This runs on master, primary and secondary nodes of the instance.
7396 env = _BuildInstanceHookEnvByObject(self, self.instance)
7397 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7400 def BuildHooksNodes(self):
7401 """Build hooks nodes.
7404 nl = [self.cfg.GetMasterNode()]
7405 nl_post = list(self.instance.all_nodes) + nl
7406 return (nl, nl_post)
7408 def CheckPrereq(self):
7409 """Check prerequisites.
7411 This checks that the instance is in the cluster.
7414 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7415 assert self.instance is not None, \
7416 "Cannot retrieve locked instance %s" % self.op.instance_name
7418 def Exec(self, feedback_fn):
7419 """Remove the instance.
7422 instance = self.instance
7423 logging.info("Shutting down instance %s on node %s",
7424 instance.name, instance.primary_node)
7426 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7427 self.op.shutdown_timeout)
7428 msg = result.fail_msg
7430 if self.op.ignore_failures:
7431 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7433 raise errors.OpExecError("Could not shutdown instance %s on"
7435 (instance.name, instance.primary_node, msg))
7437 assert (self.owned_locks(locking.LEVEL_NODE) ==
7438 self.owned_locks(locking.LEVEL_NODE_RES))
7439 assert not (set(instance.all_nodes) -
7440 self.owned_locks(locking.LEVEL_NODE)), \
7441 "Not owning correct locks"
7443 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7446 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7447 """Utility function to remove an instance.
7450 logging.info("Removing block devices for instance %s", instance.name)
7452 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7453 if not ignore_failures:
7454 raise errors.OpExecError("Can't remove instance's disks")
7455 feedback_fn("Warning: can't remove instance's disks")
7457 logging.info("Removing instance %s out of cluster config", instance.name)
7459 lu.cfg.RemoveInstance(instance.name)
7461 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7462 "Instance lock removal conflict"
7464 # Remove lock for the instance
7465 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7468 class LUInstanceQuery(NoHooksLU):
7469 """Logical unit for querying instances.
7472 # pylint: disable=W0142
7475 def CheckArguments(self):
7476 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7477 self.op.output_fields, self.op.use_locking)
7479 def ExpandNames(self):
7480 self.iq.ExpandNames(self)
7482 def DeclareLocks(self, level):
7483 self.iq.DeclareLocks(self, level)
7485 def Exec(self, feedback_fn):
7486 return self.iq.OldStyleQuery(self)
7489 class LUInstanceFailover(LogicalUnit):
7490 """Failover an instance.
7493 HPATH = "instance-failover"
7494 HTYPE = constants.HTYPE_INSTANCE
7497 def CheckArguments(self):
7498 """Check the arguments.
7501 self.iallocator = getattr(self.op, "iallocator", None)
7502 self.target_node = getattr(self.op, "target_node", None)
7504 def ExpandNames(self):
7505 self._ExpandAndLockInstance()
7507 if self.op.target_node is not None:
7508 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7510 self.needed_locks[locking.LEVEL_NODE] = []
7511 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7513 self.needed_locks[locking.LEVEL_NODE_RES] = []
7514 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7516 ignore_consistency = self.op.ignore_consistency
7517 shutdown_timeout = self.op.shutdown_timeout
7518 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7521 ignore_consistency=ignore_consistency,
7522 shutdown_timeout=shutdown_timeout,
7523 ignore_ipolicy=self.op.ignore_ipolicy)
7524 self.tasklets = [self._migrater]
7526 def DeclareLocks(self, level):
7527 if level == locking.LEVEL_NODE:
7528 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7529 if instance.disk_template in constants.DTS_EXT_MIRROR:
7530 if self.op.target_node is None:
7531 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7533 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7534 self.op.target_node]
7535 del self.recalculate_locks[locking.LEVEL_NODE]
7537 self._LockInstancesNodes()
7538 elif level == locking.LEVEL_NODE_RES:
7540 self.needed_locks[locking.LEVEL_NODE_RES] = \
7541 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7543 def BuildHooksEnv(self):
7546 This runs on master, primary and secondary nodes of the instance.
7549 instance = self._migrater.instance
7550 source_node = instance.primary_node
7551 target_node = self.op.target_node
7553 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7554 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7555 "OLD_PRIMARY": source_node,
7556 "NEW_PRIMARY": target_node,
7559 if instance.disk_template in constants.DTS_INT_MIRROR:
7560 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7561 env["NEW_SECONDARY"] = source_node
7563 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7565 env.update(_BuildInstanceHookEnvByObject(self, instance))
7569 def BuildHooksNodes(self):
7570 """Build hooks nodes.
7573 instance = self._migrater.instance
7574 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7575 return (nl, nl + [instance.primary_node])
7578 class LUInstanceMigrate(LogicalUnit):
7579 """Migrate an instance.
7581 This is migration without shutting down, compared to the failover,
7582 which is done with shutdown.
7585 HPATH = "instance-migrate"
7586 HTYPE = constants.HTYPE_INSTANCE
7589 def ExpandNames(self):
7590 self._ExpandAndLockInstance()
7592 if self.op.target_node is not None:
7593 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7595 self.needed_locks[locking.LEVEL_NODE] = []
7596 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7598 self.needed_locks[locking.LEVEL_NODE] = []
7599 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7602 TLMigrateInstance(self, self.op.instance_name,
7603 cleanup=self.op.cleanup,
7605 fallback=self.op.allow_failover,
7606 allow_runtime_changes=self.op.allow_runtime_changes,
7607 ignore_ipolicy=self.op.ignore_ipolicy)
7608 self.tasklets = [self._migrater]
7610 def DeclareLocks(self, level):
7611 if level == locking.LEVEL_NODE:
7612 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7613 if instance.disk_template in constants.DTS_EXT_MIRROR:
7614 if self.op.target_node is None:
7615 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7617 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7618 self.op.target_node]
7619 del self.recalculate_locks[locking.LEVEL_NODE]
7621 self._LockInstancesNodes()
7622 elif level == locking.LEVEL_NODE_RES:
7624 self.needed_locks[locking.LEVEL_NODE_RES] = \
7625 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7627 def BuildHooksEnv(self):
7630 This runs on master, primary and secondary nodes of the instance.
7633 instance = self._migrater.instance
7634 source_node = instance.primary_node
7635 target_node = self.op.target_node
7636 env = _BuildInstanceHookEnvByObject(self, instance)
7638 "MIGRATE_LIVE": self._migrater.live,
7639 "MIGRATE_CLEANUP": self.op.cleanup,
7640 "OLD_PRIMARY": source_node,
7641 "NEW_PRIMARY": target_node,
7642 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7645 if instance.disk_template in constants.DTS_INT_MIRROR:
7646 env["OLD_SECONDARY"] = target_node
7647 env["NEW_SECONDARY"] = source_node
7649 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7653 def BuildHooksNodes(self):
7654 """Build hooks nodes.
7657 instance = self._migrater.instance
7658 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7659 return (nl, nl + [instance.primary_node])
7662 class LUInstanceMove(LogicalUnit):
7663 """Move an instance by data-copying.
7666 HPATH = "instance-move"
7667 HTYPE = constants.HTYPE_INSTANCE
7670 def ExpandNames(self):
7671 self._ExpandAndLockInstance()
7672 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7673 self.op.target_node = target_node
7674 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7675 self.needed_locks[locking.LEVEL_NODE_RES] = []
7676 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7678 def DeclareLocks(self, level):
7679 if level == locking.LEVEL_NODE:
7680 self._LockInstancesNodes(primary_only=True)
7681 elif level == locking.LEVEL_NODE_RES:
7683 self.needed_locks[locking.LEVEL_NODE_RES] = \
7684 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7686 def BuildHooksEnv(self):
7689 This runs on master, primary and secondary nodes of the instance.
7693 "TARGET_NODE": self.op.target_node,
7694 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7696 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7699 def BuildHooksNodes(self):
7700 """Build hooks nodes.
7704 self.cfg.GetMasterNode(),
7705 self.instance.primary_node,
7706 self.op.target_node,
7710 def CheckPrereq(self):
7711 """Check prerequisites.
7713 This checks that the instance is in the cluster.
7716 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7717 assert self.instance is not None, \
7718 "Cannot retrieve locked instance %s" % self.op.instance_name
7720 node = self.cfg.GetNodeInfo(self.op.target_node)
7721 assert node is not None, \
7722 "Cannot retrieve locked node %s" % self.op.target_node
7724 self.target_node = target_node = node.name
7726 if target_node == instance.primary_node:
7727 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7728 (instance.name, target_node),
7731 bep = self.cfg.GetClusterInfo().FillBE(instance)
7733 for idx, dsk in enumerate(instance.disks):
7734 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7735 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7736 " cannot copy" % idx, errors.ECODE_STATE)
7738 _CheckNodeOnline(self, target_node)
7739 _CheckNodeNotDrained(self, target_node)
7740 _CheckNodeVmCapable(self, target_node)
7741 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7742 self.cfg.GetNodeGroup(node.group))
7743 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7744 ignore=self.op.ignore_ipolicy)
7746 if instance.admin_state == constants.ADMINST_UP:
7747 # check memory requirements on the secondary node
7748 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7749 instance.name, bep[constants.BE_MAXMEM],
7750 instance.hypervisor)
7752 self.LogInfo("Not checking memory on the secondary node as"
7753 " instance will not be started")
7755 # check bridge existance
7756 _CheckInstanceBridgesExist(self, instance, node=target_node)
7758 def Exec(self, feedback_fn):
7759 """Move an instance.
7761 The move is done by shutting it down on its present node, copying
7762 the data over (slow) and starting it on the new node.
7765 instance = self.instance
7767 source_node = instance.primary_node
7768 target_node = self.target_node
7770 self.LogInfo("Shutting down instance %s on source node %s",
7771 instance.name, source_node)
7773 assert (self.owned_locks(locking.LEVEL_NODE) ==
7774 self.owned_locks(locking.LEVEL_NODE_RES))
7776 result = self.rpc.call_instance_shutdown(source_node, instance,
7777 self.op.shutdown_timeout)
7778 msg = result.fail_msg
7780 if self.op.ignore_consistency:
7781 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7782 " Proceeding anyway. Please make sure node"
7783 " %s is down. Error details: %s",
7784 instance.name, source_node, source_node, msg)
7786 raise errors.OpExecError("Could not shutdown instance %s on"
7788 (instance.name, source_node, msg))
7790 # create the target disks
7792 _CreateDisks(self, instance, target_node=target_node)
7793 except errors.OpExecError:
7794 self.LogWarning("Device creation failed, reverting...")
7796 _RemoveDisks(self, instance, target_node=target_node)
7798 self.cfg.ReleaseDRBDMinors(instance.name)
7801 cluster_name = self.cfg.GetClusterInfo().cluster_name
7804 # activate, get path, copy the data over
7805 for idx, disk in enumerate(instance.disks):
7806 self.LogInfo("Copying data for disk %d", idx)
7807 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7808 instance.name, True, idx)
7810 self.LogWarning("Can't assemble newly created disk %d: %s",
7811 idx, result.fail_msg)
7812 errs.append(result.fail_msg)
7814 dev_path = result.payload
7815 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7816 target_node, dev_path,
7819 self.LogWarning("Can't copy data over for disk %d: %s",
7820 idx, result.fail_msg)
7821 errs.append(result.fail_msg)
7825 self.LogWarning("Some disks failed to copy, aborting")
7827 _RemoveDisks(self, instance, target_node=target_node)
7829 self.cfg.ReleaseDRBDMinors(instance.name)
7830 raise errors.OpExecError("Errors during disk copy: %s" %
7833 instance.primary_node = target_node
7834 self.cfg.Update(instance, feedback_fn)
7836 self.LogInfo("Removing the disks on the original node")
7837 _RemoveDisks(self, instance, target_node=source_node)
7839 # Only start the instance if it's marked as up
7840 if instance.admin_state == constants.ADMINST_UP:
7841 self.LogInfo("Starting instance %s on node %s",
7842 instance.name, target_node)
7844 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7845 ignore_secondaries=True)
7847 _ShutdownInstanceDisks(self, instance)
7848 raise errors.OpExecError("Can't activate the instance's disks")
7850 result = self.rpc.call_instance_start(target_node,
7851 (instance, None, None), False)
7852 msg = result.fail_msg
7854 _ShutdownInstanceDisks(self, instance)
7855 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7856 (instance.name, target_node, msg))
7859 class LUNodeMigrate(LogicalUnit):
7860 """Migrate all instances from a node.
7863 HPATH = "node-migrate"
7864 HTYPE = constants.HTYPE_NODE
7867 def CheckArguments(self):
7870 def ExpandNames(self):
7871 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7873 self.share_locks = _ShareAll()
7874 self.needed_locks = {
7875 locking.LEVEL_NODE: [self.op.node_name],
7878 def BuildHooksEnv(self):
7881 This runs on the master, the primary and all the secondaries.
7885 "NODE_NAME": self.op.node_name,
7886 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7889 def BuildHooksNodes(self):
7890 """Build hooks nodes.
7893 nl = [self.cfg.GetMasterNode()]
7896 def CheckPrereq(self):
7899 def Exec(self, feedback_fn):
7900 # Prepare jobs for migration instances
7901 allow_runtime_changes = self.op.allow_runtime_changes
7903 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7906 iallocator=self.op.iallocator,
7907 target_node=self.op.target_node,
7908 allow_runtime_changes=allow_runtime_changes,
7909 ignore_ipolicy=self.op.ignore_ipolicy)]
7910 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7913 # TODO: Run iallocator in this opcode and pass correct placement options to
7914 # OpInstanceMigrate. Since other jobs can modify the cluster between
7915 # running the iallocator and the actual migration, a good consistency model
7916 # will have to be found.
7918 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7919 frozenset([self.op.node_name]))
7921 return ResultWithJobs(jobs)
7924 class TLMigrateInstance(Tasklet):
7925 """Tasklet class for instance migration.
7928 @ivar live: whether the migration will be done live or non-live;
7929 this variable is initalized only after CheckPrereq has run
7930 @type cleanup: boolean
7931 @ivar cleanup: Wheater we cleanup from a failed migration
7932 @type iallocator: string
7933 @ivar iallocator: The iallocator used to determine target_node
7934 @type target_node: string
7935 @ivar target_node: If given, the target_node to reallocate the instance to
7936 @type failover: boolean
7937 @ivar failover: Whether operation results in failover or migration
7938 @type fallback: boolean
7939 @ivar fallback: Whether fallback to failover is allowed if migration not
7941 @type ignore_consistency: boolean
7942 @ivar ignore_consistency: Wheter we should ignore consistency between source
7944 @type shutdown_timeout: int
7945 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7946 @type ignore_ipolicy: bool
7947 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7952 _MIGRATION_POLL_INTERVAL = 1 # seconds
7953 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7955 def __init__(self, lu, instance_name, cleanup=False,
7956 failover=False, fallback=False,
7957 ignore_consistency=False,
7958 allow_runtime_changes=True,
7959 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7960 ignore_ipolicy=False):
7961 """Initializes this class.
7964 Tasklet.__init__(self, lu)
7967 self.instance_name = instance_name
7968 self.cleanup = cleanup
7969 self.live = False # will be overridden later
7970 self.failover = failover
7971 self.fallback = fallback
7972 self.ignore_consistency = ignore_consistency
7973 self.shutdown_timeout = shutdown_timeout
7974 self.ignore_ipolicy = ignore_ipolicy
7975 self.allow_runtime_changes = allow_runtime_changes
7977 def CheckPrereq(self):
7978 """Check prerequisites.
7980 This checks that the instance is in the cluster.
7983 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7984 instance = self.cfg.GetInstanceInfo(instance_name)
7985 assert instance is not None
7986 self.instance = instance
7987 cluster = self.cfg.GetClusterInfo()
7989 if (not self.cleanup and
7990 not instance.admin_state == constants.ADMINST_UP and
7991 not self.failover and self.fallback):
7992 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7993 " switching to failover")
7994 self.failover = True
7996 if instance.disk_template not in constants.DTS_MIRRORED:
8001 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8002 " %s" % (instance.disk_template, text),
8005 if instance.disk_template in constants.DTS_EXT_MIRROR:
8006 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8008 if self.lu.op.iallocator:
8009 self._RunAllocator()
8011 # We set set self.target_node as it is required by
8013 self.target_node = self.lu.op.target_node
8015 # Check that the target node is correct in terms of instance policy
8016 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8017 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8018 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8019 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8020 ignore=self.ignore_ipolicy)
8022 # self.target_node is already populated, either directly or by the
8024 target_node = self.target_node
8025 if self.target_node == instance.primary_node:
8026 raise errors.OpPrereqError("Cannot migrate instance %s"
8027 " to its primary (%s)" %
8028 (instance.name, instance.primary_node))
8030 if len(self.lu.tasklets) == 1:
8031 # It is safe to release locks only when we're the only tasklet
8033 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8034 keep=[instance.primary_node, self.target_node])
8037 secondary_nodes = instance.secondary_nodes
8038 if not secondary_nodes:
8039 raise errors.ConfigurationError("No secondary node but using"
8040 " %s disk template" %
8041 instance.disk_template)
8042 target_node = secondary_nodes[0]
8043 if self.lu.op.iallocator or (self.lu.op.target_node and
8044 self.lu.op.target_node != target_node):
8046 text = "failed over"
8049 raise errors.OpPrereqError("Instances with disk template %s cannot"
8050 " be %s to arbitrary nodes"
8051 " (neither an iallocator nor a target"
8052 " node can be passed)" %
8053 (instance.disk_template, text),
8055 nodeinfo = self.cfg.GetNodeInfo(target_node)
8056 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8057 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8058 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8059 ignore=self.ignore_ipolicy)
8061 i_be = cluster.FillBE(instance)
8063 # check memory requirements on the secondary node
8064 if (not self.cleanup and
8065 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8066 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8067 "migrating instance %s" %
8069 i_be[constants.BE_MINMEM],
8070 instance.hypervisor)
8072 self.lu.LogInfo("Not checking memory on the secondary node as"
8073 " instance will not be started")
8075 # check if failover must be forced instead of migration
8076 if (not self.cleanup and not self.failover and
8077 i_be[constants.BE_ALWAYS_FAILOVER]):
8078 self.lu.LogInfo("Instance configured to always failover; fallback"
8080 self.failover = True
8082 # check bridge existance
8083 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8085 if not self.cleanup:
8086 _CheckNodeNotDrained(self.lu, target_node)
8087 if not self.failover:
8088 result = self.rpc.call_instance_migratable(instance.primary_node,
8090 if result.fail_msg and self.fallback:
8091 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8093 self.failover = True
8095 result.Raise("Can't migrate, please use failover",
8096 prereq=True, ecode=errors.ECODE_STATE)
8098 assert not (self.failover and self.cleanup)
8100 if not self.failover:
8101 if self.lu.op.live is not None and self.lu.op.mode is not None:
8102 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8103 " parameters are accepted",
8105 if self.lu.op.live is not None:
8107 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8109 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8110 # reset the 'live' parameter to None so that repeated
8111 # invocations of CheckPrereq do not raise an exception
8112 self.lu.op.live = None
8113 elif self.lu.op.mode is None:
8114 # read the default value from the hypervisor
8115 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8116 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8118 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8120 # Failover is never live
8123 if not (self.failover or self.cleanup):
8124 remote_info = self.rpc.call_instance_info(instance.primary_node,
8126 instance.hypervisor)
8127 remote_info.Raise("Error checking instance on node %s" %
8128 instance.primary_node)
8129 instance_running = bool(remote_info.payload)
8130 if instance_running:
8131 self.current_mem = int(remote_info.payload["memory"])
8133 def _RunAllocator(self):
8134 """Run the allocator based on input opcode.
8137 # FIXME: add a self.ignore_ipolicy option
8138 ial = IAllocator(self.cfg, self.rpc,
8139 mode=constants.IALLOCATOR_MODE_RELOC,
8140 name=self.instance_name,
8141 relocate_from=[self.instance.primary_node],
8144 ial.Run(self.lu.op.iallocator)
8147 raise errors.OpPrereqError("Can't compute nodes using"
8148 " iallocator '%s': %s" %
8149 (self.lu.op.iallocator, ial.info),
8151 if len(ial.result) != ial.required_nodes:
8152 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8153 " of nodes (%s), required %s" %
8154 (self.lu.op.iallocator, len(ial.result),
8155 ial.required_nodes), errors.ECODE_FAULT)
8156 self.target_node = ial.result[0]
8157 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8158 self.instance_name, self.lu.op.iallocator,
8159 utils.CommaJoin(ial.result))
8161 def _WaitUntilSync(self):
8162 """Poll with custom rpc for disk sync.
8164 This uses our own step-based rpc call.
8167 self.feedback_fn("* wait until resync is done")
8171 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8173 (self.instance.disks,
8176 for node, nres in result.items():
8177 nres.Raise("Cannot resync disks on node %s" % node)
8178 node_done, node_percent = nres.payload
8179 all_done = all_done and node_done
8180 if node_percent is not None:
8181 min_percent = min(min_percent, node_percent)
8183 if min_percent < 100:
8184 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8187 def _EnsureSecondary(self, node):
8188 """Demote a node to secondary.
8191 self.feedback_fn("* switching node %s to secondary mode" % node)
8193 for dev in self.instance.disks:
8194 self.cfg.SetDiskID(dev, node)
8196 result = self.rpc.call_blockdev_close(node, self.instance.name,
8197 self.instance.disks)
8198 result.Raise("Cannot change disk to secondary on node %s" % node)
8200 def _GoStandalone(self):
8201 """Disconnect from the network.
8204 self.feedback_fn("* changing into standalone mode")
8205 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8206 self.instance.disks)
8207 for node, nres in result.items():
8208 nres.Raise("Cannot disconnect disks node %s" % node)
8210 def _GoReconnect(self, multimaster):
8211 """Reconnect to the network.
8217 msg = "single-master"
8218 self.feedback_fn("* changing disks into %s mode" % msg)
8219 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8220 (self.instance.disks, self.instance),
8221 self.instance.name, multimaster)
8222 for node, nres in result.items():
8223 nres.Raise("Cannot change disks config on node %s" % node)
8225 def _ExecCleanup(self):
8226 """Try to cleanup after a failed migration.
8228 The cleanup is done by:
8229 - check that the instance is running only on one node
8230 (and update the config if needed)
8231 - change disks on its secondary node to secondary
8232 - wait until disks are fully synchronized
8233 - disconnect from the network
8234 - change disks into single-master mode
8235 - wait again until disks are fully synchronized
8238 instance = self.instance
8239 target_node = self.target_node
8240 source_node = self.source_node
8242 # check running on only one node
8243 self.feedback_fn("* checking where the instance actually runs"
8244 " (if this hangs, the hypervisor might be in"
8246 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8247 for node, result in ins_l.items():
8248 result.Raise("Can't contact node %s" % node)
8250 runningon_source = instance.name in ins_l[source_node].payload
8251 runningon_target = instance.name in ins_l[target_node].payload
8253 if runningon_source and runningon_target:
8254 raise errors.OpExecError("Instance seems to be running on two nodes,"
8255 " or the hypervisor is confused; you will have"
8256 " to ensure manually that it runs only on one"
8257 " and restart this operation")
8259 if not (runningon_source or runningon_target):
8260 raise errors.OpExecError("Instance does not seem to be running at all;"
8261 " in this case it's safer to repair by"
8262 " running 'gnt-instance stop' to ensure disk"
8263 " shutdown, and then restarting it")
8265 if runningon_target:
8266 # the migration has actually succeeded, we need to update the config
8267 self.feedback_fn("* instance running on secondary node (%s),"
8268 " updating config" % target_node)
8269 instance.primary_node = target_node
8270 self.cfg.Update(instance, self.feedback_fn)
8271 demoted_node = source_node
8273 self.feedback_fn("* instance confirmed to be running on its"
8274 " primary node (%s)" % source_node)
8275 demoted_node = target_node
8277 if instance.disk_template in constants.DTS_INT_MIRROR:
8278 self._EnsureSecondary(demoted_node)
8280 self._WaitUntilSync()
8281 except errors.OpExecError:
8282 # we ignore here errors, since if the device is standalone, it
8283 # won't be able to sync
8285 self._GoStandalone()
8286 self._GoReconnect(False)
8287 self._WaitUntilSync()
8289 self.feedback_fn("* done")
8291 def _RevertDiskStatus(self):
8292 """Try to revert the disk status after a failed migration.
8295 target_node = self.target_node
8296 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8300 self._EnsureSecondary(target_node)
8301 self._GoStandalone()
8302 self._GoReconnect(False)
8303 self._WaitUntilSync()
8304 except errors.OpExecError, err:
8305 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8306 " please try to recover the instance manually;"
8307 " error '%s'" % str(err))
8309 def _AbortMigration(self):
8310 """Call the hypervisor code to abort a started migration.
8313 instance = self.instance
8314 target_node = self.target_node
8315 source_node = self.source_node
8316 migration_info = self.migration_info
8318 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8322 abort_msg = abort_result.fail_msg
8324 logging.error("Aborting migration failed on target node %s: %s",
8325 target_node, abort_msg)
8326 # Don't raise an exception here, as we stil have to try to revert the
8327 # disk status, even if this step failed.
8329 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8330 instance, False, self.live)
8331 abort_msg = abort_result.fail_msg
8333 logging.error("Aborting migration failed on source node %s: %s",
8334 source_node, abort_msg)
8336 def _ExecMigration(self):
8337 """Migrate an instance.
8339 The migrate is done by:
8340 - change the disks into dual-master mode
8341 - wait until disks are fully synchronized again
8342 - migrate the instance
8343 - change disks on the new secondary node (the old primary) to secondary
8344 - wait until disks are fully synchronized
8345 - change disks into single-master mode
8348 instance = self.instance
8349 target_node = self.target_node
8350 source_node = self.source_node
8352 # Check for hypervisor version mismatch and warn the user.
8353 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8354 None, [self.instance.hypervisor])
8355 for ninfo in nodeinfo.values():
8356 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8358 (_, _, (src_info, )) = nodeinfo[source_node].payload
8359 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8361 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8362 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8363 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8364 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8365 if src_version != dst_version:
8366 self.feedback_fn("* warning: hypervisor version mismatch between"
8367 " source (%s) and target (%s) node" %
8368 (src_version, dst_version))
8370 self.feedback_fn("* checking disk consistency between source and target")
8371 for (idx, dev) in enumerate(instance.disks):
8372 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8373 raise errors.OpExecError("Disk %s is degraded or not fully"
8374 " synchronized on target node,"
8375 " aborting migration" % idx)
8377 if self.current_mem > self.tgt_free_mem:
8378 if not self.allow_runtime_changes:
8379 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8380 " free memory to fit instance %s on target"
8381 " node %s (have %dMB, need %dMB)" %
8382 (instance.name, target_node,
8383 self.tgt_free_mem, self.current_mem))
8384 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8385 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8388 rpcres.Raise("Cannot modify instance runtime memory")
8390 # First get the migration information from the remote node
8391 result = self.rpc.call_migration_info(source_node, instance)
8392 msg = result.fail_msg
8394 log_err = ("Failed fetching source migration information from %s: %s" %
8396 logging.error(log_err)
8397 raise errors.OpExecError(log_err)
8399 self.migration_info = migration_info = result.payload
8401 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8402 # Then switch the disks to master/master mode
8403 self._EnsureSecondary(target_node)
8404 self._GoStandalone()
8405 self._GoReconnect(True)
8406 self._WaitUntilSync()
8408 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8409 result = self.rpc.call_accept_instance(target_node,
8412 self.nodes_ip[target_node])
8414 msg = result.fail_msg
8416 logging.error("Instance pre-migration failed, trying to revert"
8417 " disk status: %s", msg)
8418 self.feedback_fn("Pre-migration failed, aborting")
8419 self._AbortMigration()
8420 self._RevertDiskStatus()
8421 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8422 (instance.name, msg))
8424 self.feedback_fn("* migrating instance to %s" % target_node)
8425 result = self.rpc.call_instance_migrate(source_node, instance,
8426 self.nodes_ip[target_node],
8428 msg = result.fail_msg
8430 logging.error("Instance migration failed, trying to revert"
8431 " disk status: %s", msg)
8432 self.feedback_fn("Migration failed, aborting")
8433 self._AbortMigration()
8434 self._RevertDiskStatus()
8435 raise errors.OpExecError("Could not migrate instance %s: %s" %
8436 (instance.name, msg))
8438 self.feedback_fn("* starting memory transfer")
8439 last_feedback = time.time()
8441 result = self.rpc.call_instance_get_migration_status(source_node,
8443 msg = result.fail_msg
8444 ms = result.payload # MigrationStatus instance
8445 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8446 logging.error("Instance migration failed, trying to revert"
8447 " disk status: %s", msg)
8448 self.feedback_fn("Migration failed, aborting")
8449 self._AbortMigration()
8450 self._RevertDiskStatus()
8451 raise errors.OpExecError("Could not migrate instance %s: %s" %
8452 (instance.name, msg))
8454 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8455 self.feedback_fn("* memory transfer complete")
8458 if (utils.TimeoutExpired(last_feedback,
8459 self._MIGRATION_FEEDBACK_INTERVAL) and
8460 ms.transferred_ram is not None):
8461 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8462 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8463 last_feedback = time.time()
8465 time.sleep(self._MIGRATION_POLL_INTERVAL)
8467 result = self.rpc.call_instance_finalize_migration_src(source_node,
8471 msg = result.fail_msg
8473 logging.error("Instance migration succeeded, but finalization failed"
8474 " on the source node: %s", msg)
8475 raise errors.OpExecError("Could not finalize instance migration: %s" %
8478 instance.primary_node = target_node
8480 # distribute new instance config to the other nodes
8481 self.cfg.Update(instance, self.feedback_fn)
8483 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8487 msg = result.fail_msg
8489 logging.error("Instance migration succeeded, but finalization failed"
8490 " on the target node: %s", msg)
8491 raise errors.OpExecError("Could not finalize instance migration: %s" %
8494 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8495 self._EnsureSecondary(source_node)
8496 self._WaitUntilSync()
8497 self._GoStandalone()
8498 self._GoReconnect(False)
8499 self._WaitUntilSync()
8501 # If the instance's disk template is `rbd' and there was a successful
8502 # migration, unmap the device from the source node.
8503 if self.instance.disk_template == constants.DT_RBD:
8504 disks = _ExpandCheckDisks(instance, instance.disks)
8505 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8507 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8508 msg = result.fail_msg
8510 logging.error("Migration was successful, but couldn't unmap the"
8511 " block device %s on source node %s: %s",
8512 disk.iv_name, source_node, msg)
8513 logging.error("You need to unmap the device %s manually on %s",
8514 disk.iv_name, source_node)
8516 self.feedback_fn("* done")
8518 def _ExecFailover(self):
8519 """Failover an instance.
8521 The failover is done by shutting it down on its present node and
8522 starting it on the secondary.
8525 instance = self.instance
8526 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8528 source_node = instance.primary_node
8529 target_node = self.target_node
8531 if instance.admin_state == constants.ADMINST_UP:
8532 self.feedback_fn("* checking disk consistency between source and target")
8533 for (idx, dev) in enumerate(instance.disks):
8534 # for drbd, these are drbd over lvm
8535 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8537 if primary_node.offline:
8538 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8540 (primary_node.name, idx, target_node))
8541 elif not self.ignore_consistency:
8542 raise errors.OpExecError("Disk %s is degraded on target node,"
8543 " aborting failover" % idx)
8545 self.feedback_fn("* not checking disk consistency as instance is not"
8548 self.feedback_fn("* shutting down instance on source node")
8549 logging.info("Shutting down instance %s on node %s",
8550 instance.name, source_node)
8552 result = self.rpc.call_instance_shutdown(source_node, instance,
8553 self.shutdown_timeout)
8554 msg = result.fail_msg
8556 if self.ignore_consistency or primary_node.offline:
8557 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8558 " proceeding anyway; please make sure node"
8559 " %s is down; error details: %s",
8560 instance.name, source_node, source_node, msg)
8562 raise errors.OpExecError("Could not shutdown instance %s on"
8564 (instance.name, source_node, msg))
8566 self.feedback_fn("* deactivating the instance's disks on source node")
8567 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8568 raise errors.OpExecError("Can't shut down the instance's disks")
8570 instance.primary_node = target_node
8571 # distribute new instance config to the other nodes
8572 self.cfg.Update(instance, self.feedback_fn)
8574 # Only start the instance if it's marked as up
8575 if instance.admin_state == constants.ADMINST_UP:
8576 self.feedback_fn("* activating the instance's disks on target node %s" %
8578 logging.info("Starting instance %s on node %s",
8579 instance.name, target_node)
8581 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8582 ignore_secondaries=True)
8584 _ShutdownInstanceDisks(self.lu, instance)
8585 raise errors.OpExecError("Can't activate the instance's disks")
8587 self.feedback_fn("* starting the instance on the target node %s" %
8589 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8591 msg = result.fail_msg
8593 _ShutdownInstanceDisks(self.lu, instance)
8594 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8595 (instance.name, target_node, msg))
8597 def Exec(self, feedback_fn):
8598 """Perform the migration.
8601 self.feedback_fn = feedback_fn
8602 self.source_node = self.instance.primary_node
8604 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8605 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8606 self.target_node = self.instance.secondary_nodes[0]
8607 # Otherwise self.target_node has been populated either
8608 # directly, or through an iallocator.
8610 self.all_nodes = [self.source_node, self.target_node]
8611 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8612 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8615 feedback_fn("Failover instance %s" % self.instance.name)
8616 self._ExecFailover()
8618 feedback_fn("Migrating instance %s" % self.instance.name)
8621 return self._ExecCleanup()
8623 return self._ExecMigration()
8626 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8628 """Wrapper around L{_CreateBlockDevInner}.
8630 This method annotates the root device first.
8633 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8634 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8638 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8640 """Create a tree of block devices on a given node.
8642 If this device type has to be created on secondaries, create it and
8645 If not, just recurse to children keeping the same 'force' value.
8647 @attention: The device has to be annotated already.
8649 @param lu: the lu on whose behalf we execute
8650 @param node: the node on which to create the device
8651 @type instance: L{objects.Instance}
8652 @param instance: the instance which owns the device
8653 @type device: L{objects.Disk}
8654 @param device: the device to create
8655 @type force_create: boolean
8656 @param force_create: whether to force creation of this device; this
8657 will be change to True whenever we find a device which has
8658 CreateOnSecondary() attribute
8659 @param info: the extra 'metadata' we should attach to the device
8660 (this will be represented as a LVM tag)
8661 @type force_open: boolean
8662 @param force_open: this parameter will be passes to the
8663 L{backend.BlockdevCreate} function where it specifies
8664 whether we run on primary or not, and it affects both
8665 the child assembly and the device own Open() execution
8668 if device.CreateOnSecondary():
8672 for child in device.children:
8673 _CreateBlockDevInner(lu, node, instance, child, force_create,
8676 if not force_create:
8679 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8682 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8683 """Create a single block device on a given node.
8685 This will not recurse over children of the device, so they must be
8688 @param lu: the lu on whose behalf we execute
8689 @param node: the node on which to create the device
8690 @type instance: L{objects.Instance}
8691 @param instance: the instance which owns the device
8692 @type device: L{objects.Disk}
8693 @param device: the device to create
8694 @param info: the extra 'metadata' we should attach to the device
8695 (this will be represented as a LVM tag)
8696 @type force_open: boolean
8697 @param force_open: this parameter will be passes to the
8698 L{backend.BlockdevCreate} function where it specifies
8699 whether we run on primary or not, and it affects both
8700 the child assembly and the device own Open() execution
8703 lu.cfg.SetDiskID(device, node)
8704 result = lu.rpc.call_blockdev_create(node, device, device.size,
8705 instance.name, force_open, info)
8706 result.Raise("Can't create block device %s on"
8707 " node %s for instance %s" % (device, node, instance.name))
8708 if device.physical_id is None:
8709 device.physical_id = result.payload
8712 def _GenerateUniqueNames(lu, exts):
8713 """Generate a suitable LV name.
8715 This will generate a logical volume name for the given instance.
8720 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8721 results.append("%s%s" % (new_id, val))
8725 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8726 iv_name, p_minor, s_minor):
8727 """Generate a drbd8 device complete with its children.
8730 assert len(vgnames) == len(names) == 2
8731 port = lu.cfg.AllocatePort()
8732 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8734 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8735 logical_id=(vgnames[0], names[0]),
8737 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8738 logical_id=(vgnames[1], names[1]),
8740 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8741 logical_id=(primary, secondary, port,
8744 children=[dev_data, dev_meta],
8745 iv_name=iv_name, params={})
8749 _DISK_TEMPLATE_NAME_PREFIX = {
8750 constants.DT_PLAIN: "",
8751 constants.DT_RBD: ".rbd",
8755 _DISK_TEMPLATE_DEVICE_TYPE = {
8756 constants.DT_PLAIN: constants.LD_LV,
8757 constants.DT_FILE: constants.LD_FILE,
8758 constants.DT_SHARED_FILE: constants.LD_FILE,
8759 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8760 constants.DT_RBD: constants.LD_RBD,
8764 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8765 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8766 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8767 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8768 """Generate the entire disk layout for a given template type.
8771 #TODO: compute space requirements
8773 vgname = lu.cfg.GetVGName()
8774 disk_count = len(disk_info)
8777 if template_name == constants.DT_DISKLESS:
8779 elif template_name == constants.DT_DRBD8:
8780 if len(secondary_nodes) != 1:
8781 raise errors.ProgrammerError("Wrong template configuration")
8782 remote_node = secondary_nodes[0]
8783 minors = lu.cfg.AllocateDRBDMinor(
8784 [primary_node, remote_node] * len(disk_info), instance_name)
8786 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8788 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8791 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8792 for i in range(disk_count)]):
8793 names.append(lv_prefix + "_data")
8794 names.append(lv_prefix + "_meta")
8795 for idx, disk in enumerate(disk_info):
8796 disk_index = idx + base_index
8797 data_vg = disk.get(constants.IDISK_VG, vgname)
8798 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8799 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8800 disk[constants.IDISK_SIZE],
8802 names[idx * 2:idx * 2 + 2],
8803 "disk/%d" % disk_index,
8804 minors[idx * 2], minors[idx * 2 + 1])
8805 disk_dev.mode = disk[constants.IDISK_MODE]
8806 disks.append(disk_dev)
8809 raise errors.ProgrammerError("Wrong template configuration")
8811 if template_name == constants.DT_FILE:
8813 elif template_name == constants.DT_SHARED_FILE:
8814 _req_shr_file_storage()
8816 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8817 if name_prefix is None:
8820 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8821 (name_prefix, base_index + i)
8822 for i in range(disk_count)])
8824 if template_name == constants.DT_PLAIN:
8825 def logical_id_fn(idx, _, disk):
8826 vg = disk.get(constants.IDISK_VG, vgname)
8827 return (vg, names[idx])
8828 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8830 lambda _, disk_index, disk: (file_driver,
8831 "%s/disk%d" % (file_storage_dir,
8833 elif template_name == constants.DT_BLOCK:
8835 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8836 disk[constants.IDISK_ADOPT])
8837 elif template_name == constants.DT_RBD:
8838 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8840 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8842 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8844 for idx, disk in enumerate(disk_info):
8845 disk_index = idx + base_index
8846 size = disk[constants.IDISK_SIZE]
8847 feedback_fn("* disk %s, size %s" %
8848 (disk_index, utils.FormatUnit(size, "h")))
8849 disks.append(objects.Disk(dev_type=dev_type, size=size,
8850 logical_id=logical_id_fn(idx, disk_index, disk),
8851 iv_name="disk/%d" % disk_index,
8852 mode=disk[constants.IDISK_MODE],
8858 def _GetInstanceInfoText(instance):
8859 """Compute that text that should be added to the disk's metadata.
8862 return "originstname+%s" % instance.name
8865 def _CalcEta(time_taken, written, total_size):
8866 """Calculates the ETA based on size written and total size.
8868 @param time_taken: The time taken so far
8869 @param written: amount written so far
8870 @param total_size: The total size of data to be written
8871 @return: The remaining time in seconds
8874 avg_time = time_taken / float(written)
8875 return (total_size - written) * avg_time
8878 def _WipeDisks(lu, instance):
8879 """Wipes instance disks.
8881 @type lu: L{LogicalUnit}
8882 @param lu: the logical unit on whose behalf we execute
8883 @type instance: L{objects.Instance}
8884 @param instance: the instance whose disks we should create
8885 @return: the success of the wipe
8888 node = instance.primary_node
8890 for device in instance.disks:
8891 lu.cfg.SetDiskID(device, node)
8893 logging.info("Pause sync of instance %s disks", instance.name)
8894 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8895 (instance.disks, instance),
8897 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8899 for idx, success in enumerate(result.payload):
8901 logging.warn("pause-sync of instance %s for disks %d failed",
8905 for idx, device in enumerate(instance.disks):
8906 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8907 # MAX_WIPE_CHUNK at max
8908 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8909 constants.MIN_WIPE_CHUNK_PERCENT)
8910 # we _must_ make this an int, otherwise rounding errors will
8912 wipe_chunk_size = int(wipe_chunk_size)
8914 lu.LogInfo("* Wiping disk %d", idx)
8915 logging.info("Wiping disk %d for instance %s, node %s using"
8916 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8921 start_time = time.time()
8923 while offset < size:
8924 wipe_size = min(wipe_chunk_size, size - offset)
8925 logging.debug("Wiping disk %d, offset %s, chunk %s",
8926 idx, offset, wipe_size)
8927 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8929 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8930 (idx, offset, wipe_size))
8933 if now - last_output >= 60:
8934 eta = _CalcEta(now - start_time, offset, size)
8935 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8936 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8939 logging.info("Resume sync of instance %s disks", instance.name)
8941 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8942 (instance.disks, instance),
8946 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
8947 " please have a look at the status and troubleshoot"
8948 " the issue: %s", node, result.fail_msg)
8950 for idx, success in enumerate(result.payload):
8952 lu.LogWarning("Resume sync of disk %d failed, please have a"
8953 " look at the status and troubleshoot the issue", idx)
8954 logging.warn("resume-sync of instance %s for disks %d failed",
8958 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8959 """Create all disks for an instance.
8961 This abstracts away some work from AddInstance.
8963 @type lu: L{LogicalUnit}
8964 @param lu: the logical unit on whose behalf we execute
8965 @type instance: L{objects.Instance}
8966 @param instance: the instance whose disks we should create
8968 @param to_skip: list of indices to skip
8969 @type target_node: string
8970 @param target_node: if passed, overrides the target node for creation
8972 @return: the success of the creation
8975 info = _GetInstanceInfoText(instance)
8976 if target_node is None:
8977 pnode = instance.primary_node
8978 all_nodes = instance.all_nodes
8983 if instance.disk_template in constants.DTS_FILEBASED:
8984 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8985 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8987 result.Raise("Failed to create directory '%s' on"
8988 " node %s" % (file_storage_dir, pnode))
8990 # Note: this needs to be kept in sync with adding of disks in
8991 # LUInstanceSetParams
8992 for idx, device in enumerate(instance.disks):
8993 if to_skip and idx in to_skip:
8995 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8997 for node in all_nodes:
8998 f_create = node == pnode
8999 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9002 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9003 """Remove all disks for an instance.
9005 This abstracts away some work from `AddInstance()` and
9006 `RemoveInstance()`. Note that in case some of the devices couldn't
9007 be removed, the removal will continue with the other ones (compare
9008 with `_CreateDisks()`).
9010 @type lu: L{LogicalUnit}
9011 @param lu: the logical unit on whose behalf we execute
9012 @type instance: L{objects.Instance}
9013 @param instance: the instance whose disks we should remove
9014 @type target_node: string
9015 @param target_node: used to override the node on which to remove the disks
9017 @return: the success of the removal
9020 logging.info("Removing block devices for instance %s", instance.name)
9023 ports_to_release = set()
9024 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9025 for (idx, device) in enumerate(anno_disks):
9027 edata = [(target_node, device)]
9029 edata = device.ComputeNodeTree(instance.primary_node)
9030 for node, disk in edata:
9031 lu.cfg.SetDiskID(disk, node)
9032 result = lu.rpc.call_blockdev_remove(node, disk)
9034 lu.LogWarning("Could not remove disk %s on node %s,"
9035 " continuing anyway: %s", idx, node, result.fail_msg)
9036 if not (result.offline and node != instance.primary_node):
9039 # if this is a DRBD disk, return its port to the pool
9040 if device.dev_type in constants.LDS_DRBD:
9041 ports_to_release.add(device.logical_id[2])
9043 if all_result or ignore_failures:
9044 for port in ports_to_release:
9045 lu.cfg.AddTcpUdpPort(port)
9047 if instance.disk_template == constants.DT_FILE:
9048 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9052 tgt = instance.primary_node
9053 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9055 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9056 file_storage_dir, instance.primary_node, result.fail_msg)
9062 def _ComputeDiskSizePerVG(disk_template, disks):
9063 """Compute disk size requirements in the volume group
9066 def _compute(disks, payload):
9067 """Universal algorithm.
9072 vgs[disk[constants.IDISK_VG]] = \
9073 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9077 # Required free disk space as a function of disk and swap space
9079 constants.DT_DISKLESS: {},
9080 constants.DT_PLAIN: _compute(disks, 0),
9081 # 128 MB are added for drbd metadata for each disk
9082 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9083 constants.DT_FILE: {},
9084 constants.DT_SHARED_FILE: {},
9087 if disk_template not in req_size_dict:
9088 raise errors.ProgrammerError("Disk template '%s' size requirement"
9089 " is unknown" % disk_template)
9091 return req_size_dict[disk_template]
9094 def _ComputeDiskSize(disk_template, disks):
9095 """Compute disk size requirements according to disk template
9098 # Required free disk space as a function of disk and swap space
9100 constants.DT_DISKLESS: None,
9101 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9102 # 128 MB are added for drbd metadata for each disk
9104 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9105 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9106 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9107 constants.DT_BLOCK: 0,
9108 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9111 if disk_template not in req_size_dict:
9112 raise errors.ProgrammerError("Disk template '%s' size requirement"
9113 " is unknown" % disk_template)
9115 return req_size_dict[disk_template]
9118 def _FilterVmNodes(lu, nodenames):
9119 """Filters out non-vm_capable nodes from a list.
9121 @type lu: L{LogicalUnit}
9122 @param lu: the logical unit for which we check
9123 @type nodenames: list
9124 @param nodenames: the list of nodes on which we should check
9126 @return: the list of vm-capable nodes
9129 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9130 return [name for name in nodenames if name not in vm_nodes]
9133 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9134 """Hypervisor parameter validation.
9136 This function abstract the hypervisor parameter validation to be
9137 used in both instance create and instance modify.
9139 @type lu: L{LogicalUnit}
9140 @param lu: the logical unit for which we check
9141 @type nodenames: list
9142 @param nodenames: the list of nodes on which we should check
9143 @type hvname: string
9144 @param hvname: the name of the hypervisor we should use
9145 @type hvparams: dict
9146 @param hvparams: the parameters which we need to check
9147 @raise errors.OpPrereqError: if the parameters are not valid
9150 nodenames = _FilterVmNodes(lu, nodenames)
9152 cluster = lu.cfg.GetClusterInfo()
9153 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9155 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9156 for node in nodenames:
9160 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9163 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9164 """OS parameters validation.
9166 @type lu: L{LogicalUnit}
9167 @param lu: the logical unit for which we check
9168 @type required: boolean
9169 @param required: whether the validation should fail if the OS is not
9171 @type nodenames: list
9172 @param nodenames: the list of nodes on which we should check
9173 @type osname: string
9174 @param osname: the name of the hypervisor we should use
9175 @type osparams: dict
9176 @param osparams: the parameters which we need to check
9177 @raise errors.OpPrereqError: if the parameters are not valid
9180 nodenames = _FilterVmNodes(lu, nodenames)
9181 result = lu.rpc.call_os_validate(nodenames, required, osname,
9182 [constants.OS_VALIDATE_PARAMETERS],
9184 for node, nres in result.items():
9185 # we don't check for offline cases since this should be run only
9186 # against the master node and/or an instance's nodes
9187 nres.Raise("OS Parameters validation failed on node %s" % node)
9188 if not nres.payload:
9189 lu.LogInfo("OS %s not found on node %s, validation skipped",
9193 class LUInstanceCreate(LogicalUnit):
9194 """Create an instance.
9197 HPATH = "instance-add"
9198 HTYPE = constants.HTYPE_INSTANCE
9201 def CheckArguments(self):
9205 # do not require name_check to ease forward/backward compatibility
9207 if self.op.no_install and self.op.start:
9208 self.LogInfo("No-installation mode selected, disabling startup")
9209 self.op.start = False
9210 # validate/normalize the instance name
9211 self.op.instance_name = \
9212 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9214 if self.op.ip_check and not self.op.name_check:
9215 # TODO: make the ip check more flexible and not depend on the name check
9216 raise errors.OpPrereqError("Cannot do IP address check without a name"
9217 " check", errors.ECODE_INVAL)
9219 # check nics' parameter names
9220 for nic in self.op.nics:
9221 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9223 # check disks. parameter names and consistent adopt/no-adopt strategy
9224 has_adopt = has_no_adopt = False
9225 for disk in self.op.disks:
9226 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9227 if constants.IDISK_ADOPT in disk:
9231 if has_adopt and has_no_adopt:
9232 raise errors.OpPrereqError("Either all disks are adopted or none is",
9235 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9236 raise errors.OpPrereqError("Disk adoption is not supported for the"
9237 " '%s' disk template" %
9238 self.op.disk_template,
9240 if self.op.iallocator is not None:
9241 raise errors.OpPrereqError("Disk adoption not allowed with an"
9242 " iallocator script", errors.ECODE_INVAL)
9243 if self.op.mode == constants.INSTANCE_IMPORT:
9244 raise errors.OpPrereqError("Disk adoption not allowed for"
9245 " instance import", errors.ECODE_INVAL)
9247 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9248 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9249 " but no 'adopt' parameter given" %
9250 self.op.disk_template,
9253 self.adopt_disks = has_adopt
9255 # instance name verification
9256 if self.op.name_check:
9257 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9258 self.op.instance_name = self.hostname1.name
9259 # used in CheckPrereq for ip ping check
9260 self.check_ip = self.hostname1.ip
9262 self.check_ip = None
9264 # file storage checks
9265 if (self.op.file_driver and
9266 not self.op.file_driver in constants.FILE_DRIVER):
9267 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9268 self.op.file_driver, errors.ECODE_INVAL)
9270 if self.op.disk_template == constants.DT_FILE:
9271 opcodes.RequireFileStorage()
9272 elif self.op.disk_template == constants.DT_SHARED_FILE:
9273 opcodes.RequireSharedFileStorage()
9275 ### Node/iallocator related checks
9276 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9278 if self.op.pnode is not None:
9279 if self.op.disk_template in constants.DTS_INT_MIRROR:
9280 if self.op.snode is None:
9281 raise errors.OpPrereqError("The networked disk templates need"
9282 " a mirror node", errors.ECODE_INVAL)
9284 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9286 self.op.snode = None
9288 self._cds = _GetClusterDomainSecret()
9290 if self.op.mode == constants.INSTANCE_IMPORT:
9291 # On import force_variant must be True, because if we forced it at
9292 # initial install, our only chance when importing it back is that it
9294 self.op.force_variant = True
9296 if self.op.no_install:
9297 self.LogInfo("No-installation mode has no effect during import")
9299 elif self.op.mode == constants.INSTANCE_CREATE:
9300 if self.op.os_type is None:
9301 raise errors.OpPrereqError("No guest OS specified",
9303 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9304 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9305 " installation" % self.op.os_type,
9307 if self.op.disk_template is None:
9308 raise errors.OpPrereqError("No disk template specified",
9311 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9312 # Check handshake to ensure both clusters have the same domain secret
9313 src_handshake = self.op.source_handshake
9314 if not src_handshake:
9315 raise errors.OpPrereqError("Missing source handshake",
9318 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9321 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9324 # Load and check source CA
9325 self.source_x509_ca_pem = self.op.source_x509_ca
9326 if not self.source_x509_ca_pem:
9327 raise errors.OpPrereqError("Missing source X509 CA",
9331 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9333 except OpenSSL.crypto.Error, err:
9334 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9335 (err, ), errors.ECODE_INVAL)
9337 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9338 if errcode is not None:
9339 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9342 self.source_x509_ca = cert
9344 src_instance_name = self.op.source_instance_name
9345 if not src_instance_name:
9346 raise errors.OpPrereqError("Missing source instance name",
9349 self.source_instance_name = \
9350 netutils.GetHostname(name=src_instance_name).name
9353 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9354 self.op.mode, errors.ECODE_INVAL)
9356 def ExpandNames(self):
9357 """ExpandNames for CreateInstance.
9359 Figure out the right locks for instance creation.
9362 self.needed_locks = {}
9364 instance_name = self.op.instance_name
9365 # this is just a preventive check, but someone might still add this
9366 # instance in the meantime, and creation will fail at lock-add time
9367 if instance_name in self.cfg.GetInstanceList():
9368 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9369 instance_name, errors.ECODE_EXISTS)
9371 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9373 if self.op.iallocator:
9374 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9375 # specifying a group on instance creation and then selecting nodes from
9377 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9378 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9380 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9381 nodelist = [self.op.pnode]
9382 if self.op.snode is not None:
9383 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9384 nodelist.append(self.op.snode)
9385 self.needed_locks[locking.LEVEL_NODE] = nodelist
9386 # Lock resources of instance's primary and secondary nodes (copy to
9387 # prevent accidential modification)
9388 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9390 # in case of import lock the source node too
9391 if self.op.mode == constants.INSTANCE_IMPORT:
9392 src_node = self.op.src_node
9393 src_path = self.op.src_path
9395 if src_path is None:
9396 self.op.src_path = src_path = self.op.instance_name
9398 if src_node is None:
9399 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9400 self.op.src_node = None
9401 if os.path.isabs(src_path):
9402 raise errors.OpPrereqError("Importing an instance from a path"
9403 " requires a source node option",
9406 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9407 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9408 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9409 if not os.path.isabs(src_path):
9410 self.op.src_path = src_path = \
9411 utils.PathJoin(constants.EXPORT_DIR, src_path)
9413 def _RunAllocator(self):
9414 """Run the allocator based on input opcode.
9417 nics = [n.ToDict() for n in self.nics]
9418 ial = IAllocator(self.cfg, self.rpc,
9419 mode=constants.IALLOCATOR_MODE_ALLOC,
9420 name=self.op.instance_name,
9421 disk_template=self.op.disk_template,
9424 vcpus=self.be_full[constants.BE_VCPUS],
9425 memory=self.be_full[constants.BE_MAXMEM],
9426 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9429 hypervisor=self.op.hypervisor,
9432 ial.Run(self.op.iallocator)
9435 raise errors.OpPrereqError("Can't compute nodes using"
9436 " iallocator '%s': %s" %
9437 (self.op.iallocator, ial.info),
9439 if len(ial.result) != ial.required_nodes:
9440 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9441 " of nodes (%s), required %s" %
9442 (self.op.iallocator, len(ial.result),
9443 ial.required_nodes), errors.ECODE_FAULT)
9444 self.op.pnode = ial.result[0]
9445 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9446 self.op.instance_name, self.op.iallocator,
9447 utils.CommaJoin(ial.result))
9448 if ial.required_nodes == 2:
9449 self.op.snode = ial.result[1]
9451 def BuildHooksEnv(self):
9454 This runs on master, primary and secondary nodes of the instance.
9458 "ADD_MODE": self.op.mode,
9460 if self.op.mode == constants.INSTANCE_IMPORT:
9461 env["SRC_NODE"] = self.op.src_node
9462 env["SRC_PATH"] = self.op.src_path
9463 env["SRC_IMAGES"] = self.src_images
9465 env.update(_BuildInstanceHookEnv(
9466 name=self.op.instance_name,
9467 primary_node=self.op.pnode,
9468 secondary_nodes=self.secondaries,
9469 status=self.op.start,
9470 os_type=self.op.os_type,
9471 minmem=self.be_full[constants.BE_MINMEM],
9472 maxmem=self.be_full[constants.BE_MAXMEM],
9473 vcpus=self.be_full[constants.BE_VCPUS],
9474 nics=_NICListToTuple(self, self.nics),
9475 disk_template=self.op.disk_template,
9476 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9477 for d in self.disks],
9480 hypervisor_name=self.op.hypervisor,
9486 def BuildHooksNodes(self):
9487 """Build hooks nodes.
9490 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9493 def _ReadExportInfo(self):
9494 """Reads the export information from disk.
9496 It will override the opcode source node and path with the actual
9497 information, if these two were not specified before.
9499 @return: the export information
9502 assert self.op.mode == constants.INSTANCE_IMPORT
9504 src_node = self.op.src_node
9505 src_path = self.op.src_path
9507 if src_node is None:
9508 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9509 exp_list = self.rpc.call_export_list(locked_nodes)
9511 for node in exp_list:
9512 if exp_list[node].fail_msg:
9514 if src_path in exp_list[node].payload:
9516 self.op.src_node = src_node = node
9517 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9521 raise errors.OpPrereqError("No export found for relative path %s" %
9522 src_path, errors.ECODE_INVAL)
9524 _CheckNodeOnline(self, src_node)
9525 result = self.rpc.call_export_info(src_node, src_path)
9526 result.Raise("No export or invalid export found in dir %s" % src_path)
9528 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9529 if not export_info.has_section(constants.INISECT_EXP):
9530 raise errors.ProgrammerError("Corrupted export config",
9531 errors.ECODE_ENVIRON)
9533 ei_version = export_info.get(constants.INISECT_EXP, "version")
9534 if (int(ei_version) != constants.EXPORT_VERSION):
9535 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9536 (ei_version, constants.EXPORT_VERSION),
9537 errors.ECODE_ENVIRON)
9540 def _ReadExportParams(self, einfo):
9541 """Use export parameters as defaults.
9543 In case the opcode doesn't specify (as in override) some instance
9544 parameters, then try to use them from the export information, if
9548 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9550 if self.op.disk_template is None:
9551 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9552 self.op.disk_template = einfo.get(constants.INISECT_INS,
9554 if self.op.disk_template not in constants.DISK_TEMPLATES:
9555 raise errors.OpPrereqError("Disk template specified in configuration"
9556 " file is not one of the allowed values:"
9557 " %s" % " ".join(constants.DISK_TEMPLATES))
9559 raise errors.OpPrereqError("No disk template specified and the export"
9560 " is missing the disk_template information",
9563 if not self.op.disks:
9565 # TODO: import the disk iv_name too
9566 for idx in range(constants.MAX_DISKS):
9567 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9568 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9569 disks.append({constants.IDISK_SIZE: disk_sz})
9570 self.op.disks = disks
9571 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9572 raise errors.OpPrereqError("No disk info specified and the export"
9573 " is missing the disk information",
9576 if not self.op.nics:
9578 for idx in range(constants.MAX_NICS):
9579 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9581 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9582 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9589 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9590 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9592 if (self.op.hypervisor is None and
9593 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9594 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9596 if einfo.has_section(constants.INISECT_HYP):
9597 # use the export parameters but do not override the ones
9598 # specified by the user
9599 for name, value in einfo.items(constants.INISECT_HYP):
9600 if name not in self.op.hvparams:
9601 self.op.hvparams[name] = value
9603 if einfo.has_section(constants.INISECT_BEP):
9604 # use the parameters, without overriding
9605 for name, value in einfo.items(constants.INISECT_BEP):
9606 if name not in self.op.beparams:
9607 self.op.beparams[name] = value
9608 # Compatibility for the old "memory" be param
9609 if name == constants.BE_MEMORY:
9610 if constants.BE_MAXMEM not in self.op.beparams:
9611 self.op.beparams[constants.BE_MAXMEM] = value
9612 if constants.BE_MINMEM not in self.op.beparams:
9613 self.op.beparams[constants.BE_MINMEM] = value
9615 # try to read the parameters old style, from the main section
9616 for name in constants.BES_PARAMETERS:
9617 if (name not in self.op.beparams and
9618 einfo.has_option(constants.INISECT_INS, name)):
9619 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9621 if einfo.has_section(constants.INISECT_OSP):
9622 # use the parameters, without overriding
9623 for name, value in einfo.items(constants.INISECT_OSP):
9624 if name not in self.op.osparams:
9625 self.op.osparams[name] = value
9627 def _RevertToDefaults(self, cluster):
9628 """Revert the instance parameters to the default values.
9632 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9633 for name in self.op.hvparams.keys():
9634 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9635 del self.op.hvparams[name]
9637 be_defs = cluster.SimpleFillBE({})
9638 for name in self.op.beparams.keys():
9639 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9640 del self.op.beparams[name]
9642 nic_defs = cluster.SimpleFillNIC({})
9643 for nic in self.op.nics:
9644 for name in constants.NICS_PARAMETERS:
9645 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9648 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9649 for name in self.op.osparams.keys():
9650 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9651 del self.op.osparams[name]
9653 def _CalculateFileStorageDir(self):
9654 """Calculate final instance file storage dir.
9657 # file storage dir calculation/check
9658 self.instance_file_storage_dir = None
9659 if self.op.disk_template in constants.DTS_FILEBASED:
9660 # build the full file storage dir path
9663 if self.op.disk_template == constants.DT_SHARED_FILE:
9664 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9666 get_fsd_fn = self.cfg.GetFileStorageDir
9668 cfg_storagedir = get_fsd_fn()
9669 if not cfg_storagedir:
9670 raise errors.OpPrereqError("Cluster file storage dir not defined")
9671 joinargs.append(cfg_storagedir)
9673 if self.op.file_storage_dir is not None:
9674 joinargs.append(self.op.file_storage_dir)
9676 joinargs.append(self.op.instance_name)
9678 # pylint: disable=W0142
9679 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9681 def CheckPrereq(self): # pylint: disable=R0914
9682 """Check prerequisites.
9685 self._CalculateFileStorageDir()
9687 if self.op.mode == constants.INSTANCE_IMPORT:
9688 export_info = self._ReadExportInfo()
9689 self._ReadExportParams(export_info)
9690 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9692 self._old_instance_name = None
9694 if (not self.cfg.GetVGName() and
9695 self.op.disk_template not in constants.DTS_NOT_LVM):
9696 raise errors.OpPrereqError("Cluster does not support lvm-based"
9697 " instances", errors.ECODE_STATE)
9699 if (self.op.hypervisor is None or
9700 self.op.hypervisor == constants.VALUE_AUTO):
9701 self.op.hypervisor = self.cfg.GetHypervisorType()
9703 cluster = self.cfg.GetClusterInfo()
9704 enabled_hvs = cluster.enabled_hypervisors
9705 if self.op.hypervisor not in enabled_hvs:
9706 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9707 " cluster (%s)" % (self.op.hypervisor,
9708 ",".join(enabled_hvs)),
9711 # Check tag validity
9712 for tag in self.op.tags:
9713 objects.TaggableObject.ValidateTag(tag)
9715 # check hypervisor parameter syntax (locally)
9716 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9717 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9719 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9720 hv_type.CheckParameterSyntax(filled_hvp)
9721 self.hv_full = filled_hvp
9722 # check that we don't specify global parameters on an instance
9723 _CheckGlobalHvParams(self.op.hvparams)
9725 # fill and remember the beparams dict
9726 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9727 for param, value in self.op.beparams.iteritems():
9728 if value == constants.VALUE_AUTO:
9729 self.op.beparams[param] = default_beparams[param]
9730 objects.UpgradeBeParams(self.op.beparams)
9731 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9732 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9734 # build os parameters
9735 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9737 # now that hvp/bep are in final format, let's reset to defaults,
9739 if self.op.identify_defaults:
9740 self._RevertToDefaults(cluster)
9744 for idx, nic in enumerate(self.op.nics):
9745 nic_mode_req = nic.get(constants.INIC_MODE, None)
9746 nic_mode = nic_mode_req
9747 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9748 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9750 # in routed mode, for the first nic, the default ip is 'auto'
9751 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9752 default_ip_mode = constants.VALUE_AUTO
9754 default_ip_mode = constants.VALUE_NONE
9756 # ip validity checks
9757 ip = nic.get(constants.INIC_IP, default_ip_mode)
9758 if ip is None or ip.lower() == constants.VALUE_NONE:
9760 elif ip.lower() == constants.VALUE_AUTO:
9761 if not self.op.name_check:
9762 raise errors.OpPrereqError("IP address set to auto but name checks"
9763 " have been skipped",
9765 nic_ip = self.hostname1.ip
9767 if not netutils.IPAddress.IsValid(ip):
9768 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9772 # TODO: check the ip address for uniqueness
9773 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9774 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9777 # MAC address verification
9778 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9779 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9780 mac = utils.NormalizeAndValidateMac(mac)
9783 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9784 except errors.ReservationError:
9785 raise errors.OpPrereqError("MAC address %s already in use"
9786 " in cluster" % mac,
9787 errors.ECODE_NOTUNIQUE)
9789 # Build nic parameters
9790 link = nic.get(constants.INIC_LINK, None)
9791 if link == constants.VALUE_AUTO:
9792 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9795 nicparams[constants.NIC_MODE] = nic_mode
9797 nicparams[constants.NIC_LINK] = link
9799 check_params = cluster.SimpleFillNIC(nicparams)
9800 objects.NIC.CheckParameterSyntax(check_params)
9801 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9803 # disk checks/pre-build
9804 default_vg = self.cfg.GetVGName()
9806 for disk in self.op.disks:
9807 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9808 if mode not in constants.DISK_ACCESS_SET:
9809 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9810 mode, errors.ECODE_INVAL)
9811 size = disk.get(constants.IDISK_SIZE, None)
9813 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9816 except (TypeError, ValueError):
9817 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9820 data_vg = disk.get(constants.IDISK_VG, default_vg)
9822 constants.IDISK_SIZE: size,
9823 constants.IDISK_MODE: mode,
9824 constants.IDISK_VG: data_vg,
9826 if constants.IDISK_METAVG in disk:
9827 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9828 if constants.IDISK_ADOPT in disk:
9829 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9830 self.disks.append(new_disk)
9832 if self.op.mode == constants.INSTANCE_IMPORT:
9834 for idx in range(len(self.disks)):
9835 option = "disk%d_dump" % idx
9836 if export_info.has_option(constants.INISECT_INS, option):
9837 # FIXME: are the old os-es, disk sizes, etc. useful?
9838 export_name = export_info.get(constants.INISECT_INS, option)
9839 image = utils.PathJoin(self.op.src_path, export_name)
9840 disk_images.append(image)
9842 disk_images.append(False)
9844 self.src_images = disk_images
9846 if self.op.instance_name == self._old_instance_name:
9847 for idx, nic in enumerate(self.nics):
9848 if nic.mac == constants.VALUE_AUTO:
9849 nic_mac_ini = "nic%d_mac" % idx
9850 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9852 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9854 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9855 if self.op.ip_check:
9856 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9857 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9858 (self.check_ip, self.op.instance_name),
9859 errors.ECODE_NOTUNIQUE)
9861 #### mac address generation
9862 # By generating here the mac address both the allocator and the hooks get
9863 # the real final mac address rather than the 'auto' or 'generate' value.
9864 # There is a race condition between the generation and the instance object
9865 # creation, which means that we know the mac is valid now, but we're not
9866 # sure it will be when we actually add the instance. If things go bad
9867 # adding the instance will abort because of a duplicate mac, and the
9868 # creation job will fail.
9869 for nic in self.nics:
9870 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9871 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9875 if self.op.iallocator is not None:
9876 self._RunAllocator()
9878 # Release all unneeded node locks
9879 _ReleaseLocks(self, locking.LEVEL_NODE,
9880 keep=filter(None, [self.op.pnode, self.op.snode,
9882 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9883 keep=filter(None, [self.op.pnode, self.op.snode,
9886 #### node related checks
9888 # check primary node
9889 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9890 assert self.pnode is not None, \
9891 "Cannot retrieve locked node %s" % self.op.pnode
9893 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9894 pnode.name, errors.ECODE_STATE)
9896 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9897 pnode.name, errors.ECODE_STATE)
9898 if not pnode.vm_capable:
9899 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9900 " '%s'" % pnode.name, errors.ECODE_STATE)
9902 self.secondaries = []
9904 # mirror node verification
9905 if self.op.disk_template in constants.DTS_INT_MIRROR:
9906 if self.op.snode == pnode.name:
9907 raise errors.OpPrereqError("The secondary node cannot be the"
9908 " primary node", errors.ECODE_INVAL)
9909 _CheckNodeOnline(self, self.op.snode)
9910 _CheckNodeNotDrained(self, self.op.snode)
9911 _CheckNodeVmCapable(self, self.op.snode)
9912 self.secondaries.append(self.op.snode)
9914 snode = self.cfg.GetNodeInfo(self.op.snode)
9915 if pnode.group != snode.group:
9916 self.LogWarning("The primary and secondary nodes are in two"
9917 " different node groups; the disk parameters"
9918 " from the first disk's node group will be"
9921 nodenames = [pnode.name] + self.secondaries
9923 # Verify instance specs
9924 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9926 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9927 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9928 constants.ISPEC_DISK_COUNT: len(self.disks),
9929 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9930 constants.ISPEC_NIC_COUNT: len(self.nics),
9931 constants.ISPEC_SPINDLE_USE: spindle_use,
9934 group_info = self.cfg.GetNodeGroup(pnode.group)
9935 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9936 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9937 if not self.op.ignore_ipolicy and res:
9938 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9939 " policy: %s") % (pnode.group,
9940 utils.CommaJoin(res)),
9943 if not self.adopt_disks:
9944 if self.op.disk_template == constants.DT_RBD:
9945 # _CheckRADOSFreeSpace() is just a placeholder.
9946 # Any function that checks prerequisites can be placed here.
9947 # Check if there is enough space on the RADOS cluster.
9948 _CheckRADOSFreeSpace()
9950 # Check lv size requirements, if not adopting
9951 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9952 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9954 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9955 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9956 disk[constants.IDISK_ADOPT])
9957 for disk in self.disks])
9958 if len(all_lvs) != len(self.disks):
9959 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9961 for lv_name in all_lvs:
9963 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9964 # to ReserveLV uses the same syntax
9965 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9966 except errors.ReservationError:
9967 raise errors.OpPrereqError("LV named %s used by another instance" %
9968 lv_name, errors.ECODE_NOTUNIQUE)
9970 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9971 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9973 node_lvs = self.rpc.call_lv_list([pnode.name],
9974 vg_names.payload.keys())[pnode.name]
9975 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9976 node_lvs = node_lvs.payload
9978 delta = all_lvs.difference(node_lvs.keys())
9980 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9981 utils.CommaJoin(delta),
9983 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9985 raise errors.OpPrereqError("Online logical volumes found, cannot"
9986 " adopt: %s" % utils.CommaJoin(online_lvs),
9988 # update the size of disk based on what is found
9989 for dsk in self.disks:
9990 dsk[constants.IDISK_SIZE] = \
9991 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9992 dsk[constants.IDISK_ADOPT])][0]))
9994 elif self.op.disk_template == constants.DT_BLOCK:
9995 # Normalize and de-duplicate device paths
9996 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9997 for disk in self.disks])
9998 if len(all_disks) != len(self.disks):
9999 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10000 errors.ECODE_INVAL)
10001 baddisks = [d for d in all_disks
10002 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10004 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10005 " cannot be adopted" %
10006 (", ".join(baddisks),
10007 constants.ADOPTABLE_BLOCKDEV_ROOT),
10008 errors.ECODE_INVAL)
10010 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10011 list(all_disks))[pnode.name]
10012 node_disks.Raise("Cannot get block device information from node %s" %
10014 node_disks = node_disks.payload
10015 delta = all_disks.difference(node_disks.keys())
10017 raise errors.OpPrereqError("Missing block device(s): %s" %
10018 utils.CommaJoin(delta),
10019 errors.ECODE_INVAL)
10020 for dsk in self.disks:
10021 dsk[constants.IDISK_SIZE] = \
10022 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10024 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10026 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10027 # check OS parameters (remotely)
10028 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10030 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10032 # memory check on primary node
10033 #TODO(dynmem): use MINMEM for checking
10035 _CheckNodeFreeMemory(self, self.pnode.name,
10036 "creating instance %s" % self.op.instance_name,
10037 self.be_full[constants.BE_MAXMEM],
10038 self.op.hypervisor)
10040 self.dry_run_result = list(nodenames)
10042 def Exec(self, feedback_fn):
10043 """Create and add the instance to the cluster.
10046 instance = self.op.instance_name
10047 pnode_name = self.pnode.name
10049 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10050 self.owned_locks(locking.LEVEL_NODE)), \
10051 "Node locks differ from node resource locks"
10053 ht_kind = self.op.hypervisor
10054 if ht_kind in constants.HTS_REQ_PORT:
10055 network_port = self.cfg.AllocatePort()
10057 network_port = None
10059 # This is ugly but we got a chicken-egg problem here
10060 # We can only take the group disk parameters, as the instance
10061 # has no disks yet (we are generating them right here).
10062 node = self.cfg.GetNodeInfo(pnode_name)
10063 nodegroup = self.cfg.GetNodeGroup(node.group)
10064 disks = _GenerateDiskTemplate(self,
10065 self.op.disk_template,
10066 instance, pnode_name,
10069 self.instance_file_storage_dir,
10070 self.op.file_driver,
10073 self.cfg.GetGroupDiskParams(nodegroup))
10075 iobj = objects.Instance(name=instance, os=self.op.os_type,
10076 primary_node=pnode_name,
10077 nics=self.nics, disks=disks,
10078 disk_template=self.op.disk_template,
10079 admin_state=constants.ADMINST_DOWN,
10080 network_port=network_port,
10081 beparams=self.op.beparams,
10082 hvparams=self.op.hvparams,
10083 hypervisor=self.op.hypervisor,
10084 osparams=self.op.osparams,
10088 for tag in self.op.tags:
10091 if self.adopt_disks:
10092 if self.op.disk_template == constants.DT_PLAIN:
10093 # rename LVs to the newly-generated names; we need to construct
10094 # 'fake' LV disks with the old data, plus the new unique_id
10095 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10097 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10098 rename_to.append(t_dsk.logical_id)
10099 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10100 self.cfg.SetDiskID(t_dsk, pnode_name)
10101 result = self.rpc.call_blockdev_rename(pnode_name,
10102 zip(tmp_disks, rename_to))
10103 result.Raise("Failed to rename adoped LVs")
10105 feedback_fn("* creating instance disks...")
10107 _CreateDisks(self, iobj)
10108 except errors.OpExecError:
10109 self.LogWarning("Device creation failed, reverting...")
10111 _RemoveDisks(self, iobj)
10113 self.cfg.ReleaseDRBDMinors(instance)
10116 feedback_fn("adding instance %s to cluster config" % instance)
10118 self.cfg.AddInstance(iobj, self.proc.GetECId())
10120 # Declare that we don't want to remove the instance lock anymore, as we've
10121 # added the instance to the config
10122 del self.remove_locks[locking.LEVEL_INSTANCE]
10124 if self.op.mode == constants.INSTANCE_IMPORT:
10125 # Release unused nodes
10126 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10128 # Release all nodes
10129 _ReleaseLocks(self, locking.LEVEL_NODE)
10132 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10133 feedback_fn("* wiping instance disks...")
10135 _WipeDisks(self, iobj)
10136 except errors.OpExecError, err:
10137 logging.exception("Wiping disks failed")
10138 self.LogWarning("Wiping instance disks failed (%s)", err)
10142 # Something is already wrong with the disks, don't do anything else
10144 elif self.op.wait_for_sync:
10145 disk_abort = not _WaitForSync(self, iobj)
10146 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10147 # make sure the disks are not degraded (still sync-ing is ok)
10148 feedback_fn("* checking mirrors status")
10149 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10154 _RemoveDisks(self, iobj)
10155 self.cfg.RemoveInstance(iobj.name)
10156 # Make sure the instance lock gets removed
10157 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10158 raise errors.OpExecError("There are some degraded disks for"
10161 # Release all node resource locks
10162 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10164 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10165 # we need to set the disks ID to the primary node, since the
10166 # preceding code might or might have not done it, depending on
10167 # disk template and other options
10168 for disk in iobj.disks:
10169 self.cfg.SetDiskID(disk, pnode_name)
10170 if self.op.mode == constants.INSTANCE_CREATE:
10171 if not self.op.no_install:
10172 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10173 not self.op.wait_for_sync)
10175 feedback_fn("* pausing disk sync to install instance OS")
10176 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10179 for idx, success in enumerate(result.payload):
10181 logging.warn("pause-sync of instance %s for disk %d failed",
10184 feedback_fn("* running the instance OS create scripts...")
10185 # FIXME: pass debug option from opcode to backend
10187 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10188 self.op.debug_level)
10190 feedback_fn("* resuming disk sync")
10191 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10194 for idx, success in enumerate(result.payload):
10196 logging.warn("resume-sync of instance %s for disk %d failed",
10199 os_add_result.Raise("Could not add os for instance %s"
10200 " on node %s" % (instance, pnode_name))
10203 if self.op.mode == constants.INSTANCE_IMPORT:
10204 feedback_fn("* running the instance OS import scripts...")
10208 for idx, image in enumerate(self.src_images):
10212 # FIXME: pass debug option from opcode to backend
10213 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10214 constants.IEIO_FILE, (image, ),
10215 constants.IEIO_SCRIPT,
10216 (iobj.disks[idx], idx),
10218 transfers.append(dt)
10221 masterd.instance.TransferInstanceData(self, feedback_fn,
10222 self.op.src_node, pnode_name,
10223 self.pnode.secondary_ip,
10225 if not compat.all(import_result):
10226 self.LogWarning("Some disks for instance %s on node %s were not"
10227 " imported successfully" % (instance, pnode_name))
10229 rename_from = self._old_instance_name
10231 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10232 feedback_fn("* preparing remote import...")
10233 # The source cluster will stop the instance before attempting to make
10234 # a connection. In some cases stopping an instance can take a long
10235 # time, hence the shutdown timeout is added to the connection
10237 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10238 self.op.source_shutdown_timeout)
10239 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10241 assert iobj.primary_node == self.pnode.name
10243 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10244 self.source_x509_ca,
10245 self._cds, timeouts)
10246 if not compat.all(disk_results):
10247 # TODO: Should the instance still be started, even if some disks
10248 # failed to import (valid for local imports, too)?
10249 self.LogWarning("Some disks for instance %s on node %s were not"
10250 " imported successfully" % (instance, pnode_name))
10252 rename_from = self.source_instance_name
10255 # also checked in the prereq part
10256 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10259 # Run rename script on newly imported instance
10260 assert iobj.name == instance
10261 feedback_fn("Running rename script for %s" % instance)
10262 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10264 self.op.debug_level)
10265 if result.fail_msg:
10266 self.LogWarning("Failed to run rename script for %s on node"
10267 " %s: %s" % (instance, pnode_name, result.fail_msg))
10269 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10272 iobj.admin_state = constants.ADMINST_UP
10273 self.cfg.Update(iobj, feedback_fn)
10274 logging.info("Starting instance %s on node %s", instance, pnode_name)
10275 feedback_fn("* starting instance...")
10276 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10278 result.Raise("Could not start instance")
10280 return list(iobj.all_nodes)
10283 def _CheckRADOSFreeSpace():
10284 """Compute disk size requirements inside the RADOS cluster.
10287 # For the RADOS cluster we assume there is always enough space.
10291 class LUInstanceConsole(NoHooksLU):
10292 """Connect to an instance's console.
10294 This is somewhat special in that it returns the command line that
10295 you need to run on the master node in order to connect to the
10301 def ExpandNames(self):
10302 self.share_locks = _ShareAll()
10303 self._ExpandAndLockInstance()
10305 def CheckPrereq(self):
10306 """Check prerequisites.
10308 This checks that the instance is in the cluster.
10311 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10312 assert self.instance is not None, \
10313 "Cannot retrieve locked instance %s" % self.op.instance_name
10314 _CheckNodeOnline(self, self.instance.primary_node)
10316 def Exec(self, feedback_fn):
10317 """Connect to the console of an instance
10320 instance = self.instance
10321 node = instance.primary_node
10323 node_insts = self.rpc.call_instance_list([node],
10324 [instance.hypervisor])[node]
10325 node_insts.Raise("Can't get node information from %s" % node)
10327 if instance.name not in node_insts.payload:
10328 if instance.admin_state == constants.ADMINST_UP:
10329 state = constants.INSTST_ERRORDOWN
10330 elif instance.admin_state == constants.ADMINST_DOWN:
10331 state = constants.INSTST_ADMINDOWN
10333 state = constants.INSTST_ADMINOFFLINE
10334 raise errors.OpExecError("Instance %s is not running (state %s)" %
10335 (instance.name, state))
10337 logging.debug("Connecting to console of %s on %s", instance.name, node)
10339 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10342 def _GetInstanceConsole(cluster, instance):
10343 """Returns console information for an instance.
10345 @type cluster: L{objects.Cluster}
10346 @type instance: L{objects.Instance}
10350 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10351 # beparams and hvparams are passed separately, to avoid editing the
10352 # instance and then saving the defaults in the instance itself.
10353 hvparams = cluster.FillHV(instance)
10354 beparams = cluster.FillBE(instance)
10355 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10357 assert console.instance == instance.name
10358 assert console.Validate()
10360 return console.ToDict()
10363 class LUInstanceReplaceDisks(LogicalUnit):
10364 """Replace the disks of an instance.
10367 HPATH = "mirrors-replace"
10368 HTYPE = constants.HTYPE_INSTANCE
10371 def CheckArguments(self):
10372 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10373 self.op.iallocator)
10375 def ExpandNames(self):
10376 self._ExpandAndLockInstance()
10378 assert locking.LEVEL_NODE not in self.needed_locks
10379 assert locking.LEVEL_NODE_RES not in self.needed_locks
10380 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10382 assert self.op.iallocator is None or self.op.remote_node is None, \
10383 "Conflicting options"
10385 if self.op.remote_node is not None:
10386 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10388 # Warning: do not remove the locking of the new secondary here
10389 # unless DRBD8.AddChildren is changed to work in parallel;
10390 # currently it doesn't since parallel invocations of
10391 # FindUnusedMinor will conflict
10392 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10393 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10395 self.needed_locks[locking.LEVEL_NODE] = []
10396 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10398 if self.op.iallocator is not None:
10399 # iallocator will select a new node in the same group
10400 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10402 self.needed_locks[locking.LEVEL_NODE_RES] = []
10404 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10405 self.op.iallocator, self.op.remote_node,
10406 self.op.disks, False, self.op.early_release,
10407 self.op.ignore_ipolicy)
10409 self.tasklets = [self.replacer]
10411 def DeclareLocks(self, level):
10412 if level == locking.LEVEL_NODEGROUP:
10413 assert self.op.remote_node is None
10414 assert self.op.iallocator is not None
10415 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10417 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10418 # Lock all groups used by instance optimistically; this requires going
10419 # via the node before it's locked, requiring verification later on
10420 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10421 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10423 elif level == locking.LEVEL_NODE:
10424 if self.op.iallocator is not None:
10425 assert self.op.remote_node is None
10426 assert not self.needed_locks[locking.LEVEL_NODE]
10428 # Lock member nodes of all locked groups
10429 self.needed_locks[locking.LEVEL_NODE] = [node_name
10430 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10431 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10433 self._LockInstancesNodes()
10434 elif level == locking.LEVEL_NODE_RES:
10436 self.needed_locks[locking.LEVEL_NODE_RES] = \
10437 self.needed_locks[locking.LEVEL_NODE]
10439 def BuildHooksEnv(self):
10440 """Build hooks env.
10442 This runs on the master, the primary and all the secondaries.
10445 instance = self.replacer.instance
10447 "MODE": self.op.mode,
10448 "NEW_SECONDARY": self.op.remote_node,
10449 "OLD_SECONDARY": instance.secondary_nodes[0],
10451 env.update(_BuildInstanceHookEnvByObject(self, instance))
10454 def BuildHooksNodes(self):
10455 """Build hooks nodes.
10458 instance = self.replacer.instance
10460 self.cfg.GetMasterNode(),
10461 instance.primary_node,
10463 if self.op.remote_node is not None:
10464 nl.append(self.op.remote_node)
10467 def CheckPrereq(self):
10468 """Check prerequisites.
10471 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10472 self.op.iallocator is None)
10474 # Verify if node group locks are still correct
10475 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10477 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10479 return LogicalUnit.CheckPrereq(self)
10482 class TLReplaceDisks(Tasklet):
10483 """Replaces disks for an instance.
10485 Note: Locking is not within the scope of this class.
10488 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10489 disks, delay_iallocator, early_release, ignore_ipolicy):
10490 """Initializes this class.
10493 Tasklet.__init__(self, lu)
10496 self.instance_name = instance_name
10498 self.iallocator_name = iallocator_name
10499 self.remote_node = remote_node
10501 self.delay_iallocator = delay_iallocator
10502 self.early_release = early_release
10503 self.ignore_ipolicy = ignore_ipolicy
10506 self.instance = None
10507 self.new_node = None
10508 self.target_node = None
10509 self.other_node = None
10510 self.remote_node_info = None
10511 self.node_secondary_ip = None
10514 def CheckArguments(mode, remote_node, iallocator):
10515 """Helper function for users of this class.
10518 # check for valid parameter combination
10519 if mode == constants.REPLACE_DISK_CHG:
10520 if remote_node is None and iallocator is None:
10521 raise errors.OpPrereqError("When changing the secondary either an"
10522 " iallocator script must be used or the"
10523 " new node given", errors.ECODE_INVAL)
10525 if remote_node is not None and iallocator is not None:
10526 raise errors.OpPrereqError("Give either the iallocator or the new"
10527 " secondary, not both", errors.ECODE_INVAL)
10529 elif remote_node is not None or iallocator is not None:
10530 # Not replacing the secondary
10531 raise errors.OpPrereqError("The iallocator and new node options can"
10532 " only be used when changing the"
10533 " secondary node", errors.ECODE_INVAL)
10536 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10537 """Compute a new secondary node using an IAllocator.
10540 ial = IAllocator(lu.cfg, lu.rpc,
10541 mode=constants.IALLOCATOR_MODE_RELOC,
10542 name=instance_name,
10543 relocate_from=list(relocate_from))
10545 ial.Run(iallocator_name)
10547 if not ial.success:
10548 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10549 " %s" % (iallocator_name, ial.info),
10550 errors.ECODE_NORES)
10552 if len(ial.result) != ial.required_nodes:
10553 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10554 " of nodes (%s), required %s" %
10556 len(ial.result), ial.required_nodes),
10557 errors.ECODE_FAULT)
10559 remote_node_name = ial.result[0]
10561 lu.LogInfo("Selected new secondary for instance '%s': %s",
10562 instance_name, remote_node_name)
10564 return remote_node_name
10566 def _FindFaultyDisks(self, node_name):
10567 """Wrapper for L{_FindFaultyInstanceDisks}.
10570 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10573 def _CheckDisksActivated(self, instance):
10574 """Checks if the instance disks are activated.
10576 @param instance: The instance to check disks
10577 @return: True if they are activated, False otherwise
10580 nodes = instance.all_nodes
10582 for idx, dev in enumerate(instance.disks):
10584 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10585 self.cfg.SetDiskID(dev, node)
10587 result = _BlockdevFind(self, node, dev, instance)
10591 elif result.fail_msg or not result.payload:
10596 def CheckPrereq(self):
10597 """Check prerequisites.
10599 This checks that the instance is in the cluster.
10602 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10603 assert instance is not None, \
10604 "Cannot retrieve locked instance %s" % self.instance_name
10606 if instance.disk_template != constants.DT_DRBD8:
10607 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10608 " instances", errors.ECODE_INVAL)
10610 if len(instance.secondary_nodes) != 1:
10611 raise errors.OpPrereqError("The instance has a strange layout,"
10612 " expected one secondary but found %d" %
10613 len(instance.secondary_nodes),
10614 errors.ECODE_FAULT)
10616 if not self.delay_iallocator:
10617 self._CheckPrereq2()
10619 def _CheckPrereq2(self):
10620 """Check prerequisites, second part.
10622 This function should always be part of CheckPrereq. It was separated and is
10623 now called from Exec because during node evacuation iallocator was only
10624 called with an unmodified cluster model, not taking planned changes into
10628 instance = self.instance
10629 secondary_node = instance.secondary_nodes[0]
10631 if self.iallocator_name is None:
10632 remote_node = self.remote_node
10634 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10635 instance.name, instance.secondary_nodes)
10637 if remote_node is None:
10638 self.remote_node_info = None
10640 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10641 "Remote node '%s' is not locked" % remote_node
10643 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10644 assert self.remote_node_info is not None, \
10645 "Cannot retrieve locked node %s" % remote_node
10647 if remote_node == self.instance.primary_node:
10648 raise errors.OpPrereqError("The specified node is the primary node of"
10649 " the instance", errors.ECODE_INVAL)
10651 if remote_node == secondary_node:
10652 raise errors.OpPrereqError("The specified node is already the"
10653 " secondary node of the instance",
10654 errors.ECODE_INVAL)
10656 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10657 constants.REPLACE_DISK_CHG):
10658 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10659 errors.ECODE_INVAL)
10661 if self.mode == constants.REPLACE_DISK_AUTO:
10662 if not self._CheckDisksActivated(instance):
10663 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10664 " first" % self.instance_name,
10665 errors.ECODE_STATE)
10666 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10667 faulty_secondary = self._FindFaultyDisks(secondary_node)
10669 if faulty_primary and faulty_secondary:
10670 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10671 " one node and can not be repaired"
10672 " automatically" % self.instance_name,
10673 errors.ECODE_STATE)
10676 self.disks = faulty_primary
10677 self.target_node = instance.primary_node
10678 self.other_node = secondary_node
10679 check_nodes = [self.target_node, self.other_node]
10680 elif faulty_secondary:
10681 self.disks = faulty_secondary
10682 self.target_node = secondary_node
10683 self.other_node = instance.primary_node
10684 check_nodes = [self.target_node, self.other_node]
10690 # Non-automatic modes
10691 if self.mode == constants.REPLACE_DISK_PRI:
10692 self.target_node = instance.primary_node
10693 self.other_node = secondary_node
10694 check_nodes = [self.target_node, self.other_node]
10696 elif self.mode == constants.REPLACE_DISK_SEC:
10697 self.target_node = secondary_node
10698 self.other_node = instance.primary_node
10699 check_nodes = [self.target_node, self.other_node]
10701 elif self.mode == constants.REPLACE_DISK_CHG:
10702 self.new_node = remote_node
10703 self.other_node = instance.primary_node
10704 self.target_node = secondary_node
10705 check_nodes = [self.new_node, self.other_node]
10707 _CheckNodeNotDrained(self.lu, remote_node)
10708 _CheckNodeVmCapable(self.lu, remote_node)
10710 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10711 assert old_node_info is not None
10712 if old_node_info.offline and not self.early_release:
10713 # doesn't make sense to delay the release
10714 self.early_release = True
10715 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10716 " early-release mode", secondary_node)
10719 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10722 # If not specified all disks should be replaced
10724 self.disks = range(len(self.instance.disks))
10726 # TODO: This is ugly, but right now we can't distinguish between internal
10727 # submitted opcode and external one. We should fix that.
10728 if self.remote_node_info:
10729 # We change the node, lets verify it still meets instance policy
10730 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10731 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10733 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10734 ignore=self.ignore_ipolicy)
10736 for node in check_nodes:
10737 _CheckNodeOnline(self.lu, node)
10739 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10742 if node_name is not None)
10744 # Release unneeded node and node resource locks
10745 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10746 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10748 # Release any owned node group
10749 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10750 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10752 # Check whether disks are valid
10753 for disk_idx in self.disks:
10754 instance.FindDisk(disk_idx)
10756 # Get secondary node IP addresses
10757 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10758 in self.cfg.GetMultiNodeInfo(touched_nodes))
10760 def Exec(self, feedback_fn):
10761 """Execute disk replacement.
10763 This dispatches the disk replacement to the appropriate handler.
10766 if self.delay_iallocator:
10767 self._CheckPrereq2()
10770 # Verify owned locks before starting operation
10771 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10772 assert set(owned_nodes) == set(self.node_secondary_ip), \
10773 ("Incorrect node locks, owning %s, expected %s" %
10774 (owned_nodes, self.node_secondary_ip.keys()))
10775 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10776 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10778 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10779 assert list(owned_instances) == [self.instance_name], \
10780 "Instance '%s' not locked" % self.instance_name
10782 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10783 "Should not own any node group lock at this point"
10786 feedback_fn("No disks need replacement for instance '%s'" %
10787 self.instance.name)
10790 feedback_fn("Replacing disk(s) %s for instance '%s'" %
10791 (utils.CommaJoin(self.disks), self.instance.name))
10792 feedback_fn("Current primary node: %s", self.instance.primary_node)
10793 feedback_fn("Current seconary node: %s",
10794 utils.CommaJoin(self.instance.secondary_nodes))
10796 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10798 # Activate the instance disks if we're replacing them on a down instance
10800 _StartInstanceDisks(self.lu, self.instance, True)
10803 # Should we replace the secondary node?
10804 if self.new_node is not None:
10805 fn = self._ExecDrbd8Secondary
10807 fn = self._ExecDrbd8DiskOnly
10809 result = fn(feedback_fn)
10811 # Deactivate the instance disks if we're replacing them on a
10814 _SafeShutdownInstanceDisks(self.lu, self.instance)
10816 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10819 # Verify owned locks
10820 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10821 nodes = frozenset(self.node_secondary_ip)
10822 assert ((self.early_release and not owned_nodes) or
10823 (not self.early_release and not (set(owned_nodes) - nodes))), \
10824 ("Not owning the correct locks, early_release=%s, owned=%r,"
10825 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10829 def _CheckVolumeGroup(self, nodes):
10830 self.lu.LogInfo("Checking volume groups")
10832 vgname = self.cfg.GetVGName()
10834 # Make sure volume group exists on all involved nodes
10835 results = self.rpc.call_vg_list(nodes)
10837 raise errors.OpExecError("Can't list volume groups on the nodes")
10840 res = results[node]
10841 res.Raise("Error checking node %s" % node)
10842 if vgname not in res.payload:
10843 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10846 def _CheckDisksExistence(self, nodes):
10847 # Check disk existence
10848 for idx, dev in enumerate(self.instance.disks):
10849 if idx not in self.disks:
10853 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10854 self.cfg.SetDiskID(dev, node)
10856 result = _BlockdevFind(self, node, dev, self.instance)
10858 msg = result.fail_msg
10859 if msg or not result.payload:
10861 msg = "disk not found"
10862 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10865 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10866 for idx, dev in enumerate(self.instance.disks):
10867 if idx not in self.disks:
10870 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10873 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10874 on_primary, ldisk=ldisk):
10875 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10876 " replace disks for instance %s" %
10877 (node_name, self.instance.name))
10879 def _CreateNewStorage(self, node_name):
10880 """Create new storage on the primary or secondary node.
10882 This is only used for same-node replaces, not for changing the
10883 secondary node, hence we don't want to modify the existing disk.
10888 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10889 for idx, dev in enumerate(disks):
10890 if idx not in self.disks:
10893 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10895 self.cfg.SetDiskID(dev, node_name)
10897 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10898 names = _GenerateUniqueNames(self.lu, lv_names)
10900 (data_disk, meta_disk) = dev.children
10901 vg_data = data_disk.logical_id[0]
10902 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10903 logical_id=(vg_data, names[0]),
10904 params=data_disk.params)
10905 vg_meta = meta_disk.logical_id[0]
10906 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10907 logical_id=(vg_meta, names[1]),
10908 params=meta_disk.params)
10910 new_lvs = [lv_data, lv_meta]
10911 old_lvs = [child.Copy() for child in dev.children]
10912 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10914 # we pass force_create=True to force the LVM creation
10915 for new_lv in new_lvs:
10916 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10917 _GetInstanceInfoText(self.instance), False)
10921 def _CheckDevices(self, node_name, iv_names):
10922 for name, (dev, _, _) in iv_names.iteritems():
10923 self.cfg.SetDiskID(dev, node_name)
10925 result = _BlockdevFind(self, node_name, dev, self.instance)
10927 msg = result.fail_msg
10928 if msg or not result.payload:
10930 msg = "disk not found"
10931 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10934 if result.payload.is_degraded:
10935 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10937 def _RemoveOldStorage(self, node_name, iv_names):
10938 for name, (_, old_lvs, _) in iv_names.iteritems():
10939 self.lu.LogInfo("Remove logical volumes for %s" % name)
10942 self.cfg.SetDiskID(lv, node_name)
10944 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10946 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10947 hint="remove unused LVs manually")
10949 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10950 """Replace a disk on the primary or secondary for DRBD 8.
10952 The algorithm for replace is quite complicated:
10954 1. for each disk to be replaced:
10956 1. create new LVs on the target node with unique names
10957 1. detach old LVs from the drbd device
10958 1. rename old LVs to name_replaced.<time_t>
10959 1. rename new LVs to old LVs
10960 1. attach the new LVs (with the old names now) to the drbd device
10962 1. wait for sync across all devices
10964 1. for each modified disk:
10966 1. remove old LVs (which have the name name_replaces.<time_t>)
10968 Failures are not very well handled.
10973 # Step: check device activation
10974 self.lu.LogStep(1, steps_total, "Check device existence")
10975 self._CheckDisksExistence([self.other_node, self.target_node])
10976 self._CheckVolumeGroup([self.target_node, self.other_node])
10978 # Step: check other node consistency
10979 self.lu.LogStep(2, steps_total, "Check peer consistency")
10980 self._CheckDisksConsistency(self.other_node,
10981 self.other_node == self.instance.primary_node,
10984 # Step: create new storage
10985 self.lu.LogStep(3, steps_total, "Allocate new storage")
10986 iv_names = self._CreateNewStorage(self.target_node)
10988 # Step: for each lv, detach+rename*2+attach
10989 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10990 for dev, old_lvs, new_lvs in iv_names.itervalues():
10991 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10993 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10995 result.Raise("Can't detach drbd from local storage on node"
10996 " %s for device %s" % (self.target_node, dev.iv_name))
10998 #cfg.Update(instance)
11000 # ok, we created the new LVs, so now we know we have the needed
11001 # storage; as such, we proceed on the target node to rename
11002 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11003 # using the assumption that logical_id == physical_id (which in
11004 # turn is the unique_id on that node)
11006 # FIXME(iustin): use a better name for the replaced LVs
11007 temp_suffix = int(time.time())
11008 ren_fn = lambda d, suff: (d.physical_id[0],
11009 d.physical_id[1] + "_replaced-%s" % suff)
11011 # Build the rename list based on what LVs exist on the node
11012 rename_old_to_new = []
11013 for to_ren in old_lvs:
11014 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11015 if not result.fail_msg and result.payload:
11017 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11019 self.lu.LogInfo("Renaming the old LVs on the target node")
11020 result = self.rpc.call_blockdev_rename(self.target_node,
11022 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11024 # Now we rename the new LVs to the old LVs
11025 self.lu.LogInfo("Renaming the new LVs on the target node")
11026 rename_new_to_old = [(new, old.physical_id)
11027 for old, new in zip(old_lvs, new_lvs)]
11028 result = self.rpc.call_blockdev_rename(self.target_node,
11030 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11032 # Intermediate steps of in memory modifications
11033 for old, new in zip(old_lvs, new_lvs):
11034 new.logical_id = old.logical_id
11035 self.cfg.SetDiskID(new, self.target_node)
11037 # We need to modify old_lvs so that removal later removes the
11038 # right LVs, not the newly added ones; note that old_lvs is a
11040 for disk in old_lvs:
11041 disk.logical_id = ren_fn(disk, temp_suffix)
11042 self.cfg.SetDiskID(disk, self.target_node)
11044 # Now that the new lvs have the old name, we can add them to the device
11045 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11046 result = self.rpc.call_blockdev_addchildren(self.target_node,
11047 (dev, self.instance), new_lvs)
11048 msg = result.fail_msg
11050 for new_lv in new_lvs:
11051 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11054 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11055 hint=("cleanup manually the unused logical"
11057 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11059 cstep = itertools.count(5)
11061 if self.early_release:
11062 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11063 self._RemoveOldStorage(self.target_node, iv_names)
11064 # TODO: Check if releasing locks early still makes sense
11065 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11067 # Release all resource locks except those used by the instance
11068 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11069 keep=self.node_secondary_ip.keys())
11071 # Release all node locks while waiting for sync
11072 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11074 # TODO: Can the instance lock be downgraded here? Take the optional disk
11075 # shutdown in the caller into consideration.
11078 # This can fail as the old devices are degraded and _WaitForSync
11079 # does a combined result over all disks, so we don't check its return value
11080 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11081 _WaitForSync(self.lu, self.instance)
11083 # Check all devices manually
11084 self._CheckDevices(self.instance.primary_node, iv_names)
11086 # Step: remove old storage
11087 if not self.early_release:
11088 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11089 self._RemoveOldStorage(self.target_node, iv_names)
11091 def _ExecDrbd8Secondary(self, feedback_fn):
11092 """Replace the secondary node for DRBD 8.
11094 The algorithm for replace is quite complicated:
11095 - for all disks of the instance:
11096 - create new LVs on the new node with same names
11097 - shutdown the drbd device on the old secondary
11098 - disconnect the drbd network on the primary
11099 - create the drbd device on the new secondary
11100 - network attach the drbd on the primary, using an artifice:
11101 the drbd code for Attach() will connect to the network if it
11102 finds a device which is connected to the good local disks but
11103 not network enabled
11104 - wait for sync across all devices
11105 - remove all disks from the old secondary
11107 Failures are not very well handled.
11112 pnode = self.instance.primary_node
11114 # Step: check device activation
11115 self.lu.LogStep(1, steps_total, "Check device existence")
11116 self._CheckDisksExistence([self.instance.primary_node])
11117 self._CheckVolumeGroup([self.instance.primary_node])
11119 # Step: check other node consistency
11120 self.lu.LogStep(2, steps_total, "Check peer consistency")
11121 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11123 # Step: create new storage
11124 self.lu.LogStep(3, steps_total, "Allocate new storage")
11125 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11126 for idx, dev in enumerate(disks):
11127 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11128 (self.new_node, idx))
11129 # we pass force_create=True to force LVM creation
11130 for new_lv in dev.children:
11131 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11132 True, _GetInstanceInfoText(self.instance), False)
11134 # Step 4: dbrd minors and drbd setups changes
11135 # after this, we must manually remove the drbd minors on both the
11136 # error and the success paths
11137 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11138 minors = self.cfg.AllocateDRBDMinor([self.new_node
11139 for dev in self.instance.disks],
11140 self.instance.name)
11141 logging.debug("Allocated minors %r", minors)
11144 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11145 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11146 (self.new_node, idx))
11147 # create new devices on new_node; note that we create two IDs:
11148 # one without port, so the drbd will be activated without
11149 # networking information on the new node at this stage, and one
11150 # with network, for the latter activation in step 4
11151 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11152 if self.instance.primary_node == o_node1:
11155 assert self.instance.primary_node == o_node2, "Three-node instance?"
11158 new_alone_id = (self.instance.primary_node, self.new_node, None,
11159 p_minor, new_minor, o_secret)
11160 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11161 p_minor, new_minor, o_secret)
11163 iv_names[idx] = (dev, dev.children, new_net_id)
11164 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11166 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11167 logical_id=new_alone_id,
11168 children=dev.children,
11171 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11174 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11176 _GetInstanceInfoText(self.instance), False)
11177 except errors.GenericError:
11178 self.cfg.ReleaseDRBDMinors(self.instance.name)
11181 # We have new devices, shutdown the drbd on the old secondary
11182 for idx, dev in enumerate(self.instance.disks):
11183 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11184 self.cfg.SetDiskID(dev, self.target_node)
11185 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11186 (dev, self.instance)).fail_msg
11188 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11189 "node: %s" % (idx, msg),
11190 hint=("Please cleanup this device manually as"
11191 " soon as possible"))
11193 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11194 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11195 self.instance.disks)[pnode]
11197 msg = result.fail_msg
11199 # detaches didn't succeed (unlikely)
11200 self.cfg.ReleaseDRBDMinors(self.instance.name)
11201 raise errors.OpExecError("Can't detach the disks from the network on"
11202 " old node: %s" % (msg,))
11204 # if we managed to detach at least one, we update all the disks of
11205 # the instance to point to the new secondary
11206 self.lu.LogInfo("Updating instance configuration")
11207 for dev, _, new_logical_id in iv_names.itervalues():
11208 dev.logical_id = new_logical_id
11209 self.cfg.SetDiskID(dev, self.instance.primary_node)
11211 self.cfg.Update(self.instance, feedback_fn)
11213 # Release all node locks (the configuration has been updated)
11214 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11216 # and now perform the drbd attach
11217 self.lu.LogInfo("Attaching primary drbds to new secondary"
11218 " (standalone => connected)")
11219 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11221 self.node_secondary_ip,
11222 (self.instance.disks, self.instance),
11223 self.instance.name,
11225 for to_node, to_result in result.items():
11226 msg = to_result.fail_msg
11228 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11230 hint=("please do a gnt-instance info to see the"
11231 " status of disks"))
11233 cstep = itertools.count(5)
11235 if self.early_release:
11236 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11237 self._RemoveOldStorage(self.target_node, iv_names)
11238 # TODO: Check if releasing locks early still makes sense
11239 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11241 # Release all resource locks except those used by the instance
11242 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11243 keep=self.node_secondary_ip.keys())
11245 # TODO: Can the instance lock be downgraded here? Take the optional disk
11246 # shutdown in the caller into consideration.
11249 # This can fail as the old devices are degraded and _WaitForSync
11250 # does a combined result over all disks, so we don't check its return value
11251 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11252 _WaitForSync(self.lu, self.instance)
11254 # Check all devices manually
11255 self._CheckDevices(self.instance.primary_node, iv_names)
11257 # Step: remove old storage
11258 if not self.early_release:
11259 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11260 self._RemoveOldStorage(self.target_node, iv_names)
11263 class LURepairNodeStorage(NoHooksLU):
11264 """Repairs the volume group on a node.
11269 def CheckArguments(self):
11270 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11272 storage_type = self.op.storage_type
11274 if (constants.SO_FIX_CONSISTENCY not in
11275 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11276 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11277 " repaired" % storage_type,
11278 errors.ECODE_INVAL)
11280 def ExpandNames(self):
11281 self.needed_locks = {
11282 locking.LEVEL_NODE: [self.op.node_name],
11285 def _CheckFaultyDisks(self, instance, node_name):
11286 """Ensure faulty disks abort the opcode or at least warn."""
11288 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11290 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11291 " node '%s'" % (instance.name, node_name),
11292 errors.ECODE_STATE)
11293 except errors.OpPrereqError, err:
11294 if self.op.ignore_consistency:
11295 self.proc.LogWarning(str(err.args[0]))
11299 def CheckPrereq(self):
11300 """Check prerequisites.
11303 # Check whether any instance on this node has faulty disks
11304 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11305 if inst.admin_state != constants.ADMINST_UP:
11307 check_nodes = set(inst.all_nodes)
11308 check_nodes.discard(self.op.node_name)
11309 for inst_node_name in check_nodes:
11310 self._CheckFaultyDisks(inst, inst_node_name)
11312 def Exec(self, feedback_fn):
11313 feedback_fn("Repairing storage unit '%s' on %s ..." %
11314 (self.op.name, self.op.node_name))
11316 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11317 result = self.rpc.call_storage_execute(self.op.node_name,
11318 self.op.storage_type, st_args,
11320 constants.SO_FIX_CONSISTENCY)
11321 result.Raise("Failed to repair storage unit '%s' on %s" %
11322 (self.op.name, self.op.node_name))
11325 class LUNodeEvacuate(NoHooksLU):
11326 """Evacuates instances off a list of nodes.
11331 _MODE2IALLOCATOR = {
11332 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11333 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11334 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11336 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11337 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11338 constants.IALLOCATOR_NEVAC_MODES)
11340 def CheckArguments(self):
11341 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11343 def ExpandNames(self):
11344 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11346 if self.op.remote_node is not None:
11347 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11348 assert self.op.remote_node
11350 if self.op.remote_node == self.op.node_name:
11351 raise errors.OpPrereqError("Can not use evacuated node as a new"
11352 " secondary node", errors.ECODE_INVAL)
11354 if self.op.mode != constants.NODE_EVAC_SEC:
11355 raise errors.OpPrereqError("Without the use of an iallocator only"
11356 " secondary instances can be evacuated",
11357 errors.ECODE_INVAL)
11360 self.share_locks = _ShareAll()
11361 self.needed_locks = {
11362 locking.LEVEL_INSTANCE: [],
11363 locking.LEVEL_NODEGROUP: [],
11364 locking.LEVEL_NODE: [],
11367 # Determine nodes (via group) optimistically, needs verification once locks
11368 # have been acquired
11369 self.lock_nodes = self._DetermineNodes()
11371 def _DetermineNodes(self):
11372 """Gets the list of nodes to operate on.
11375 if self.op.remote_node is None:
11376 # Iallocator will choose any node(s) in the same group
11377 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11379 group_nodes = frozenset([self.op.remote_node])
11381 # Determine nodes to be locked
11382 return set([self.op.node_name]) | group_nodes
11384 def _DetermineInstances(self):
11385 """Builds list of instances to operate on.
11388 assert self.op.mode in constants.NODE_EVAC_MODES
11390 if self.op.mode == constants.NODE_EVAC_PRI:
11391 # Primary instances only
11392 inst_fn = _GetNodePrimaryInstances
11393 assert self.op.remote_node is None, \
11394 "Evacuating primary instances requires iallocator"
11395 elif self.op.mode == constants.NODE_EVAC_SEC:
11396 # Secondary instances only
11397 inst_fn = _GetNodeSecondaryInstances
11400 assert self.op.mode == constants.NODE_EVAC_ALL
11401 inst_fn = _GetNodeInstances
11402 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11404 raise errors.OpPrereqError("Due to an issue with the iallocator"
11405 " interface it is not possible to evacuate"
11406 " all instances at once; specify explicitly"
11407 " whether to evacuate primary or secondary"
11409 errors.ECODE_INVAL)
11411 return inst_fn(self.cfg, self.op.node_name)
11413 def DeclareLocks(self, level):
11414 if level == locking.LEVEL_INSTANCE:
11415 # Lock instances optimistically, needs verification once node and group
11416 # locks have been acquired
11417 self.needed_locks[locking.LEVEL_INSTANCE] = \
11418 set(i.name for i in self._DetermineInstances())
11420 elif level == locking.LEVEL_NODEGROUP:
11421 # Lock node groups for all potential target nodes optimistically, needs
11422 # verification once nodes have been acquired
11423 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11424 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11426 elif level == locking.LEVEL_NODE:
11427 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11429 def CheckPrereq(self):
11431 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11432 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11433 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11435 need_nodes = self._DetermineNodes()
11437 if not owned_nodes.issuperset(need_nodes):
11438 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11439 " locks were acquired, current nodes are"
11440 " are '%s', used to be '%s'; retry the"
11442 (self.op.node_name,
11443 utils.CommaJoin(need_nodes),
11444 utils.CommaJoin(owned_nodes)),
11445 errors.ECODE_STATE)
11447 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11448 if owned_groups != wanted_groups:
11449 raise errors.OpExecError("Node groups changed since locks were acquired,"
11450 " current groups are '%s', used to be '%s';"
11451 " retry the operation" %
11452 (utils.CommaJoin(wanted_groups),
11453 utils.CommaJoin(owned_groups)))
11455 # Determine affected instances
11456 self.instances = self._DetermineInstances()
11457 self.instance_names = [i.name for i in self.instances]
11459 if set(self.instance_names) != owned_instances:
11460 raise errors.OpExecError("Instances on node '%s' changed since locks"
11461 " were acquired, current instances are '%s',"
11462 " used to be '%s'; retry the operation" %
11463 (self.op.node_name,
11464 utils.CommaJoin(self.instance_names),
11465 utils.CommaJoin(owned_instances)))
11467 if self.instance_names:
11468 self.LogInfo("Evacuating instances from node '%s': %s",
11470 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11472 self.LogInfo("No instances to evacuate from node '%s'",
11475 if self.op.remote_node is not None:
11476 for i in self.instances:
11477 if i.primary_node == self.op.remote_node:
11478 raise errors.OpPrereqError("Node %s is the primary node of"
11479 " instance %s, cannot use it as"
11481 (self.op.remote_node, i.name),
11482 errors.ECODE_INVAL)
11484 def Exec(self, feedback_fn):
11485 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11487 if not self.instance_names:
11488 # No instances to evacuate
11491 elif self.op.iallocator is not None:
11492 # TODO: Implement relocation to other group
11493 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11494 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11495 instances=list(self.instance_names))
11497 ial.Run(self.op.iallocator)
11499 if not ial.success:
11500 raise errors.OpPrereqError("Can't compute node evacuation using"
11501 " iallocator '%s': %s" %
11502 (self.op.iallocator, ial.info),
11503 errors.ECODE_NORES)
11505 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11507 elif self.op.remote_node is not None:
11508 assert self.op.mode == constants.NODE_EVAC_SEC
11510 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11511 remote_node=self.op.remote_node,
11513 mode=constants.REPLACE_DISK_CHG,
11514 early_release=self.op.early_release)]
11515 for instance_name in self.instance_names
11519 raise errors.ProgrammerError("No iallocator or remote node")
11521 return ResultWithJobs(jobs)
11524 def _SetOpEarlyRelease(early_release, op):
11525 """Sets C{early_release} flag on opcodes if available.
11529 op.early_release = early_release
11530 except AttributeError:
11531 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11536 def _NodeEvacDest(use_nodes, group, nodes):
11537 """Returns group or nodes depending on caller's choice.
11541 return utils.CommaJoin(nodes)
11546 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11547 """Unpacks the result of change-group and node-evacuate iallocator requests.
11549 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11550 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11552 @type lu: L{LogicalUnit}
11553 @param lu: Logical unit instance
11554 @type alloc_result: tuple/list
11555 @param alloc_result: Result from iallocator
11556 @type early_release: bool
11557 @param early_release: Whether to release locks early if possible
11558 @type use_nodes: bool
11559 @param use_nodes: Whether to display node names instead of groups
11562 (moved, failed, jobs) = alloc_result
11565 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11566 for (name, reason) in failed)
11567 lu.LogWarning("Unable to evacuate instances %s", failreason)
11568 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11571 lu.LogInfo("Instances to be moved: %s",
11572 utils.CommaJoin("%s (to %s)" %
11573 (name, _NodeEvacDest(use_nodes, group, nodes))
11574 for (name, group, nodes) in moved))
11576 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11577 map(opcodes.OpCode.LoadOpCode, ops))
11581 class LUInstanceGrowDisk(LogicalUnit):
11582 """Grow a disk of an instance.
11585 HPATH = "disk-grow"
11586 HTYPE = constants.HTYPE_INSTANCE
11589 def ExpandNames(self):
11590 self._ExpandAndLockInstance()
11591 self.needed_locks[locking.LEVEL_NODE] = []
11592 self.needed_locks[locking.LEVEL_NODE_RES] = []
11593 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11594 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11596 def DeclareLocks(self, level):
11597 if level == locking.LEVEL_NODE:
11598 self._LockInstancesNodes()
11599 elif level == locking.LEVEL_NODE_RES:
11601 self.needed_locks[locking.LEVEL_NODE_RES] = \
11602 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11604 def BuildHooksEnv(self):
11605 """Build hooks env.
11607 This runs on the master, the primary and all the secondaries.
11611 "DISK": self.op.disk,
11612 "AMOUNT": self.op.amount,
11613 "ABSOLUTE": self.op.absolute,
11615 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11618 def BuildHooksNodes(self):
11619 """Build hooks nodes.
11622 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11625 def CheckPrereq(self):
11626 """Check prerequisites.
11628 This checks that the instance is in the cluster.
11631 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11632 assert instance is not None, \
11633 "Cannot retrieve locked instance %s" % self.op.instance_name
11634 nodenames = list(instance.all_nodes)
11635 for node in nodenames:
11636 _CheckNodeOnline(self, node)
11638 self.instance = instance
11640 if instance.disk_template not in constants.DTS_GROWABLE:
11641 raise errors.OpPrereqError("Instance's disk layout does not support"
11642 " growing", errors.ECODE_INVAL)
11644 self.disk = instance.FindDisk(self.op.disk)
11646 if self.op.absolute:
11647 self.target = self.op.amount
11648 self.delta = self.target - self.disk.size
11650 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11651 "current disk size (%s)" %
11652 (utils.FormatUnit(self.target, "h"),
11653 utils.FormatUnit(self.disk.size, "h")),
11654 errors.ECODE_STATE)
11656 self.delta = self.op.amount
11657 self.target = self.disk.size + self.delta
11659 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11660 utils.FormatUnit(self.delta, "h"),
11661 errors.ECODE_INVAL)
11663 if instance.disk_template not in (constants.DT_FILE,
11664 constants.DT_SHARED_FILE,
11666 # TODO: check the free disk space for file, when that feature will be
11668 _CheckNodesFreeDiskPerVG(self, nodenames,
11669 self.disk.ComputeGrowth(self.delta))
11671 def Exec(self, feedback_fn):
11672 """Execute disk grow.
11675 instance = self.instance
11678 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11679 assert (self.owned_locks(locking.LEVEL_NODE) ==
11680 self.owned_locks(locking.LEVEL_NODE_RES))
11682 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11684 raise errors.OpExecError("Cannot activate block device to grow")
11686 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11687 (self.op.disk, instance.name,
11688 utils.FormatUnit(self.delta, "h"),
11689 utils.FormatUnit(self.target, "h")))
11691 # First run all grow ops in dry-run mode
11692 for node in instance.all_nodes:
11693 self.cfg.SetDiskID(disk, node)
11694 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11696 result.Raise("Grow request failed to node %s" % node)
11698 # We know that (as far as we can test) operations across different
11699 # nodes will succeed, time to run it for real
11700 for node in instance.all_nodes:
11701 self.cfg.SetDiskID(disk, node)
11702 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11704 result.Raise("Grow request failed to node %s" % node)
11706 # TODO: Rewrite code to work properly
11707 # DRBD goes into sync mode for a short amount of time after executing the
11708 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11709 # calling "resize" in sync mode fails. Sleeping for a short amount of
11710 # time is a work-around.
11713 disk.RecordGrow(self.delta)
11714 self.cfg.Update(instance, feedback_fn)
11716 # Changes have been recorded, release node lock
11717 _ReleaseLocks(self, locking.LEVEL_NODE)
11719 # Downgrade lock while waiting for sync
11720 self.glm.downgrade(locking.LEVEL_INSTANCE)
11722 if self.op.wait_for_sync:
11723 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11725 self.proc.LogWarning("Disk sync-ing has not returned a good"
11726 " status; please check the instance")
11727 if instance.admin_state != constants.ADMINST_UP:
11728 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11729 elif instance.admin_state != constants.ADMINST_UP:
11730 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11731 " not supposed to be running because no wait for"
11732 " sync mode was requested")
11734 assert self.owned_locks(locking.LEVEL_NODE_RES)
11735 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11738 class LUInstanceQueryData(NoHooksLU):
11739 """Query runtime instance data.
11744 def ExpandNames(self):
11745 self.needed_locks = {}
11747 # Use locking if requested or when non-static information is wanted
11748 if not (self.op.static or self.op.use_locking):
11749 self.LogWarning("Non-static data requested, locks need to be acquired")
11750 self.op.use_locking = True
11752 if self.op.instances or not self.op.use_locking:
11753 # Expand instance names right here
11754 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11756 # Will use acquired locks
11757 self.wanted_names = None
11759 if self.op.use_locking:
11760 self.share_locks = _ShareAll()
11762 if self.wanted_names is None:
11763 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11765 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11767 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11768 self.needed_locks[locking.LEVEL_NODE] = []
11769 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11771 def DeclareLocks(self, level):
11772 if self.op.use_locking:
11773 if level == locking.LEVEL_NODEGROUP:
11774 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11776 # Lock all groups used by instances optimistically; this requires going
11777 # via the node before it's locked, requiring verification later on
11778 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11779 frozenset(group_uuid
11780 for instance_name in owned_instances
11782 self.cfg.GetInstanceNodeGroups(instance_name))
11784 elif level == locking.LEVEL_NODE:
11785 self._LockInstancesNodes()
11787 def CheckPrereq(self):
11788 """Check prerequisites.
11790 This only checks the optional instance list against the existing names.
11793 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11794 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11795 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11797 if self.wanted_names is None:
11798 assert self.op.use_locking, "Locking was not used"
11799 self.wanted_names = owned_instances
11801 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11803 if self.op.use_locking:
11804 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11807 assert not (owned_instances or owned_groups or owned_nodes)
11809 self.wanted_instances = instances.values()
11811 def _ComputeBlockdevStatus(self, node, instance, dev):
11812 """Returns the status of a block device
11815 if self.op.static or not node:
11818 self.cfg.SetDiskID(dev, node)
11820 result = self.rpc.call_blockdev_find(node, dev)
11824 result.Raise("Can't compute disk status for %s" % instance.name)
11826 status = result.payload
11830 return (status.dev_path, status.major, status.minor,
11831 status.sync_percent, status.estimated_time,
11832 status.is_degraded, status.ldisk_status)
11834 def _ComputeDiskStatus(self, instance, snode, dev):
11835 """Compute block device status.
11838 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11840 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11842 def _ComputeDiskStatusInner(self, instance, snode, dev):
11843 """Compute block device status.
11845 @attention: The device has to be annotated already.
11848 if dev.dev_type in constants.LDS_DRBD:
11849 # we change the snode then (otherwise we use the one passed in)
11850 if dev.logical_id[0] == instance.primary_node:
11851 snode = dev.logical_id[1]
11853 snode = dev.logical_id[0]
11855 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11857 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11860 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11867 "iv_name": dev.iv_name,
11868 "dev_type": dev.dev_type,
11869 "logical_id": dev.logical_id,
11870 "physical_id": dev.physical_id,
11871 "pstatus": dev_pstatus,
11872 "sstatus": dev_sstatus,
11873 "children": dev_children,
11878 def Exec(self, feedback_fn):
11879 """Gather and return data"""
11882 cluster = self.cfg.GetClusterInfo()
11884 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11885 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11887 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11888 for node in nodes.values()))
11890 group2name_fn = lambda uuid: groups[uuid].name
11892 for instance in self.wanted_instances:
11893 pnode = nodes[instance.primary_node]
11895 if self.op.static or pnode.offline:
11896 remote_state = None
11898 self.LogWarning("Primary node %s is marked offline, returning static"
11899 " information only for instance %s" %
11900 (pnode.name, instance.name))
11902 remote_info = self.rpc.call_instance_info(instance.primary_node,
11904 instance.hypervisor)
11905 remote_info.Raise("Error checking node %s" % instance.primary_node)
11906 remote_info = remote_info.payload
11907 if remote_info and "state" in remote_info:
11908 remote_state = "up"
11910 if instance.admin_state == constants.ADMINST_UP:
11911 remote_state = "down"
11913 remote_state = instance.admin_state
11915 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11918 snodes_group_uuids = [nodes[snode_name].group
11919 for snode_name in instance.secondary_nodes]
11921 result[instance.name] = {
11922 "name": instance.name,
11923 "config_state": instance.admin_state,
11924 "run_state": remote_state,
11925 "pnode": instance.primary_node,
11926 "pnode_group_uuid": pnode.group,
11927 "pnode_group_name": group2name_fn(pnode.group),
11928 "snodes": instance.secondary_nodes,
11929 "snodes_group_uuids": snodes_group_uuids,
11930 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11932 # this happens to be the same format used for hooks
11933 "nics": _NICListToTuple(self, instance.nics),
11934 "disk_template": instance.disk_template,
11936 "hypervisor": instance.hypervisor,
11937 "network_port": instance.network_port,
11938 "hv_instance": instance.hvparams,
11939 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11940 "be_instance": instance.beparams,
11941 "be_actual": cluster.FillBE(instance),
11942 "os_instance": instance.osparams,
11943 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11944 "serial_no": instance.serial_no,
11945 "mtime": instance.mtime,
11946 "ctime": instance.ctime,
11947 "uuid": instance.uuid,
11953 def PrepareContainerMods(mods, private_fn):
11954 """Prepares a list of container modifications by adding a private data field.
11956 @type mods: list of tuples; (operation, index, parameters)
11957 @param mods: List of modifications
11958 @type private_fn: callable or None
11959 @param private_fn: Callable for constructing a private data field for a
11964 if private_fn is None:
11969 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11972 #: Type description for changes as returned by L{ApplyContainerMods}'s
11974 _TApplyContModsCbChanges = \
11975 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11976 ht.TNonEmptyString,
11981 def ApplyContainerMods(kind, container, chgdesc, mods,
11982 create_fn, modify_fn, remove_fn):
11983 """Applies descriptions in C{mods} to C{container}.
11986 @param kind: One-word item description
11987 @type container: list
11988 @param container: Container to modify
11989 @type chgdesc: None or list
11990 @param chgdesc: List of applied changes
11992 @param mods: Modifications as returned by L{PrepareContainerMods}
11993 @type create_fn: callable
11994 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11995 receives absolute item index, parameters and private data object as added
11996 by L{PrepareContainerMods}, returns tuple containing new item and changes
11998 @type modify_fn: callable
11999 @param modify_fn: Callback for modifying an existing item
12000 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12001 and private data object as added by L{PrepareContainerMods}, returns
12003 @type remove_fn: callable
12004 @param remove_fn: Callback on removing item; receives absolute item index,
12005 item and private data object as added by L{PrepareContainerMods}
12008 for (op, idx, params, private) in mods:
12011 absidx = len(container) - 1
12013 raise IndexError("Not accepting negative indices other than -1")
12014 elif idx > len(container):
12015 raise IndexError("Got %s index %s, but there are only %s" %
12016 (kind, idx, len(container)))
12022 if op == constants.DDM_ADD:
12023 # Calculate where item will be added
12025 addidx = len(container)
12029 if create_fn is None:
12032 (item, changes) = create_fn(addidx, params, private)
12035 container.append(item)
12038 assert idx <= len(container)
12039 # list.insert does so before the specified index
12040 container.insert(idx, item)
12042 # Retrieve existing item
12044 item = container[absidx]
12046 raise IndexError("Invalid %s index %s" % (kind, idx))
12048 if op == constants.DDM_REMOVE:
12051 if remove_fn is not None:
12052 remove_fn(absidx, item, private)
12054 changes = [("%s/%s" % (kind, absidx), "remove")]
12056 assert container[absidx] == item
12057 del container[absidx]
12058 elif op == constants.DDM_MODIFY:
12059 if modify_fn is not None:
12060 changes = modify_fn(absidx, item, params, private)
12062 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12064 assert _TApplyContModsCbChanges(changes)
12066 if not (chgdesc is None or changes is None):
12067 chgdesc.extend(changes)
12070 def _UpdateIvNames(base_index, disks):
12071 """Updates the C{iv_name} attribute of disks.
12073 @type disks: list of L{objects.Disk}
12076 for (idx, disk) in enumerate(disks):
12077 disk.iv_name = "disk/%s" % (base_index + idx, )
12080 class _InstNicModPrivate:
12081 """Data structure for network interface modifications.
12083 Used by L{LUInstanceSetParams}.
12086 def __init__(self):
12091 class LUInstanceSetParams(LogicalUnit):
12092 """Modifies an instances's parameters.
12095 HPATH = "instance-modify"
12096 HTYPE = constants.HTYPE_INSTANCE
12100 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12101 assert ht.TList(mods)
12102 assert not mods or len(mods[0]) in (2, 3)
12104 if mods and len(mods[0]) == 2:
12108 for op, params in mods:
12109 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12110 result.append((op, -1, params))
12114 raise errors.OpPrereqError("Only one %s add or remove operation is"
12115 " supported at a time" % kind,
12116 errors.ECODE_INVAL)
12118 result.append((constants.DDM_MODIFY, op, params))
12120 assert verify_fn(result)
12127 def _CheckMods(kind, mods, key_types, item_fn):
12128 """Ensures requested disk/NIC modifications are valid.
12131 for (op, _, params) in mods:
12132 assert ht.TDict(params)
12134 utils.ForceDictType(params, key_types)
12136 if op == constants.DDM_REMOVE:
12138 raise errors.OpPrereqError("No settings should be passed when"
12139 " removing a %s" % kind,
12140 errors.ECODE_INVAL)
12141 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12142 item_fn(op, params)
12144 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12147 def _VerifyDiskModification(op, params):
12148 """Verifies a disk modification.
12151 if op == constants.DDM_ADD:
12152 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12153 if mode not in constants.DISK_ACCESS_SET:
12154 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12155 errors.ECODE_INVAL)
12157 size = params.get(constants.IDISK_SIZE, None)
12159 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12160 constants.IDISK_SIZE, errors.ECODE_INVAL)
12164 except (TypeError, ValueError), err:
12165 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12166 errors.ECODE_INVAL)
12168 params[constants.IDISK_SIZE] = size
12170 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12171 raise errors.OpPrereqError("Disk size change not possible, use"
12172 " grow-disk", errors.ECODE_INVAL)
12175 def _VerifyNicModification(op, params):
12176 """Verifies a network interface modification.
12179 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12180 ip = params.get(constants.INIC_IP, None)
12183 elif ip.lower() == constants.VALUE_NONE:
12184 params[constants.INIC_IP] = None
12185 elif not netutils.IPAddress.IsValid(ip):
12186 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12187 errors.ECODE_INVAL)
12189 bridge = params.get("bridge", None)
12190 link = params.get(constants.INIC_LINK, None)
12191 if bridge and link:
12192 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12193 " at the same time", errors.ECODE_INVAL)
12194 elif bridge and bridge.lower() == constants.VALUE_NONE:
12195 params["bridge"] = None
12196 elif link and link.lower() == constants.VALUE_NONE:
12197 params[constants.INIC_LINK] = None
12199 if op == constants.DDM_ADD:
12200 macaddr = params.get(constants.INIC_MAC, None)
12201 if macaddr is None:
12202 params[constants.INIC_MAC] = constants.VALUE_AUTO
12204 if constants.INIC_MAC in params:
12205 macaddr = params[constants.INIC_MAC]
12206 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12207 macaddr = utils.NormalizeAndValidateMac(macaddr)
12209 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12210 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12211 " modifying an existing NIC",
12212 errors.ECODE_INVAL)
12214 def CheckArguments(self):
12215 if not (self.op.nics or self.op.disks or self.op.disk_template or
12216 self.op.hvparams or self.op.beparams or self.op.os_name or
12217 self.op.offline is not None or self.op.runtime_mem):
12218 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12220 if self.op.hvparams:
12221 _CheckGlobalHvParams(self.op.hvparams)
12224 self._UpgradeDiskNicMods("disk", self.op.disks,
12225 opcodes.OpInstanceSetParams.TestDiskModifications)
12227 self._UpgradeDiskNicMods("NIC", self.op.nics,
12228 opcodes.OpInstanceSetParams.TestNicModifications)
12230 # Check disk modifications
12231 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12232 self._VerifyDiskModification)
12234 if self.op.disks and self.op.disk_template is not None:
12235 raise errors.OpPrereqError("Disk template conversion and other disk"
12236 " changes not supported at the same time",
12237 errors.ECODE_INVAL)
12239 if (self.op.disk_template and
12240 self.op.disk_template in constants.DTS_INT_MIRROR and
12241 self.op.remote_node is None):
12242 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12243 " one requires specifying a secondary node",
12244 errors.ECODE_INVAL)
12246 # Check NIC modifications
12247 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12248 self._VerifyNicModification)
12250 def ExpandNames(self):
12251 self._ExpandAndLockInstance()
12252 # Can't even acquire node locks in shared mode as upcoming changes in
12253 # Ganeti 2.6 will start to modify the node object on disk conversion
12254 self.needed_locks[locking.LEVEL_NODE] = []
12255 self.needed_locks[locking.LEVEL_NODE_RES] = []
12256 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12258 def DeclareLocks(self, level):
12259 # TODO: Acquire group lock in shared mode (disk parameters)
12260 if level == locking.LEVEL_NODE:
12261 self._LockInstancesNodes()
12262 if self.op.disk_template and self.op.remote_node:
12263 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12264 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12265 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12267 self.needed_locks[locking.LEVEL_NODE_RES] = \
12268 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12270 def BuildHooksEnv(self):
12271 """Build hooks env.
12273 This runs on the master, primary and secondaries.
12277 if constants.BE_MINMEM in self.be_new:
12278 args["minmem"] = self.be_new[constants.BE_MINMEM]
12279 if constants.BE_MAXMEM in self.be_new:
12280 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12281 if constants.BE_VCPUS in self.be_new:
12282 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12283 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12284 # information at all.
12286 if self._new_nics is not None:
12289 for nic in self._new_nics:
12290 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12291 mode = nicparams[constants.NIC_MODE]
12292 link = nicparams[constants.NIC_LINK]
12293 nics.append((nic.ip, nic.mac, mode, link))
12295 args["nics"] = nics
12297 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12298 if self.op.disk_template:
12299 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12300 if self.op.runtime_mem:
12301 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12305 def BuildHooksNodes(self):
12306 """Build hooks nodes.
12309 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12312 def _PrepareNicModification(self, params, private, old_ip, old_params,
12314 update_params_dict = dict([(key, params[key])
12315 for key in constants.NICS_PARAMETERS
12318 if "bridge" in params:
12319 update_params_dict[constants.NIC_LINK] = params["bridge"]
12321 new_params = _GetUpdatedParams(old_params, update_params_dict)
12322 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12324 new_filled_params = cluster.SimpleFillNIC(new_params)
12325 objects.NIC.CheckParameterSyntax(new_filled_params)
12327 new_mode = new_filled_params[constants.NIC_MODE]
12328 if new_mode == constants.NIC_MODE_BRIDGED:
12329 bridge = new_filled_params[constants.NIC_LINK]
12330 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12332 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12334 self.warn.append(msg)
12336 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12338 elif new_mode == constants.NIC_MODE_ROUTED:
12339 ip = params.get(constants.INIC_IP, old_ip)
12341 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12342 " on a routed NIC", errors.ECODE_INVAL)
12344 if constants.INIC_MAC in params:
12345 mac = params[constants.INIC_MAC]
12347 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12348 errors.ECODE_INVAL)
12349 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12350 # otherwise generate the MAC address
12351 params[constants.INIC_MAC] = \
12352 self.cfg.GenerateMAC(self.proc.GetECId())
12354 # or validate/reserve the current one
12356 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12357 except errors.ReservationError:
12358 raise errors.OpPrereqError("MAC address '%s' already in use"
12359 " in cluster" % mac,
12360 errors.ECODE_NOTUNIQUE)
12362 private.params = new_params
12363 private.filled = new_filled_params
12365 def CheckPrereq(self):
12366 """Check prerequisites.
12368 This only checks the instance list against the existing names.
12371 # checking the new params on the primary/secondary nodes
12373 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12374 cluster = self.cluster = self.cfg.GetClusterInfo()
12375 assert self.instance is not None, \
12376 "Cannot retrieve locked instance %s" % self.op.instance_name
12377 pnode = instance.primary_node
12378 nodelist = list(instance.all_nodes)
12379 pnode_info = self.cfg.GetNodeInfo(pnode)
12380 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12382 # Prepare disk/NIC modifications
12383 self.diskmod = PrepareContainerMods(self.op.disks, None)
12384 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12387 if self.op.os_name and not self.op.force:
12388 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12389 self.op.force_variant)
12390 instance_os = self.op.os_name
12392 instance_os = instance.os
12394 assert not (self.op.disk_template and self.op.disks), \
12395 "Can't modify disk template and apply disk changes at the same time"
12397 if self.op.disk_template:
12398 if instance.disk_template == self.op.disk_template:
12399 raise errors.OpPrereqError("Instance already has disk template %s" %
12400 instance.disk_template, errors.ECODE_INVAL)
12402 if (instance.disk_template,
12403 self.op.disk_template) not in self._DISK_CONVERSIONS:
12404 raise errors.OpPrereqError("Unsupported disk template conversion from"
12405 " %s to %s" % (instance.disk_template,
12406 self.op.disk_template),
12407 errors.ECODE_INVAL)
12408 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12409 msg="cannot change disk template")
12410 if self.op.disk_template in constants.DTS_INT_MIRROR:
12411 if self.op.remote_node == pnode:
12412 raise errors.OpPrereqError("Given new secondary node %s is the same"
12413 " as the primary node of the instance" %
12414 self.op.remote_node, errors.ECODE_STATE)
12415 _CheckNodeOnline(self, self.op.remote_node)
12416 _CheckNodeNotDrained(self, self.op.remote_node)
12417 # FIXME: here we assume that the old instance type is DT_PLAIN
12418 assert instance.disk_template == constants.DT_PLAIN
12419 disks = [{constants.IDISK_SIZE: d.size,
12420 constants.IDISK_VG: d.logical_id[0]}
12421 for d in instance.disks]
12422 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12423 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12425 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12426 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12427 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12428 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12429 ignore=self.op.ignore_ipolicy)
12430 if pnode_info.group != snode_info.group:
12431 self.LogWarning("The primary and secondary nodes are in two"
12432 " different node groups; the disk parameters"
12433 " from the first disk's node group will be"
12436 # hvparams processing
12437 if self.op.hvparams:
12438 hv_type = instance.hypervisor
12439 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12440 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12441 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12444 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12445 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12446 self.hv_proposed = self.hv_new = hv_new # the new actual values
12447 self.hv_inst = i_hvdict # the new dict (without defaults)
12449 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12451 self.hv_new = self.hv_inst = {}
12453 # beparams processing
12454 if self.op.beparams:
12455 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12457 objects.UpgradeBeParams(i_bedict)
12458 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12459 be_new = cluster.SimpleFillBE(i_bedict)
12460 self.be_proposed = self.be_new = be_new # the new actual values
12461 self.be_inst = i_bedict # the new dict (without defaults)
12463 self.be_new = self.be_inst = {}
12464 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12465 be_old = cluster.FillBE(instance)
12467 # CPU param validation -- checking every time a parameter is
12468 # changed to cover all cases where either CPU mask or vcpus have
12470 if (constants.BE_VCPUS in self.be_proposed and
12471 constants.HV_CPU_MASK in self.hv_proposed):
12473 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12474 # Verify mask is consistent with number of vCPUs. Can skip this
12475 # test if only 1 entry in the CPU mask, which means same mask
12476 # is applied to all vCPUs.
12477 if (len(cpu_list) > 1 and
12478 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12479 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12481 (self.be_proposed[constants.BE_VCPUS],
12482 self.hv_proposed[constants.HV_CPU_MASK]),
12483 errors.ECODE_INVAL)
12485 # Only perform this test if a new CPU mask is given
12486 if constants.HV_CPU_MASK in self.hv_new:
12487 # Calculate the largest CPU number requested
12488 max_requested_cpu = max(map(max, cpu_list))
12489 # Check that all of the instance's nodes have enough physical CPUs to
12490 # satisfy the requested CPU mask
12491 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12492 max_requested_cpu + 1, instance.hypervisor)
12494 # osparams processing
12495 if self.op.osparams:
12496 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12497 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12498 self.os_inst = i_osdict # the new dict (without defaults)
12504 #TODO(dynmem): do the appropriate check involving MINMEM
12505 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12506 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12507 mem_check_list = [pnode]
12508 if be_new[constants.BE_AUTO_BALANCE]:
12509 # either we changed auto_balance to yes or it was from before
12510 mem_check_list.extend(instance.secondary_nodes)
12511 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12512 instance.hypervisor)
12513 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12514 [instance.hypervisor])
12515 pninfo = nodeinfo[pnode]
12516 msg = pninfo.fail_msg
12518 # Assume the primary node is unreachable and go ahead
12519 self.warn.append("Can't get info from primary node %s: %s" %
12522 (_, _, (pnhvinfo, )) = pninfo.payload
12523 if not isinstance(pnhvinfo.get("memory_free", None), int):
12524 self.warn.append("Node data from primary node %s doesn't contain"
12525 " free memory information" % pnode)
12526 elif instance_info.fail_msg:
12527 self.warn.append("Can't get instance runtime information: %s" %
12528 instance_info.fail_msg)
12530 if instance_info.payload:
12531 current_mem = int(instance_info.payload["memory"])
12533 # Assume instance not running
12534 # (there is a slight race condition here, but it's not very
12535 # probable, and we have no other way to check)
12536 # TODO: Describe race condition
12538 #TODO(dynmem): do the appropriate check involving MINMEM
12539 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12540 pnhvinfo["memory_free"])
12542 raise errors.OpPrereqError("This change will prevent the instance"
12543 " from starting, due to %d MB of memory"
12544 " missing on its primary node" %
12546 errors.ECODE_NORES)
12548 if be_new[constants.BE_AUTO_BALANCE]:
12549 for node, nres in nodeinfo.items():
12550 if node not in instance.secondary_nodes:
12552 nres.Raise("Can't get info from secondary node %s" % node,
12553 prereq=True, ecode=errors.ECODE_STATE)
12554 (_, _, (nhvinfo, )) = nres.payload
12555 if not isinstance(nhvinfo.get("memory_free", None), int):
12556 raise errors.OpPrereqError("Secondary node %s didn't return free"
12557 " memory information" % node,
12558 errors.ECODE_STATE)
12559 #TODO(dynmem): do the appropriate check involving MINMEM
12560 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12561 raise errors.OpPrereqError("This change will prevent the instance"
12562 " from failover to its secondary node"
12563 " %s, due to not enough memory" % node,
12564 errors.ECODE_STATE)
12566 if self.op.runtime_mem:
12567 remote_info = self.rpc.call_instance_info(instance.primary_node,
12569 instance.hypervisor)
12570 remote_info.Raise("Error checking node %s" % instance.primary_node)
12571 if not remote_info.payload: # not running already
12572 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12573 errors.ECODE_STATE)
12575 current_memory = remote_info.payload["memory"]
12576 if (not self.op.force and
12577 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12578 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12579 raise errors.OpPrereqError("Instance %s must have memory between %d"
12580 " and %d MB of memory unless --force is"
12581 " given" % (instance.name,
12582 self.be_proposed[constants.BE_MINMEM],
12583 self.be_proposed[constants.BE_MAXMEM]),
12584 errors.ECODE_INVAL)
12586 if self.op.runtime_mem > current_memory:
12587 _CheckNodeFreeMemory(self, instance.primary_node,
12588 "ballooning memory for instance %s" %
12590 self.op.memory - current_memory,
12591 instance.hypervisor)
12593 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12594 raise errors.OpPrereqError("Disk operations not supported for"
12595 " diskless instances",
12596 errors.ECODE_INVAL)
12598 def _PrepareNicCreate(_, params, private):
12599 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12600 return (None, None)
12602 def _PrepareNicMod(_, nic, params, private):
12603 self._PrepareNicModification(params, private, nic.ip,
12604 nic.nicparams, cluster, pnode)
12607 # Verify NIC changes (operating on copy)
12608 nics = instance.nics[:]
12609 ApplyContainerMods("NIC", nics, None, self.nicmod,
12610 _PrepareNicCreate, _PrepareNicMod, None)
12611 if len(nics) > constants.MAX_NICS:
12612 raise errors.OpPrereqError("Instance has too many network interfaces"
12613 " (%d), cannot add more" % constants.MAX_NICS,
12614 errors.ECODE_STATE)
12616 # Verify disk changes (operating on a copy)
12617 disks = instance.disks[:]
12618 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12619 if len(disks) > constants.MAX_DISKS:
12620 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12621 " more" % constants.MAX_DISKS,
12622 errors.ECODE_STATE)
12624 if self.op.offline is not None:
12625 if self.op.offline:
12626 msg = "can't change to offline"
12628 msg = "can't change to online"
12629 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12631 # Pre-compute NIC changes (necessary to use result in hooks)
12632 self._nic_chgdesc = []
12634 # Operate on copies as this is still in prereq
12635 nics = [nic.Copy() for nic in instance.nics]
12636 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12637 self._CreateNewNic, self._ApplyNicMods, None)
12638 self._new_nics = nics
12640 self._new_nics = None
12642 def _ConvertPlainToDrbd(self, feedback_fn):
12643 """Converts an instance from plain to drbd.
12646 feedback_fn("Converting template to drbd")
12647 instance = self.instance
12648 pnode = instance.primary_node
12649 snode = self.op.remote_node
12651 assert instance.disk_template == constants.DT_PLAIN
12653 # create a fake disk info for _GenerateDiskTemplate
12654 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12655 constants.IDISK_VG: d.logical_id[0]}
12656 for d in instance.disks]
12657 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12658 instance.name, pnode, [snode],
12659 disk_info, None, None, 0, feedback_fn,
12661 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12663 info = _GetInstanceInfoText(instance)
12664 feedback_fn("Creating additional volumes...")
12665 # first, create the missing data and meta devices
12666 for disk in anno_disks:
12667 # unfortunately this is... not too nice
12668 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12670 for child in disk.children:
12671 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12672 # at this stage, all new LVs have been created, we can rename the
12674 feedback_fn("Renaming original volumes...")
12675 rename_list = [(o, n.children[0].logical_id)
12676 for (o, n) in zip(instance.disks, new_disks)]
12677 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12678 result.Raise("Failed to rename original LVs")
12680 feedback_fn("Initializing DRBD devices...")
12681 # all child devices are in place, we can now create the DRBD devices
12682 for disk in anno_disks:
12683 for node in [pnode, snode]:
12684 f_create = node == pnode
12685 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12687 # at this point, the instance has been modified
12688 instance.disk_template = constants.DT_DRBD8
12689 instance.disks = new_disks
12690 self.cfg.Update(instance, feedback_fn)
12692 # Release node locks while waiting for sync
12693 _ReleaseLocks(self, locking.LEVEL_NODE)
12695 # disks are created, waiting for sync
12696 disk_abort = not _WaitForSync(self, instance,
12697 oneshot=not self.op.wait_for_sync)
12699 raise errors.OpExecError("There are some degraded disks for"
12700 " this instance, please cleanup manually")
12702 # Node resource locks will be released by caller
12704 def _ConvertDrbdToPlain(self, feedback_fn):
12705 """Converts an instance from drbd to plain.
12708 instance = self.instance
12710 assert len(instance.secondary_nodes) == 1
12711 assert instance.disk_template == constants.DT_DRBD8
12713 pnode = instance.primary_node
12714 snode = instance.secondary_nodes[0]
12715 feedback_fn("Converting template to plain")
12717 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12718 new_disks = [d.children[0] for d in instance.disks]
12720 # copy over size and mode
12721 for parent, child in zip(old_disks, new_disks):
12722 child.size = parent.size
12723 child.mode = parent.mode
12725 # this is a DRBD disk, return its port to the pool
12726 # NOTE: this must be done right before the call to cfg.Update!
12727 for disk in old_disks:
12728 tcp_port = disk.logical_id[2]
12729 self.cfg.AddTcpUdpPort(tcp_port)
12731 # update instance structure
12732 instance.disks = new_disks
12733 instance.disk_template = constants.DT_PLAIN
12734 self.cfg.Update(instance, feedback_fn)
12736 # Release locks in case removing disks takes a while
12737 _ReleaseLocks(self, locking.LEVEL_NODE)
12739 feedback_fn("Removing volumes on the secondary node...")
12740 for disk in old_disks:
12741 self.cfg.SetDiskID(disk, snode)
12742 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12744 self.LogWarning("Could not remove block device %s on node %s,"
12745 " continuing anyway: %s", disk.iv_name, snode, msg)
12747 feedback_fn("Removing unneeded volumes on the primary node...")
12748 for idx, disk in enumerate(old_disks):
12749 meta = disk.children[1]
12750 self.cfg.SetDiskID(meta, pnode)
12751 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12753 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12754 " continuing anyway: %s", idx, pnode, msg)
12756 def _CreateNewDisk(self, idx, params, _):
12757 """Creates a new disk.
12760 instance = self.instance
12763 if instance.disk_template in constants.DTS_FILEBASED:
12764 (file_driver, file_path) = instance.disks[0].logical_id
12765 file_path = os.path.dirname(file_path)
12767 file_driver = file_path = None
12770 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12771 instance.primary_node, instance.secondary_nodes,
12772 [params], file_path, file_driver, idx,
12773 self.Log, self.diskparams)[0]
12775 info = _GetInstanceInfoText(instance)
12777 logging.info("Creating volume %s for instance %s",
12778 disk.iv_name, instance.name)
12779 # Note: this needs to be kept in sync with _CreateDisks
12781 for node in instance.all_nodes:
12782 f_create = (node == instance.primary_node)
12784 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12785 except errors.OpExecError, err:
12786 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12787 disk.iv_name, disk, node, err)
12790 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12794 def _ModifyDisk(idx, disk, params, _):
12795 """Modifies a disk.
12798 disk.mode = params[constants.IDISK_MODE]
12801 ("disk.mode/%d" % idx, disk.mode),
12804 def _RemoveDisk(self, idx, root, _):
12808 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12809 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12810 self.cfg.SetDiskID(disk, node)
12811 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12813 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12814 " continuing anyway", idx, node, msg)
12816 # if this is a DRBD disk, return its port to the pool
12817 if root.dev_type in constants.LDS_DRBD:
12818 self.cfg.AddTcpUdpPort(root.logical_id[2])
12821 def _CreateNewNic(idx, params, private):
12822 """Creates data structure for a new network interface.
12825 mac = params[constants.INIC_MAC]
12826 ip = params.get(constants.INIC_IP, None)
12827 nicparams = private.params
12829 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12831 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12832 (mac, ip, private.filled[constants.NIC_MODE],
12833 private.filled[constants.NIC_LINK])),
12837 def _ApplyNicMods(idx, nic, params, private):
12838 """Modifies a network interface.
12843 for key in [constants.INIC_MAC, constants.INIC_IP]:
12845 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12846 setattr(nic, key, params[key])
12849 nic.nicparams = private.params
12851 for (key, val) in params.items():
12852 changes.append(("nic.%s/%d" % (key, idx), val))
12856 def Exec(self, feedback_fn):
12857 """Modifies an instance.
12859 All parameters take effect only at the next restart of the instance.
12862 # Process here the warnings from CheckPrereq, as we don't have a
12863 # feedback_fn there.
12864 # TODO: Replace with self.LogWarning
12865 for warn in self.warn:
12866 feedback_fn("WARNING: %s" % warn)
12868 assert ((self.op.disk_template is None) ^
12869 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12870 "Not owning any node resource locks"
12873 instance = self.instance
12876 if self.op.runtime_mem:
12877 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12879 self.op.runtime_mem)
12880 rpcres.Raise("Cannot modify instance runtime memory")
12881 result.append(("runtime_memory", self.op.runtime_mem))
12883 # Apply disk changes
12884 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12885 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12886 _UpdateIvNames(0, instance.disks)
12888 if self.op.disk_template:
12890 check_nodes = set(instance.all_nodes)
12891 if self.op.remote_node:
12892 check_nodes.add(self.op.remote_node)
12893 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12894 owned = self.owned_locks(level)
12895 assert not (check_nodes - owned), \
12896 ("Not owning the correct locks, owning %r, expected at least %r" %
12897 (owned, check_nodes))
12899 r_shut = _ShutdownInstanceDisks(self, instance)
12901 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12902 " proceed with disk template conversion")
12903 mode = (instance.disk_template, self.op.disk_template)
12905 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12907 self.cfg.ReleaseDRBDMinors(instance.name)
12909 result.append(("disk_template", self.op.disk_template))
12911 assert instance.disk_template == self.op.disk_template, \
12912 ("Expected disk template '%s', found '%s'" %
12913 (self.op.disk_template, instance.disk_template))
12915 # Release node and resource locks if there are any (they might already have
12916 # been released during disk conversion)
12917 _ReleaseLocks(self, locking.LEVEL_NODE)
12918 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12920 # Apply NIC changes
12921 if self._new_nics is not None:
12922 instance.nics = self._new_nics
12923 result.extend(self._nic_chgdesc)
12926 if self.op.hvparams:
12927 instance.hvparams = self.hv_inst
12928 for key, val in self.op.hvparams.iteritems():
12929 result.append(("hv/%s" % key, val))
12932 if self.op.beparams:
12933 instance.beparams = self.be_inst
12934 for key, val in self.op.beparams.iteritems():
12935 result.append(("be/%s" % key, val))
12938 if self.op.os_name:
12939 instance.os = self.op.os_name
12942 if self.op.osparams:
12943 instance.osparams = self.os_inst
12944 for key, val in self.op.osparams.iteritems():
12945 result.append(("os/%s" % key, val))
12947 if self.op.offline is None:
12950 elif self.op.offline:
12951 # Mark instance as offline
12952 self.cfg.MarkInstanceOffline(instance.name)
12953 result.append(("admin_state", constants.ADMINST_OFFLINE))
12955 # Mark instance as online, but stopped
12956 self.cfg.MarkInstanceDown(instance.name)
12957 result.append(("admin_state", constants.ADMINST_DOWN))
12959 self.cfg.Update(instance, feedback_fn)
12961 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12962 self.owned_locks(locking.LEVEL_NODE)), \
12963 "All node locks should have been released by now"
12967 _DISK_CONVERSIONS = {
12968 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12969 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12973 class LUInstanceChangeGroup(LogicalUnit):
12974 HPATH = "instance-change-group"
12975 HTYPE = constants.HTYPE_INSTANCE
12978 def ExpandNames(self):
12979 self.share_locks = _ShareAll()
12980 self.needed_locks = {
12981 locking.LEVEL_NODEGROUP: [],
12982 locking.LEVEL_NODE: [],
12985 self._ExpandAndLockInstance()
12987 if self.op.target_groups:
12988 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12989 self.op.target_groups)
12991 self.req_target_uuids = None
12993 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12995 def DeclareLocks(self, level):
12996 if level == locking.LEVEL_NODEGROUP:
12997 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12999 if self.req_target_uuids:
13000 lock_groups = set(self.req_target_uuids)
13002 # Lock all groups used by instance optimistically; this requires going
13003 # via the node before it's locked, requiring verification later on
13004 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13005 lock_groups.update(instance_groups)
13007 # No target groups, need to lock all of them
13008 lock_groups = locking.ALL_SET
13010 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13012 elif level == locking.LEVEL_NODE:
13013 if self.req_target_uuids:
13014 # Lock all nodes used by instances
13015 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13016 self._LockInstancesNodes()
13018 # Lock all nodes in all potential target groups
13019 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13020 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13021 member_nodes = [node_name
13022 for group in lock_groups
13023 for node_name in self.cfg.GetNodeGroup(group).members]
13024 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13026 # Lock all nodes as all groups are potential targets
13027 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13029 def CheckPrereq(self):
13030 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13031 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13032 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13034 assert (self.req_target_uuids is None or
13035 owned_groups.issuperset(self.req_target_uuids))
13036 assert owned_instances == set([self.op.instance_name])
13038 # Get instance information
13039 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13041 # Check if node groups for locked instance are still correct
13042 assert owned_nodes.issuperset(self.instance.all_nodes), \
13043 ("Instance %s's nodes changed while we kept the lock" %
13044 self.op.instance_name)
13046 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13049 if self.req_target_uuids:
13050 # User requested specific target groups
13051 self.target_uuids = frozenset(self.req_target_uuids)
13053 # All groups except those used by the instance are potential targets
13054 self.target_uuids = owned_groups - inst_groups
13056 conflicting_groups = self.target_uuids & inst_groups
13057 if conflicting_groups:
13058 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13059 " used by the instance '%s'" %
13060 (utils.CommaJoin(conflicting_groups),
13061 self.op.instance_name),
13062 errors.ECODE_INVAL)
13064 if not self.target_uuids:
13065 raise errors.OpPrereqError("There are no possible target groups",
13066 errors.ECODE_INVAL)
13068 def BuildHooksEnv(self):
13069 """Build hooks env.
13072 assert self.target_uuids
13075 "TARGET_GROUPS": " ".join(self.target_uuids),
13078 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13082 def BuildHooksNodes(self):
13083 """Build hooks nodes.
13086 mn = self.cfg.GetMasterNode()
13087 return ([mn], [mn])
13089 def Exec(self, feedback_fn):
13090 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13092 assert instances == [self.op.instance_name], "Instance not locked"
13094 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13095 instances=instances, target_groups=list(self.target_uuids))
13097 ial.Run(self.op.iallocator)
13099 if not ial.success:
13100 raise errors.OpPrereqError("Can't compute solution for changing group of"
13101 " instance '%s' using iallocator '%s': %s" %
13102 (self.op.instance_name, self.op.iallocator,
13104 errors.ECODE_NORES)
13106 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13108 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13109 " instance '%s'", len(jobs), self.op.instance_name)
13111 return ResultWithJobs(jobs)
13114 class LUBackupQuery(NoHooksLU):
13115 """Query the exports list
13120 def CheckArguments(self):
13121 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13122 ["node", "export"], self.op.use_locking)
13124 def ExpandNames(self):
13125 self.expq.ExpandNames(self)
13127 def DeclareLocks(self, level):
13128 self.expq.DeclareLocks(self, level)
13130 def Exec(self, feedback_fn):
13133 for (node, expname) in self.expq.OldStyleQuery(self):
13134 if expname is None:
13135 result[node] = False
13137 result.setdefault(node, []).append(expname)
13142 class _ExportQuery(_QueryBase):
13143 FIELDS = query.EXPORT_FIELDS
13145 #: The node name is not a unique key for this query
13146 SORT_FIELD = "node"
13148 def ExpandNames(self, lu):
13149 lu.needed_locks = {}
13151 # The following variables interact with _QueryBase._GetNames
13153 self.wanted = _GetWantedNodes(lu, self.names)
13155 self.wanted = locking.ALL_SET
13157 self.do_locking = self.use_locking
13159 if self.do_locking:
13160 lu.share_locks = _ShareAll()
13161 lu.needed_locks = {
13162 locking.LEVEL_NODE: self.wanted,
13165 def DeclareLocks(self, lu, level):
13168 def _GetQueryData(self, lu):
13169 """Computes the list of nodes and their attributes.
13172 # Locking is not used
13174 assert not (compat.any(lu.glm.is_owned(level)
13175 for level in locking.LEVELS
13176 if level != locking.LEVEL_CLUSTER) or
13177 self.do_locking or self.use_locking)
13179 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13183 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13185 result.append((node, None))
13187 result.extend((node, expname) for expname in nres.payload)
13192 class LUBackupPrepare(NoHooksLU):
13193 """Prepares an instance for an export and returns useful information.
13198 def ExpandNames(self):
13199 self._ExpandAndLockInstance()
13201 def CheckPrereq(self):
13202 """Check prerequisites.
13205 instance_name = self.op.instance_name
13207 self.instance = self.cfg.GetInstanceInfo(instance_name)
13208 assert self.instance is not None, \
13209 "Cannot retrieve locked instance %s" % self.op.instance_name
13210 _CheckNodeOnline(self, self.instance.primary_node)
13212 self._cds = _GetClusterDomainSecret()
13214 def Exec(self, feedback_fn):
13215 """Prepares an instance for an export.
13218 instance = self.instance
13220 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13221 salt = utils.GenerateSecret(8)
13223 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13224 result = self.rpc.call_x509_cert_create(instance.primary_node,
13225 constants.RIE_CERT_VALIDITY)
13226 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13228 (name, cert_pem) = result.payload
13230 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13234 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13235 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13237 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13243 class LUBackupExport(LogicalUnit):
13244 """Export an instance to an image in the cluster.
13247 HPATH = "instance-export"
13248 HTYPE = constants.HTYPE_INSTANCE
13251 def CheckArguments(self):
13252 """Check the arguments.
13255 self.x509_key_name = self.op.x509_key_name
13256 self.dest_x509_ca_pem = self.op.destination_x509_ca
13258 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13259 if not self.x509_key_name:
13260 raise errors.OpPrereqError("Missing X509 key name for encryption",
13261 errors.ECODE_INVAL)
13263 if not self.dest_x509_ca_pem:
13264 raise errors.OpPrereqError("Missing destination X509 CA",
13265 errors.ECODE_INVAL)
13267 def ExpandNames(self):
13268 self._ExpandAndLockInstance()
13270 # Lock all nodes for local exports
13271 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13272 # FIXME: lock only instance primary and destination node
13274 # Sad but true, for now we have do lock all nodes, as we don't know where
13275 # the previous export might be, and in this LU we search for it and
13276 # remove it from its current node. In the future we could fix this by:
13277 # - making a tasklet to search (share-lock all), then create the
13278 # new one, then one to remove, after
13279 # - removing the removal operation altogether
13280 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13282 def DeclareLocks(self, level):
13283 """Last minute lock declaration."""
13284 # All nodes are locked anyway, so nothing to do here.
13286 def BuildHooksEnv(self):
13287 """Build hooks env.
13289 This will run on the master, primary node and target node.
13293 "EXPORT_MODE": self.op.mode,
13294 "EXPORT_NODE": self.op.target_node,
13295 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13296 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13297 # TODO: Generic function for boolean env variables
13298 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13301 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13305 def BuildHooksNodes(self):
13306 """Build hooks nodes.
13309 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13311 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13312 nl.append(self.op.target_node)
13316 def CheckPrereq(self):
13317 """Check prerequisites.
13319 This checks that the instance and node names are valid.
13322 instance_name = self.op.instance_name
13324 self.instance = self.cfg.GetInstanceInfo(instance_name)
13325 assert self.instance is not None, \
13326 "Cannot retrieve locked instance %s" % self.op.instance_name
13327 _CheckNodeOnline(self, self.instance.primary_node)
13329 if (self.op.remove_instance and
13330 self.instance.admin_state == constants.ADMINST_UP and
13331 not self.op.shutdown):
13332 raise errors.OpPrereqError("Can not remove instance without shutting it"
13335 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13336 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13337 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13338 assert self.dst_node is not None
13340 _CheckNodeOnline(self, self.dst_node.name)
13341 _CheckNodeNotDrained(self, self.dst_node.name)
13344 self.dest_disk_info = None
13345 self.dest_x509_ca = None
13347 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13348 self.dst_node = None
13350 if len(self.op.target_node) != len(self.instance.disks):
13351 raise errors.OpPrereqError(("Received destination information for %s"
13352 " disks, but instance %s has %s disks") %
13353 (len(self.op.target_node), instance_name,
13354 len(self.instance.disks)),
13355 errors.ECODE_INVAL)
13357 cds = _GetClusterDomainSecret()
13359 # Check X509 key name
13361 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13362 except (TypeError, ValueError), err:
13363 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13365 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13366 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13367 errors.ECODE_INVAL)
13369 # Load and verify CA
13371 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13372 except OpenSSL.crypto.Error, err:
13373 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13374 (err, ), errors.ECODE_INVAL)
13376 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13377 if errcode is not None:
13378 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13379 (msg, ), errors.ECODE_INVAL)
13381 self.dest_x509_ca = cert
13383 # Verify target information
13385 for idx, disk_data in enumerate(self.op.target_node):
13387 (host, port, magic) = \
13388 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13389 except errors.GenericError, err:
13390 raise errors.OpPrereqError("Target info for disk %s: %s" %
13391 (idx, err), errors.ECODE_INVAL)
13393 disk_info.append((host, port, magic))
13395 assert len(disk_info) == len(self.op.target_node)
13396 self.dest_disk_info = disk_info
13399 raise errors.ProgrammerError("Unhandled export mode %r" %
13402 # instance disk type verification
13403 # TODO: Implement export support for file-based disks
13404 for disk in self.instance.disks:
13405 if disk.dev_type == constants.LD_FILE:
13406 raise errors.OpPrereqError("Export not supported for instances with"
13407 " file-based disks", errors.ECODE_INVAL)
13409 def _CleanupExports(self, feedback_fn):
13410 """Removes exports of current instance from all other nodes.
13412 If an instance in a cluster with nodes A..D was exported to node C, its
13413 exports will be removed from the nodes A, B and D.
13416 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13418 nodelist = self.cfg.GetNodeList()
13419 nodelist.remove(self.dst_node.name)
13421 # on one-node clusters nodelist will be empty after the removal
13422 # if we proceed the backup would be removed because OpBackupQuery
13423 # substitutes an empty list with the full cluster node list.
13424 iname = self.instance.name
13426 feedback_fn("Removing old exports for instance %s" % iname)
13427 exportlist = self.rpc.call_export_list(nodelist)
13428 for node in exportlist:
13429 if exportlist[node].fail_msg:
13431 if iname in exportlist[node].payload:
13432 msg = self.rpc.call_export_remove(node, iname).fail_msg
13434 self.LogWarning("Could not remove older export for instance %s"
13435 " on node %s: %s", iname, node, msg)
13437 def Exec(self, feedback_fn):
13438 """Export an instance to an image in the cluster.
13441 assert self.op.mode in constants.EXPORT_MODES
13443 instance = self.instance
13444 src_node = instance.primary_node
13446 if self.op.shutdown:
13447 # shutdown the instance, but not the disks
13448 feedback_fn("Shutting down instance %s" % instance.name)
13449 result = self.rpc.call_instance_shutdown(src_node, instance,
13450 self.op.shutdown_timeout)
13451 # TODO: Maybe ignore failures if ignore_remove_failures is set
13452 result.Raise("Could not shutdown instance %s on"
13453 " node %s" % (instance.name, src_node))
13455 # set the disks ID correctly since call_instance_start needs the
13456 # correct drbd minor to create the symlinks
13457 for disk in instance.disks:
13458 self.cfg.SetDiskID(disk, src_node)
13460 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13463 # Activate the instance disks if we'exporting a stopped instance
13464 feedback_fn("Activating disks for %s" % instance.name)
13465 _StartInstanceDisks(self, instance, None)
13468 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13471 helper.CreateSnapshots()
13473 if (self.op.shutdown and
13474 instance.admin_state == constants.ADMINST_UP and
13475 not self.op.remove_instance):
13476 assert not activate_disks
13477 feedback_fn("Starting instance %s" % instance.name)
13478 result = self.rpc.call_instance_start(src_node,
13479 (instance, None, None), False)
13480 msg = result.fail_msg
13482 feedback_fn("Failed to start instance: %s" % msg)
13483 _ShutdownInstanceDisks(self, instance)
13484 raise errors.OpExecError("Could not start instance: %s" % msg)
13486 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13487 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13488 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13489 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13490 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13492 (key_name, _, _) = self.x509_key_name
13495 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13498 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13499 key_name, dest_ca_pem,
13504 # Check for backwards compatibility
13505 assert len(dresults) == len(instance.disks)
13506 assert compat.all(isinstance(i, bool) for i in dresults), \
13507 "Not all results are boolean: %r" % dresults
13511 feedback_fn("Deactivating disks for %s" % instance.name)
13512 _ShutdownInstanceDisks(self, instance)
13514 if not (compat.all(dresults) and fin_resu):
13517 failures.append("export finalization")
13518 if not compat.all(dresults):
13519 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13521 failures.append("disk export: disk(s) %s" % fdsk)
13523 raise errors.OpExecError("Export failed, errors in %s" %
13524 utils.CommaJoin(failures))
13526 # At this point, the export was successful, we can cleanup/finish
13528 # Remove instance if requested
13529 if self.op.remove_instance:
13530 feedback_fn("Removing instance %s" % instance.name)
13531 _RemoveInstance(self, feedback_fn, instance,
13532 self.op.ignore_remove_failures)
13534 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13535 self._CleanupExports(feedback_fn)
13537 return fin_resu, dresults
13540 class LUBackupRemove(NoHooksLU):
13541 """Remove exports related to the named instance.
13546 def ExpandNames(self):
13547 self.needed_locks = {}
13548 # We need all nodes to be locked in order for RemoveExport to work, but we
13549 # don't need to lock the instance itself, as nothing will happen to it (and
13550 # we can remove exports also for a removed instance)
13551 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13553 def Exec(self, feedback_fn):
13554 """Remove any export.
13557 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13558 # If the instance was not found we'll try with the name that was passed in.
13559 # This will only work if it was an FQDN, though.
13561 if not instance_name:
13563 instance_name = self.op.instance_name
13565 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13566 exportlist = self.rpc.call_export_list(locked_nodes)
13568 for node in exportlist:
13569 msg = exportlist[node].fail_msg
13571 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13573 if instance_name in exportlist[node].payload:
13575 result = self.rpc.call_export_remove(node, instance_name)
13576 msg = result.fail_msg
13578 logging.error("Could not remove export for instance %s"
13579 " on node %s: %s", instance_name, node, msg)
13581 if fqdn_warn and not found:
13582 feedback_fn("Export not found. If trying to remove an export belonging"
13583 " to a deleted instance please use its Fully Qualified"
13587 class LUGroupAdd(LogicalUnit):
13588 """Logical unit for creating node groups.
13591 HPATH = "group-add"
13592 HTYPE = constants.HTYPE_GROUP
13595 def ExpandNames(self):
13596 # We need the new group's UUID here so that we can create and acquire the
13597 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13598 # that it should not check whether the UUID exists in the configuration.
13599 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13600 self.needed_locks = {}
13601 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13603 def CheckPrereq(self):
13604 """Check prerequisites.
13606 This checks that the given group name is not an existing node group
13611 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13612 except errors.OpPrereqError:
13615 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13616 " node group (UUID: %s)" %
13617 (self.op.group_name, existing_uuid),
13618 errors.ECODE_EXISTS)
13620 if self.op.ndparams:
13621 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13623 if self.op.hv_state:
13624 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13626 self.new_hv_state = None
13628 if self.op.disk_state:
13629 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13631 self.new_disk_state = None
13633 if self.op.diskparams:
13634 for templ in constants.DISK_TEMPLATES:
13635 if templ in self.op.diskparams:
13636 utils.ForceDictType(self.op.diskparams[templ],
13637 constants.DISK_DT_TYPES)
13638 self.new_diskparams = self.op.diskparams
13640 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13641 except errors.OpPrereqError, err:
13642 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13643 errors.ECODE_INVAL)
13645 self.new_diskparams = {}
13647 if self.op.ipolicy:
13648 cluster = self.cfg.GetClusterInfo()
13649 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13651 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13652 except errors.ConfigurationError, err:
13653 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13654 errors.ECODE_INVAL)
13656 def BuildHooksEnv(self):
13657 """Build hooks env.
13661 "GROUP_NAME": self.op.group_name,
13664 def BuildHooksNodes(self):
13665 """Build hooks nodes.
13668 mn = self.cfg.GetMasterNode()
13669 return ([mn], [mn])
13671 def Exec(self, feedback_fn):
13672 """Add the node group to the cluster.
13675 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13676 uuid=self.group_uuid,
13677 alloc_policy=self.op.alloc_policy,
13678 ndparams=self.op.ndparams,
13679 diskparams=self.new_diskparams,
13680 ipolicy=self.op.ipolicy,
13681 hv_state_static=self.new_hv_state,
13682 disk_state_static=self.new_disk_state)
13684 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13685 del self.remove_locks[locking.LEVEL_NODEGROUP]
13688 class LUGroupAssignNodes(NoHooksLU):
13689 """Logical unit for assigning nodes to groups.
13694 def ExpandNames(self):
13695 # These raise errors.OpPrereqError on their own:
13696 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13697 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13699 # We want to lock all the affected nodes and groups. We have readily
13700 # available the list of nodes, and the *destination* group. To gather the
13701 # list of "source" groups, we need to fetch node information later on.
13702 self.needed_locks = {
13703 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13704 locking.LEVEL_NODE: self.op.nodes,
13707 def DeclareLocks(self, level):
13708 if level == locking.LEVEL_NODEGROUP:
13709 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13711 # Try to get all affected nodes' groups without having the group or node
13712 # lock yet. Needs verification later in the code flow.
13713 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13715 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13717 def CheckPrereq(self):
13718 """Check prerequisites.
13721 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13722 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13723 frozenset(self.op.nodes))
13725 expected_locks = (set([self.group_uuid]) |
13726 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13727 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13728 if actual_locks != expected_locks:
13729 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13730 " current groups are '%s', used to be '%s'" %
13731 (utils.CommaJoin(expected_locks),
13732 utils.CommaJoin(actual_locks)))
13734 self.node_data = self.cfg.GetAllNodesInfo()
13735 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13736 instance_data = self.cfg.GetAllInstancesInfo()
13738 if self.group is None:
13739 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13740 (self.op.group_name, self.group_uuid))
13742 (new_splits, previous_splits) = \
13743 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13744 for node in self.op.nodes],
13745 self.node_data, instance_data)
13748 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13750 if not self.op.force:
13751 raise errors.OpExecError("The following instances get split by this"
13752 " change and --force was not given: %s" %
13755 self.LogWarning("This operation will split the following instances: %s",
13758 if previous_splits:
13759 self.LogWarning("In addition, these already-split instances continue"
13760 " to be split across groups: %s",
13761 utils.CommaJoin(utils.NiceSort(previous_splits)))
13763 def Exec(self, feedback_fn):
13764 """Assign nodes to a new group.
13767 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13769 self.cfg.AssignGroupNodes(mods)
13772 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13773 """Check for split instances after a node assignment.
13775 This method considers a series of node assignments as an atomic operation,
13776 and returns information about split instances after applying the set of
13779 In particular, it returns information about newly split instances, and
13780 instances that were already split, and remain so after the change.
13782 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13785 @type changes: list of (node_name, new_group_uuid) pairs.
13786 @param changes: list of node assignments to consider.
13787 @param node_data: a dict with data for all nodes
13788 @param instance_data: a dict with all instances to consider
13789 @rtype: a two-tuple
13790 @return: a list of instances that were previously okay and result split as a
13791 consequence of this change, and a list of instances that were previously
13792 split and this change does not fix.
13795 changed_nodes = dict((node, group) for node, group in changes
13796 if node_data[node].group != group)
13798 all_split_instances = set()
13799 previously_split_instances = set()
13801 def InstanceNodes(instance):
13802 return [instance.primary_node] + list(instance.secondary_nodes)
13804 for inst in instance_data.values():
13805 if inst.disk_template not in constants.DTS_INT_MIRROR:
13808 instance_nodes = InstanceNodes(inst)
13810 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13811 previously_split_instances.add(inst.name)
13813 if len(set(changed_nodes.get(node, node_data[node].group)
13814 for node in instance_nodes)) > 1:
13815 all_split_instances.add(inst.name)
13817 return (list(all_split_instances - previously_split_instances),
13818 list(previously_split_instances & all_split_instances))
13821 class _GroupQuery(_QueryBase):
13822 FIELDS = query.GROUP_FIELDS
13824 def ExpandNames(self, lu):
13825 lu.needed_locks = {}
13827 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13828 self._cluster = lu.cfg.GetClusterInfo()
13829 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13832 self.wanted = [name_to_uuid[name]
13833 for name in utils.NiceSort(name_to_uuid.keys())]
13835 # Accept names to be either names or UUIDs.
13838 all_uuid = frozenset(self._all_groups.keys())
13840 for name in self.names:
13841 if name in all_uuid:
13842 self.wanted.append(name)
13843 elif name in name_to_uuid:
13844 self.wanted.append(name_to_uuid[name])
13846 missing.append(name)
13849 raise errors.OpPrereqError("Some groups do not exist: %s" %
13850 utils.CommaJoin(missing),
13851 errors.ECODE_NOENT)
13853 def DeclareLocks(self, lu, level):
13856 def _GetQueryData(self, lu):
13857 """Computes the list of node groups and their attributes.
13860 do_nodes = query.GQ_NODE in self.requested_data
13861 do_instances = query.GQ_INST in self.requested_data
13863 group_to_nodes = None
13864 group_to_instances = None
13866 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13867 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13868 # latter GetAllInstancesInfo() is not enough, for we have to go through
13869 # instance->node. Hence, we will need to process nodes even if we only need
13870 # instance information.
13871 if do_nodes or do_instances:
13872 all_nodes = lu.cfg.GetAllNodesInfo()
13873 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13876 for node in all_nodes.values():
13877 if node.group in group_to_nodes:
13878 group_to_nodes[node.group].append(node.name)
13879 node_to_group[node.name] = node.group
13882 all_instances = lu.cfg.GetAllInstancesInfo()
13883 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13885 for instance in all_instances.values():
13886 node = instance.primary_node
13887 if node in node_to_group:
13888 group_to_instances[node_to_group[node]].append(instance.name)
13891 # Do not pass on node information if it was not requested.
13892 group_to_nodes = None
13894 return query.GroupQueryData(self._cluster,
13895 [self._all_groups[uuid]
13896 for uuid in self.wanted],
13897 group_to_nodes, group_to_instances,
13898 query.GQ_DISKPARAMS in self.requested_data)
13901 class LUGroupQuery(NoHooksLU):
13902 """Logical unit for querying node groups.
13907 def CheckArguments(self):
13908 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13909 self.op.output_fields, False)
13911 def ExpandNames(self):
13912 self.gq.ExpandNames(self)
13914 def DeclareLocks(self, level):
13915 self.gq.DeclareLocks(self, level)
13917 def Exec(self, feedback_fn):
13918 return self.gq.OldStyleQuery(self)
13921 class LUGroupSetParams(LogicalUnit):
13922 """Modifies the parameters of a node group.
13925 HPATH = "group-modify"
13926 HTYPE = constants.HTYPE_GROUP
13929 def CheckArguments(self):
13932 self.op.diskparams,
13933 self.op.alloc_policy,
13935 self.op.disk_state,
13939 if all_changes.count(None) == len(all_changes):
13940 raise errors.OpPrereqError("Please pass at least one modification",
13941 errors.ECODE_INVAL)
13943 def ExpandNames(self):
13944 # This raises errors.OpPrereqError on its own:
13945 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13947 self.needed_locks = {
13948 locking.LEVEL_INSTANCE: [],
13949 locking.LEVEL_NODEGROUP: [self.group_uuid],
13952 self.share_locks[locking.LEVEL_INSTANCE] = 1
13954 def DeclareLocks(self, level):
13955 if level == locking.LEVEL_INSTANCE:
13956 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13958 # Lock instances optimistically, needs verification once group lock has
13960 self.needed_locks[locking.LEVEL_INSTANCE] = \
13961 self.cfg.GetNodeGroupInstances(self.group_uuid)
13964 def _UpdateAndVerifyDiskParams(old, new):
13965 """Updates and verifies disk parameters.
13968 new_params = _GetUpdatedParams(old, new)
13969 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13972 def CheckPrereq(self):
13973 """Check prerequisites.
13976 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13978 # Check if locked instances are still correct
13979 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13981 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13982 cluster = self.cfg.GetClusterInfo()
13984 if self.group is None:
13985 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13986 (self.op.group_name, self.group_uuid))
13988 if self.op.ndparams:
13989 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13990 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
13991 self.new_ndparams = new_ndparams
13993 if self.op.diskparams:
13994 diskparams = self.group.diskparams
13995 uavdp = self._UpdateAndVerifyDiskParams
13996 # For each disktemplate subdict update and verify the values
13997 new_diskparams = dict((dt,
13998 uavdp(diskparams.get(dt, {}),
13999 self.op.diskparams[dt]))
14000 for dt in constants.DISK_TEMPLATES
14001 if dt in self.op.diskparams)
14002 # As we've all subdicts of diskparams ready, lets merge the actual
14003 # dict with all updated subdicts
14004 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14006 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14007 except errors.OpPrereqError, err:
14008 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14009 errors.ECODE_INVAL)
14011 if self.op.hv_state:
14012 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14013 self.group.hv_state_static)
14015 if self.op.disk_state:
14016 self.new_disk_state = \
14017 _MergeAndVerifyDiskState(self.op.disk_state,
14018 self.group.disk_state_static)
14020 if self.op.ipolicy:
14021 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14025 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14026 inst_filter = lambda inst: inst.name in owned_instances
14027 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14029 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14031 new_ipolicy, instances)
14034 self.LogWarning("After the ipolicy change the following instances"
14035 " violate them: %s",
14036 utils.CommaJoin(violations))
14038 def BuildHooksEnv(self):
14039 """Build hooks env.
14043 "GROUP_NAME": self.op.group_name,
14044 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14047 def BuildHooksNodes(self):
14048 """Build hooks nodes.
14051 mn = self.cfg.GetMasterNode()
14052 return ([mn], [mn])
14054 def Exec(self, feedback_fn):
14055 """Modifies the node group.
14060 if self.op.ndparams:
14061 self.group.ndparams = self.new_ndparams
14062 result.append(("ndparams", str(self.group.ndparams)))
14064 if self.op.diskparams:
14065 self.group.diskparams = self.new_diskparams
14066 result.append(("diskparams", str(self.group.diskparams)))
14068 if self.op.alloc_policy:
14069 self.group.alloc_policy = self.op.alloc_policy
14071 if self.op.hv_state:
14072 self.group.hv_state_static = self.new_hv_state
14074 if self.op.disk_state:
14075 self.group.disk_state_static = self.new_disk_state
14077 if self.op.ipolicy:
14078 self.group.ipolicy = self.new_ipolicy
14080 self.cfg.Update(self.group, feedback_fn)
14084 class LUGroupRemove(LogicalUnit):
14085 HPATH = "group-remove"
14086 HTYPE = constants.HTYPE_GROUP
14089 def ExpandNames(self):
14090 # This will raises errors.OpPrereqError on its own:
14091 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14092 self.needed_locks = {
14093 locking.LEVEL_NODEGROUP: [self.group_uuid],
14096 def CheckPrereq(self):
14097 """Check prerequisites.
14099 This checks that the given group name exists as a node group, that is
14100 empty (i.e., contains no nodes), and that is not the last group of the
14104 # Verify that the group is empty.
14105 group_nodes = [node.name
14106 for node in self.cfg.GetAllNodesInfo().values()
14107 if node.group == self.group_uuid]
14110 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14112 (self.op.group_name,
14113 utils.CommaJoin(utils.NiceSort(group_nodes))),
14114 errors.ECODE_STATE)
14116 # Verify the cluster would not be left group-less.
14117 if len(self.cfg.GetNodeGroupList()) == 1:
14118 raise errors.OpPrereqError("Group '%s' is the only group,"
14119 " cannot be removed" %
14120 self.op.group_name,
14121 errors.ECODE_STATE)
14123 def BuildHooksEnv(self):
14124 """Build hooks env.
14128 "GROUP_NAME": self.op.group_name,
14131 def BuildHooksNodes(self):
14132 """Build hooks nodes.
14135 mn = self.cfg.GetMasterNode()
14136 return ([mn], [mn])
14138 def Exec(self, feedback_fn):
14139 """Remove the node group.
14143 self.cfg.RemoveNodeGroup(self.group_uuid)
14144 except errors.ConfigurationError:
14145 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14146 (self.op.group_name, self.group_uuid))
14148 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14151 class LUGroupRename(LogicalUnit):
14152 HPATH = "group-rename"
14153 HTYPE = constants.HTYPE_GROUP
14156 def ExpandNames(self):
14157 # This raises errors.OpPrereqError on its own:
14158 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14160 self.needed_locks = {
14161 locking.LEVEL_NODEGROUP: [self.group_uuid],
14164 def CheckPrereq(self):
14165 """Check prerequisites.
14167 Ensures requested new name is not yet used.
14171 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14172 except errors.OpPrereqError:
14175 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14176 " node group (UUID: %s)" %
14177 (self.op.new_name, new_name_uuid),
14178 errors.ECODE_EXISTS)
14180 def BuildHooksEnv(self):
14181 """Build hooks env.
14185 "OLD_NAME": self.op.group_name,
14186 "NEW_NAME": self.op.new_name,
14189 def BuildHooksNodes(self):
14190 """Build hooks nodes.
14193 mn = self.cfg.GetMasterNode()
14195 all_nodes = self.cfg.GetAllNodesInfo()
14196 all_nodes.pop(mn, None)
14199 run_nodes.extend(node.name for node in all_nodes.values()
14200 if node.group == self.group_uuid)
14202 return (run_nodes, run_nodes)
14204 def Exec(self, feedback_fn):
14205 """Rename the node group.
14208 group = self.cfg.GetNodeGroup(self.group_uuid)
14211 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14212 (self.op.group_name, self.group_uuid))
14214 group.name = self.op.new_name
14215 self.cfg.Update(group, feedback_fn)
14217 return self.op.new_name
14220 class LUGroupEvacuate(LogicalUnit):
14221 HPATH = "group-evacuate"
14222 HTYPE = constants.HTYPE_GROUP
14225 def ExpandNames(self):
14226 # This raises errors.OpPrereqError on its own:
14227 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14229 if self.op.target_groups:
14230 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14231 self.op.target_groups)
14233 self.req_target_uuids = []
14235 if self.group_uuid in self.req_target_uuids:
14236 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14237 " as a target group (targets are %s)" %
14239 utils.CommaJoin(self.req_target_uuids)),
14240 errors.ECODE_INVAL)
14242 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14244 self.share_locks = _ShareAll()
14245 self.needed_locks = {
14246 locking.LEVEL_INSTANCE: [],
14247 locking.LEVEL_NODEGROUP: [],
14248 locking.LEVEL_NODE: [],
14251 def DeclareLocks(self, level):
14252 if level == locking.LEVEL_INSTANCE:
14253 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14255 # Lock instances optimistically, needs verification once node and group
14256 # locks have been acquired
14257 self.needed_locks[locking.LEVEL_INSTANCE] = \
14258 self.cfg.GetNodeGroupInstances(self.group_uuid)
14260 elif level == locking.LEVEL_NODEGROUP:
14261 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14263 if self.req_target_uuids:
14264 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14266 # Lock all groups used by instances optimistically; this requires going
14267 # via the node before it's locked, requiring verification later on
14268 lock_groups.update(group_uuid
14269 for instance_name in
14270 self.owned_locks(locking.LEVEL_INSTANCE)
14272 self.cfg.GetInstanceNodeGroups(instance_name))
14274 # No target groups, need to lock all of them
14275 lock_groups = locking.ALL_SET
14277 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14279 elif level == locking.LEVEL_NODE:
14280 # This will only lock the nodes in the group to be evacuated which
14281 # contain actual instances
14282 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14283 self._LockInstancesNodes()
14285 # Lock all nodes in group to be evacuated and target groups
14286 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14287 assert self.group_uuid in owned_groups
14288 member_nodes = [node_name
14289 for group in owned_groups
14290 for node_name in self.cfg.GetNodeGroup(group).members]
14291 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14293 def CheckPrereq(self):
14294 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14295 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14296 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14298 assert owned_groups.issuperset(self.req_target_uuids)
14299 assert self.group_uuid in owned_groups
14301 # Check if locked instances are still correct
14302 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14304 # Get instance information
14305 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14307 # Check if node groups for locked instances are still correct
14308 _CheckInstancesNodeGroups(self.cfg, self.instances,
14309 owned_groups, owned_nodes, self.group_uuid)
14311 if self.req_target_uuids:
14312 # User requested specific target groups
14313 self.target_uuids = self.req_target_uuids
14315 # All groups except the one to be evacuated are potential targets
14316 self.target_uuids = [group_uuid for group_uuid in owned_groups
14317 if group_uuid != self.group_uuid]
14319 if not self.target_uuids:
14320 raise errors.OpPrereqError("There are no possible target groups",
14321 errors.ECODE_INVAL)
14323 def BuildHooksEnv(self):
14324 """Build hooks env.
14328 "GROUP_NAME": self.op.group_name,
14329 "TARGET_GROUPS": " ".join(self.target_uuids),
14332 def BuildHooksNodes(self):
14333 """Build hooks nodes.
14336 mn = self.cfg.GetMasterNode()
14338 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14340 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14342 return (run_nodes, run_nodes)
14344 def Exec(self, feedback_fn):
14345 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14347 assert self.group_uuid not in self.target_uuids
14349 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14350 instances=instances, target_groups=self.target_uuids)
14352 ial.Run(self.op.iallocator)
14354 if not ial.success:
14355 raise errors.OpPrereqError("Can't compute group evacuation using"
14356 " iallocator '%s': %s" %
14357 (self.op.iallocator, ial.info),
14358 errors.ECODE_NORES)
14360 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14362 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14363 len(jobs), self.op.group_name)
14365 return ResultWithJobs(jobs)
14368 class TagsLU(NoHooksLU): # pylint: disable=W0223
14369 """Generic tags LU.
14371 This is an abstract class which is the parent of all the other tags LUs.
14374 def ExpandNames(self):
14375 self.group_uuid = None
14376 self.needed_locks = {}
14378 if self.op.kind == constants.TAG_NODE:
14379 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14380 lock_level = locking.LEVEL_NODE
14381 lock_name = self.op.name
14382 elif self.op.kind == constants.TAG_INSTANCE:
14383 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14384 lock_level = locking.LEVEL_INSTANCE
14385 lock_name = self.op.name
14386 elif self.op.kind == constants.TAG_NODEGROUP:
14387 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14388 lock_level = locking.LEVEL_NODEGROUP
14389 lock_name = self.group_uuid
14394 if lock_level and getattr(self.op, "use_locking", True):
14395 self.needed_locks[lock_level] = lock_name
14397 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14398 # not possible to acquire the BGL based on opcode parameters)
14400 def CheckPrereq(self):
14401 """Check prerequisites.
14404 if self.op.kind == constants.TAG_CLUSTER:
14405 self.target = self.cfg.GetClusterInfo()
14406 elif self.op.kind == constants.TAG_NODE:
14407 self.target = self.cfg.GetNodeInfo(self.op.name)
14408 elif self.op.kind == constants.TAG_INSTANCE:
14409 self.target = self.cfg.GetInstanceInfo(self.op.name)
14410 elif self.op.kind == constants.TAG_NODEGROUP:
14411 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14413 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14414 str(self.op.kind), errors.ECODE_INVAL)
14417 class LUTagsGet(TagsLU):
14418 """Returns the tags of a given object.
14423 def ExpandNames(self):
14424 TagsLU.ExpandNames(self)
14426 # Share locks as this is only a read operation
14427 self.share_locks = _ShareAll()
14429 def Exec(self, feedback_fn):
14430 """Returns the tag list.
14433 return list(self.target.GetTags())
14436 class LUTagsSearch(NoHooksLU):
14437 """Searches the tags for a given pattern.
14442 def ExpandNames(self):
14443 self.needed_locks = {}
14445 def CheckPrereq(self):
14446 """Check prerequisites.
14448 This checks the pattern passed for validity by compiling it.
14452 self.re = re.compile(self.op.pattern)
14453 except re.error, err:
14454 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14455 (self.op.pattern, err), errors.ECODE_INVAL)
14457 def Exec(self, feedback_fn):
14458 """Returns the tag list.
14462 tgts = [("/cluster", cfg.GetClusterInfo())]
14463 ilist = cfg.GetAllInstancesInfo().values()
14464 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14465 nlist = cfg.GetAllNodesInfo().values()
14466 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14467 tgts.extend(("/nodegroup/%s" % n.name, n)
14468 for n in cfg.GetAllNodeGroupsInfo().values())
14470 for path, target in tgts:
14471 for tag in target.GetTags():
14472 if self.re.search(tag):
14473 results.append((path, tag))
14477 class LUTagsSet(TagsLU):
14478 """Sets a tag on a given object.
14483 def CheckPrereq(self):
14484 """Check prerequisites.
14486 This checks the type and length of the tag name and value.
14489 TagsLU.CheckPrereq(self)
14490 for tag in self.op.tags:
14491 objects.TaggableObject.ValidateTag(tag)
14493 def Exec(self, feedback_fn):
14498 for tag in self.op.tags:
14499 self.target.AddTag(tag)
14500 except errors.TagError, err:
14501 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14502 self.cfg.Update(self.target, feedback_fn)
14505 class LUTagsDel(TagsLU):
14506 """Delete a list of tags from a given object.
14511 def CheckPrereq(self):
14512 """Check prerequisites.
14514 This checks that we have the given tag.
14517 TagsLU.CheckPrereq(self)
14518 for tag in self.op.tags:
14519 objects.TaggableObject.ValidateTag(tag)
14520 del_tags = frozenset(self.op.tags)
14521 cur_tags = self.target.GetTags()
14523 diff_tags = del_tags - cur_tags
14525 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14526 raise errors.OpPrereqError("Tag(s) %s not found" %
14527 (utils.CommaJoin(diff_names), ),
14528 errors.ECODE_NOENT)
14530 def Exec(self, feedback_fn):
14531 """Remove the tag from the object.
14534 for tag in self.op.tags:
14535 self.target.RemoveTag(tag)
14536 self.cfg.Update(self.target, feedback_fn)
14539 class LUTestDelay(NoHooksLU):
14540 """Sleep for a specified amount of time.
14542 This LU sleeps on the master and/or nodes for a specified amount of
14548 def ExpandNames(self):
14549 """Expand names and set required locks.
14551 This expands the node list, if any.
14554 self.needed_locks = {}
14555 if self.op.on_nodes:
14556 # _GetWantedNodes can be used here, but is not always appropriate to use
14557 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14558 # more information.
14559 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14560 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14562 def _TestDelay(self):
14563 """Do the actual sleep.
14566 if self.op.on_master:
14567 if not utils.TestDelay(self.op.duration):
14568 raise errors.OpExecError("Error during master delay test")
14569 if self.op.on_nodes:
14570 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14571 for node, node_result in result.items():
14572 node_result.Raise("Failure during rpc call to node %s" % node)
14574 def Exec(self, feedback_fn):
14575 """Execute the test delay opcode, with the wanted repetitions.
14578 if self.op.repeat == 0:
14581 top_value = self.op.repeat - 1
14582 for i in range(self.op.repeat):
14583 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14587 class LUTestJqueue(NoHooksLU):
14588 """Utility LU to test some aspects of the job queue.
14593 # Must be lower than default timeout for WaitForJobChange to see whether it
14594 # notices changed jobs
14595 _CLIENT_CONNECT_TIMEOUT = 20.0
14596 _CLIENT_CONFIRM_TIMEOUT = 60.0
14599 def _NotifyUsingSocket(cls, cb, errcls):
14600 """Opens a Unix socket and waits for another program to connect.
14603 @param cb: Callback to send socket name to client
14604 @type errcls: class
14605 @param errcls: Exception class to use for errors
14608 # Using a temporary directory as there's no easy way to create temporary
14609 # sockets without writing a custom loop around tempfile.mktemp and
14611 tmpdir = tempfile.mkdtemp()
14613 tmpsock = utils.PathJoin(tmpdir, "sock")
14615 logging.debug("Creating temporary socket at %s", tmpsock)
14616 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14621 # Send details to client
14624 # Wait for client to connect before continuing
14625 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14627 (conn, _) = sock.accept()
14628 except socket.error, err:
14629 raise errcls("Client didn't connect in time (%s)" % err)
14633 # Remove as soon as client is connected
14634 shutil.rmtree(tmpdir)
14636 # Wait for client to close
14639 # pylint: disable=E1101
14640 # Instance of '_socketobject' has no ... member
14641 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14643 except socket.error, err:
14644 raise errcls("Client failed to confirm notification (%s)" % err)
14648 def _SendNotification(self, test, arg, sockname):
14649 """Sends a notification to the client.
14652 @param test: Test name
14653 @param arg: Test argument (depends on test)
14654 @type sockname: string
14655 @param sockname: Socket path
14658 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14660 def _Notify(self, prereq, test, arg):
14661 """Notifies the client of a test.
14664 @param prereq: Whether this is a prereq-phase test
14666 @param test: Test name
14667 @param arg: Test argument (depends on test)
14671 errcls = errors.OpPrereqError
14673 errcls = errors.OpExecError
14675 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14679 def CheckArguments(self):
14680 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14681 self.expandnames_calls = 0
14683 def ExpandNames(self):
14684 checkargs_calls = getattr(self, "checkargs_calls", 0)
14685 if checkargs_calls < 1:
14686 raise errors.ProgrammerError("CheckArguments was not called")
14688 self.expandnames_calls += 1
14690 if self.op.notify_waitlock:
14691 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14693 self.LogInfo("Expanding names")
14695 # Get lock on master node (just to get a lock, not for a particular reason)
14696 self.needed_locks = {
14697 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14700 def Exec(self, feedback_fn):
14701 if self.expandnames_calls < 1:
14702 raise errors.ProgrammerError("ExpandNames was not called")
14704 if self.op.notify_exec:
14705 self._Notify(False, constants.JQT_EXEC, None)
14707 self.LogInfo("Executing")
14709 if self.op.log_messages:
14710 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14711 for idx, msg in enumerate(self.op.log_messages):
14712 self.LogInfo("Sending log message %s", idx + 1)
14713 feedback_fn(constants.JQT_MSGPREFIX + msg)
14714 # Report how many test messages have been sent
14715 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14718 raise errors.OpExecError("Opcode failure was requested")
14723 class IAllocator(object):
14724 """IAllocator framework.
14726 An IAllocator instance has three sets of attributes:
14727 - cfg that is needed to query the cluster
14728 - input data (all members of the _KEYS class attribute are required)
14729 - four buffer attributes (in|out_data|text), that represent the
14730 input (to the external script) in text and data structure format,
14731 and the output from it, again in two formats
14732 - the result variables from the script (success, info, nodes) for
14736 # pylint: disable=R0902
14737 # lots of instance attributes
14739 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14741 self.rpc = rpc_runner
14742 # init buffer variables
14743 self.in_text = self.out_text = self.in_data = self.out_data = None
14744 # init all input fields so that pylint is happy
14746 self.memory = self.disks = self.disk_template = self.spindle_use = None
14747 self.os = self.tags = self.nics = self.vcpus = None
14748 self.hypervisor = None
14749 self.relocate_from = None
14751 self.instances = None
14752 self.evac_mode = None
14753 self.target_groups = []
14755 self.required_nodes = None
14756 # init result fields
14757 self.success = self.info = self.result = None
14760 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14762 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14763 " IAllocator" % self.mode)
14765 keyset = [n for (n, _) in keydata]
14768 if key not in keyset:
14769 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14770 " IAllocator" % key)
14771 setattr(self, key, kwargs[key])
14774 if key not in kwargs:
14775 raise errors.ProgrammerError("Missing input parameter '%s' to"
14776 " IAllocator" % key)
14777 self._BuildInputData(compat.partial(fn, self), keydata)
14779 def _ComputeClusterData(self):
14780 """Compute the generic allocator input data.
14782 This is the data that is independent of the actual operation.
14786 cluster_info = cfg.GetClusterInfo()
14789 "version": constants.IALLOCATOR_VERSION,
14790 "cluster_name": cfg.GetClusterName(),
14791 "cluster_tags": list(cluster_info.GetTags()),
14792 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14793 "ipolicy": cluster_info.ipolicy,
14795 ninfo = cfg.GetAllNodesInfo()
14796 iinfo = cfg.GetAllInstancesInfo().values()
14797 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14800 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14802 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14803 hypervisor_name = self.hypervisor
14804 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14805 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14807 hypervisor_name = cluster_info.primary_hypervisor
14809 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14812 self.rpc.call_all_instances_info(node_list,
14813 cluster_info.enabled_hypervisors)
14815 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14817 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14818 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14819 i_list, config_ndata)
14820 assert len(data["nodes"]) == len(ninfo), \
14821 "Incomplete node data computed"
14823 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14825 self.in_data = data
14828 def _ComputeNodeGroupData(cfg):
14829 """Compute node groups data.
14832 cluster = cfg.GetClusterInfo()
14833 ng = dict((guuid, {
14834 "name": gdata.name,
14835 "alloc_policy": gdata.alloc_policy,
14836 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14838 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14843 def _ComputeBasicNodeData(cfg, node_cfg):
14844 """Compute global node data.
14847 @returns: a dict of name: (node dict, node config)
14850 # fill in static (config-based) values
14851 node_results = dict((ninfo.name, {
14852 "tags": list(ninfo.GetTags()),
14853 "primary_ip": ninfo.primary_ip,
14854 "secondary_ip": ninfo.secondary_ip,
14855 "offline": ninfo.offline,
14856 "drained": ninfo.drained,
14857 "master_candidate": ninfo.master_candidate,
14858 "group": ninfo.group,
14859 "master_capable": ninfo.master_capable,
14860 "vm_capable": ninfo.vm_capable,
14861 "ndparams": cfg.GetNdParams(ninfo),
14863 for ninfo in node_cfg.values())
14865 return node_results
14868 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14870 """Compute global node data.
14872 @param node_results: the basic node structures as filled from the config
14875 #TODO(dynmem): compute the right data on MAX and MIN memory
14876 # make a copy of the current dict
14877 node_results = dict(node_results)
14878 for nname, nresult in node_data.items():
14879 assert nname in node_results, "Missing basic data for node %s" % nname
14880 ninfo = node_cfg[nname]
14882 if not (ninfo.offline or ninfo.drained):
14883 nresult.Raise("Can't get data for node %s" % nname)
14884 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14886 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14888 for attr in ["memory_total", "memory_free", "memory_dom0",
14889 "vg_size", "vg_free", "cpu_total"]:
14890 if attr not in remote_info:
14891 raise errors.OpExecError("Node '%s' didn't return attribute"
14892 " '%s'" % (nname, attr))
14893 if not isinstance(remote_info[attr], int):
14894 raise errors.OpExecError("Node '%s' returned invalid value"
14896 (nname, attr, remote_info[attr]))
14897 # compute memory used by primary instances
14898 i_p_mem = i_p_up_mem = 0
14899 for iinfo, beinfo in i_list:
14900 if iinfo.primary_node == nname:
14901 i_p_mem += beinfo[constants.BE_MAXMEM]
14902 if iinfo.name not in node_iinfo[nname].payload:
14905 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14906 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14907 remote_info["memory_free"] -= max(0, i_mem_diff)
14909 if iinfo.admin_state == constants.ADMINST_UP:
14910 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14912 # compute memory used by instances
14914 "total_memory": remote_info["memory_total"],
14915 "reserved_memory": remote_info["memory_dom0"],
14916 "free_memory": remote_info["memory_free"],
14917 "total_disk": remote_info["vg_size"],
14918 "free_disk": remote_info["vg_free"],
14919 "total_cpus": remote_info["cpu_total"],
14920 "i_pri_memory": i_p_mem,
14921 "i_pri_up_memory": i_p_up_mem,
14923 pnr_dyn.update(node_results[nname])
14924 node_results[nname] = pnr_dyn
14926 return node_results
14929 def _ComputeInstanceData(cluster_info, i_list):
14930 """Compute global instance data.
14934 for iinfo, beinfo in i_list:
14936 for nic in iinfo.nics:
14937 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14941 "mode": filled_params[constants.NIC_MODE],
14942 "link": filled_params[constants.NIC_LINK],
14944 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14945 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14946 nic_data.append(nic_dict)
14948 "tags": list(iinfo.GetTags()),
14949 "admin_state": iinfo.admin_state,
14950 "vcpus": beinfo[constants.BE_VCPUS],
14951 "memory": beinfo[constants.BE_MAXMEM],
14952 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14954 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14956 "disks": [{constants.IDISK_SIZE: dsk.size,
14957 constants.IDISK_MODE: dsk.mode}
14958 for dsk in iinfo.disks],
14959 "disk_template": iinfo.disk_template,
14960 "hypervisor": iinfo.hypervisor,
14962 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14964 instance_data[iinfo.name] = pir
14966 return instance_data
14968 def _AddNewInstance(self):
14969 """Add new instance data to allocator structure.
14971 This in combination with _AllocatorGetClusterData will create the
14972 correct structure needed as input for the allocator.
14974 The checks for the completeness of the opcode must have already been
14978 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14980 if self.disk_template in constants.DTS_INT_MIRROR:
14981 self.required_nodes = 2
14983 self.required_nodes = 1
14987 "disk_template": self.disk_template,
14990 "vcpus": self.vcpus,
14991 "memory": self.memory,
14992 "spindle_use": self.spindle_use,
14993 "disks": self.disks,
14994 "disk_space_total": disk_space,
14996 "required_nodes": self.required_nodes,
14997 "hypervisor": self.hypervisor,
15002 def _AddRelocateInstance(self):
15003 """Add relocate instance data to allocator structure.
15005 This in combination with _IAllocatorGetClusterData will create the
15006 correct structure needed as input for the allocator.
15008 The checks for the completeness of the opcode must have already been
15012 instance = self.cfg.GetInstanceInfo(self.name)
15013 if instance is None:
15014 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15015 " IAllocator" % self.name)
15017 if instance.disk_template not in constants.DTS_MIRRORED:
15018 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15019 errors.ECODE_INVAL)
15021 if instance.disk_template in constants.DTS_INT_MIRROR and \
15022 len(instance.secondary_nodes) != 1:
15023 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15024 errors.ECODE_STATE)
15026 self.required_nodes = 1
15027 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15028 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15032 "disk_space_total": disk_space,
15033 "required_nodes": self.required_nodes,
15034 "relocate_from": self.relocate_from,
15038 def _AddNodeEvacuate(self):
15039 """Get data for node-evacuate requests.
15043 "instances": self.instances,
15044 "evac_mode": self.evac_mode,
15047 def _AddChangeGroup(self):
15048 """Get data for node-evacuate requests.
15052 "instances": self.instances,
15053 "target_groups": self.target_groups,
15056 def _BuildInputData(self, fn, keydata):
15057 """Build input data structures.
15060 self._ComputeClusterData()
15063 request["type"] = self.mode
15064 for keyname, keytype in keydata:
15065 if keyname not in request:
15066 raise errors.ProgrammerError("Request parameter %s is missing" %
15068 val = request[keyname]
15069 if not keytype(val):
15070 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15071 " validation, value %s, expected"
15072 " type %s" % (keyname, val, keytype))
15073 self.in_data["request"] = request
15075 self.in_text = serializer.Dump(self.in_data)
15077 _STRING_LIST = ht.TListOf(ht.TString)
15078 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15079 # pylint: disable=E1101
15080 # Class '...' has no 'OP_ID' member
15081 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15082 opcodes.OpInstanceMigrate.OP_ID,
15083 opcodes.OpInstanceReplaceDisks.OP_ID])
15087 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15088 ht.TItems([ht.TNonEmptyString,
15089 ht.TNonEmptyString,
15090 ht.TListOf(ht.TNonEmptyString),
15093 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15094 ht.TItems([ht.TNonEmptyString,
15097 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15098 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15101 constants.IALLOCATOR_MODE_ALLOC:
15104 ("name", ht.TString),
15105 ("memory", ht.TInt),
15106 ("spindle_use", ht.TInt),
15107 ("disks", ht.TListOf(ht.TDict)),
15108 ("disk_template", ht.TString),
15109 ("os", ht.TString),
15110 ("tags", _STRING_LIST),
15111 ("nics", ht.TListOf(ht.TDict)),
15112 ("vcpus", ht.TInt),
15113 ("hypervisor", ht.TString),
15115 constants.IALLOCATOR_MODE_RELOC:
15116 (_AddRelocateInstance,
15117 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15119 constants.IALLOCATOR_MODE_NODE_EVAC:
15120 (_AddNodeEvacuate, [
15121 ("instances", _STRING_LIST),
15122 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15124 constants.IALLOCATOR_MODE_CHG_GROUP:
15125 (_AddChangeGroup, [
15126 ("instances", _STRING_LIST),
15127 ("target_groups", _STRING_LIST),
15131 def Run(self, name, validate=True, call_fn=None):
15132 """Run an instance allocator and return the results.
15135 if call_fn is None:
15136 call_fn = self.rpc.call_iallocator_runner
15138 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15139 result.Raise("Failure while running the iallocator script")
15141 self.out_text = result.payload
15143 self._ValidateResult()
15145 def _ValidateResult(self):
15146 """Process the allocator results.
15148 This will process and if successful save the result in
15149 self.out_data and the other parameters.
15153 rdict = serializer.Load(self.out_text)
15154 except Exception, err:
15155 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15157 if not isinstance(rdict, dict):
15158 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15160 # TODO: remove backwards compatiblity in later versions
15161 if "nodes" in rdict and "result" not in rdict:
15162 rdict["result"] = rdict["nodes"]
15165 for key in "success", "info", "result":
15166 if key not in rdict:
15167 raise errors.OpExecError("Can't parse iallocator results:"
15168 " missing key '%s'" % key)
15169 setattr(self, key, rdict[key])
15171 if not self._result_check(self.result):
15172 raise errors.OpExecError("Iallocator returned invalid result,"
15173 " expected %s, got %s" %
15174 (self._result_check, self.result),
15175 errors.ECODE_INVAL)
15177 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15178 assert self.relocate_from is not None
15179 assert self.required_nodes == 1
15181 node2group = dict((name, ndata["group"])
15182 for (name, ndata) in self.in_data["nodes"].items())
15184 fn = compat.partial(self._NodesToGroups, node2group,
15185 self.in_data["nodegroups"])
15187 instance = self.cfg.GetInstanceInfo(self.name)
15188 request_groups = fn(self.relocate_from + [instance.primary_node])
15189 result_groups = fn(rdict["result"] + [instance.primary_node])
15191 if self.success and not set(result_groups).issubset(request_groups):
15192 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15193 " differ from original groups (%s)" %
15194 (utils.CommaJoin(result_groups),
15195 utils.CommaJoin(request_groups)))
15197 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15198 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15200 self.out_data = rdict
15203 def _NodesToGroups(node2group, groups, nodes):
15204 """Returns a list of unique group names for a list of nodes.
15206 @type node2group: dict
15207 @param node2group: Map from node name to group UUID
15209 @param groups: Group information
15211 @param nodes: Node names
15218 group_uuid = node2group[node]
15220 # Ignore unknown node
15224 group = groups[group_uuid]
15226 # Can't find group, let's use UUID
15227 group_name = group_uuid
15229 group_name = group["name"]
15231 result.add(group_name)
15233 return sorted(result)
15236 class LUTestAllocator(NoHooksLU):
15237 """Run allocator tests.
15239 This LU runs the allocator tests
15242 def CheckPrereq(self):
15243 """Check prerequisites.
15245 This checks the opcode parameters depending on the director and mode test.
15248 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15249 for attr in ["memory", "disks", "disk_template",
15250 "os", "tags", "nics", "vcpus"]:
15251 if not hasattr(self.op, attr):
15252 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15253 attr, errors.ECODE_INVAL)
15254 iname = self.cfg.ExpandInstanceName(self.op.name)
15255 if iname is not None:
15256 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15257 iname, errors.ECODE_EXISTS)
15258 if not isinstance(self.op.nics, list):
15259 raise errors.OpPrereqError("Invalid parameter 'nics'",
15260 errors.ECODE_INVAL)
15261 if not isinstance(self.op.disks, list):
15262 raise errors.OpPrereqError("Invalid parameter 'disks'",
15263 errors.ECODE_INVAL)
15264 for row in self.op.disks:
15265 if (not isinstance(row, dict) or
15266 constants.IDISK_SIZE not in row or
15267 not isinstance(row[constants.IDISK_SIZE], int) or
15268 constants.IDISK_MODE not in row or
15269 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15270 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15271 " parameter", errors.ECODE_INVAL)
15272 if self.op.hypervisor is None:
15273 self.op.hypervisor = self.cfg.GetHypervisorType()
15274 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15275 fname = _ExpandInstanceName(self.cfg, self.op.name)
15276 self.op.name = fname
15277 self.relocate_from = \
15278 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15279 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15280 constants.IALLOCATOR_MODE_NODE_EVAC):
15281 if not self.op.instances:
15282 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15283 self.op.instances = _GetWantedInstances(self, self.op.instances)
15285 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15286 self.op.mode, errors.ECODE_INVAL)
15288 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15289 if self.op.allocator is None:
15290 raise errors.OpPrereqError("Missing allocator name",
15291 errors.ECODE_INVAL)
15292 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15293 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15294 self.op.direction, errors.ECODE_INVAL)
15296 def Exec(self, feedback_fn):
15297 """Run the allocator test.
15300 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15301 ial = IAllocator(self.cfg, self.rpc,
15304 memory=self.op.memory,
15305 disks=self.op.disks,
15306 disk_template=self.op.disk_template,
15310 vcpus=self.op.vcpus,
15311 hypervisor=self.op.hypervisor,
15312 spindle_use=self.op.spindle_use,
15314 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15315 ial = IAllocator(self.cfg, self.rpc,
15318 relocate_from=list(self.relocate_from),
15320 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15321 ial = IAllocator(self.cfg, self.rpc,
15323 instances=self.op.instances,
15324 target_groups=self.op.target_groups)
15325 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15326 ial = IAllocator(self.cfg, self.rpc,
15328 instances=self.op.instances,
15329 evac_mode=self.op.evac_mode)
15331 raise errors.ProgrammerError("Uncatched mode %s in"
15332 " LUTestAllocator.Exec", self.op.mode)
15334 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15335 result = ial.in_text
15337 ial.Run(self.op.allocator, validate=False)
15338 result = ial.out_text
15342 #: Query type implementations
15344 constants.QR_CLUSTER: _ClusterQuery,
15345 constants.QR_INSTANCE: _InstanceQuery,
15346 constants.QR_NODE: _NodeQuery,
15347 constants.QR_GROUP: _GroupQuery,
15348 constants.QR_OS: _OsQuery,
15349 constants.QR_EXPORT: _ExportQuery,
15352 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15355 def _GetQueryImplementation(name):
15356 """Returns the implemtnation for a query type.
15358 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15362 return _QUERY_IMPL[name]
15364 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15365 errors.ECODE_INVAL)