4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _GetWantedNodes(lu, nodes):
707 """Returns list of checked and expanded node names.
709 @type lu: L{LogicalUnit}
710 @param lu: the logical unit on whose behalf we execute
712 @param nodes: list of node names or None for all nodes
714 @return: the list of nodes, sorted
715 @raise errors.ProgrammerError: if the nodes parameter is wrong type
719 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
721 return utils.NiceSort(lu.cfg.GetNodeList())
724 def _GetWantedInstances(lu, instances):
725 """Returns list of checked and expanded instance names.
727 @type lu: L{LogicalUnit}
728 @param lu: the logical unit on whose behalf we execute
729 @type instances: list
730 @param instances: list of instance names or None for all instances
732 @return: the list of instances, sorted
733 @raise errors.OpPrereqError: if the instances parameter is wrong type
734 @raise errors.OpPrereqError: if any of the passed instances is not found
738 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
740 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
744 def _GetUpdatedParams(old_params, update_dict,
745 use_default=True, use_none=False):
746 """Return the new version of a parameter dictionary.
748 @type old_params: dict
749 @param old_params: old parameters
750 @type update_dict: dict
751 @param update_dict: dict containing new parameter values, or
752 constants.VALUE_DEFAULT to reset the parameter to its default
754 @param use_default: boolean
755 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
756 values as 'to be deleted' values
757 @param use_none: boolean
758 @type use_none: whether to recognise C{None} values as 'to be
761 @return: the new parameter dictionary
764 params_copy = copy.deepcopy(old_params)
765 for key, val in update_dict.iteritems():
766 if ((use_default and val == constants.VALUE_DEFAULT) or
767 (use_none and val is None)):
773 params_copy[key] = val
777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
778 """Return the new version of a instance policy.
780 @param group_policy: whether this policy applies to a group and thus
781 we should support removal of policy entries
784 use_none = use_default = group_policy
785 ipolicy = copy.deepcopy(old_ipolicy)
786 for key, value in new_ipolicy.items():
787 if key not in constants.IPOLICY_ALL_KEYS:
788 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
790 if key in constants.IPOLICY_ISPECS:
791 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
792 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
794 use_default=use_default)
796 if (not value or value == [constants.VALUE_DEFAULT] or
797 value == constants.VALUE_DEFAULT):
801 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
802 " on the cluster'" % key,
805 if key in constants.IPOLICY_PARAMETERS:
806 # FIXME: we assume all such values are float
808 ipolicy[key] = float(value)
809 except (TypeError, ValueError), err:
810 raise errors.OpPrereqError("Invalid value for attribute"
811 " '%s': '%s', error: %s" %
812 (key, value, err), errors.ECODE_INVAL)
814 # FIXME: we assume all others are lists; this should be redone
816 ipolicy[key] = list(value)
818 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
819 except errors.ConfigurationError, err:
820 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
825 def _UpdateAndVerifySubDict(base, updates, type_check):
826 """Updates and verifies a dict with sub dicts of the same type.
828 @param base: The dict with the old data
829 @param updates: The dict with the new data
830 @param type_check: Dict suitable to ForceDictType to verify correct types
831 @returns: A new dict with updated and verified values
835 new = _GetUpdatedParams(old, value)
836 utils.ForceDictType(new, type_check)
839 ret = copy.deepcopy(base)
840 ret.update(dict((key, fn(base.get(key, {}), value))
841 for key, value in updates.items()))
845 def _MergeAndVerifyHvState(op_input, obj_input):
846 """Combines the hv state from an opcode with the one of the object
848 @param op_input: The input dict from the opcode
849 @param obj_input: The input dict from the objects
850 @return: The verified and updated dict
854 invalid_hvs = set(op_input) - constants.HYPER_TYPES
856 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
857 " %s" % utils.CommaJoin(invalid_hvs),
859 if obj_input is None:
861 type_check = constants.HVSTS_PARAMETER_TYPES
862 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
867 def _MergeAndVerifyDiskState(op_input, obj_input):
868 """Combines the disk state from an opcode with the one of the object
870 @param op_input: The input dict from the opcode
871 @param obj_input: The input dict from the objects
872 @return: The verified and updated dict
875 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
877 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
878 utils.CommaJoin(invalid_dst),
880 type_check = constants.DSS_PARAMETER_TYPES
881 if obj_input is None:
883 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
885 for key, value in op_input.items())
890 def _ReleaseLocks(lu, level, names=None, keep=None):
891 """Releases locks owned by an LU.
893 @type lu: L{LogicalUnit}
894 @param level: Lock level
895 @type names: list or None
896 @param names: Names of locks to release
897 @type keep: list or None
898 @param keep: Names of locks to retain
901 assert not (keep is not None and names is not None), \
902 "Only one of the 'names' and the 'keep' parameters can be given"
904 if names is not None:
905 should_release = names.__contains__
907 should_release = lambda name: name not in keep
909 should_release = None
911 owned = lu.owned_locks(level)
913 # Not owning any lock at this level, do nothing
920 # Determine which locks to release
922 if should_release(name):
927 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
929 # Release just some locks
930 lu.glm.release(level, names=release)
932 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
935 lu.glm.release(level)
937 assert not lu.glm.is_owned(level), "No locks should be owned"
940 def _MapInstanceDisksToNodes(instances):
941 """Creates a map from (node, volume) to instance name.
943 @type instances: list of L{objects.Instance}
944 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
947 return dict(((node, vol), inst.name)
948 for inst in instances
949 for (node, vols) in inst.MapLVsByNode().items()
953 def _RunPostHook(lu, node_name):
954 """Runs the post-hook for an opcode on a single node.
957 hm = lu.proc.BuildHooksManager(lu)
959 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
961 # pylint: disable=W0702
962 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
965 def _CheckOutputFields(static, dynamic, selected):
966 """Checks whether all selected fields are valid.
968 @type static: L{utils.FieldSet}
969 @param static: static fields set
970 @type dynamic: L{utils.FieldSet}
971 @param dynamic: dynamic fields set
978 delta = f.NonMatching(selected)
980 raise errors.OpPrereqError("Unknown output fields selected: %s"
981 % ",".join(delta), errors.ECODE_INVAL)
984 def _CheckGlobalHvParams(params):
985 """Validates that given hypervisor params are not global ones.
987 This will ensure that instances don't get customised versions of
991 used_globals = constants.HVC_GLOBALS.intersection(params)
993 msg = ("The following hypervisor parameters are global and cannot"
994 " be customized at instance level, please modify them at"
995 " cluster level: %s" % utils.CommaJoin(used_globals))
996 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
999 def _CheckNodeOnline(lu, node, msg=None):
1000 """Ensure that a given node is online.
1002 @param lu: the LU on behalf of which we make the check
1003 @param node: the node to check
1004 @param msg: if passed, should be a message to replace the default one
1005 @raise errors.OpPrereqError: if the node is offline
1009 msg = "Can't use offline node"
1010 if lu.cfg.GetNodeInfo(node).offline:
1011 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1014 def _CheckNodeNotDrained(lu, node):
1015 """Ensure that a given node is not drained.
1017 @param lu: the LU on behalf of which we make the check
1018 @param node: the node to check
1019 @raise errors.OpPrereqError: if the node is drained
1022 if lu.cfg.GetNodeInfo(node).drained:
1023 raise errors.OpPrereqError("Can't use drained node %s" % node,
1027 def _CheckNodeVmCapable(lu, node):
1028 """Ensure that a given node is vm capable.
1030 @param lu: the LU on behalf of which we make the check
1031 @param node: the node to check
1032 @raise errors.OpPrereqError: if the node is not vm capable
1035 if not lu.cfg.GetNodeInfo(node).vm_capable:
1036 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1040 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1041 """Ensure that a node supports a given OS.
1043 @param lu: the LU on behalf of which we make the check
1044 @param node: the node to check
1045 @param os_name: the OS to query about
1046 @param force_variant: whether to ignore variant errors
1047 @raise errors.OpPrereqError: if the node is not supporting the OS
1050 result = lu.rpc.call_os_get(node, os_name)
1051 result.Raise("OS '%s' not in supported OS list for node %s" %
1053 prereq=True, ecode=errors.ECODE_INVAL)
1054 if not force_variant:
1055 _CheckOSVariant(result.payload, os_name)
1058 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1059 """Ensure that a node has the given secondary ip.
1061 @type lu: L{LogicalUnit}
1062 @param lu: the LU on behalf of which we make the check
1064 @param node: the node to check
1065 @type secondary_ip: string
1066 @param secondary_ip: the ip to check
1067 @type prereq: boolean
1068 @param prereq: whether to throw a prerequisite or an execute error
1069 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1070 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1073 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1074 result.Raise("Failure checking secondary ip on node %s" % node,
1075 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1076 if not result.payload:
1077 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1078 " please fix and re-run this command" % secondary_ip)
1080 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1082 raise errors.OpExecError(msg)
1085 def _GetClusterDomainSecret():
1086 """Reads the cluster domain secret.
1089 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1093 def _CheckInstanceState(lu, instance, req_states, msg=None):
1094 """Ensure that an instance is in one of the required states.
1096 @param lu: the LU on behalf of which we make the check
1097 @param instance: the instance to check
1098 @param msg: if passed, should be a message to replace the default one
1099 @raise errors.OpPrereqError: if the instance is not in the required state
1103 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1104 if instance.admin_state not in req_states:
1105 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1106 (instance.name, instance.admin_state, msg),
1109 if constants.ADMINST_UP not in req_states:
1110 pnode = instance.primary_node
1111 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113 prereq=True, ecode=errors.ECODE_ENVIRON)
1115 if instance.name in ins_l.payload:
1116 raise errors.OpPrereqError("Instance %s is running, %s" %
1117 (instance.name, msg), errors.ECODE_STATE)
1120 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1121 """Computes if value is in the desired range.
1123 @param name: name of the parameter for which we perform the check
1124 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1126 @param ipolicy: dictionary containing min, max and std values
1127 @param value: actual value that we want to use
1128 @return: None or element not meeting the criteria
1132 if value in [None, constants.VALUE_AUTO]:
1134 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1135 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1136 if value > max_v or min_v > value:
1138 fqn = "%s/%s" % (name, qualifier)
1141 return ("%s value %s is not in range [%s, %s]" %
1142 (fqn, value, min_v, max_v))
1146 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1147 nic_count, disk_sizes, spindle_use,
1148 _compute_fn=_ComputeMinMaxSpec):
1149 """Verifies ipolicy against provided specs.
1152 @param ipolicy: The ipolicy
1154 @param mem_size: The memory size
1155 @type cpu_count: int
1156 @param cpu_count: Used cpu cores
1157 @type disk_count: int
1158 @param disk_count: Number of disks used
1159 @type nic_count: int
1160 @param nic_count: Number of nics used
1161 @type disk_sizes: list of ints
1162 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1163 @type spindle_use: int
1164 @param spindle_use: The number of spindles this instance uses
1165 @param _compute_fn: The compute function (unittest only)
1166 @return: A list of violations, or an empty list of no violations are found
1169 assert disk_count == len(disk_sizes)
1172 (constants.ISPEC_MEM_SIZE, "", mem_size),
1173 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1174 (constants.ISPEC_DISK_COUNT, "", disk_count),
1175 (constants.ISPEC_NIC_COUNT, "", nic_count),
1176 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1177 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1178 for idx, d in enumerate(disk_sizes)]
1181 (_compute_fn(name, qualifier, ipolicy, value)
1182 for (name, qualifier, value) in test_settings))
1185 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1186 _compute_fn=_ComputeIPolicySpecViolation):
1187 """Compute if instance meets the specs of ipolicy.
1190 @param ipolicy: The ipolicy to verify against
1191 @type instance: L{objects.Instance}
1192 @param instance: The instance to verify
1193 @param _compute_fn: The function to verify ipolicy (unittest only)
1194 @see: L{_ComputeIPolicySpecViolation}
1197 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1198 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1199 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1200 disk_count = len(instance.disks)
1201 disk_sizes = [disk.size for disk in instance.disks]
1202 nic_count = len(instance.nics)
1204 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1205 disk_sizes, spindle_use)
1208 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1209 _compute_fn=_ComputeIPolicySpecViolation):
1210 """Compute if instance specs meets the specs of ipolicy.
1213 @param ipolicy: The ipolicy to verify against
1214 @param instance_spec: dict
1215 @param instance_spec: The instance spec to verify
1216 @param _compute_fn: The function to verify ipolicy (unittest only)
1217 @see: L{_ComputeIPolicySpecViolation}
1220 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1221 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1222 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1223 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1224 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1225 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1227 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1228 disk_sizes, spindle_use)
1231 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1233 _compute_fn=_ComputeIPolicyInstanceViolation):
1234 """Compute if instance meets the specs of the new target group.
1236 @param ipolicy: The ipolicy to verify
1237 @param instance: The instance object to verify
1238 @param current_group: The current group of the instance
1239 @param target_group: The new group of the instance
1240 @param _compute_fn: The function to verify ipolicy (unittest only)
1241 @see: L{_ComputeIPolicySpecViolation}
1244 if current_group == target_group:
1247 return _compute_fn(ipolicy, instance)
1250 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1251 _compute_fn=_ComputeIPolicyNodeViolation):
1252 """Checks that the target node is correct in terms of instance policy.
1254 @param ipolicy: The ipolicy to verify
1255 @param instance: The instance object to verify
1256 @param node: The new node to relocate
1257 @param ignore: Ignore violations of the ipolicy
1258 @param _compute_fn: The function to verify ipolicy (unittest only)
1259 @see: L{_ComputeIPolicySpecViolation}
1262 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1263 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1266 msg = ("Instance does not meet target node group's (%s) instance"
1267 " policy: %s") % (node.group, utils.CommaJoin(res))
1271 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1274 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1275 """Computes a set of any instances that would violate the new ipolicy.
1277 @param old_ipolicy: The current (still in-place) ipolicy
1278 @param new_ipolicy: The new (to become) ipolicy
1279 @param instances: List of instances to verify
1280 @return: A list of instances which violates the new ipolicy but
1284 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1285 _ComputeViolatingInstances(old_ipolicy, instances))
1288 def _ExpandItemName(fn, name, kind):
1289 """Expand an item name.
1291 @param fn: the function to use for expansion
1292 @param name: requested item name
1293 @param kind: text description ('Node' or 'Instance')
1294 @return: the resolved (full) name
1295 @raise errors.OpPrereqError: if the item is not found
1298 full_name = fn(name)
1299 if full_name is None:
1300 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1305 def _ExpandNodeName(cfg, name):
1306 """Wrapper over L{_ExpandItemName} for nodes."""
1307 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1310 def _ExpandInstanceName(cfg, name):
1311 """Wrapper over L{_ExpandItemName} for instance."""
1312 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1315 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1316 minmem, maxmem, vcpus, nics, disk_template, disks,
1317 bep, hvp, hypervisor_name, tags):
1318 """Builds instance related env variables for hooks
1320 This builds the hook environment from individual variables.
1323 @param name: the name of the instance
1324 @type primary_node: string
1325 @param primary_node: the name of the instance's primary node
1326 @type secondary_nodes: list
1327 @param secondary_nodes: list of secondary nodes as strings
1328 @type os_type: string
1329 @param os_type: the name of the instance's OS
1330 @type status: string
1331 @param status: the desired status of the instance
1332 @type minmem: string
1333 @param minmem: the minimum memory size of the instance
1334 @type maxmem: string
1335 @param maxmem: the maximum memory size of the instance
1337 @param vcpus: the count of VCPUs the instance has
1339 @param nics: list of tuples (ip, mac, mode, link) representing
1340 the NICs the instance has
1341 @type disk_template: string
1342 @param disk_template: the disk template of the instance
1344 @param disks: the list of (size, mode) pairs
1346 @param bep: the backend parameters for the instance
1348 @param hvp: the hypervisor parameters for the instance
1349 @type hypervisor_name: string
1350 @param hypervisor_name: the hypervisor for the instance
1352 @param tags: list of instance tags as strings
1354 @return: the hook environment for this instance
1359 "INSTANCE_NAME": name,
1360 "INSTANCE_PRIMARY": primary_node,
1361 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1362 "INSTANCE_OS_TYPE": os_type,
1363 "INSTANCE_STATUS": status,
1364 "INSTANCE_MINMEM": minmem,
1365 "INSTANCE_MAXMEM": maxmem,
1366 # TODO(2.7) remove deprecated "memory" value
1367 "INSTANCE_MEMORY": maxmem,
1368 "INSTANCE_VCPUS": vcpus,
1369 "INSTANCE_DISK_TEMPLATE": disk_template,
1370 "INSTANCE_HYPERVISOR": hypervisor_name,
1373 nic_count = len(nics)
1374 for idx, (ip, mac, mode, link) in enumerate(nics):
1377 env["INSTANCE_NIC%d_IP" % idx] = ip
1378 env["INSTANCE_NIC%d_MAC" % idx] = mac
1379 env["INSTANCE_NIC%d_MODE" % idx] = mode
1380 env["INSTANCE_NIC%d_LINK" % idx] = link
1381 if mode == constants.NIC_MODE_BRIDGED:
1382 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1386 env["INSTANCE_NIC_COUNT"] = nic_count
1389 disk_count = len(disks)
1390 for idx, (size, mode) in enumerate(disks):
1391 env["INSTANCE_DISK%d_SIZE" % idx] = size
1392 env["INSTANCE_DISK%d_MODE" % idx] = mode
1396 env["INSTANCE_DISK_COUNT"] = disk_count
1401 env["INSTANCE_TAGS"] = " ".join(tags)
1403 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1404 for key, value in source.items():
1405 env["INSTANCE_%s_%s" % (kind, key)] = value
1410 def _NICListToTuple(lu, nics):
1411 """Build a list of nic information tuples.
1413 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1414 value in LUInstanceQueryData.
1416 @type lu: L{LogicalUnit}
1417 @param lu: the logical unit on whose behalf we execute
1418 @type nics: list of L{objects.NIC}
1419 @param nics: list of nics to convert to hooks tuples
1423 cluster = lu.cfg.GetClusterInfo()
1427 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1428 mode = filled_params[constants.NIC_MODE]
1429 link = filled_params[constants.NIC_LINK]
1430 hooks_nics.append((ip, mac, mode, link))
1434 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1435 """Builds instance related env variables for hooks from an object.
1437 @type lu: L{LogicalUnit}
1438 @param lu: the logical unit on whose behalf we execute
1439 @type instance: L{objects.Instance}
1440 @param instance: the instance for which we should build the
1442 @type override: dict
1443 @param override: dictionary with key/values that will override
1446 @return: the hook environment dictionary
1449 cluster = lu.cfg.GetClusterInfo()
1450 bep = cluster.FillBE(instance)
1451 hvp = cluster.FillHV(instance)
1453 "name": instance.name,
1454 "primary_node": instance.primary_node,
1455 "secondary_nodes": instance.secondary_nodes,
1456 "os_type": instance.os,
1457 "status": instance.admin_state,
1458 "maxmem": bep[constants.BE_MAXMEM],
1459 "minmem": bep[constants.BE_MINMEM],
1460 "vcpus": bep[constants.BE_VCPUS],
1461 "nics": _NICListToTuple(lu, instance.nics),
1462 "disk_template": instance.disk_template,
1463 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1466 "hypervisor_name": instance.hypervisor,
1467 "tags": instance.tags,
1470 args.update(override)
1471 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1474 def _AdjustCandidatePool(lu, exceptions):
1475 """Adjust the candidate pool after node operations.
1478 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1480 lu.LogInfo("Promoted nodes to master candidate role: %s",
1481 utils.CommaJoin(node.name for node in mod_list))
1482 for name in mod_list:
1483 lu.context.ReaddNode(name)
1484 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1486 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1490 def _DecideSelfPromotion(lu, exceptions=None):
1491 """Decide whether I should promote myself as a master candidate.
1494 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1495 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1496 # the new node will increase mc_max with one, so:
1497 mc_should = min(mc_should + 1, cp_size)
1498 return mc_now < mc_should
1501 def _CalculateGroupIPolicy(cluster, group):
1502 """Calculate instance policy for group.
1505 return cluster.SimpleFillIPolicy(group.ipolicy)
1508 def _ComputeViolatingInstances(ipolicy, instances):
1509 """Computes a set of instances who violates given ipolicy.
1511 @param ipolicy: The ipolicy to verify
1512 @type instances: object.Instance
1513 @param instances: List of instances to verify
1514 @return: A frozenset of instance names violating the ipolicy
1517 return frozenset([inst.name for inst in instances
1518 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1521 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1522 """Check that the brigdes needed by a list of nics exist.
1525 cluster = lu.cfg.GetClusterInfo()
1526 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1527 brlist = [params[constants.NIC_LINK] for params in paramslist
1528 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1530 result = lu.rpc.call_bridges_exist(target_node, brlist)
1531 result.Raise("Error checking bridges on destination node '%s'" %
1532 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1535 def _CheckInstanceBridgesExist(lu, instance, node=None):
1536 """Check that the brigdes needed by an instance exist.
1540 node = instance.primary_node
1541 _CheckNicsBridgesExist(lu, instance.nics, node)
1544 def _CheckOSVariant(os_obj, name):
1545 """Check whether an OS name conforms to the os variants specification.
1547 @type os_obj: L{objects.OS}
1548 @param os_obj: OS object to check
1550 @param name: OS name passed by the user, to check for validity
1553 variant = objects.OS.GetVariant(name)
1554 if not os_obj.supported_variants:
1556 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1557 " passed)" % (os_obj.name, variant),
1561 raise errors.OpPrereqError("OS name must include a variant",
1564 if variant not in os_obj.supported_variants:
1565 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1568 def _GetNodeInstancesInner(cfg, fn):
1569 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1572 def _GetNodeInstances(cfg, node_name):
1573 """Returns a list of all primary and secondary instances on a node.
1577 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1580 def _GetNodePrimaryInstances(cfg, node_name):
1581 """Returns primary instances on a node.
1584 return _GetNodeInstancesInner(cfg,
1585 lambda inst: node_name == inst.primary_node)
1588 def _GetNodeSecondaryInstances(cfg, node_name):
1589 """Returns secondary instances on a node.
1592 return _GetNodeInstancesInner(cfg,
1593 lambda inst: node_name in inst.secondary_nodes)
1596 def _GetStorageTypeArgs(cfg, storage_type):
1597 """Returns the arguments for a storage type.
1600 # Special case for file storage
1601 if storage_type == constants.ST_FILE:
1602 # storage.FileStorage wants a list of storage directories
1603 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1608 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1611 for dev in instance.disks:
1612 cfg.SetDiskID(dev, node_name)
1614 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1616 result.Raise("Failed to get disk status from node %s" % node_name,
1617 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1619 for idx, bdev_status in enumerate(result.payload):
1620 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1626 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1627 """Check the sanity of iallocator and node arguments and use the
1628 cluster-wide iallocator if appropriate.
1630 Check that at most one of (iallocator, node) is specified. If none is
1631 specified, then the LU's opcode's iallocator slot is filled with the
1632 cluster-wide default iallocator.
1634 @type iallocator_slot: string
1635 @param iallocator_slot: the name of the opcode iallocator slot
1636 @type node_slot: string
1637 @param node_slot: the name of the opcode target node slot
1640 node = getattr(lu.op, node_slot, None)
1641 iallocator = getattr(lu.op, iallocator_slot, None)
1643 if node is not None and iallocator is not None:
1644 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1646 elif node is None and iallocator is None:
1647 default_iallocator = lu.cfg.GetDefaultIAllocator()
1648 if default_iallocator:
1649 setattr(lu.op, iallocator_slot, default_iallocator)
1651 raise errors.OpPrereqError("No iallocator or node given and no"
1652 " cluster-wide default iallocator found;"
1653 " please specify either an iallocator or a"
1654 " node, or set a cluster-wide default"
1658 def _GetDefaultIAllocator(cfg, iallocator):
1659 """Decides on which iallocator to use.
1661 @type cfg: L{config.ConfigWriter}
1662 @param cfg: Cluster configuration object
1663 @type iallocator: string or None
1664 @param iallocator: Iallocator specified in opcode
1666 @return: Iallocator name
1670 # Use default iallocator
1671 iallocator = cfg.GetDefaultIAllocator()
1674 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1675 " opcode nor as a cluster-wide default",
1681 class LUClusterPostInit(LogicalUnit):
1682 """Logical unit for running hooks after cluster initialization.
1685 HPATH = "cluster-init"
1686 HTYPE = constants.HTYPE_CLUSTER
1688 def BuildHooksEnv(self):
1693 "OP_TARGET": self.cfg.GetClusterName(),
1696 def BuildHooksNodes(self):
1697 """Build hooks nodes.
1700 return ([], [self.cfg.GetMasterNode()])
1702 def Exec(self, feedback_fn):
1709 class LUClusterDestroy(LogicalUnit):
1710 """Logical unit for destroying the cluster.
1713 HPATH = "cluster-destroy"
1714 HTYPE = constants.HTYPE_CLUSTER
1716 def BuildHooksEnv(self):
1721 "OP_TARGET": self.cfg.GetClusterName(),
1724 def BuildHooksNodes(self):
1725 """Build hooks nodes.
1730 def CheckPrereq(self):
1731 """Check prerequisites.
1733 This checks whether the cluster is empty.
1735 Any errors are signaled by raising errors.OpPrereqError.
1738 master = self.cfg.GetMasterNode()
1740 nodelist = self.cfg.GetNodeList()
1741 if len(nodelist) != 1 or nodelist[0] != master:
1742 raise errors.OpPrereqError("There are still %d node(s) in"
1743 " this cluster." % (len(nodelist) - 1),
1745 instancelist = self.cfg.GetInstanceList()
1747 raise errors.OpPrereqError("There are still %d instance(s) in"
1748 " this cluster." % len(instancelist),
1751 def Exec(self, feedback_fn):
1752 """Destroys the cluster.
1755 master_params = self.cfg.GetMasterNetworkParameters()
1757 # Run post hooks on master node before it's removed
1758 _RunPostHook(self, master_params.name)
1760 ems = self.cfg.GetUseExternalMipScript()
1761 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1764 self.LogWarning("Error disabling the master IP address: %s",
1767 return master_params.name
1770 def _VerifyCertificate(filename):
1771 """Verifies a certificate for L{LUClusterVerifyConfig}.
1773 @type filename: string
1774 @param filename: Path to PEM file
1778 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1779 utils.ReadFile(filename))
1780 except Exception, err: # pylint: disable=W0703
1781 return (LUClusterVerifyConfig.ETYPE_ERROR,
1782 "Failed to load X509 certificate %s: %s" % (filename, err))
1785 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1786 constants.SSL_CERT_EXPIRATION_ERROR)
1789 fnamemsg = "While verifying %s: %s" % (filename, msg)
1794 return (None, fnamemsg)
1795 elif errcode == utils.CERT_WARNING:
1796 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1797 elif errcode == utils.CERT_ERROR:
1798 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1800 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1803 def _GetAllHypervisorParameters(cluster, instances):
1804 """Compute the set of all hypervisor parameters.
1806 @type cluster: L{objects.Cluster}
1807 @param cluster: the cluster object
1808 @param instances: list of L{objects.Instance}
1809 @param instances: additional instances from which to obtain parameters
1810 @rtype: list of (origin, hypervisor, parameters)
1811 @return: a list with all parameters found, indicating the hypervisor they
1812 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1817 for hv_name in cluster.enabled_hypervisors:
1818 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1820 for os_name, os_hvp in cluster.os_hvp.items():
1821 for hv_name, hv_params in os_hvp.items():
1823 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1824 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1826 # TODO: collapse identical parameter values in a single one
1827 for instance in instances:
1828 if instance.hvparams:
1829 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1830 cluster.FillHV(instance)))
1835 class _VerifyErrors(object):
1836 """Mix-in for cluster/group verify LUs.
1838 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1839 self.op and self._feedback_fn to be available.)
1843 ETYPE_FIELD = "code"
1844 ETYPE_ERROR = "ERROR"
1845 ETYPE_WARNING = "WARNING"
1847 def _Error(self, ecode, item, msg, *args, **kwargs):
1848 """Format an error message.
1850 Based on the opcode's error_codes parameter, either format a
1851 parseable error code, or a simpler error string.
1853 This must be called only from Exec and functions called from Exec.
1856 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1857 itype, etxt, _ = ecode
1858 # first complete the msg
1861 # then format the whole message
1862 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1863 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1869 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1870 # and finally report it via the feedback_fn
1871 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1873 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1874 """Log an error message if the passed condition is True.
1878 or self.op.debug_simulate_errors) # pylint: disable=E1101
1880 # If the error code is in the list of ignored errors, demote the error to a
1882 (_, etxt, _) = ecode
1883 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1884 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1887 self._Error(ecode, *args, **kwargs)
1889 # do not mark the operation as failed for WARN cases only
1890 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1891 self.bad = self.bad or cond
1894 class LUClusterVerify(NoHooksLU):
1895 """Submits all jobs necessary to verify the cluster.
1900 def ExpandNames(self):
1901 self.needed_locks = {}
1903 def Exec(self, feedback_fn):
1906 if self.op.group_name:
1907 groups = [self.op.group_name]
1908 depends_fn = lambda: None
1910 groups = self.cfg.GetNodeGroupList()
1912 # Verify global configuration
1914 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1917 # Always depend on global verification
1918 depends_fn = lambda: [(-len(jobs), [])]
1920 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1921 ignore_errors=self.op.ignore_errors,
1922 depends=depends_fn())]
1923 for group in groups)
1925 # Fix up all parameters
1926 for op in itertools.chain(*jobs): # pylint: disable=W0142
1927 op.debug_simulate_errors = self.op.debug_simulate_errors
1928 op.verbose = self.op.verbose
1929 op.error_codes = self.op.error_codes
1931 op.skip_checks = self.op.skip_checks
1932 except AttributeError:
1933 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1935 return ResultWithJobs(jobs)
1938 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1939 """Verifies the cluster config.
1944 def _VerifyHVP(self, hvp_data):
1945 """Verifies locally the syntax of the hypervisor parameters.
1948 for item, hv_name, hv_params in hvp_data:
1949 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1952 hv_class = hypervisor.GetHypervisor(hv_name)
1953 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1954 hv_class.CheckParameterSyntax(hv_params)
1955 except errors.GenericError, err:
1956 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1958 def ExpandNames(self):
1959 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1960 self.share_locks = _ShareAll()
1962 def CheckPrereq(self):
1963 """Check prerequisites.
1966 # Retrieve all information
1967 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1968 self.all_node_info = self.cfg.GetAllNodesInfo()
1969 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1971 def Exec(self, feedback_fn):
1972 """Verify integrity of cluster, performing various test on nodes.
1976 self._feedback_fn = feedback_fn
1978 feedback_fn("* Verifying cluster config")
1980 for msg in self.cfg.VerifyConfig():
1981 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1983 feedback_fn("* Verifying cluster certificate files")
1985 for cert_filename in constants.ALL_CERT_FILES:
1986 (errcode, msg) = _VerifyCertificate(cert_filename)
1987 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1989 feedback_fn("* Verifying hypervisor parameters")
1991 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1992 self.all_inst_info.values()))
1994 feedback_fn("* Verifying all nodes belong to an existing group")
1996 # We do this verification here because, should this bogus circumstance
1997 # occur, it would never be caught by VerifyGroup, which only acts on
1998 # nodes/instances reachable from existing node groups.
2000 dangling_nodes = set(node.name for node in self.all_node_info.values()
2001 if node.group not in self.all_group_info)
2003 dangling_instances = {}
2004 no_node_instances = []
2006 for inst in self.all_inst_info.values():
2007 if inst.primary_node in dangling_nodes:
2008 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2009 elif inst.primary_node not in self.all_node_info:
2010 no_node_instances.append(inst.name)
2015 utils.CommaJoin(dangling_instances.get(node.name,
2017 for node in dangling_nodes]
2019 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2021 "the following nodes (and their instances) belong to a non"
2022 " existing group: %s", utils.CommaJoin(pretty_dangling))
2024 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2026 "the following instances have a non-existing primary-node:"
2027 " %s", utils.CommaJoin(no_node_instances))
2032 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2033 """Verifies the status of a node group.
2036 HPATH = "cluster-verify"
2037 HTYPE = constants.HTYPE_CLUSTER
2040 _HOOKS_INDENT_RE = re.compile("^", re.M)
2042 class NodeImage(object):
2043 """A class representing the logical and physical status of a node.
2046 @ivar name: the node name to which this object refers
2047 @ivar volumes: a structure as returned from
2048 L{ganeti.backend.GetVolumeList} (runtime)
2049 @ivar instances: a list of running instances (runtime)
2050 @ivar pinst: list of configured primary instances (config)
2051 @ivar sinst: list of configured secondary instances (config)
2052 @ivar sbp: dictionary of {primary-node: list of instances} for all
2053 instances for which this node is secondary (config)
2054 @ivar mfree: free memory, as reported by hypervisor (runtime)
2055 @ivar dfree: free disk, as reported by the node (runtime)
2056 @ivar offline: the offline status (config)
2057 @type rpc_fail: boolean
2058 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2059 not whether the individual keys were correct) (runtime)
2060 @type lvm_fail: boolean
2061 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2062 @type hyp_fail: boolean
2063 @ivar hyp_fail: whether the RPC call didn't return the instance list
2064 @type ghost: boolean
2065 @ivar ghost: whether this is a known node or not (config)
2066 @type os_fail: boolean
2067 @ivar os_fail: whether the RPC call didn't return valid OS data
2069 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2070 @type vm_capable: boolean
2071 @ivar vm_capable: whether the node can host instances
2074 def __init__(self, offline=False, name=None, vm_capable=True):
2083 self.offline = offline
2084 self.vm_capable = vm_capable
2085 self.rpc_fail = False
2086 self.lvm_fail = False
2087 self.hyp_fail = False
2089 self.os_fail = False
2092 def ExpandNames(self):
2093 # This raises errors.OpPrereqError on its own:
2094 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2096 # Get instances in node group; this is unsafe and needs verification later
2098 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2100 self.needed_locks = {
2101 locking.LEVEL_INSTANCE: inst_names,
2102 locking.LEVEL_NODEGROUP: [self.group_uuid],
2103 locking.LEVEL_NODE: [],
2106 self.share_locks = _ShareAll()
2108 def DeclareLocks(self, level):
2109 if level == locking.LEVEL_NODE:
2110 # Get members of node group; this is unsafe and needs verification later
2111 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2113 all_inst_info = self.cfg.GetAllInstancesInfo()
2115 # In Exec(), we warn about mirrored instances that have primary and
2116 # secondary living in separate node groups. To fully verify that
2117 # volumes for these instances are healthy, we will need to do an
2118 # extra call to their secondaries. We ensure here those nodes will
2120 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2121 # Important: access only the instances whose lock is owned
2122 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2123 nodes.update(all_inst_info[inst].secondary_nodes)
2125 self.needed_locks[locking.LEVEL_NODE] = nodes
2127 def CheckPrereq(self):
2128 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2129 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2131 group_nodes = set(self.group_info.members)
2133 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2136 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2138 unlocked_instances = \
2139 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2142 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2143 utils.CommaJoin(unlocked_nodes),
2146 if unlocked_instances:
2147 raise errors.OpPrereqError("Missing lock for instances: %s" %
2148 utils.CommaJoin(unlocked_instances),
2151 self.all_node_info = self.cfg.GetAllNodesInfo()
2152 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2154 self.my_node_names = utils.NiceSort(group_nodes)
2155 self.my_inst_names = utils.NiceSort(group_instances)
2157 self.my_node_info = dict((name, self.all_node_info[name])
2158 for name in self.my_node_names)
2160 self.my_inst_info = dict((name, self.all_inst_info[name])
2161 for name in self.my_inst_names)
2163 # We detect here the nodes that will need the extra RPC calls for verifying
2164 # split LV volumes; they should be locked.
2165 extra_lv_nodes = set()
2167 for inst in self.my_inst_info.values():
2168 if inst.disk_template in constants.DTS_INT_MIRROR:
2169 for nname in inst.all_nodes:
2170 if self.all_node_info[nname].group != self.group_uuid:
2171 extra_lv_nodes.add(nname)
2173 unlocked_lv_nodes = \
2174 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2176 if unlocked_lv_nodes:
2177 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2178 utils.CommaJoin(unlocked_lv_nodes),
2180 self.extra_lv_nodes = list(extra_lv_nodes)
2182 def _VerifyNode(self, ninfo, nresult):
2183 """Perform some basic validation on data returned from a node.
2185 - check the result data structure is well formed and has all the
2187 - check ganeti version
2189 @type ninfo: L{objects.Node}
2190 @param ninfo: the node to check
2191 @param nresult: the results from the node
2193 @return: whether overall this call was successful (and we can expect
2194 reasonable values in the respose)
2198 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2200 # main result, nresult should be a non-empty dict
2201 test = not nresult or not isinstance(nresult, dict)
2202 _ErrorIf(test, constants.CV_ENODERPC, node,
2203 "unable to verify node: no data returned")
2207 # compares ganeti version
2208 local_version = constants.PROTOCOL_VERSION
2209 remote_version = nresult.get("version", None)
2210 test = not (remote_version and
2211 isinstance(remote_version, (list, tuple)) and
2212 len(remote_version) == 2)
2213 _ErrorIf(test, constants.CV_ENODERPC, node,
2214 "connection to node returned invalid data")
2218 test = local_version != remote_version[0]
2219 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2220 "incompatible protocol versions: master %s,"
2221 " node %s", local_version, remote_version[0])
2225 # node seems compatible, we can actually try to look into its results
2227 # full package version
2228 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2229 constants.CV_ENODEVERSION, node,
2230 "software version mismatch: master %s, node %s",
2231 constants.RELEASE_VERSION, remote_version[1],
2232 code=self.ETYPE_WARNING)
2234 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2235 if ninfo.vm_capable and isinstance(hyp_result, dict):
2236 for hv_name, hv_result in hyp_result.iteritems():
2237 test = hv_result is not None
2238 _ErrorIf(test, constants.CV_ENODEHV, node,
2239 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2241 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2242 if ninfo.vm_capable and isinstance(hvp_result, list):
2243 for item, hv_name, hv_result in hvp_result:
2244 _ErrorIf(True, constants.CV_ENODEHV, node,
2245 "hypervisor %s parameter verify failure (source %s): %s",
2246 hv_name, item, hv_result)
2248 test = nresult.get(constants.NV_NODESETUP,
2249 ["Missing NODESETUP results"])
2250 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2255 def _VerifyNodeTime(self, ninfo, nresult,
2256 nvinfo_starttime, nvinfo_endtime):
2257 """Check the node time.
2259 @type ninfo: L{objects.Node}
2260 @param ninfo: the node to check
2261 @param nresult: the remote results for the node
2262 @param nvinfo_starttime: the start time of the RPC call
2263 @param nvinfo_endtime: the end time of the RPC call
2267 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2269 ntime = nresult.get(constants.NV_TIME, None)
2271 ntime_merged = utils.MergeTime(ntime)
2272 except (ValueError, TypeError):
2273 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2276 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2277 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2278 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2279 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2283 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2284 "Node time diverges by at least %s from master node time",
2287 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2288 """Check the node LVM results.
2290 @type ninfo: L{objects.Node}
2291 @param ninfo: the node to check
2292 @param nresult: the remote results for the node
2293 @param vg_name: the configured VG name
2300 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2302 # checks vg existence and size > 20G
2303 vglist = nresult.get(constants.NV_VGLIST, None)
2305 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2307 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2308 constants.MIN_VG_SIZE)
2309 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2312 pvlist = nresult.get(constants.NV_PVLIST, None)
2313 test = pvlist is None
2314 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2316 # check that ':' is not present in PV names, since it's a
2317 # special character for lvcreate (denotes the range of PEs to
2319 for _, pvname, owner_vg in pvlist:
2320 test = ":" in pvname
2321 _ErrorIf(test, constants.CV_ENODELVM, node,
2322 "Invalid character ':' in PV '%s' of VG '%s'",
2325 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2326 """Check the node bridges.
2328 @type ninfo: L{objects.Node}
2329 @param ninfo: the node to check
2330 @param nresult: the remote results for the node
2331 @param bridges: the expected list of bridges
2338 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2340 missing = nresult.get(constants.NV_BRIDGES, None)
2341 test = not isinstance(missing, list)
2342 _ErrorIf(test, constants.CV_ENODENET, node,
2343 "did not return valid bridge information")
2345 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2346 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2348 def _VerifyNodeUserScripts(self, ninfo, nresult):
2349 """Check the results of user scripts presence and executability on the node
2351 @type ninfo: L{objects.Node}
2352 @param ninfo: the node to check
2353 @param nresult: the remote results for the node
2358 test = not constants.NV_USERSCRIPTS in nresult
2359 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2360 "did not return user scripts information")
2362 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2364 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2365 "user scripts not present or not executable: %s" %
2366 utils.CommaJoin(sorted(broken_scripts)))
2368 def _VerifyNodeNetwork(self, ninfo, nresult):
2369 """Check the node network connectivity results.
2371 @type ninfo: L{objects.Node}
2372 @param ninfo: the node to check
2373 @param nresult: the remote results for the node
2377 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2379 test = constants.NV_NODELIST not in nresult
2380 _ErrorIf(test, constants.CV_ENODESSH, node,
2381 "node hasn't returned node ssh connectivity data")
2383 if nresult[constants.NV_NODELIST]:
2384 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2385 _ErrorIf(True, constants.CV_ENODESSH, node,
2386 "ssh communication with node '%s': %s", a_node, a_msg)
2388 test = constants.NV_NODENETTEST not in nresult
2389 _ErrorIf(test, constants.CV_ENODENET, node,
2390 "node hasn't returned node tcp connectivity data")
2392 if nresult[constants.NV_NODENETTEST]:
2393 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2395 _ErrorIf(True, constants.CV_ENODENET, node,
2396 "tcp communication with node '%s': %s",
2397 anode, nresult[constants.NV_NODENETTEST][anode])
2399 test = constants.NV_MASTERIP not in nresult
2400 _ErrorIf(test, constants.CV_ENODENET, node,
2401 "node hasn't returned node master IP reachability data")
2403 if not nresult[constants.NV_MASTERIP]:
2404 if node == self.master_node:
2405 msg = "the master node cannot reach the master IP (not configured?)"
2407 msg = "cannot reach the master IP"
2408 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2410 def _VerifyInstance(self, instance, instanceconfig, node_image,
2412 """Verify an instance.
2414 This function checks to see if the required block devices are
2415 available on the instance's node.
2418 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2419 node_current = instanceconfig.primary_node
2421 node_vol_should = {}
2422 instanceconfig.MapLVsByNode(node_vol_should)
2424 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2425 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2426 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2428 for node in node_vol_should:
2429 n_img = node_image[node]
2430 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2431 # ignore missing volumes on offline or broken nodes
2433 for volume in node_vol_should[node]:
2434 test = volume not in n_img.volumes
2435 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2436 "volume %s missing on node %s", volume, node)
2438 if instanceconfig.admin_state == constants.ADMINST_UP:
2439 pri_img = node_image[node_current]
2440 test = instance not in pri_img.instances and not pri_img.offline
2441 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2442 "instance not running on its primary node %s",
2445 diskdata = [(nname, success, status, idx)
2446 for (nname, disks) in diskstatus.items()
2447 for idx, (success, status) in enumerate(disks)]
2449 for nname, success, bdev_status, idx in diskdata:
2450 # the 'ghost node' construction in Exec() ensures that we have a
2452 snode = node_image[nname]
2453 bad_snode = snode.ghost or snode.offline
2454 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2455 not success and not bad_snode,
2456 constants.CV_EINSTANCEFAULTYDISK, instance,
2457 "couldn't retrieve status for disk/%s on %s: %s",
2458 idx, nname, bdev_status)
2459 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2460 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2461 constants.CV_EINSTANCEFAULTYDISK, instance,
2462 "disk/%s on %s is faulty", idx, nname)
2464 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2465 """Verify if there are any unknown volumes in the cluster.
2467 The .os, .swap and backup volumes are ignored. All other volumes are
2468 reported as unknown.
2470 @type reserved: L{ganeti.utils.FieldSet}
2471 @param reserved: a FieldSet of reserved volume names
2474 for node, n_img in node_image.items():
2475 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2476 self.all_node_info[node].group != self.group_uuid):
2477 # skip non-healthy nodes
2479 for volume in n_img.volumes:
2480 test = ((node not in node_vol_should or
2481 volume not in node_vol_should[node]) and
2482 not reserved.Matches(volume))
2483 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2484 "volume %s is unknown", volume)
2486 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2487 """Verify N+1 Memory Resilience.
2489 Check that if one single node dies we can still start all the
2490 instances it was primary for.
2493 cluster_info = self.cfg.GetClusterInfo()
2494 for node, n_img in node_image.items():
2495 # This code checks that every node which is now listed as
2496 # secondary has enough memory to host all instances it is
2497 # supposed to should a single other node in the cluster fail.
2498 # FIXME: not ready for failover to an arbitrary node
2499 # FIXME: does not support file-backed instances
2500 # WARNING: we currently take into account down instances as well
2501 # as up ones, considering that even if they're down someone
2502 # might want to start them even in the event of a node failure.
2503 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2504 # we're skipping nodes marked offline and nodes in other groups from
2505 # the N+1 warning, since most likely we don't have good memory
2506 # infromation from them; we already list instances living on such
2507 # nodes, and that's enough warning
2509 #TODO(dynmem): also consider ballooning out other instances
2510 for prinode, instances in n_img.sbp.items():
2512 for instance in instances:
2513 bep = cluster_info.FillBE(instance_cfg[instance])
2514 if bep[constants.BE_AUTO_BALANCE]:
2515 needed_mem += bep[constants.BE_MINMEM]
2516 test = n_img.mfree < needed_mem
2517 self._ErrorIf(test, constants.CV_ENODEN1, node,
2518 "not enough memory to accomodate instance failovers"
2519 " should node %s fail (%dMiB needed, %dMiB available)",
2520 prinode, needed_mem, n_img.mfree)
2523 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2524 (files_all, files_opt, files_mc, files_vm)):
2525 """Verifies file checksums collected from all nodes.
2527 @param errorif: Callback for reporting errors
2528 @param nodeinfo: List of L{objects.Node} objects
2529 @param master_node: Name of master node
2530 @param all_nvinfo: RPC results
2533 # Define functions determining which nodes to consider for a file
2536 (files_mc, lambda node: (node.master_candidate or
2537 node.name == master_node)),
2538 (files_vm, lambda node: node.vm_capable),
2541 # Build mapping from filename to list of nodes which should have the file
2543 for (files, fn) in files2nodefn:
2545 filenodes = nodeinfo
2547 filenodes = filter(fn, nodeinfo)
2548 nodefiles.update((filename,
2549 frozenset(map(operator.attrgetter("name"), filenodes)))
2550 for filename in files)
2552 assert set(nodefiles) == (files_all | files_mc | files_vm)
2554 fileinfo = dict((filename, {}) for filename in nodefiles)
2555 ignore_nodes = set()
2557 for node in nodeinfo:
2559 ignore_nodes.add(node.name)
2562 nresult = all_nvinfo[node.name]
2564 if nresult.fail_msg or not nresult.payload:
2567 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2569 test = not (node_files and isinstance(node_files, dict))
2570 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2571 "Node did not return file checksum data")
2573 ignore_nodes.add(node.name)
2576 # Build per-checksum mapping from filename to nodes having it
2577 for (filename, checksum) in node_files.items():
2578 assert filename in nodefiles
2579 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2581 for (filename, checksums) in fileinfo.items():
2582 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2584 # Nodes having the file
2585 with_file = frozenset(node_name
2586 for nodes in fileinfo[filename].values()
2587 for node_name in nodes) - ignore_nodes
2589 expected_nodes = nodefiles[filename] - ignore_nodes
2591 # Nodes missing file
2592 missing_file = expected_nodes - with_file
2594 if filename in files_opt:
2596 errorif(missing_file and missing_file != expected_nodes,
2597 constants.CV_ECLUSTERFILECHECK, None,
2598 "File %s is optional, but it must exist on all or no"
2599 " nodes (not found on %s)",
2600 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2602 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2603 "File %s is missing from node(s) %s", filename,
2604 utils.CommaJoin(utils.NiceSort(missing_file)))
2606 # Warn if a node has a file it shouldn't
2607 unexpected = with_file - expected_nodes
2609 constants.CV_ECLUSTERFILECHECK, None,
2610 "File %s should not exist on node(s) %s",
2611 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2613 # See if there are multiple versions of the file
2614 test = len(checksums) > 1
2616 variants = ["variant %s on %s" %
2617 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2618 for (idx, (checksum, nodes)) in
2619 enumerate(sorted(checksums.items()))]
2623 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2624 "File %s found with %s different checksums (%s)",
2625 filename, len(checksums), "; ".join(variants))
2627 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2629 """Verifies and the node DRBD status.
2631 @type ninfo: L{objects.Node}
2632 @param ninfo: the node to check
2633 @param nresult: the remote results for the node
2634 @param instanceinfo: the dict of instances
2635 @param drbd_helper: the configured DRBD usermode helper
2636 @param drbd_map: the DRBD map as returned by
2637 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2641 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2644 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2645 test = (helper_result == None)
2646 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2647 "no drbd usermode helper returned")
2649 status, payload = helper_result
2651 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2652 "drbd usermode helper check unsuccessful: %s", payload)
2653 test = status and (payload != drbd_helper)
2654 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2655 "wrong drbd usermode helper: %s", payload)
2657 # compute the DRBD minors
2659 for minor, instance in drbd_map[node].items():
2660 test = instance not in instanceinfo
2661 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2662 "ghost instance '%s' in temporary DRBD map", instance)
2663 # ghost instance should not be running, but otherwise we
2664 # don't give double warnings (both ghost instance and
2665 # unallocated minor in use)
2667 node_drbd[minor] = (instance, False)
2669 instance = instanceinfo[instance]
2670 node_drbd[minor] = (instance.name,
2671 instance.admin_state == constants.ADMINST_UP)
2673 # and now check them
2674 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2675 test = not isinstance(used_minors, (tuple, list))
2676 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2677 "cannot parse drbd status file: %s", str(used_minors))
2679 # we cannot check drbd status
2682 for minor, (iname, must_exist) in node_drbd.items():
2683 test = minor not in used_minors and must_exist
2684 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2685 "drbd minor %d of instance %s is not active", minor, iname)
2686 for minor in used_minors:
2687 test = minor not in node_drbd
2688 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2689 "unallocated drbd minor %d is in use", minor)
2691 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2692 """Builds the node OS structures.
2694 @type ninfo: L{objects.Node}
2695 @param ninfo: the node to check
2696 @param nresult: the remote results for the node
2697 @param nimg: the node image object
2701 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2703 remote_os = nresult.get(constants.NV_OSLIST, None)
2704 test = (not isinstance(remote_os, list) or
2705 not compat.all(isinstance(v, list) and len(v) == 7
2706 for v in remote_os))
2708 _ErrorIf(test, constants.CV_ENODEOS, node,
2709 "node hasn't returned valid OS data")
2718 for (name, os_path, status, diagnose,
2719 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2721 if name not in os_dict:
2724 # parameters is a list of lists instead of list of tuples due to
2725 # JSON lacking a real tuple type, fix it:
2726 parameters = [tuple(v) for v in parameters]
2727 os_dict[name].append((os_path, status, diagnose,
2728 set(variants), set(parameters), set(api_ver)))
2730 nimg.oslist = os_dict
2732 def _VerifyNodeOS(self, ninfo, nimg, base):
2733 """Verifies the node OS list.
2735 @type ninfo: L{objects.Node}
2736 @param ninfo: the node to check
2737 @param nimg: the node image object
2738 @param base: the 'template' node we match against (e.g. from the master)
2742 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2744 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2746 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2747 for os_name, os_data in nimg.oslist.items():
2748 assert os_data, "Empty OS status for OS %s?!" % os_name
2749 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2750 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2751 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2752 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2753 "OS '%s' has multiple entries (first one shadows the rest): %s",
2754 os_name, utils.CommaJoin([v[0] for v in os_data]))
2755 # comparisons with the 'base' image
2756 test = os_name not in base.oslist
2757 _ErrorIf(test, constants.CV_ENODEOS, node,
2758 "Extra OS %s not present on reference node (%s)",
2762 assert base.oslist[os_name], "Base node has empty OS status?"
2763 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2765 # base OS is invalid, skipping
2767 for kind, a, b in [("API version", f_api, b_api),
2768 ("variants list", f_var, b_var),
2769 ("parameters", beautify_params(f_param),
2770 beautify_params(b_param))]:
2771 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2772 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2773 kind, os_name, base.name,
2774 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2776 # check any missing OSes
2777 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2778 _ErrorIf(missing, constants.CV_ENODEOS, node,
2779 "OSes present on reference node %s but missing on this node: %s",
2780 base.name, utils.CommaJoin(missing))
2782 def _VerifyOob(self, ninfo, nresult):
2783 """Verifies out of band functionality of a node.
2785 @type ninfo: L{objects.Node}
2786 @param ninfo: the node to check
2787 @param nresult: the remote results for the node
2791 # We just have to verify the paths on master and/or master candidates
2792 # as the oob helper is invoked on the master
2793 if ((ninfo.master_candidate or ninfo.master_capable) and
2794 constants.NV_OOB_PATHS in nresult):
2795 for path_result in nresult[constants.NV_OOB_PATHS]:
2796 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2798 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2799 """Verifies and updates the node volume data.
2801 This function will update a L{NodeImage}'s internal structures
2802 with data from the remote call.
2804 @type ninfo: L{objects.Node}
2805 @param ninfo: the node to check
2806 @param nresult: the remote results for the node
2807 @param nimg: the node image object
2808 @param vg_name: the configured VG name
2812 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2814 nimg.lvm_fail = True
2815 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2818 elif isinstance(lvdata, basestring):
2819 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2820 utils.SafeEncode(lvdata))
2821 elif not isinstance(lvdata, dict):
2822 _ErrorIf(True, constants.CV_ENODELVM, node,
2823 "rpc call to node failed (lvlist)")
2825 nimg.volumes = lvdata
2826 nimg.lvm_fail = False
2828 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2829 """Verifies and updates the node instance list.
2831 If the listing was successful, then updates this node's instance
2832 list. Otherwise, it marks the RPC call as failed for the instance
2835 @type ninfo: L{objects.Node}
2836 @param ninfo: the node to check
2837 @param nresult: the remote results for the node
2838 @param nimg: the node image object
2841 idata = nresult.get(constants.NV_INSTANCELIST, None)
2842 test = not isinstance(idata, list)
2843 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2844 "rpc call to node failed (instancelist): %s",
2845 utils.SafeEncode(str(idata)))
2847 nimg.hyp_fail = True
2849 nimg.instances = idata
2851 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2852 """Verifies and computes a node information map
2854 @type ninfo: L{objects.Node}
2855 @param ninfo: the node to check
2856 @param nresult: the remote results for the node
2857 @param nimg: the node image object
2858 @param vg_name: the configured VG name
2862 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2864 # try to read free memory (from the hypervisor)
2865 hv_info = nresult.get(constants.NV_HVINFO, None)
2866 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2867 _ErrorIf(test, constants.CV_ENODEHV, node,
2868 "rpc call to node failed (hvinfo)")
2871 nimg.mfree = int(hv_info["memory_free"])
2872 except (ValueError, TypeError):
2873 _ErrorIf(True, constants.CV_ENODERPC, node,
2874 "node returned invalid nodeinfo, check hypervisor")
2876 # FIXME: devise a free space model for file based instances as well
2877 if vg_name is not None:
2878 test = (constants.NV_VGLIST not in nresult or
2879 vg_name not in nresult[constants.NV_VGLIST])
2880 _ErrorIf(test, constants.CV_ENODELVM, node,
2881 "node didn't return data for the volume group '%s'"
2882 " - it is either missing or broken", vg_name)
2885 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2886 except (ValueError, TypeError):
2887 _ErrorIf(True, constants.CV_ENODERPC, node,
2888 "node returned invalid LVM info, check LVM status")
2890 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2891 """Gets per-disk status information for all instances.
2893 @type nodelist: list of strings
2894 @param nodelist: Node names
2895 @type node_image: dict of (name, L{objects.Node})
2896 @param node_image: Node objects
2897 @type instanceinfo: dict of (name, L{objects.Instance})
2898 @param instanceinfo: Instance objects
2899 @rtype: {instance: {node: [(succes, payload)]}}
2900 @return: a dictionary of per-instance dictionaries with nodes as
2901 keys and disk information as values; the disk information is a
2902 list of tuples (success, payload)
2905 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2908 node_disks_devonly = {}
2909 diskless_instances = set()
2910 diskless = constants.DT_DISKLESS
2912 for nname in nodelist:
2913 node_instances = list(itertools.chain(node_image[nname].pinst,
2914 node_image[nname].sinst))
2915 diskless_instances.update(inst for inst in node_instances
2916 if instanceinfo[inst].disk_template == diskless)
2917 disks = [(inst, disk)
2918 for inst in node_instances
2919 for disk in instanceinfo[inst].disks]
2922 # No need to collect data
2925 node_disks[nname] = disks
2927 # _AnnotateDiskParams makes already copies of the disks
2929 for (inst, dev) in disks:
2930 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2931 self.cfg.SetDiskID(anno_disk, nname)
2932 devonly.append(anno_disk)
2934 node_disks_devonly[nname] = devonly
2936 assert len(node_disks) == len(node_disks_devonly)
2938 # Collect data from all nodes with disks
2939 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2942 assert len(result) == len(node_disks)
2946 for (nname, nres) in result.items():
2947 disks = node_disks[nname]
2950 # No data from this node
2951 data = len(disks) * [(False, "node offline")]
2954 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2955 "while getting disk information: %s", msg)
2957 # No data from this node
2958 data = len(disks) * [(False, msg)]
2961 for idx, i in enumerate(nres.payload):
2962 if isinstance(i, (tuple, list)) and len(i) == 2:
2965 logging.warning("Invalid result from node %s, entry %d: %s",
2967 data.append((False, "Invalid result from the remote node"))
2969 for ((inst, _), status) in zip(disks, data):
2970 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2972 # Add empty entries for diskless instances.
2973 for inst in diskless_instances:
2974 assert inst not in instdisk
2977 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2978 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2979 compat.all(isinstance(s, (tuple, list)) and
2980 len(s) == 2 for s in statuses)
2981 for inst, nnames in instdisk.items()
2982 for nname, statuses in nnames.items())
2983 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2988 def _SshNodeSelector(group_uuid, all_nodes):
2989 """Create endless iterators for all potential SSH check hosts.
2992 nodes = [node for node in all_nodes
2993 if (node.group != group_uuid and
2995 keyfunc = operator.attrgetter("group")
2997 return map(itertools.cycle,
2998 [sorted(map(operator.attrgetter("name"), names))
2999 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3003 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3004 """Choose which nodes should talk to which other nodes.
3006 We will make nodes contact all nodes in their group, and one node from
3009 @warning: This algorithm has a known issue if one node group is much
3010 smaller than others (e.g. just one node). In such a case all other
3011 nodes will talk to the single node.
3014 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3015 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3017 return (online_nodes,
3018 dict((name, sorted([i.next() for i in sel]))
3019 for name in online_nodes))
3021 def BuildHooksEnv(self):
3024 Cluster-Verify hooks just ran in the post phase and their failure makes
3025 the output be logged in the verify output and the verification to fail.
3029 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3032 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3033 for node in self.my_node_info.values())
3037 def BuildHooksNodes(self):
3038 """Build hooks nodes.
3041 return ([], self.my_node_names)
3043 def Exec(self, feedback_fn):
3044 """Verify integrity of the node group, performing various test on nodes.
3047 # This method has too many local variables. pylint: disable=R0914
3048 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3050 if not self.my_node_names:
3052 feedback_fn("* Empty node group, skipping verification")
3056 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3057 verbose = self.op.verbose
3058 self._feedback_fn = feedback_fn
3060 vg_name = self.cfg.GetVGName()
3061 drbd_helper = self.cfg.GetDRBDHelper()
3062 cluster = self.cfg.GetClusterInfo()
3063 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3064 hypervisors = cluster.enabled_hypervisors
3065 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3067 i_non_redundant = [] # Non redundant instances
3068 i_non_a_balanced = [] # Non auto-balanced instances
3069 i_offline = 0 # Count of offline instances
3070 n_offline = 0 # Count of offline nodes
3071 n_drained = 0 # Count of nodes being drained
3072 node_vol_should = {}
3074 # FIXME: verify OS list
3077 filemap = _ComputeAncillaryFiles(cluster, False)
3079 # do local checksums
3080 master_node = self.master_node = self.cfg.GetMasterNode()
3081 master_ip = self.cfg.GetMasterIP()
3083 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3086 if self.cfg.GetUseExternalMipScript():
3087 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3089 node_verify_param = {
3090 constants.NV_FILELIST:
3091 utils.UniqueSequence(filename
3092 for files in filemap
3093 for filename in files),
3094 constants.NV_NODELIST:
3095 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3096 self.all_node_info.values()),
3097 constants.NV_HYPERVISOR: hypervisors,
3098 constants.NV_HVPARAMS:
3099 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3100 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3101 for node in node_data_list
3102 if not node.offline],
3103 constants.NV_INSTANCELIST: hypervisors,
3104 constants.NV_VERSION: None,
3105 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3106 constants.NV_NODESETUP: None,
3107 constants.NV_TIME: None,
3108 constants.NV_MASTERIP: (master_node, master_ip),
3109 constants.NV_OSLIST: None,
3110 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3111 constants.NV_USERSCRIPTS: user_scripts,
3114 if vg_name is not None:
3115 node_verify_param[constants.NV_VGLIST] = None
3116 node_verify_param[constants.NV_LVLIST] = vg_name
3117 node_verify_param[constants.NV_PVLIST] = [vg_name]
3118 node_verify_param[constants.NV_DRBDLIST] = None
3121 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3124 # FIXME: this needs to be changed per node-group, not cluster-wide
3126 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3127 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3128 bridges.add(default_nicpp[constants.NIC_LINK])
3129 for instance in self.my_inst_info.values():
3130 for nic in instance.nics:
3131 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3132 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3133 bridges.add(full_nic[constants.NIC_LINK])
3136 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3138 # Build our expected cluster state
3139 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3141 vm_capable=node.vm_capable))
3142 for node in node_data_list)
3146 for node in self.all_node_info.values():
3147 path = _SupportsOob(self.cfg, node)
3148 if path and path not in oob_paths:
3149 oob_paths.append(path)
3152 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3154 for instance in self.my_inst_names:
3155 inst_config = self.my_inst_info[instance]
3156 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3159 for nname in inst_config.all_nodes:
3160 if nname not in node_image:
3161 gnode = self.NodeImage(name=nname)
3162 gnode.ghost = (nname not in self.all_node_info)
3163 node_image[nname] = gnode
3165 inst_config.MapLVsByNode(node_vol_should)
3167 pnode = inst_config.primary_node
3168 node_image[pnode].pinst.append(instance)
3170 for snode in inst_config.secondary_nodes:
3171 nimg = node_image[snode]
3172 nimg.sinst.append(instance)
3173 if pnode not in nimg.sbp:
3174 nimg.sbp[pnode] = []
3175 nimg.sbp[pnode].append(instance)
3177 # At this point, we have the in-memory data structures complete,
3178 # except for the runtime information, which we'll gather next
3180 # Due to the way our RPC system works, exact response times cannot be
3181 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3182 # time before and after executing the request, we can at least have a time
3184 nvinfo_starttime = time.time()
3185 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3187 self.cfg.GetClusterName())
3188 nvinfo_endtime = time.time()
3190 if self.extra_lv_nodes and vg_name is not None:
3192 self.rpc.call_node_verify(self.extra_lv_nodes,
3193 {constants.NV_LVLIST: vg_name},
3194 self.cfg.GetClusterName())
3196 extra_lv_nvinfo = {}
3198 all_drbd_map = self.cfg.ComputeDRBDMap()
3200 feedback_fn("* Gathering disk information (%s nodes)" %
3201 len(self.my_node_names))
3202 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3205 feedback_fn("* Verifying configuration file consistency")
3207 # If not all nodes are being checked, we need to make sure the master node
3208 # and a non-checked vm_capable node are in the list.
3209 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3211 vf_nvinfo = all_nvinfo.copy()
3212 vf_node_info = list(self.my_node_info.values())
3213 additional_nodes = []
3214 if master_node not in self.my_node_info:
3215 additional_nodes.append(master_node)
3216 vf_node_info.append(self.all_node_info[master_node])
3217 # Add the first vm_capable node we find which is not included
3218 for node in absent_nodes:
3219 nodeinfo = self.all_node_info[node]
3220 if nodeinfo.vm_capable and not nodeinfo.offline:
3221 additional_nodes.append(node)
3222 vf_node_info.append(self.all_node_info[node])
3224 key = constants.NV_FILELIST
3225 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3226 {key: node_verify_param[key]},
3227 self.cfg.GetClusterName()))
3229 vf_nvinfo = all_nvinfo
3230 vf_node_info = self.my_node_info.values()
3232 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3234 feedback_fn("* Verifying node status")
3238 for node_i in node_data_list:
3240 nimg = node_image[node]
3244 feedback_fn("* Skipping offline node %s" % (node,))
3248 if node == master_node:
3250 elif node_i.master_candidate:
3251 ntype = "master candidate"
3252 elif node_i.drained:
3258 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3260 msg = all_nvinfo[node].fail_msg
3261 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3264 nimg.rpc_fail = True
3267 nresult = all_nvinfo[node].payload
3269 nimg.call_ok = self._VerifyNode(node_i, nresult)
3270 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3271 self._VerifyNodeNetwork(node_i, nresult)
3272 self._VerifyNodeUserScripts(node_i, nresult)
3273 self._VerifyOob(node_i, nresult)
3276 self._VerifyNodeLVM(node_i, nresult, vg_name)
3277 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3280 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3281 self._UpdateNodeInstances(node_i, nresult, nimg)
3282 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3283 self._UpdateNodeOS(node_i, nresult, nimg)
3285 if not nimg.os_fail:
3286 if refos_img is None:
3288 self._VerifyNodeOS(node_i, nimg, refos_img)
3289 self._VerifyNodeBridges(node_i, nresult, bridges)
3291 # Check whether all running instancies are primary for the node. (This
3292 # can no longer be done from _VerifyInstance below, since some of the
3293 # wrong instances could be from other node groups.)
3294 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3296 for inst in non_primary_inst:
3297 test = inst in self.all_inst_info
3298 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3299 "instance should not run on node %s", node_i.name)
3300 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3301 "node is running unknown instance %s", inst)
3303 for node, result in extra_lv_nvinfo.items():
3304 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3305 node_image[node], vg_name)
3307 feedback_fn("* Verifying instance status")
3308 for instance in self.my_inst_names:
3310 feedback_fn("* Verifying instance %s" % instance)
3311 inst_config = self.my_inst_info[instance]
3312 self._VerifyInstance(instance, inst_config, node_image,
3314 inst_nodes_offline = []
3316 pnode = inst_config.primary_node
3317 pnode_img = node_image[pnode]
3318 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3319 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3320 " primary node failed", instance)
3322 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3324 constants.CV_EINSTANCEBADNODE, instance,
3325 "instance is marked as running and lives on offline node %s",
3326 inst_config.primary_node)
3328 # If the instance is non-redundant we cannot survive losing its primary
3329 # node, so we are not N+1 compliant. On the other hand we have no disk
3330 # templates with more than one secondary so that situation is not well
3332 # FIXME: does not support file-backed instances
3333 if not inst_config.secondary_nodes:
3334 i_non_redundant.append(instance)
3336 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3337 constants.CV_EINSTANCELAYOUT,
3338 instance, "instance has multiple secondary nodes: %s",
3339 utils.CommaJoin(inst_config.secondary_nodes),
3340 code=self.ETYPE_WARNING)
3342 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3343 pnode = inst_config.primary_node
3344 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3345 instance_groups = {}
3347 for node in instance_nodes:
3348 instance_groups.setdefault(self.all_node_info[node].group,
3352 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3353 # Sort so that we always list the primary node first.
3354 for group, nodes in sorted(instance_groups.items(),
3355 key=lambda (_, nodes): pnode in nodes,
3358 self._ErrorIf(len(instance_groups) > 1,
3359 constants.CV_EINSTANCESPLITGROUPS,
3360 instance, "instance has primary and secondary nodes in"
3361 " different groups: %s", utils.CommaJoin(pretty_list),
3362 code=self.ETYPE_WARNING)
3364 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3365 i_non_a_balanced.append(instance)
3367 for snode in inst_config.secondary_nodes:
3368 s_img = node_image[snode]
3369 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3370 snode, "instance %s, connection to secondary node failed",
3374 inst_nodes_offline.append(snode)
3376 # warn that the instance lives on offline nodes
3377 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3378 "instance has offline secondary node(s) %s",
3379 utils.CommaJoin(inst_nodes_offline))
3380 # ... or ghost/non-vm_capable nodes
3381 for node in inst_config.all_nodes:
3382 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3383 instance, "instance lives on ghost node %s", node)
3384 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3385 instance, "instance lives on non-vm_capable node %s", node)
3387 feedback_fn("* Verifying orphan volumes")
3388 reserved = utils.FieldSet(*cluster.reserved_lvs)
3390 # We will get spurious "unknown volume" warnings if any node of this group
3391 # is secondary for an instance whose primary is in another group. To avoid
3392 # them, we find these instances and add their volumes to node_vol_should.
3393 for inst in self.all_inst_info.values():
3394 for secondary in inst.secondary_nodes:
3395 if (secondary in self.my_node_info
3396 and inst.name not in self.my_inst_info):
3397 inst.MapLVsByNode(node_vol_should)
3400 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3402 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3403 feedback_fn("* Verifying N+1 Memory redundancy")
3404 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3406 feedback_fn("* Other Notes")
3408 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3409 % len(i_non_redundant))
3411 if i_non_a_balanced:
3412 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3413 % len(i_non_a_balanced))
3416 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3419 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3422 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3426 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3427 """Analyze the post-hooks' result
3429 This method analyses the hook result, handles it, and sends some
3430 nicely-formatted feedback back to the user.
3432 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3433 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3434 @param hooks_results: the results of the multi-node hooks rpc call
3435 @param feedback_fn: function used send feedback back to the caller
3436 @param lu_result: previous Exec result
3437 @return: the new Exec result, based on the previous result
3441 # We only really run POST phase hooks, only for non-empty groups,
3442 # and are only interested in their results
3443 if not self.my_node_names:
3446 elif phase == constants.HOOKS_PHASE_POST:
3447 # Used to change hooks' output to proper indentation
3448 feedback_fn("* Hooks Results")
3449 assert hooks_results, "invalid result from hooks"
3451 for node_name in hooks_results:
3452 res = hooks_results[node_name]
3454 test = msg and not res.offline
3455 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3456 "Communication failure in hooks execution: %s", msg)
3457 if res.offline or msg:
3458 # No need to investigate payload if node is offline or gave
3461 for script, hkr, output in res.payload:
3462 test = hkr == constants.HKR_FAIL
3463 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3464 "Script %s failed, output:", script)
3466 output = self._HOOKS_INDENT_RE.sub(" ", output)
3467 feedback_fn("%s" % output)
3473 class LUClusterVerifyDisks(NoHooksLU):
3474 """Verifies the cluster disks status.
3479 def ExpandNames(self):
3480 self.share_locks = _ShareAll()
3481 self.needed_locks = {
3482 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3485 def Exec(self, feedback_fn):
3486 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3488 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3489 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3490 for group in group_names])
3493 class LUGroupVerifyDisks(NoHooksLU):
3494 """Verifies the status of all disks in a node group.
3499 def ExpandNames(self):
3500 # Raises errors.OpPrereqError on its own if group can't be found
3501 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3503 self.share_locks = _ShareAll()
3504 self.needed_locks = {
3505 locking.LEVEL_INSTANCE: [],
3506 locking.LEVEL_NODEGROUP: [],
3507 locking.LEVEL_NODE: [],
3510 def DeclareLocks(self, level):
3511 if level == locking.LEVEL_INSTANCE:
3512 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3514 # Lock instances optimistically, needs verification once node and group
3515 # locks have been acquired
3516 self.needed_locks[locking.LEVEL_INSTANCE] = \
3517 self.cfg.GetNodeGroupInstances(self.group_uuid)
3519 elif level == locking.LEVEL_NODEGROUP:
3520 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3522 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3523 set([self.group_uuid] +
3524 # Lock all groups used by instances optimistically; this requires
3525 # going via the node before it's locked, requiring verification
3528 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3529 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3531 elif level == locking.LEVEL_NODE:
3532 # This will only lock the nodes in the group to be verified which contain
3534 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3535 self._LockInstancesNodes()
3537 # Lock all nodes in group to be verified
3538 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3539 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3540 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3542 def CheckPrereq(self):
3543 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3544 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3545 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3547 assert self.group_uuid in owned_groups
3549 # Check if locked instances are still correct
3550 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3552 # Get instance information
3553 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3555 # Check if node groups for locked instances are still correct
3556 _CheckInstancesNodeGroups(self.cfg, self.instances,
3557 owned_groups, owned_nodes, self.group_uuid)
3559 def Exec(self, feedback_fn):
3560 """Verify integrity of cluster disks.
3562 @rtype: tuple of three items
3563 @return: a tuple of (dict of node-to-node_error, list of instances
3564 which need activate-disks, dict of instance: (node, volume) for
3569 res_instances = set()
3572 nv_dict = _MapInstanceDisksToNodes([inst
3573 for inst in self.instances.values()
3574 if inst.admin_state == constants.ADMINST_UP])
3577 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3578 set(self.cfg.GetVmCapableNodeList()))
3580 node_lvs = self.rpc.call_lv_list(nodes, [])
3582 for (node, node_res) in node_lvs.items():
3583 if node_res.offline:
3586 msg = node_res.fail_msg
3588 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3589 res_nodes[node] = msg
3592 for lv_name, (_, _, lv_online) in node_res.payload.items():
3593 inst = nv_dict.pop((node, lv_name), None)
3594 if not (lv_online or inst is None):
3595 res_instances.add(inst)
3597 # any leftover items in nv_dict are missing LVs, let's arrange the data
3599 for key, inst in nv_dict.iteritems():
3600 res_missing.setdefault(inst, []).append(list(key))
3602 return (res_nodes, list(res_instances), res_missing)
3605 class LUClusterRepairDiskSizes(NoHooksLU):
3606 """Verifies the cluster disks sizes.
3611 def ExpandNames(self):
3612 if self.op.instances:
3613 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3614 self.needed_locks = {
3615 locking.LEVEL_NODE_RES: [],
3616 locking.LEVEL_INSTANCE: self.wanted_names,
3618 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3620 self.wanted_names = None
3621 self.needed_locks = {
3622 locking.LEVEL_NODE_RES: locking.ALL_SET,
3623 locking.LEVEL_INSTANCE: locking.ALL_SET,
3625 self.share_locks = {
3626 locking.LEVEL_NODE_RES: 1,
3627 locking.LEVEL_INSTANCE: 0,
3630 def DeclareLocks(self, level):
3631 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3632 self._LockInstancesNodes(primary_only=True, level=level)
3634 def CheckPrereq(self):
3635 """Check prerequisites.
3637 This only checks the optional instance list against the existing names.
3640 if self.wanted_names is None:
3641 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3643 self.wanted_instances = \
3644 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3646 def _EnsureChildSizes(self, disk):
3647 """Ensure children of the disk have the needed disk size.
3649 This is valid mainly for DRBD8 and fixes an issue where the
3650 children have smaller disk size.
3652 @param disk: an L{ganeti.objects.Disk} object
3655 if disk.dev_type == constants.LD_DRBD8:
3656 assert disk.children, "Empty children for DRBD8?"
3657 fchild = disk.children[0]
3658 mismatch = fchild.size < disk.size
3660 self.LogInfo("Child disk has size %d, parent %d, fixing",
3661 fchild.size, disk.size)
3662 fchild.size = disk.size
3664 # and we recurse on this child only, not on the metadev
3665 return self._EnsureChildSizes(fchild) or mismatch
3669 def Exec(self, feedback_fn):
3670 """Verify the size of cluster disks.
3673 # TODO: check child disks too
3674 # TODO: check differences in size between primary/secondary nodes
3676 for instance in self.wanted_instances:
3677 pnode = instance.primary_node
3678 if pnode not in per_node_disks:
3679 per_node_disks[pnode] = []
3680 for idx, disk in enumerate(instance.disks):
3681 per_node_disks[pnode].append((instance, idx, disk))
3683 assert not (frozenset(per_node_disks.keys()) -
3684 self.owned_locks(locking.LEVEL_NODE_RES)), \
3685 "Not owning correct locks"
3686 assert not self.owned_locks(locking.LEVEL_NODE)
3689 for node, dskl in per_node_disks.items():
3690 newl = [v[2].Copy() for v in dskl]
3692 self.cfg.SetDiskID(dsk, node)
3693 result = self.rpc.call_blockdev_getsize(node, newl)
3695 self.LogWarning("Failure in blockdev_getsize call to node"
3696 " %s, ignoring", node)
3698 if len(result.payload) != len(dskl):
3699 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3700 " result.payload=%s", node, len(dskl), result.payload)
3701 self.LogWarning("Invalid result from node %s, ignoring node results",
3704 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3706 self.LogWarning("Disk %d of instance %s did not return size"
3707 " information, ignoring", idx, instance.name)
3709 if not isinstance(size, (int, long)):
3710 self.LogWarning("Disk %d of instance %s did not return valid"
3711 " size information, ignoring", idx, instance.name)
3714 if size != disk.size:
3715 self.LogInfo("Disk %d of instance %s has mismatched size,"
3716 " correcting: recorded %d, actual %d", idx,
3717 instance.name, disk.size, size)
3719 self.cfg.Update(instance, feedback_fn)
3720 changed.append((instance.name, idx, size))
3721 if self._EnsureChildSizes(disk):
3722 self.cfg.Update(instance, feedback_fn)
3723 changed.append((instance.name, idx, disk.size))
3727 class LUClusterRename(LogicalUnit):
3728 """Rename the cluster.
3731 HPATH = "cluster-rename"
3732 HTYPE = constants.HTYPE_CLUSTER
3734 def BuildHooksEnv(self):
3739 "OP_TARGET": self.cfg.GetClusterName(),
3740 "NEW_NAME": self.op.name,
3743 def BuildHooksNodes(self):
3744 """Build hooks nodes.
3747 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3749 def CheckPrereq(self):
3750 """Verify that the passed name is a valid one.
3753 hostname = netutils.GetHostname(name=self.op.name,
3754 family=self.cfg.GetPrimaryIPFamily())
3756 new_name = hostname.name
3757 self.ip = new_ip = hostname.ip
3758 old_name = self.cfg.GetClusterName()
3759 old_ip = self.cfg.GetMasterIP()
3760 if new_name == old_name and new_ip == old_ip:
3761 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3762 " cluster has changed",
3764 if new_ip != old_ip:
3765 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3766 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3767 " reachable on the network" %
3768 new_ip, errors.ECODE_NOTUNIQUE)
3770 self.op.name = new_name
3772 def Exec(self, feedback_fn):
3773 """Rename the cluster.
3776 clustername = self.op.name
3779 # shutdown the master IP
3780 master_params = self.cfg.GetMasterNetworkParameters()
3781 ems = self.cfg.GetUseExternalMipScript()
3782 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3784 result.Raise("Could not disable the master role")
3787 cluster = self.cfg.GetClusterInfo()
3788 cluster.cluster_name = clustername
3789 cluster.master_ip = new_ip
3790 self.cfg.Update(cluster, feedback_fn)
3792 # update the known hosts file
3793 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3794 node_list = self.cfg.GetOnlineNodeList()
3796 node_list.remove(master_params.name)
3799 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3801 master_params.ip = new_ip
3802 result = self.rpc.call_node_activate_master_ip(master_params.name,
3804 msg = result.fail_msg
3806 self.LogWarning("Could not re-enable the master role on"
3807 " the master, please restart manually: %s", msg)
3812 def _ValidateNetmask(cfg, netmask):
3813 """Checks if a netmask is valid.
3815 @type cfg: L{config.ConfigWriter}
3816 @param cfg: The cluster configuration
3818 @param netmask: the netmask to be verified
3819 @raise errors.OpPrereqError: if the validation fails
3822 ip_family = cfg.GetPrimaryIPFamily()
3824 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3825 except errors.ProgrammerError:
3826 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3828 if not ipcls.ValidateNetmask(netmask):
3829 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3833 class LUClusterSetParams(LogicalUnit):
3834 """Change the parameters of the cluster.
3837 HPATH = "cluster-modify"
3838 HTYPE = constants.HTYPE_CLUSTER
3841 def CheckArguments(self):
3845 if self.op.uid_pool:
3846 uidpool.CheckUidPool(self.op.uid_pool)
3848 if self.op.add_uids:
3849 uidpool.CheckUidPool(self.op.add_uids)
3851 if self.op.remove_uids:
3852 uidpool.CheckUidPool(self.op.remove_uids)
3854 if self.op.master_netmask is not None:
3855 _ValidateNetmask(self.cfg, self.op.master_netmask)
3857 if self.op.diskparams:
3858 for dt_params in self.op.diskparams.values():
3859 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3861 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3862 except errors.OpPrereqError, err:
3863 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3866 def ExpandNames(self):
3867 # FIXME: in the future maybe other cluster params won't require checking on
3868 # all nodes to be modified.
3869 self.needed_locks = {
3870 locking.LEVEL_NODE: locking.ALL_SET,
3871 locking.LEVEL_INSTANCE: locking.ALL_SET,
3872 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3874 self.share_locks = {
3875 locking.LEVEL_NODE: 1,
3876 locking.LEVEL_INSTANCE: 1,
3877 locking.LEVEL_NODEGROUP: 1,
3880 def BuildHooksEnv(self):
3885 "OP_TARGET": self.cfg.GetClusterName(),
3886 "NEW_VG_NAME": self.op.vg_name,
3889 def BuildHooksNodes(self):
3890 """Build hooks nodes.
3893 mn = self.cfg.GetMasterNode()
3896 def CheckPrereq(self):
3897 """Check prerequisites.
3899 This checks whether the given params don't conflict and
3900 if the given volume group is valid.
3903 if self.op.vg_name is not None and not self.op.vg_name:
3904 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3905 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3906 " instances exist", errors.ECODE_INVAL)
3908 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3909 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3910 raise errors.OpPrereqError("Cannot disable drbd helper while"
3911 " drbd-based instances exist",
3914 node_list = self.owned_locks(locking.LEVEL_NODE)
3916 # if vg_name not None, checks given volume group on all nodes
3918 vglist = self.rpc.call_vg_list(node_list)
3919 for node in node_list:
3920 msg = vglist[node].fail_msg
3922 # ignoring down node
3923 self.LogWarning("Error while gathering data on node %s"
3924 " (ignoring node): %s", node, msg)
3926 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3928 constants.MIN_VG_SIZE)
3930 raise errors.OpPrereqError("Error on node '%s': %s" %
3931 (node, vgstatus), errors.ECODE_ENVIRON)
3933 if self.op.drbd_helper:
3934 # checks given drbd helper on all nodes
3935 helpers = self.rpc.call_drbd_helper(node_list)
3936 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3938 self.LogInfo("Not checking drbd helper on offline node %s", node)
3940 msg = helpers[node].fail_msg
3942 raise errors.OpPrereqError("Error checking drbd helper on node"
3943 " '%s': %s" % (node, msg),
3944 errors.ECODE_ENVIRON)
3945 node_helper = helpers[node].payload
3946 if node_helper != self.op.drbd_helper:
3947 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3948 (node, node_helper), errors.ECODE_ENVIRON)
3950 self.cluster = cluster = self.cfg.GetClusterInfo()
3951 # validate params changes
3952 if self.op.beparams:
3953 objects.UpgradeBeParams(self.op.beparams)
3954 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3955 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3957 if self.op.ndparams:
3958 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3959 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3961 # TODO: we need a more general way to handle resetting
3962 # cluster-level parameters to default values
3963 if self.new_ndparams["oob_program"] == "":
3964 self.new_ndparams["oob_program"] = \
3965 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3967 if self.op.hv_state:
3968 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3969 self.cluster.hv_state_static)
3970 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3971 for hv, values in new_hv_state.items())
3973 if self.op.disk_state:
3974 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3975 self.cluster.disk_state_static)
3976 self.new_disk_state = \
3977 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3978 for name, values in svalues.items()))
3979 for storage, svalues in new_disk_state.items())
3982 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3985 all_instances = self.cfg.GetAllInstancesInfo().values()
3987 for group in self.cfg.GetAllNodeGroupsInfo().values():
3988 instances = frozenset([inst for inst in all_instances
3989 if compat.any(node in group.members
3990 for node in inst.all_nodes)])
3991 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3992 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3994 new_ipolicy, instances)
3996 violations.update(new)
3999 self.LogWarning("After the ipolicy change the following instances"
4000 " violate them: %s",
4001 utils.CommaJoin(utils.NiceSort(violations)))
4003 if self.op.nicparams:
4004 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4005 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4006 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4009 # check all instances for consistency
4010 for instance in self.cfg.GetAllInstancesInfo().values():
4011 for nic_idx, nic in enumerate(instance.nics):
4012 params_copy = copy.deepcopy(nic.nicparams)
4013 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4015 # check parameter syntax
4017 objects.NIC.CheckParameterSyntax(params_filled)
4018 except errors.ConfigurationError, err:
4019 nic_errors.append("Instance %s, nic/%d: %s" %
4020 (instance.name, nic_idx, err))
4022 # if we're moving instances to routed, check that they have an ip
4023 target_mode = params_filled[constants.NIC_MODE]
4024 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4025 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4026 " address" % (instance.name, nic_idx))
4028 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4029 "\n".join(nic_errors))
4031 # hypervisor list/parameters
4032 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4033 if self.op.hvparams:
4034 for hv_name, hv_dict in self.op.hvparams.items():
4035 if hv_name not in self.new_hvparams:
4036 self.new_hvparams[hv_name] = hv_dict
4038 self.new_hvparams[hv_name].update(hv_dict)
4040 # disk template parameters
4041 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4042 if self.op.diskparams:
4043 for dt_name, dt_params in self.op.diskparams.items():
4044 if dt_name not in self.op.diskparams:
4045 self.new_diskparams[dt_name] = dt_params
4047 self.new_diskparams[dt_name].update(dt_params)
4049 # os hypervisor parameters
4050 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4052 for os_name, hvs in self.op.os_hvp.items():
4053 if os_name not in self.new_os_hvp:
4054 self.new_os_hvp[os_name] = hvs
4056 for hv_name, hv_dict in hvs.items():
4057 if hv_name not in self.new_os_hvp[os_name]:
4058 self.new_os_hvp[os_name][hv_name] = hv_dict
4060 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4063 self.new_osp = objects.FillDict(cluster.osparams, {})
4064 if self.op.osparams:
4065 for os_name, osp in self.op.osparams.items():
4066 if os_name not in self.new_osp:
4067 self.new_osp[os_name] = {}
4069 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4072 if not self.new_osp[os_name]:
4073 # we removed all parameters
4074 del self.new_osp[os_name]
4076 # check the parameter validity (remote check)
4077 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4078 os_name, self.new_osp[os_name])
4080 # changes to the hypervisor list
4081 if self.op.enabled_hypervisors is not None:
4082 self.hv_list = self.op.enabled_hypervisors
4083 for hv in self.hv_list:
4084 # if the hypervisor doesn't already exist in the cluster
4085 # hvparams, we initialize it to empty, and then (in both
4086 # cases) we make sure to fill the defaults, as we might not
4087 # have a complete defaults list if the hypervisor wasn't
4089 if hv not in new_hvp:
4091 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4092 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4094 self.hv_list = cluster.enabled_hypervisors
4096 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4097 # either the enabled list has changed, or the parameters have, validate
4098 for hv_name, hv_params in self.new_hvparams.items():
4099 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4100 (self.op.enabled_hypervisors and
4101 hv_name in self.op.enabled_hypervisors)):
4102 # either this is a new hypervisor, or its parameters have changed
4103 hv_class = hypervisor.GetHypervisor(hv_name)
4104 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4105 hv_class.CheckParameterSyntax(hv_params)
4106 _CheckHVParams(self, node_list, hv_name, hv_params)
4109 # no need to check any newly-enabled hypervisors, since the
4110 # defaults have already been checked in the above code-block
4111 for os_name, os_hvp in self.new_os_hvp.items():
4112 for hv_name, hv_params in os_hvp.items():
4113 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4114 # we need to fill in the new os_hvp on top of the actual hv_p
4115 cluster_defaults = self.new_hvparams.get(hv_name, {})
4116 new_osp = objects.FillDict(cluster_defaults, hv_params)
4117 hv_class = hypervisor.GetHypervisor(hv_name)
4118 hv_class.CheckParameterSyntax(new_osp)
4119 _CheckHVParams(self, node_list, hv_name, new_osp)
4121 if self.op.default_iallocator:
4122 alloc_script = utils.FindFile(self.op.default_iallocator,
4123 constants.IALLOCATOR_SEARCH_PATH,
4125 if alloc_script is None:
4126 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4127 " specified" % self.op.default_iallocator,
4130 def Exec(self, feedback_fn):
4131 """Change the parameters of the cluster.
4134 if self.op.vg_name is not None:
4135 new_volume = self.op.vg_name
4138 if new_volume != self.cfg.GetVGName():
4139 self.cfg.SetVGName(new_volume)
4141 feedback_fn("Cluster LVM configuration already in desired"
4142 " state, not changing")
4143 if self.op.drbd_helper is not None:
4144 new_helper = self.op.drbd_helper
4147 if new_helper != self.cfg.GetDRBDHelper():
4148 self.cfg.SetDRBDHelper(new_helper)
4150 feedback_fn("Cluster DRBD helper already in desired state,"
4152 if self.op.hvparams:
4153 self.cluster.hvparams = self.new_hvparams
4155 self.cluster.os_hvp = self.new_os_hvp
4156 if self.op.enabled_hypervisors is not None:
4157 self.cluster.hvparams = self.new_hvparams
4158 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4159 if self.op.beparams:
4160 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4161 if self.op.nicparams:
4162 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4164 self.cluster.ipolicy = self.new_ipolicy
4165 if self.op.osparams:
4166 self.cluster.osparams = self.new_osp
4167 if self.op.ndparams:
4168 self.cluster.ndparams = self.new_ndparams
4169 if self.op.diskparams:
4170 self.cluster.diskparams = self.new_diskparams
4171 if self.op.hv_state:
4172 self.cluster.hv_state_static = self.new_hv_state
4173 if self.op.disk_state:
4174 self.cluster.disk_state_static = self.new_disk_state
4176 if self.op.candidate_pool_size is not None:
4177 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4178 # we need to update the pool size here, otherwise the save will fail
4179 _AdjustCandidatePool(self, [])
4181 if self.op.maintain_node_health is not None:
4182 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4183 feedback_fn("Note: CONFD was disabled at build time, node health"
4184 " maintenance is not useful (still enabling it)")
4185 self.cluster.maintain_node_health = self.op.maintain_node_health
4187 if self.op.prealloc_wipe_disks is not None:
4188 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4190 if self.op.add_uids is not None:
4191 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4193 if self.op.remove_uids is not None:
4194 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4196 if self.op.uid_pool is not None:
4197 self.cluster.uid_pool = self.op.uid_pool
4199 if self.op.default_iallocator is not None:
4200 self.cluster.default_iallocator = self.op.default_iallocator
4202 if self.op.reserved_lvs is not None:
4203 self.cluster.reserved_lvs = self.op.reserved_lvs
4205 if self.op.use_external_mip_script is not None:
4206 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4208 def helper_os(aname, mods, desc):
4210 lst = getattr(self.cluster, aname)
4211 for key, val in mods:
4212 if key == constants.DDM_ADD:
4214 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4217 elif key == constants.DDM_REMOVE:
4221 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4223 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4225 if self.op.hidden_os:
4226 helper_os("hidden_os", self.op.hidden_os, "hidden")
4228 if self.op.blacklisted_os:
4229 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4231 if self.op.master_netdev:
4232 master_params = self.cfg.GetMasterNetworkParameters()
4233 ems = self.cfg.GetUseExternalMipScript()
4234 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4235 self.cluster.master_netdev)
4236 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4238 result.Raise("Could not disable the master ip")
4239 feedback_fn("Changing master_netdev from %s to %s" %
4240 (master_params.netdev, self.op.master_netdev))
4241 self.cluster.master_netdev = self.op.master_netdev
4243 if self.op.master_netmask:
4244 master_params = self.cfg.GetMasterNetworkParameters()
4245 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4246 result = self.rpc.call_node_change_master_netmask(master_params.name,
4247 master_params.netmask,
4248 self.op.master_netmask,
4250 master_params.netdev)
4252 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4255 self.cluster.master_netmask = self.op.master_netmask
4257 self.cfg.Update(self.cluster, feedback_fn)
4259 if self.op.master_netdev:
4260 master_params = self.cfg.GetMasterNetworkParameters()
4261 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4262 self.op.master_netdev)
4263 ems = self.cfg.GetUseExternalMipScript()
4264 result = self.rpc.call_node_activate_master_ip(master_params.name,
4267 self.LogWarning("Could not re-enable the master ip on"
4268 " the master, please restart manually: %s",
4272 def _UploadHelper(lu, nodes, fname):
4273 """Helper for uploading a file and showing warnings.
4276 if os.path.exists(fname):
4277 result = lu.rpc.call_upload_file(nodes, fname)
4278 for to_node, to_result in result.items():
4279 msg = to_result.fail_msg
4281 msg = ("Copy of file %s to node %s failed: %s" %
4282 (fname, to_node, msg))
4283 lu.proc.LogWarning(msg)
4286 def _ComputeAncillaryFiles(cluster, redist):
4287 """Compute files external to Ganeti which need to be consistent.
4289 @type redist: boolean
4290 @param redist: Whether to include files which need to be redistributed
4293 # Compute files for all nodes
4295 constants.SSH_KNOWN_HOSTS_FILE,
4296 constants.CONFD_HMAC_KEY,
4297 constants.CLUSTER_DOMAIN_SECRET_FILE,
4298 constants.SPICE_CERT_FILE,
4299 constants.SPICE_CACERT_FILE,
4300 constants.RAPI_USERS_FILE,
4304 files_all.update(constants.ALL_CERT_FILES)
4305 files_all.update(ssconf.SimpleStore().GetFileList())
4307 # we need to ship at least the RAPI certificate
4308 files_all.add(constants.RAPI_CERT_FILE)
4310 if cluster.modify_etc_hosts:
4311 files_all.add(constants.ETC_HOSTS)
4313 if cluster.use_external_mip_script:
4314 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4316 # Files which are optional, these must:
4317 # - be present in one other category as well
4318 # - either exist or not exist on all nodes of that category (mc, vm all)
4320 constants.RAPI_USERS_FILE,
4323 # Files which should only be on master candidates
4327 files_mc.add(constants.CLUSTER_CONF_FILE)
4329 # Files which should only be on VM-capable nodes
4330 files_vm = set(filename
4331 for hv_name in cluster.enabled_hypervisors
4332 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4334 files_opt |= set(filename
4335 for hv_name in cluster.enabled_hypervisors
4336 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4338 # Filenames in each category must be unique
4339 all_files_set = files_all | files_mc | files_vm
4340 assert (len(all_files_set) ==
4341 sum(map(len, [files_all, files_mc, files_vm]))), \
4342 "Found file listed in more than one file list"
4344 # Optional files must be present in one other category
4345 assert all_files_set.issuperset(files_opt), \
4346 "Optional file not in a different required list"
4348 return (files_all, files_opt, files_mc, files_vm)
4351 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4352 """Distribute additional files which are part of the cluster configuration.
4354 ConfigWriter takes care of distributing the config and ssconf files, but
4355 there are more files which should be distributed to all nodes. This function
4356 makes sure those are copied.
4358 @param lu: calling logical unit
4359 @param additional_nodes: list of nodes not in the config to distribute to
4360 @type additional_vm: boolean
4361 @param additional_vm: whether the additional nodes are vm-capable or not
4364 # Gather target nodes
4365 cluster = lu.cfg.GetClusterInfo()
4366 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4368 online_nodes = lu.cfg.GetOnlineNodeList()
4369 online_set = frozenset(online_nodes)
4370 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4372 if additional_nodes is not None:
4373 online_nodes.extend(additional_nodes)
4375 vm_nodes.extend(additional_nodes)
4377 # Never distribute to master node
4378 for nodelist in [online_nodes, vm_nodes]:
4379 if master_info.name in nodelist:
4380 nodelist.remove(master_info.name)
4383 (files_all, _, files_mc, files_vm) = \
4384 _ComputeAncillaryFiles(cluster, True)
4386 # Never re-distribute configuration file from here
4387 assert not (constants.CLUSTER_CONF_FILE in files_all or
4388 constants.CLUSTER_CONF_FILE in files_vm)
4389 assert not files_mc, "Master candidates not handled in this function"
4392 (online_nodes, files_all),
4393 (vm_nodes, files_vm),
4397 for (node_list, files) in filemap:
4399 _UploadHelper(lu, node_list, fname)
4402 class LUClusterRedistConf(NoHooksLU):
4403 """Force the redistribution of cluster configuration.
4405 This is a very simple LU.
4410 def ExpandNames(self):
4411 self.needed_locks = {
4412 locking.LEVEL_NODE: locking.ALL_SET,
4414 self.share_locks[locking.LEVEL_NODE] = 1
4416 def Exec(self, feedback_fn):
4417 """Redistribute the configuration.
4420 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4421 _RedistributeAncillaryFiles(self)
4424 class LUClusterActivateMasterIp(NoHooksLU):
4425 """Activate the master IP on the master node.
4428 def Exec(self, feedback_fn):
4429 """Activate the master IP.
4432 master_params = self.cfg.GetMasterNetworkParameters()
4433 ems = self.cfg.GetUseExternalMipScript()
4434 result = self.rpc.call_node_activate_master_ip(master_params.name,
4436 result.Raise("Could not activate the master IP")
4439 class LUClusterDeactivateMasterIp(NoHooksLU):
4440 """Deactivate the master IP on the master node.
4443 def Exec(self, feedback_fn):
4444 """Deactivate the master IP.
4447 master_params = self.cfg.GetMasterNetworkParameters()
4448 ems = self.cfg.GetUseExternalMipScript()
4449 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4451 result.Raise("Could not deactivate the master IP")
4454 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4455 """Sleep and poll for an instance's disk to sync.
4458 if not instance.disks or disks is not None and not disks:
4461 disks = _ExpandCheckDisks(instance, disks)
4464 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4466 node = instance.primary_node
4469 lu.cfg.SetDiskID(dev, node)
4471 # TODO: Convert to utils.Retry
4474 degr_retries = 10 # in seconds, as we sleep 1 second each time
4478 cumul_degraded = False
4479 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4480 msg = rstats.fail_msg
4482 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4485 raise errors.RemoteError("Can't contact node %s for mirror data,"
4486 " aborting." % node)
4489 rstats = rstats.payload
4491 for i, mstat in enumerate(rstats):
4493 lu.LogWarning("Can't compute data for node %s/%s",
4494 node, disks[i].iv_name)
4497 cumul_degraded = (cumul_degraded or
4498 (mstat.is_degraded and mstat.sync_percent is None))
4499 if mstat.sync_percent is not None:
4501 if mstat.estimated_time is not None:
4502 rem_time = ("%s remaining (estimated)" %
4503 utils.FormatSeconds(mstat.estimated_time))
4504 max_time = mstat.estimated_time
4506 rem_time = "no time estimate"
4507 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4508 (disks[i].iv_name, mstat.sync_percent, rem_time))
4510 # if we're done but degraded, let's do a few small retries, to
4511 # make sure we see a stable and not transient situation; therefore
4512 # we force restart of the loop
4513 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4514 logging.info("Degraded disks found, %d retries left", degr_retries)
4522 time.sleep(min(60, max_time))
4525 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4526 return not cumul_degraded
4529 def _BlockdevFind(lu, node, dev, instance):
4530 """Wrapper around call_blockdev_find to annotate diskparams.
4532 @param lu: A reference to the lu object
4533 @param node: The node to call out
4534 @param dev: The device to find
4535 @param instance: The instance object the device belongs to
4536 @returns The result of the rpc call
4539 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4540 return lu.rpc.call_blockdev_find(node, disk)
4543 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4544 """Wrapper around L{_CheckDiskConsistencyInner}.
4547 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4548 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4552 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4554 """Check that mirrors are not degraded.
4556 @attention: The device has to be annotated already.
4558 The ldisk parameter, if True, will change the test from the
4559 is_degraded attribute (which represents overall non-ok status for
4560 the device(s)) to the ldisk (representing the local storage status).
4563 lu.cfg.SetDiskID(dev, node)
4567 if on_primary or dev.AssembleOnSecondary():
4568 rstats = lu.rpc.call_blockdev_find(node, dev)
4569 msg = rstats.fail_msg
4571 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4573 elif not rstats.payload:
4574 lu.LogWarning("Can't find disk on node %s", node)
4578 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4580 result = result and not rstats.payload.is_degraded
4583 for child in dev.children:
4584 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4590 class LUOobCommand(NoHooksLU):
4591 """Logical unit for OOB handling.
4595 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4597 def ExpandNames(self):
4598 """Gather locks we need.
4601 if self.op.node_names:
4602 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4603 lock_names = self.op.node_names
4605 lock_names = locking.ALL_SET
4607 self.needed_locks = {
4608 locking.LEVEL_NODE: lock_names,
4611 def CheckPrereq(self):
4612 """Check prerequisites.
4615 - the node exists in the configuration
4618 Any errors are signaled by raising errors.OpPrereqError.
4622 self.master_node = self.cfg.GetMasterNode()
4624 assert self.op.power_delay >= 0.0
4626 if self.op.node_names:
4627 if (self.op.command in self._SKIP_MASTER and
4628 self.master_node in self.op.node_names):
4629 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4630 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4632 if master_oob_handler:
4633 additional_text = ("run '%s %s %s' if you want to operate on the"
4634 " master regardless") % (master_oob_handler,
4638 additional_text = "it does not support out-of-band operations"
4640 raise errors.OpPrereqError(("Operating on the master node %s is not"
4641 " allowed for %s; %s") %
4642 (self.master_node, self.op.command,
4643 additional_text), errors.ECODE_INVAL)
4645 self.op.node_names = self.cfg.GetNodeList()
4646 if self.op.command in self._SKIP_MASTER:
4647 self.op.node_names.remove(self.master_node)
4649 if self.op.command in self._SKIP_MASTER:
4650 assert self.master_node not in self.op.node_names
4652 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4654 raise errors.OpPrereqError("Node %s not found" % node_name,
4657 self.nodes.append(node)
4659 if (not self.op.ignore_status and
4660 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4661 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4662 " not marked offline") % node_name,
4665 def Exec(self, feedback_fn):
4666 """Execute OOB and return result if we expect any.
4669 master_node = self.master_node
4672 for idx, node in enumerate(utils.NiceSort(self.nodes,
4673 key=lambda node: node.name)):
4674 node_entry = [(constants.RS_NORMAL, node.name)]
4675 ret.append(node_entry)
4677 oob_program = _SupportsOob(self.cfg, node)
4680 node_entry.append((constants.RS_UNAVAIL, None))
4683 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4684 self.op.command, oob_program, node.name)
4685 result = self.rpc.call_run_oob(master_node, oob_program,
4686 self.op.command, node.name,
4690 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4691 node.name, result.fail_msg)
4692 node_entry.append((constants.RS_NODATA, None))
4695 self._CheckPayload(result)
4696 except errors.OpExecError, err:
4697 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4699 node_entry.append((constants.RS_NODATA, None))
4701 if self.op.command == constants.OOB_HEALTH:
4702 # For health we should log important events
4703 for item, status in result.payload:
4704 if status in [constants.OOB_STATUS_WARNING,
4705 constants.OOB_STATUS_CRITICAL]:
4706 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4707 item, node.name, status)
4709 if self.op.command == constants.OOB_POWER_ON:
4711 elif self.op.command == constants.OOB_POWER_OFF:
4712 node.powered = False
4713 elif self.op.command == constants.OOB_POWER_STATUS:
4714 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4715 if powered != node.powered:
4716 logging.warning(("Recorded power state (%s) of node '%s' does not"
4717 " match actual power state (%s)"), node.powered,
4720 # For configuration changing commands we should update the node
4721 if self.op.command in (constants.OOB_POWER_ON,
4722 constants.OOB_POWER_OFF):
4723 self.cfg.Update(node, feedback_fn)
4725 node_entry.append((constants.RS_NORMAL, result.payload))
4727 if (self.op.command == constants.OOB_POWER_ON and
4728 idx < len(self.nodes) - 1):
4729 time.sleep(self.op.power_delay)
4733 def _CheckPayload(self, result):
4734 """Checks if the payload is valid.
4736 @param result: RPC result
4737 @raises errors.OpExecError: If payload is not valid
4741 if self.op.command == constants.OOB_HEALTH:
4742 if not isinstance(result.payload, list):
4743 errs.append("command 'health' is expected to return a list but got %s" %
4744 type(result.payload))
4746 for item, status in result.payload:
4747 if status not in constants.OOB_STATUSES:
4748 errs.append("health item '%s' has invalid status '%s'" %
4751 if self.op.command == constants.OOB_POWER_STATUS:
4752 if not isinstance(result.payload, dict):
4753 errs.append("power-status is expected to return a dict but got %s" %
4754 type(result.payload))
4756 if self.op.command in [
4757 constants.OOB_POWER_ON,
4758 constants.OOB_POWER_OFF,
4759 constants.OOB_POWER_CYCLE,
4761 if result.payload is not None:
4762 errs.append("%s is expected to not return payload but got '%s'" %
4763 (self.op.command, result.payload))
4766 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4767 utils.CommaJoin(errs))
4770 class _OsQuery(_QueryBase):
4771 FIELDS = query.OS_FIELDS
4773 def ExpandNames(self, lu):
4774 # Lock all nodes in shared mode
4775 # Temporary removal of locks, should be reverted later
4776 # TODO: reintroduce locks when they are lighter-weight
4777 lu.needed_locks = {}
4778 #self.share_locks[locking.LEVEL_NODE] = 1
4779 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4781 # The following variables interact with _QueryBase._GetNames
4783 self.wanted = self.names
4785 self.wanted = locking.ALL_SET
4787 self.do_locking = self.use_locking
4789 def DeclareLocks(self, lu, level):
4793 def _DiagnoseByOS(rlist):
4794 """Remaps a per-node return list into an a per-os per-node dictionary
4796 @param rlist: a map with node names as keys and OS objects as values
4799 @return: a dictionary with osnames as keys and as value another
4800 map, with nodes as keys and tuples of (path, status, diagnose,
4801 variants, parameters, api_versions) as values, eg::
4803 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4804 (/srv/..., False, "invalid api")],
4805 "node2": [(/srv/..., True, "", [], [])]}
4810 # we build here the list of nodes that didn't fail the RPC (at RPC
4811 # level), so that nodes with a non-responding node daemon don't
4812 # make all OSes invalid
4813 good_nodes = [node_name for node_name in rlist
4814 if not rlist[node_name].fail_msg]
4815 for node_name, nr in rlist.items():
4816 if nr.fail_msg or not nr.payload:
4818 for (name, path, status, diagnose, variants,
4819 params, api_versions) in nr.payload:
4820 if name not in all_os:
4821 # build a list of nodes for this os containing empty lists
4822 # for each node in node_list
4824 for nname in good_nodes:
4825 all_os[name][nname] = []
4826 # convert params from [name, help] to (name, help)
4827 params = [tuple(v) for v in params]
4828 all_os[name][node_name].append((path, status, diagnose,
4829 variants, params, api_versions))
4832 def _GetQueryData(self, lu):
4833 """Computes the list of nodes and their attributes.
4836 # Locking is not used
4837 assert not (compat.any(lu.glm.is_owned(level)
4838 for level in locking.LEVELS
4839 if level != locking.LEVEL_CLUSTER) or
4840 self.do_locking or self.use_locking)
4842 valid_nodes = [node.name
4843 for node in lu.cfg.GetAllNodesInfo().values()
4844 if not node.offline and node.vm_capable]
4845 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4846 cluster = lu.cfg.GetClusterInfo()
4850 for (os_name, os_data) in pol.items():
4851 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4852 hidden=(os_name in cluster.hidden_os),
4853 blacklisted=(os_name in cluster.blacklisted_os))
4857 api_versions = set()
4859 for idx, osl in enumerate(os_data.values()):
4860 info.valid = bool(info.valid and osl and osl[0][1])
4864 (node_variants, node_params, node_api) = osl[0][3:6]
4867 variants.update(node_variants)
4868 parameters.update(node_params)
4869 api_versions.update(node_api)
4871 # Filter out inconsistent values
4872 variants.intersection_update(node_variants)
4873 parameters.intersection_update(node_params)
4874 api_versions.intersection_update(node_api)
4876 info.variants = list(variants)
4877 info.parameters = list(parameters)
4878 info.api_versions = list(api_versions)
4880 data[os_name] = info
4882 # Prepare data in requested order
4883 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4887 class LUOsDiagnose(NoHooksLU):
4888 """Logical unit for OS diagnose/query.
4894 def _BuildFilter(fields, names):
4895 """Builds a filter for querying OSes.
4898 name_filter = qlang.MakeSimpleFilter("name", names)
4900 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4901 # respective field is not requested
4902 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4903 for fname in ["hidden", "blacklisted"]
4904 if fname not in fields]
4905 if "valid" not in fields:
4906 status_filter.append([qlang.OP_TRUE, "valid"])
4909 status_filter.insert(0, qlang.OP_AND)
4911 status_filter = None
4913 if name_filter and status_filter:
4914 return [qlang.OP_AND, name_filter, status_filter]
4918 return status_filter
4920 def CheckArguments(self):
4921 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4922 self.op.output_fields, False)
4924 def ExpandNames(self):
4925 self.oq.ExpandNames(self)
4927 def Exec(self, feedback_fn):
4928 return self.oq.OldStyleQuery(self)
4931 class LUNodeRemove(LogicalUnit):
4932 """Logical unit for removing a node.
4935 HPATH = "node-remove"
4936 HTYPE = constants.HTYPE_NODE
4938 def BuildHooksEnv(self):
4943 "OP_TARGET": self.op.node_name,
4944 "NODE_NAME": self.op.node_name,
4947 def BuildHooksNodes(self):
4948 """Build hooks nodes.
4950 This doesn't run on the target node in the pre phase as a failed
4951 node would then be impossible to remove.
4954 all_nodes = self.cfg.GetNodeList()
4956 all_nodes.remove(self.op.node_name)
4959 return (all_nodes, all_nodes)
4961 def CheckPrereq(self):
4962 """Check prerequisites.
4965 - the node exists in the configuration
4966 - it does not have primary or secondary instances
4967 - it's not the master
4969 Any errors are signaled by raising errors.OpPrereqError.
4972 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4973 node = self.cfg.GetNodeInfo(self.op.node_name)
4974 assert node is not None
4976 masternode = self.cfg.GetMasterNode()
4977 if node.name == masternode:
4978 raise errors.OpPrereqError("Node is the master node, failover to another"
4979 " node is required", errors.ECODE_INVAL)
4981 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4982 if node.name in instance.all_nodes:
4983 raise errors.OpPrereqError("Instance %s is still running on the node,"
4984 " please remove first" % instance_name,
4986 self.op.node_name = node.name
4989 def Exec(self, feedback_fn):
4990 """Removes the node from the cluster.
4994 logging.info("Stopping the node daemon and removing configs from node %s",
4997 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4999 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5002 # Promote nodes to master candidate as needed
5003 _AdjustCandidatePool(self, exceptions=[node.name])
5004 self.context.RemoveNode(node.name)
5006 # Run post hooks on the node before it's removed
5007 _RunPostHook(self, node.name)
5009 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5010 msg = result.fail_msg
5012 self.LogWarning("Errors encountered on the remote node while leaving"
5013 " the cluster: %s", msg)
5015 # Remove node from our /etc/hosts
5016 if self.cfg.GetClusterInfo().modify_etc_hosts:
5017 master_node = self.cfg.GetMasterNode()
5018 result = self.rpc.call_etc_hosts_modify(master_node,
5019 constants.ETC_HOSTS_REMOVE,
5021 result.Raise("Can't update hosts file with new host data")
5022 _RedistributeAncillaryFiles(self)
5025 class _NodeQuery(_QueryBase):
5026 FIELDS = query.NODE_FIELDS
5028 def ExpandNames(self, lu):
5029 lu.needed_locks = {}
5030 lu.share_locks = _ShareAll()
5033 self.wanted = _GetWantedNodes(lu, self.names)
5035 self.wanted = locking.ALL_SET
5037 self.do_locking = (self.use_locking and
5038 query.NQ_LIVE in self.requested_data)
5041 # If any non-static field is requested we need to lock the nodes
5042 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5044 def DeclareLocks(self, lu, level):
5047 def _GetQueryData(self, lu):
5048 """Computes the list of nodes and their attributes.
5051 all_info = lu.cfg.GetAllNodesInfo()
5053 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5055 # Gather data as requested
5056 if query.NQ_LIVE in self.requested_data:
5057 # filter out non-vm_capable nodes
5058 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5060 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5061 [lu.cfg.GetHypervisorType()])
5062 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5063 for (name, nresult) in node_data.items()
5064 if not nresult.fail_msg and nresult.payload)
5068 if query.NQ_INST in self.requested_data:
5069 node_to_primary = dict([(name, set()) for name in nodenames])
5070 node_to_secondary = dict([(name, set()) for name in nodenames])
5072 inst_data = lu.cfg.GetAllInstancesInfo()
5074 for inst in inst_data.values():
5075 if inst.primary_node in node_to_primary:
5076 node_to_primary[inst.primary_node].add(inst.name)
5077 for secnode in inst.secondary_nodes:
5078 if secnode in node_to_secondary:
5079 node_to_secondary[secnode].add(inst.name)
5081 node_to_primary = None
5082 node_to_secondary = None
5084 if query.NQ_OOB in self.requested_data:
5085 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5086 for name, node in all_info.iteritems())
5090 if query.NQ_GROUP in self.requested_data:
5091 groups = lu.cfg.GetAllNodeGroupsInfo()
5095 return query.NodeQueryData([all_info[name] for name in nodenames],
5096 live_data, lu.cfg.GetMasterNode(),
5097 node_to_primary, node_to_secondary, groups,
5098 oob_support, lu.cfg.GetClusterInfo())
5101 class LUNodeQuery(NoHooksLU):
5102 """Logical unit for querying nodes.
5105 # pylint: disable=W0142
5108 def CheckArguments(self):
5109 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5110 self.op.output_fields, self.op.use_locking)
5112 def ExpandNames(self):
5113 self.nq.ExpandNames(self)
5115 def DeclareLocks(self, level):
5116 self.nq.DeclareLocks(self, level)
5118 def Exec(self, feedback_fn):
5119 return self.nq.OldStyleQuery(self)
5122 class LUNodeQueryvols(NoHooksLU):
5123 """Logical unit for getting volumes on node(s).
5127 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5128 _FIELDS_STATIC = utils.FieldSet("node")
5130 def CheckArguments(self):
5131 _CheckOutputFields(static=self._FIELDS_STATIC,
5132 dynamic=self._FIELDS_DYNAMIC,
5133 selected=self.op.output_fields)
5135 def ExpandNames(self):
5136 self.share_locks = _ShareAll()
5137 self.needed_locks = {}
5139 if not self.op.nodes:
5140 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5142 self.needed_locks[locking.LEVEL_NODE] = \
5143 _GetWantedNodes(self, self.op.nodes)
5145 def Exec(self, feedback_fn):
5146 """Computes the list of nodes and their attributes.
5149 nodenames = self.owned_locks(locking.LEVEL_NODE)
5150 volumes = self.rpc.call_node_volumes(nodenames)
5152 ilist = self.cfg.GetAllInstancesInfo()
5153 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5156 for node in nodenames:
5157 nresult = volumes[node]
5160 msg = nresult.fail_msg
5162 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5165 node_vols = sorted(nresult.payload,
5166 key=operator.itemgetter("dev"))
5168 for vol in node_vols:
5170 for field in self.op.output_fields:
5173 elif field == "phys":
5177 elif field == "name":
5179 elif field == "size":
5180 val = int(float(vol["size"]))
5181 elif field == "instance":
5182 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5184 raise errors.ParameterError(field)
5185 node_output.append(str(val))
5187 output.append(node_output)
5192 class LUNodeQueryStorage(NoHooksLU):
5193 """Logical unit for getting information on storage units on node(s).
5196 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5199 def CheckArguments(self):
5200 _CheckOutputFields(static=self._FIELDS_STATIC,
5201 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5202 selected=self.op.output_fields)
5204 def ExpandNames(self):
5205 self.share_locks = _ShareAll()
5206 self.needed_locks = {}
5209 self.needed_locks[locking.LEVEL_NODE] = \
5210 _GetWantedNodes(self, self.op.nodes)
5212 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5214 def Exec(self, feedback_fn):
5215 """Computes the list of nodes and their attributes.
5218 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5220 # Always get name to sort by
5221 if constants.SF_NAME in self.op.output_fields:
5222 fields = self.op.output_fields[:]
5224 fields = [constants.SF_NAME] + self.op.output_fields
5226 # Never ask for node or type as it's only known to the LU
5227 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5228 while extra in fields:
5229 fields.remove(extra)
5231 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5232 name_idx = field_idx[constants.SF_NAME]
5234 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5235 data = self.rpc.call_storage_list(self.nodes,
5236 self.op.storage_type, st_args,
5237 self.op.name, fields)
5241 for node in utils.NiceSort(self.nodes):
5242 nresult = data[node]
5246 msg = nresult.fail_msg
5248 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5251 rows = dict([(row[name_idx], row) for row in nresult.payload])
5253 for name in utils.NiceSort(rows.keys()):
5258 for field in self.op.output_fields:
5259 if field == constants.SF_NODE:
5261 elif field == constants.SF_TYPE:
5262 val = self.op.storage_type
5263 elif field in field_idx:
5264 val = row[field_idx[field]]
5266 raise errors.ParameterError(field)
5275 class _InstanceQuery(_QueryBase):
5276 FIELDS = query.INSTANCE_FIELDS
5278 def ExpandNames(self, lu):
5279 lu.needed_locks = {}
5280 lu.share_locks = _ShareAll()
5283 self.wanted = _GetWantedInstances(lu, self.names)
5285 self.wanted = locking.ALL_SET
5287 self.do_locking = (self.use_locking and
5288 query.IQ_LIVE in self.requested_data)
5290 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5291 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5292 lu.needed_locks[locking.LEVEL_NODE] = []
5293 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5295 self.do_grouplocks = (self.do_locking and
5296 query.IQ_NODES in self.requested_data)
5298 def DeclareLocks(self, lu, level):
5300 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5301 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5303 # Lock all groups used by instances optimistically; this requires going
5304 # via the node before it's locked, requiring verification later on
5305 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5307 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5308 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5309 elif level == locking.LEVEL_NODE:
5310 lu._LockInstancesNodes() # pylint: disable=W0212
5313 def _CheckGroupLocks(lu):
5314 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5315 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5317 # Check if node groups for locked instances are still correct
5318 for instance_name in owned_instances:
5319 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5321 def _GetQueryData(self, lu):
5322 """Computes the list of instances and their attributes.
5325 if self.do_grouplocks:
5326 self._CheckGroupLocks(lu)
5328 cluster = lu.cfg.GetClusterInfo()
5329 all_info = lu.cfg.GetAllInstancesInfo()
5331 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5333 instance_list = [all_info[name] for name in instance_names]
5334 nodes = frozenset(itertools.chain(*(inst.all_nodes
5335 for inst in instance_list)))
5336 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5339 wrongnode_inst = set()
5341 # Gather data as requested
5342 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5344 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5346 result = node_data[name]
5348 # offline nodes will be in both lists
5349 assert result.fail_msg
5350 offline_nodes.append(name)
5352 bad_nodes.append(name)
5353 elif result.payload:
5354 for inst in result.payload:
5355 if inst in all_info:
5356 if all_info[inst].primary_node == name:
5357 live_data.update(result.payload)
5359 wrongnode_inst.add(inst)
5361 # orphan instance; we don't list it here as we don't
5362 # handle this case yet in the output of instance listing
5363 logging.warning("Orphan instance '%s' found on node %s",
5365 # else no instance is alive
5369 if query.IQ_DISKUSAGE in self.requested_data:
5370 disk_usage = dict((inst.name,
5371 _ComputeDiskSize(inst.disk_template,
5372 [{constants.IDISK_SIZE: disk.size}
5373 for disk in inst.disks]))
5374 for inst in instance_list)
5378 if query.IQ_CONSOLE in self.requested_data:
5380 for inst in instance_list:
5381 if inst.name in live_data:
5382 # Instance is running
5383 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5385 consinfo[inst.name] = None
5386 assert set(consinfo.keys()) == set(instance_names)
5390 if query.IQ_NODES in self.requested_data:
5391 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5393 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5394 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5395 for uuid in set(map(operator.attrgetter("group"),
5401 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5402 disk_usage, offline_nodes, bad_nodes,
5403 live_data, wrongnode_inst, consinfo,
5407 class LUQuery(NoHooksLU):
5408 """Query for resources/items of a certain kind.
5411 # pylint: disable=W0142
5414 def CheckArguments(self):
5415 qcls = _GetQueryImplementation(self.op.what)
5417 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5419 def ExpandNames(self):
5420 self.impl.ExpandNames(self)
5422 def DeclareLocks(self, level):
5423 self.impl.DeclareLocks(self, level)
5425 def Exec(self, feedback_fn):
5426 return self.impl.NewStyleQuery(self)
5429 class LUQueryFields(NoHooksLU):
5430 """Query for resources/items of a certain kind.
5433 # pylint: disable=W0142
5436 def CheckArguments(self):
5437 self.qcls = _GetQueryImplementation(self.op.what)
5439 def ExpandNames(self):
5440 self.needed_locks = {}
5442 def Exec(self, feedback_fn):
5443 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5446 class LUNodeModifyStorage(NoHooksLU):
5447 """Logical unit for modifying a storage volume on a node.
5452 def CheckArguments(self):
5453 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5455 storage_type = self.op.storage_type
5458 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5460 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5461 " modified" % storage_type,
5464 diff = set(self.op.changes.keys()) - modifiable
5466 raise errors.OpPrereqError("The following fields can not be modified for"
5467 " storage units of type '%s': %r" %
5468 (storage_type, list(diff)),
5471 def ExpandNames(self):
5472 self.needed_locks = {
5473 locking.LEVEL_NODE: self.op.node_name,
5476 def Exec(self, feedback_fn):
5477 """Computes the list of nodes and their attributes.
5480 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5481 result = self.rpc.call_storage_modify(self.op.node_name,
5482 self.op.storage_type, st_args,
5483 self.op.name, self.op.changes)
5484 result.Raise("Failed to modify storage unit '%s' on %s" %
5485 (self.op.name, self.op.node_name))
5488 class LUNodeAdd(LogicalUnit):
5489 """Logical unit for adding node to the cluster.
5493 HTYPE = constants.HTYPE_NODE
5494 _NFLAGS = ["master_capable", "vm_capable"]
5496 def CheckArguments(self):
5497 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5498 # validate/normalize the node name
5499 self.hostname = netutils.GetHostname(name=self.op.node_name,
5500 family=self.primary_ip_family)
5501 self.op.node_name = self.hostname.name
5503 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5504 raise errors.OpPrereqError("Cannot readd the master node",
5507 if self.op.readd and self.op.group:
5508 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5509 " being readded", errors.ECODE_INVAL)
5511 def BuildHooksEnv(self):
5514 This will run on all nodes before, and on all nodes + the new node after.
5518 "OP_TARGET": self.op.node_name,
5519 "NODE_NAME": self.op.node_name,
5520 "NODE_PIP": self.op.primary_ip,
5521 "NODE_SIP": self.op.secondary_ip,
5522 "MASTER_CAPABLE": str(self.op.master_capable),
5523 "VM_CAPABLE": str(self.op.vm_capable),
5526 def BuildHooksNodes(self):
5527 """Build hooks nodes.
5530 # Exclude added node
5531 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5532 post_nodes = pre_nodes + [self.op.node_name, ]
5534 return (pre_nodes, post_nodes)
5536 def CheckPrereq(self):
5537 """Check prerequisites.
5540 - the new node is not already in the config
5542 - its parameters (single/dual homed) matches the cluster
5544 Any errors are signaled by raising errors.OpPrereqError.
5548 hostname = self.hostname
5549 node = hostname.name
5550 primary_ip = self.op.primary_ip = hostname.ip
5551 if self.op.secondary_ip is None:
5552 if self.primary_ip_family == netutils.IP6Address.family:
5553 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5554 " IPv4 address must be given as secondary",
5556 self.op.secondary_ip = primary_ip
5558 secondary_ip = self.op.secondary_ip
5559 if not netutils.IP4Address.IsValid(secondary_ip):
5560 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5561 " address" % secondary_ip, errors.ECODE_INVAL)
5563 node_list = cfg.GetNodeList()
5564 if not self.op.readd and node in node_list:
5565 raise errors.OpPrereqError("Node %s is already in the configuration" %
5566 node, errors.ECODE_EXISTS)
5567 elif self.op.readd and node not in node_list:
5568 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5571 self.changed_primary_ip = False
5573 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5574 if self.op.readd and node == existing_node_name:
5575 if existing_node.secondary_ip != secondary_ip:
5576 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5577 " address configuration as before",
5579 if existing_node.primary_ip != primary_ip:
5580 self.changed_primary_ip = True
5584 if (existing_node.primary_ip == primary_ip or
5585 existing_node.secondary_ip == primary_ip or
5586 existing_node.primary_ip == secondary_ip or
5587 existing_node.secondary_ip == secondary_ip):
5588 raise errors.OpPrereqError("New node ip address(es) conflict with"
5589 " existing node %s" % existing_node.name,
5590 errors.ECODE_NOTUNIQUE)
5592 # After this 'if' block, None is no longer a valid value for the
5593 # _capable op attributes
5595 old_node = self.cfg.GetNodeInfo(node)
5596 assert old_node is not None, "Can't retrieve locked node %s" % node
5597 for attr in self._NFLAGS:
5598 if getattr(self.op, attr) is None:
5599 setattr(self.op, attr, getattr(old_node, attr))
5601 for attr in self._NFLAGS:
5602 if getattr(self.op, attr) is None:
5603 setattr(self.op, attr, True)
5605 if self.op.readd and not self.op.vm_capable:
5606 pri, sec = cfg.GetNodeInstances(node)
5608 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5609 " flag set to false, but it already holds"
5610 " instances" % node,
5613 # check that the type of the node (single versus dual homed) is the
5614 # same as for the master
5615 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5616 master_singlehomed = myself.secondary_ip == myself.primary_ip
5617 newbie_singlehomed = secondary_ip == primary_ip
5618 if master_singlehomed != newbie_singlehomed:
5619 if master_singlehomed:
5620 raise errors.OpPrereqError("The master has no secondary ip but the"
5621 " new node has one",
5624 raise errors.OpPrereqError("The master has a secondary ip but the"
5625 " new node doesn't have one",
5628 # checks reachability
5629 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5630 raise errors.OpPrereqError("Node not reachable by ping",
5631 errors.ECODE_ENVIRON)
5633 if not newbie_singlehomed:
5634 # check reachability from my secondary ip to newbie's secondary ip
5635 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5636 source=myself.secondary_ip):
5637 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5638 " based ping to node daemon port",
5639 errors.ECODE_ENVIRON)
5646 if self.op.master_capable:
5647 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5649 self.master_candidate = False
5652 self.new_node = old_node
5654 node_group = cfg.LookupNodeGroup(self.op.group)
5655 self.new_node = objects.Node(name=node,
5656 primary_ip=primary_ip,
5657 secondary_ip=secondary_ip,
5658 master_candidate=self.master_candidate,
5659 offline=False, drained=False,
5662 if self.op.ndparams:
5663 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5665 if self.op.hv_state:
5666 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5668 if self.op.disk_state:
5669 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5671 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5672 # it a property on the base class.
5673 result = rpc.DnsOnlyRunner().call_version([node])[node]
5674 result.Raise("Can't get version information from node %s" % node)
5675 if constants.PROTOCOL_VERSION == result.payload:
5676 logging.info("Communication to node %s fine, sw version %s match",
5677 node, result.payload)
5679 raise errors.OpPrereqError("Version mismatch master version %s,"
5680 " node version %s" %
5681 (constants.PROTOCOL_VERSION, result.payload),
5682 errors.ECODE_ENVIRON)
5684 def Exec(self, feedback_fn):
5685 """Adds the new node to the cluster.
5688 new_node = self.new_node
5689 node = new_node.name
5691 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5694 # We adding a new node so we assume it's powered
5695 new_node.powered = True
5697 # for re-adds, reset the offline/drained/master-candidate flags;
5698 # we need to reset here, otherwise offline would prevent RPC calls
5699 # later in the procedure; this also means that if the re-add
5700 # fails, we are left with a non-offlined, broken node
5702 new_node.drained = new_node.offline = False # pylint: disable=W0201
5703 self.LogInfo("Readding a node, the offline/drained flags were reset")
5704 # if we demote the node, we do cleanup later in the procedure
5705 new_node.master_candidate = self.master_candidate
5706 if self.changed_primary_ip:
5707 new_node.primary_ip = self.op.primary_ip
5709 # copy the master/vm_capable flags
5710 for attr in self._NFLAGS:
5711 setattr(new_node, attr, getattr(self.op, attr))
5713 # notify the user about any possible mc promotion
5714 if new_node.master_candidate:
5715 self.LogInfo("Node will be a master candidate")
5717 if self.op.ndparams:
5718 new_node.ndparams = self.op.ndparams
5720 new_node.ndparams = {}
5722 if self.op.hv_state:
5723 new_node.hv_state_static = self.new_hv_state
5725 if self.op.disk_state:
5726 new_node.disk_state_static = self.new_disk_state
5728 # Add node to our /etc/hosts, and add key to known_hosts
5729 if self.cfg.GetClusterInfo().modify_etc_hosts:
5730 master_node = self.cfg.GetMasterNode()
5731 result = self.rpc.call_etc_hosts_modify(master_node,
5732 constants.ETC_HOSTS_ADD,
5735 result.Raise("Can't update hosts file with new host data")
5737 if new_node.secondary_ip != new_node.primary_ip:
5738 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5741 node_verify_list = [self.cfg.GetMasterNode()]
5742 node_verify_param = {
5743 constants.NV_NODELIST: ([node], {}),
5744 # TODO: do a node-net-test as well?
5747 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5748 self.cfg.GetClusterName())
5749 for verifier in node_verify_list:
5750 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5751 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5753 for failed in nl_payload:
5754 feedback_fn("ssh/hostname verification failed"
5755 " (checking from %s): %s" %
5756 (verifier, nl_payload[failed]))
5757 raise errors.OpExecError("ssh/hostname verification failed")
5760 _RedistributeAncillaryFiles(self)
5761 self.context.ReaddNode(new_node)
5762 # make sure we redistribute the config
5763 self.cfg.Update(new_node, feedback_fn)
5764 # and make sure the new node will not have old files around
5765 if not new_node.master_candidate:
5766 result = self.rpc.call_node_demote_from_mc(new_node.name)
5767 msg = result.fail_msg
5769 self.LogWarning("Node failed to demote itself from master"
5770 " candidate status: %s" % msg)
5772 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5773 additional_vm=self.op.vm_capable)
5774 self.context.AddNode(new_node, self.proc.GetECId())
5777 class LUNodeSetParams(LogicalUnit):
5778 """Modifies the parameters of a node.
5780 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5781 to the node role (as _ROLE_*)
5782 @cvar _R2F: a dictionary from node role to tuples of flags
5783 @cvar _FLAGS: a list of attribute names corresponding to the flags
5786 HPATH = "node-modify"
5787 HTYPE = constants.HTYPE_NODE
5789 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5791 (True, False, False): _ROLE_CANDIDATE,
5792 (False, True, False): _ROLE_DRAINED,
5793 (False, False, True): _ROLE_OFFLINE,
5794 (False, False, False): _ROLE_REGULAR,
5796 _R2F = dict((v, k) for k, v in _F2R.items())
5797 _FLAGS = ["master_candidate", "drained", "offline"]
5799 def CheckArguments(self):
5800 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5801 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5802 self.op.master_capable, self.op.vm_capable,
5803 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5805 if all_mods.count(None) == len(all_mods):
5806 raise errors.OpPrereqError("Please pass at least one modification",
5808 if all_mods.count(True) > 1:
5809 raise errors.OpPrereqError("Can't set the node into more than one"
5810 " state at the same time",
5813 # Boolean value that tells us whether we might be demoting from MC
5814 self.might_demote = (self.op.master_candidate == False or
5815 self.op.offline == True or
5816 self.op.drained == True or
5817 self.op.master_capable == False)
5819 if self.op.secondary_ip:
5820 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5821 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5822 " address" % self.op.secondary_ip,
5825 self.lock_all = self.op.auto_promote and self.might_demote
5826 self.lock_instances = self.op.secondary_ip is not None
5828 def _InstanceFilter(self, instance):
5829 """Filter for getting affected instances.
5832 return (instance.disk_template in constants.DTS_INT_MIRROR and
5833 self.op.node_name in instance.all_nodes)
5835 def ExpandNames(self):
5837 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5839 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5841 # Since modifying a node can have severe effects on currently running
5842 # operations the resource lock is at least acquired in shared mode
5843 self.needed_locks[locking.LEVEL_NODE_RES] = \
5844 self.needed_locks[locking.LEVEL_NODE]
5846 # Get node resource and instance locks in shared mode; they are not used
5847 # for anything but read-only access
5848 self.share_locks[locking.LEVEL_NODE_RES] = 1
5849 self.share_locks[locking.LEVEL_INSTANCE] = 1
5851 if self.lock_instances:
5852 self.needed_locks[locking.LEVEL_INSTANCE] = \
5853 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5855 def BuildHooksEnv(self):
5858 This runs on the master node.
5862 "OP_TARGET": self.op.node_name,
5863 "MASTER_CANDIDATE": str(self.op.master_candidate),
5864 "OFFLINE": str(self.op.offline),
5865 "DRAINED": str(self.op.drained),
5866 "MASTER_CAPABLE": str(self.op.master_capable),
5867 "VM_CAPABLE": str(self.op.vm_capable),
5870 def BuildHooksNodes(self):
5871 """Build hooks nodes.
5874 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5877 def CheckPrereq(self):
5878 """Check prerequisites.
5880 This only checks the instance list against the existing names.
5883 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5885 if self.lock_instances:
5886 affected_instances = \
5887 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5889 # Verify instance locks
5890 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5891 wanted_instances = frozenset(affected_instances.keys())
5892 if wanted_instances - owned_instances:
5893 raise errors.OpPrereqError("Instances affected by changing node %s's"
5894 " secondary IP address have changed since"
5895 " locks were acquired, wanted '%s', have"
5896 " '%s'; retry the operation" %
5898 utils.CommaJoin(wanted_instances),
5899 utils.CommaJoin(owned_instances)),
5902 affected_instances = None
5904 if (self.op.master_candidate is not None or
5905 self.op.drained is not None or
5906 self.op.offline is not None):
5907 # we can't change the master's node flags
5908 if self.op.node_name == self.cfg.GetMasterNode():
5909 raise errors.OpPrereqError("The master role can be changed"
5910 " only via master-failover",
5913 if self.op.master_candidate and not node.master_capable:
5914 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5915 " it a master candidate" % node.name,
5918 if self.op.vm_capable == False:
5919 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5921 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5922 " the vm_capable flag" % node.name,
5925 if node.master_candidate and self.might_demote and not self.lock_all:
5926 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5927 # check if after removing the current node, we're missing master
5929 (mc_remaining, mc_should, _) = \
5930 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5931 if mc_remaining < mc_should:
5932 raise errors.OpPrereqError("Not enough master candidates, please"
5933 " pass auto promote option to allow"
5934 " promotion (--auto-promote or RAPI"
5935 " auto_promote=True)", errors.ECODE_STATE)
5937 self.old_flags = old_flags = (node.master_candidate,
5938 node.drained, node.offline)
5939 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5940 self.old_role = old_role = self._F2R[old_flags]
5942 # Check for ineffective changes
5943 for attr in self._FLAGS:
5944 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5945 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5946 setattr(self.op, attr, None)
5948 # Past this point, any flag change to False means a transition
5949 # away from the respective state, as only real changes are kept
5951 # TODO: We might query the real power state if it supports OOB
5952 if _SupportsOob(self.cfg, node):
5953 if self.op.offline is False and not (node.powered or
5954 self.op.powered == True):
5955 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5956 " offline status can be reset") %
5958 elif self.op.powered is not None:
5959 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5960 " as it does not support out-of-band"
5961 " handling") % self.op.node_name)
5963 # If we're being deofflined/drained, we'll MC ourself if needed
5964 if (self.op.drained == False or self.op.offline == False or
5965 (self.op.master_capable and not node.master_capable)):
5966 if _DecideSelfPromotion(self):
5967 self.op.master_candidate = True
5968 self.LogInfo("Auto-promoting node to master candidate")
5970 # If we're no longer master capable, we'll demote ourselves from MC
5971 if self.op.master_capable == False and node.master_candidate:
5972 self.LogInfo("Demoting from master candidate")
5973 self.op.master_candidate = False
5976 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5977 if self.op.master_candidate:
5978 new_role = self._ROLE_CANDIDATE
5979 elif self.op.drained:
5980 new_role = self._ROLE_DRAINED
5981 elif self.op.offline:
5982 new_role = self._ROLE_OFFLINE
5983 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5984 # False is still in new flags, which means we're un-setting (the
5986 new_role = self._ROLE_REGULAR
5987 else: # no new flags, nothing, keep old role
5990 self.new_role = new_role
5992 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5993 # Trying to transition out of offline status
5994 result = self.rpc.call_version([node.name])[node.name]
5996 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5997 " to report its version: %s" %
5998 (node.name, result.fail_msg),
6001 self.LogWarning("Transitioning node from offline to online state"
6002 " without using re-add. Please make sure the node"
6005 if self.op.secondary_ip:
6006 # Ok even without locking, because this can't be changed by any LU
6007 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6008 master_singlehomed = master.secondary_ip == master.primary_ip
6009 if master_singlehomed and self.op.secondary_ip:
6010 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6011 " homed cluster", errors.ECODE_INVAL)
6013 assert not (frozenset(affected_instances) -
6014 self.owned_locks(locking.LEVEL_INSTANCE))
6017 if affected_instances:
6018 raise errors.OpPrereqError("Cannot change secondary IP address:"
6019 " offline node has instances (%s)"
6020 " configured to use it" %
6021 utils.CommaJoin(affected_instances.keys()))
6023 # On online nodes, check that no instances are running, and that
6024 # the node has the new ip and we can reach it.
6025 for instance in affected_instances.values():
6026 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6027 msg="cannot change secondary ip")
6029 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6030 if master.name != node.name:
6031 # check reachability from master secondary ip to new secondary ip
6032 if not netutils.TcpPing(self.op.secondary_ip,
6033 constants.DEFAULT_NODED_PORT,
6034 source=master.secondary_ip):
6035 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6036 " based ping to node daemon port",
6037 errors.ECODE_ENVIRON)
6039 if self.op.ndparams:
6040 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6041 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6042 self.new_ndparams = new_ndparams
6044 if self.op.hv_state:
6045 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6046 self.node.hv_state_static)
6048 if self.op.disk_state:
6049 self.new_disk_state = \
6050 _MergeAndVerifyDiskState(self.op.disk_state,
6051 self.node.disk_state_static)
6053 def Exec(self, feedback_fn):
6058 old_role = self.old_role
6059 new_role = self.new_role
6063 if self.op.ndparams:
6064 node.ndparams = self.new_ndparams
6066 if self.op.powered is not None:
6067 node.powered = self.op.powered
6069 if self.op.hv_state:
6070 node.hv_state_static = self.new_hv_state
6072 if self.op.disk_state:
6073 node.disk_state_static = self.new_disk_state
6075 for attr in ["master_capable", "vm_capable"]:
6076 val = getattr(self.op, attr)
6078 setattr(node, attr, val)
6079 result.append((attr, str(val)))
6081 if new_role != old_role:
6082 # Tell the node to demote itself, if no longer MC and not offline
6083 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6084 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6086 self.LogWarning("Node failed to demote itself: %s", msg)
6088 new_flags = self._R2F[new_role]
6089 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6091 result.append((desc, str(nf)))
6092 (node.master_candidate, node.drained, node.offline) = new_flags
6094 # we locked all nodes, we adjust the CP before updating this node
6096 _AdjustCandidatePool(self, [node.name])
6098 if self.op.secondary_ip:
6099 node.secondary_ip = self.op.secondary_ip
6100 result.append(("secondary_ip", self.op.secondary_ip))
6102 # this will trigger configuration file update, if needed
6103 self.cfg.Update(node, feedback_fn)
6105 # this will trigger job queue propagation or cleanup if the mc
6107 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6108 self.context.ReaddNode(node)
6113 class LUNodePowercycle(NoHooksLU):
6114 """Powercycles a node.
6119 def CheckArguments(self):
6120 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6121 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6122 raise errors.OpPrereqError("The node is the master and the force"
6123 " parameter was not set",
6126 def ExpandNames(self):
6127 """Locking for PowercycleNode.
6129 This is a last-resort option and shouldn't block on other
6130 jobs. Therefore, we grab no locks.
6133 self.needed_locks = {}
6135 def Exec(self, feedback_fn):
6139 result = self.rpc.call_node_powercycle(self.op.node_name,
6140 self.cfg.GetHypervisorType())
6141 result.Raise("Failed to schedule the reboot")
6142 return result.payload
6145 class LUClusterQuery(NoHooksLU):
6146 """Query cluster configuration.
6151 def ExpandNames(self):
6152 self.needed_locks = {}
6154 def Exec(self, feedback_fn):
6155 """Return cluster config.
6158 cluster = self.cfg.GetClusterInfo()
6161 # Filter just for enabled hypervisors
6162 for os_name, hv_dict in cluster.os_hvp.items():
6163 os_hvp[os_name] = {}
6164 for hv_name, hv_params in hv_dict.items():
6165 if hv_name in cluster.enabled_hypervisors:
6166 os_hvp[os_name][hv_name] = hv_params
6168 # Convert ip_family to ip_version
6169 primary_ip_version = constants.IP4_VERSION
6170 if cluster.primary_ip_family == netutils.IP6Address.family:
6171 primary_ip_version = constants.IP6_VERSION
6174 "software_version": constants.RELEASE_VERSION,
6175 "protocol_version": constants.PROTOCOL_VERSION,
6176 "config_version": constants.CONFIG_VERSION,
6177 "os_api_version": max(constants.OS_API_VERSIONS),
6178 "export_version": constants.EXPORT_VERSION,
6179 "architecture": runtime.GetArchInfo(),
6180 "name": cluster.cluster_name,
6181 "master": cluster.master_node,
6182 "default_hypervisor": cluster.primary_hypervisor,
6183 "enabled_hypervisors": cluster.enabled_hypervisors,
6184 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6185 for hypervisor_name in cluster.enabled_hypervisors]),
6187 "beparams": cluster.beparams,
6188 "osparams": cluster.osparams,
6189 "ipolicy": cluster.ipolicy,
6190 "nicparams": cluster.nicparams,
6191 "ndparams": cluster.ndparams,
6192 "diskparams": cluster.diskparams,
6193 "candidate_pool_size": cluster.candidate_pool_size,
6194 "master_netdev": cluster.master_netdev,
6195 "master_netmask": cluster.master_netmask,
6196 "use_external_mip_script": cluster.use_external_mip_script,
6197 "volume_group_name": cluster.volume_group_name,
6198 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6199 "file_storage_dir": cluster.file_storage_dir,
6200 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6201 "maintain_node_health": cluster.maintain_node_health,
6202 "ctime": cluster.ctime,
6203 "mtime": cluster.mtime,
6204 "uuid": cluster.uuid,
6205 "tags": list(cluster.GetTags()),
6206 "uid_pool": cluster.uid_pool,
6207 "default_iallocator": cluster.default_iallocator,
6208 "reserved_lvs": cluster.reserved_lvs,
6209 "primary_ip_version": primary_ip_version,
6210 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6211 "hidden_os": cluster.hidden_os,
6212 "blacklisted_os": cluster.blacklisted_os,
6218 class LUClusterConfigQuery(NoHooksLU):
6219 """Return configuration values.
6224 def CheckArguments(self):
6225 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6227 def ExpandNames(self):
6228 self.cq.ExpandNames(self)
6230 def DeclareLocks(self, level):
6231 self.cq.DeclareLocks(self, level)
6233 def Exec(self, feedback_fn):
6234 result = self.cq.OldStyleQuery(self)
6236 assert len(result) == 1
6241 class _ClusterQuery(_QueryBase):
6242 FIELDS = query.CLUSTER_FIELDS
6244 #: Do not sort (there is only one item)
6247 def ExpandNames(self, lu):
6248 lu.needed_locks = {}
6250 # The following variables interact with _QueryBase._GetNames
6251 self.wanted = locking.ALL_SET
6252 self.do_locking = self.use_locking
6255 raise errors.OpPrereqError("Can not use locking for cluster queries",
6258 def DeclareLocks(self, lu, level):
6261 def _GetQueryData(self, lu):
6262 """Computes the list of nodes and their attributes.
6265 # Locking is not used
6266 assert not (compat.any(lu.glm.is_owned(level)
6267 for level in locking.LEVELS
6268 if level != locking.LEVEL_CLUSTER) or
6269 self.do_locking or self.use_locking)
6271 if query.CQ_CONFIG in self.requested_data:
6272 cluster = lu.cfg.GetClusterInfo()
6274 cluster = NotImplemented
6276 if query.CQ_QUEUE_DRAINED in self.requested_data:
6277 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6279 drain_flag = NotImplemented
6281 if query.CQ_WATCHER_PAUSE in self.requested_data:
6282 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6284 watcher_pause = NotImplemented
6286 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6289 class LUInstanceActivateDisks(NoHooksLU):
6290 """Bring up an instance's disks.
6295 def ExpandNames(self):
6296 self._ExpandAndLockInstance()
6297 self.needed_locks[locking.LEVEL_NODE] = []
6298 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6300 def DeclareLocks(self, level):
6301 if level == locking.LEVEL_NODE:
6302 self._LockInstancesNodes()
6304 def CheckPrereq(self):
6305 """Check prerequisites.
6307 This checks that the instance is in the cluster.
6310 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6311 assert self.instance is not None, \
6312 "Cannot retrieve locked instance %s" % self.op.instance_name
6313 _CheckNodeOnline(self, self.instance.primary_node)
6315 def Exec(self, feedback_fn):
6316 """Activate the disks.
6319 disks_ok, disks_info = \
6320 _AssembleInstanceDisks(self, self.instance,
6321 ignore_size=self.op.ignore_size)
6323 raise errors.OpExecError("Cannot activate block devices")
6328 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6330 """Prepare the block devices for an instance.
6332 This sets up the block devices on all nodes.
6334 @type lu: L{LogicalUnit}
6335 @param lu: the logical unit on whose behalf we execute
6336 @type instance: L{objects.Instance}
6337 @param instance: the instance for whose disks we assemble
6338 @type disks: list of L{objects.Disk} or None
6339 @param disks: which disks to assemble (or all, if None)
6340 @type ignore_secondaries: boolean
6341 @param ignore_secondaries: if true, errors on secondary nodes
6342 won't result in an error return from the function
6343 @type ignore_size: boolean
6344 @param ignore_size: if true, the current known size of the disk
6345 will not be used during the disk activation, useful for cases
6346 when the size is wrong
6347 @return: False if the operation failed, otherwise a list of
6348 (host, instance_visible_name, node_visible_name)
6349 with the mapping from node devices to instance devices
6354 iname = instance.name
6355 disks = _ExpandCheckDisks(instance, disks)
6357 # With the two passes mechanism we try to reduce the window of
6358 # opportunity for the race condition of switching DRBD to primary
6359 # before handshaking occured, but we do not eliminate it
6361 # The proper fix would be to wait (with some limits) until the
6362 # connection has been made and drbd transitions from WFConnection
6363 # into any other network-connected state (Connected, SyncTarget,
6366 # 1st pass, assemble on all nodes in secondary mode
6367 for idx, inst_disk in enumerate(disks):
6368 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6370 node_disk = node_disk.Copy()
6371 node_disk.UnsetSize()
6372 lu.cfg.SetDiskID(node_disk, node)
6373 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6375 msg = result.fail_msg
6377 is_offline_secondary = (node in instance.secondary_nodes and
6379 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6380 " (is_primary=False, pass=1): %s",
6381 inst_disk.iv_name, node, msg)
6382 if not (ignore_secondaries or is_offline_secondary):
6385 # FIXME: race condition on drbd migration to primary
6387 # 2nd pass, do only the primary node
6388 for idx, inst_disk in enumerate(disks):
6391 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6392 if node != instance.primary_node:
6395 node_disk = node_disk.Copy()
6396 node_disk.UnsetSize()
6397 lu.cfg.SetDiskID(node_disk, node)
6398 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6400 msg = result.fail_msg
6402 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6403 " (is_primary=True, pass=2): %s",
6404 inst_disk.iv_name, node, msg)
6407 dev_path = result.payload
6409 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6411 # leave the disks configured for the primary node
6412 # this is a workaround that would be fixed better by
6413 # improving the logical/physical id handling
6415 lu.cfg.SetDiskID(disk, instance.primary_node)
6417 return disks_ok, device_info
6420 def _StartInstanceDisks(lu, instance, force):
6421 """Start the disks of an instance.
6424 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6425 ignore_secondaries=force)
6427 _ShutdownInstanceDisks(lu, instance)
6428 if force is not None and not force:
6429 lu.proc.LogWarning("", hint="If the message above refers to a"
6431 " you can retry the operation using '--force'.")
6432 raise errors.OpExecError("Disk consistency error")
6435 class LUInstanceDeactivateDisks(NoHooksLU):
6436 """Shutdown an instance's disks.
6441 def ExpandNames(self):
6442 self._ExpandAndLockInstance()
6443 self.needed_locks[locking.LEVEL_NODE] = []
6444 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6446 def DeclareLocks(self, level):
6447 if level == locking.LEVEL_NODE:
6448 self._LockInstancesNodes()
6450 def CheckPrereq(self):
6451 """Check prerequisites.
6453 This checks that the instance is in the cluster.
6456 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6457 assert self.instance is not None, \
6458 "Cannot retrieve locked instance %s" % self.op.instance_name
6460 def Exec(self, feedback_fn):
6461 """Deactivate the disks
6464 instance = self.instance
6466 _ShutdownInstanceDisks(self, instance)
6468 _SafeShutdownInstanceDisks(self, instance)
6471 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6472 """Shutdown block devices of an instance.
6474 This function checks if an instance is running, before calling
6475 _ShutdownInstanceDisks.
6478 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6479 _ShutdownInstanceDisks(lu, instance, disks=disks)
6482 def _ExpandCheckDisks(instance, disks):
6483 """Return the instance disks selected by the disks list
6485 @type disks: list of L{objects.Disk} or None
6486 @param disks: selected disks
6487 @rtype: list of L{objects.Disk}
6488 @return: selected instance disks to act on
6492 return instance.disks
6494 if not set(disks).issubset(instance.disks):
6495 raise errors.ProgrammerError("Can only act on disks belonging to the"
6500 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6501 """Shutdown block devices of an instance.
6503 This does the shutdown on all nodes of the instance.
6505 If the ignore_primary is false, errors on the primary node are
6510 disks = _ExpandCheckDisks(instance, disks)
6513 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6514 lu.cfg.SetDiskID(top_disk, node)
6515 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6516 msg = result.fail_msg
6518 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6519 disk.iv_name, node, msg)
6520 if ((node == instance.primary_node and not ignore_primary) or
6521 (node != instance.primary_node and not result.offline)):
6526 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6527 """Checks if a node has enough free memory.
6529 This function check if a given node has the needed amount of free
6530 memory. In case the node has less memory or we cannot get the
6531 information from the node, this function raise an OpPrereqError
6534 @type lu: C{LogicalUnit}
6535 @param lu: a logical unit from which we get configuration data
6537 @param node: the node to check
6538 @type reason: C{str}
6539 @param reason: string to use in the error message
6540 @type requested: C{int}
6541 @param requested: the amount of memory in MiB to check for
6542 @type hypervisor_name: C{str}
6543 @param hypervisor_name: the hypervisor to ask for memory stats
6545 @return: node current free memory
6546 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6547 we cannot check the node
6550 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6551 nodeinfo[node].Raise("Can't get data from node %s" % node,
6552 prereq=True, ecode=errors.ECODE_ENVIRON)
6553 (_, _, (hv_info, )) = nodeinfo[node].payload
6555 free_mem = hv_info.get("memory_free", None)
6556 if not isinstance(free_mem, int):
6557 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6558 " was '%s'" % (node, free_mem),
6559 errors.ECODE_ENVIRON)
6560 if requested > free_mem:
6561 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6562 " needed %s MiB, available %s MiB" %
6563 (node, reason, requested, free_mem),
6568 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6569 """Checks if nodes have enough free disk space in the all VGs.
6571 This function check if all given nodes have the needed amount of
6572 free disk. In case any node has less disk or we cannot get the
6573 information from the node, this function raise an OpPrereqError
6576 @type lu: C{LogicalUnit}
6577 @param lu: a logical unit from which we get configuration data
6578 @type nodenames: C{list}
6579 @param nodenames: the list of node names to check
6580 @type req_sizes: C{dict}
6581 @param req_sizes: the hash of vg and corresponding amount of disk in
6583 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6584 or we cannot check the node
6587 for vg, req_size in req_sizes.items():
6588 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6591 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6592 """Checks if nodes have enough free disk space in the specified VG.
6594 This function check if all given nodes have the needed amount of
6595 free disk. In case any node has less disk or we cannot get the
6596 information from the node, this function raise an OpPrereqError
6599 @type lu: C{LogicalUnit}
6600 @param lu: a logical unit from which we get configuration data
6601 @type nodenames: C{list}
6602 @param nodenames: the list of node names to check
6604 @param vg: the volume group to check
6605 @type requested: C{int}
6606 @param requested: the amount of disk in MiB to check for
6607 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6608 or we cannot check the node
6611 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6612 for node in nodenames:
6613 info = nodeinfo[node]
6614 info.Raise("Cannot get current information from node %s" % node,
6615 prereq=True, ecode=errors.ECODE_ENVIRON)
6616 (_, (vg_info, ), _) = info.payload
6617 vg_free = vg_info.get("vg_free", None)
6618 if not isinstance(vg_free, int):
6619 raise errors.OpPrereqError("Can't compute free disk space on node"
6620 " %s for vg %s, result was '%s'" %
6621 (node, vg, vg_free), errors.ECODE_ENVIRON)
6622 if requested > vg_free:
6623 raise errors.OpPrereqError("Not enough disk space on target node %s"
6624 " vg %s: required %d MiB, available %d MiB" %
6625 (node, vg, requested, vg_free),
6629 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6630 """Checks if nodes have enough physical CPUs
6632 This function checks if all given nodes have the needed number of
6633 physical CPUs. In case any node has less CPUs or we cannot get the
6634 information from the node, this function raises an OpPrereqError
6637 @type lu: C{LogicalUnit}
6638 @param lu: a logical unit from which we get configuration data
6639 @type nodenames: C{list}
6640 @param nodenames: the list of node names to check
6641 @type requested: C{int}
6642 @param requested: the minimum acceptable number of physical CPUs
6643 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6644 or we cannot check the node
6647 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6648 for node in nodenames:
6649 info = nodeinfo[node]
6650 info.Raise("Cannot get current information from node %s" % node,
6651 prereq=True, ecode=errors.ECODE_ENVIRON)
6652 (_, _, (hv_info, )) = info.payload
6653 num_cpus = hv_info.get("cpu_total", None)
6654 if not isinstance(num_cpus, int):
6655 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6656 " on node %s, result was '%s'" %
6657 (node, num_cpus), errors.ECODE_ENVIRON)
6658 if requested > num_cpus:
6659 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6660 "required" % (node, num_cpus, requested),
6664 class LUInstanceStartup(LogicalUnit):
6665 """Starts an instance.
6668 HPATH = "instance-start"
6669 HTYPE = constants.HTYPE_INSTANCE
6672 def CheckArguments(self):
6674 if self.op.beparams:
6675 # fill the beparams dict
6676 objects.UpgradeBeParams(self.op.beparams)
6677 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6679 def ExpandNames(self):
6680 self._ExpandAndLockInstance()
6681 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6683 def DeclareLocks(self, level):
6684 if level == locking.LEVEL_NODE_RES:
6685 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6687 def BuildHooksEnv(self):
6690 This runs on master, primary and secondary nodes of the instance.
6694 "FORCE": self.op.force,
6697 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6701 def BuildHooksNodes(self):
6702 """Build hooks nodes.
6705 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6708 def CheckPrereq(self):
6709 """Check prerequisites.
6711 This checks that the instance is in the cluster.
6714 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6715 assert self.instance is not None, \
6716 "Cannot retrieve locked instance %s" % self.op.instance_name
6719 if self.op.hvparams:
6720 # check hypervisor parameter syntax (locally)
6721 cluster = self.cfg.GetClusterInfo()
6722 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6723 filled_hvp = cluster.FillHV(instance)
6724 filled_hvp.update(self.op.hvparams)
6725 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6726 hv_type.CheckParameterSyntax(filled_hvp)
6727 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6729 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6731 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6733 if self.primary_offline and self.op.ignore_offline_nodes:
6734 self.proc.LogWarning("Ignoring offline primary node")
6736 if self.op.hvparams or self.op.beparams:
6737 self.proc.LogWarning("Overridden parameters are ignored")
6739 _CheckNodeOnline(self, instance.primary_node)
6741 bep = self.cfg.GetClusterInfo().FillBE(instance)
6742 bep.update(self.op.beparams)
6744 # check bridges existence
6745 _CheckInstanceBridgesExist(self, instance)
6747 remote_info = self.rpc.call_instance_info(instance.primary_node,
6749 instance.hypervisor)
6750 remote_info.Raise("Error checking node %s" % instance.primary_node,
6751 prereq=True, ecode=errors.ECODE_ENVIRON)
6752 if not remote_info.payload: # not running already
6753 _CheckNodeFreeMemory(self, instance.primary_node,
6754 "starting instance %s" % instance.name,
6755 bep[constants.BE_MINMEM], instance.hypervisor)
6757 def Exec(self, feedback_fn):
6758 """Start the instance.
6761 instance = self.instance
6762 force = self.op.force
6764 if not self.op.no_remember:
6765 self.cfg.MarkInstanceUp(instance.name)
6767 if self.primary_offline:
6768 assert self.op.ignore_offline_nodes
6769 self.proc.LogInfo("Primary node offline, marked instance as started")
6771 node_current = instance.primary_node
6773 _StartInstanceDisks(self, instance, force)
6776 self.rpc.call_instance_start(node_current,
6777 (instance, self.op.hvparams,
6779 self.op.startup_paused)
6780 msg = result.fail_msg
6782 _ShutdownInstanceDisks(self, instance)
6783 raise errors.OpExecError("Could not start instance: %s" % msg)
6786 class LUInstanceReboot(LogicalUnit):
6787 """Reboot an instance.
6790 HPATH = "instance-reboot"
6791 HTYPE = constants.HTYPE_INSTANCE
6794 def ExpandNames(self):
6795 self._ExpandAndLockInstance()
6797 def BuildHooksEnv(self):
6800 This runs on master, primary and secondary nodes of the instance.
6804 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6805 "REBOOT_TYPE": self.op.reboot_type,
6806 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6809 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6813 def BuildHooksNodes(self):
6814 """Build hooks nodes.
6817 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6820 def CheckPrereq(self):
6821 """Check prerequisites.
6823 This checks that the instance is in the cluster.
6826 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6827 assert self.instance is not None, \
6828 "Cannot retrieve locked instance %s" % self.op.instance_name
6829 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6830 _CheckNodeOnline(self, instance.primary_node)
6832 # check bridges existence
6833 _CheckInstanceBridgesExist(self, instance)
6835 def Exec(self, feedback_fn):
6836 """Reboot the instance.
6839 instance = self.instance
6840 ignore_secondaries = self.op.ignore_secondaries
6841 reboot_type = self.op.reboot_type
6843 remote_info = self.rpc.call_instance_info(instance.primary_node,
6845 instance.hypervisor)
6846 remote_info.Raise("Error checking node %s" % instance.primary_node)
6847 instance_running = bool(remote_info.payload)
6849 node_current = instance.primary_node
6851 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6852 constants.INSTANCE_REBOOT_HARD]:
6853 for disk in instance.disks:
6854 self.cfg.SetDiskID(disk, node_current)
6855 result = self.rpc.call_instance_reboot(node_current, instance,
6857 self.op.shutdown_timeout)
6858 result.Raise("Could not reboot instance")
6860 if instance_running:
6861 result = self.rpc.call_instance_shutdown(node_current, instance,
6862 self.op.shutdown_timeout)
6863 result.Raise("Could not shutdown instance for full reboot")
6864 _ShutdownInstanceDisks(self, instance)
6866 self.LogInfo("Instance %s was already stopped, starting now",
6868 _StartInstanceDisks(self, instance, ignore_secondaries)
6869 result = self.rpc.call_instance_start(node_current,
6870 (instance, None, None), False)
6871 msg = result.fail_msg
6873 _ShutdownInstanceDisks(self, instance)
6874 raise errors.OpExecError("Could not start instance for"
6875 " full reboot: %s" % msg)
6877 self.cfg.MarkInstanceUp(instance.name)
6880 class LUInstanceShutdown(LogicalUnit):
6881 """Shutdown an instance.
6884 HPATH = "instance-stop"
6885 HTYPE = constants.HTYPE_INSTANCE
6888 def ExpandNames(self):
6889 self._ExpandAndLockInstance()
6891 def BuildHooksEnv(self):
6894 This runs on master, primary and secondary nodes of the instance.
6897 env = _BuildInstanceHookEnvByObject(self, self.instance)
6898 env["TIMEOUT"] = self.op.timeout
6901 def BuildHooksNodes(self):
6902 """Build hooks nodes.
6905 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6908 def CheckPrereq(self):
6909 """Check prerequisites.
6911 This checks that the instance is in the cluster.
6914 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6915 assert self.instance is not None, \
6916 "Cannot retrieve locked instance %s" % self.op.instance_name
6918 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6920 self.primary_offline = \
6921 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6923 if self.primary_offline and self.op.ignore_offline_nodes:
6924 self.proc.LogWarning("Ignoring offline primary node")
6926 _CheckNodeOnline(self, self.instance.primary_node)
6928 def Exec(self, feedback_fn):
6929 """Shutdown the instance.
6932 instance = self.instance
6933 node_current = instance.primary_node
6934 timeout = self.op.timeout
6936 if not self.op.no_remember:
6937 self.cfg.MarkInstanceDown(instance.name)
6939 if self.primary_offline:
6940 assert self.op.ignore_offline_nodes
6941 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6943 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6944 msg = result.fail_msg
6946 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6948 _ShutdownInstanceDisks(self, instance)
6951 class LUInstanceReinstall(LogicalUnit):
6952 """Reinstall an instance.
6955 HPATH = "instance-reinstall"
6956 HTYPE = constants.HTYPE_INSTANCE
6959 def ExpandNames(self):
6960 self._ExpandAndLockInstance()
6962 def BuildHooksEnv(self):
6965 This runs on master, primary and secondary nodes of the instance.
6968 return _BuildInstanceHookEnvByObject(self, self.instance)
6970 def BuildHooksNodes(self):
6971 """Build hooks nodes.
6974 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6977 def CheckPrereq(self):
6978 """Check prerequisites.
6980 This checks that the instance is in the cluster and is not running.
6983 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6984 assert instance is not None, \
6985 "Cannot retrieve locked instance %s" % self.op.instance_name
6986 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6987 " offline, cannot reinstall")
6989 if instance.disk_template == constants.DT_DISKLESS:
6990 raise errors.OpPrereqError("Instance '%s' has no disks" %
6991 self.op.instance_name,
6993 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6995 if self.op.os_type is not None:
6997 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6998 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6999 instance_os = self.op.os_type
7001 instance_os = instance.os
7003 nodelist = list(instance.all_nodes)
7005 if self.op.osparams:
7006 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7007 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7008 self.os_inst = i_osdict # the new dict (without defaults)
7012 self.instance = instance
7014 def Exec(self, feedback_fn):
7015 """Reinstall the instance.
7018 inst = self.instance
7020 if self.op.os_type is not None:
7021 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7022 inst.os = self.op.os_type
7023 # Write to configuration
7024 self.cfg.Update(inst, feedback_fn)
7026 _StartInstanceDisks(self, inst, None)
7028 feedback_fn("Running the instance OS create scripts...")
7029 # FIXME: pass debug option from opcode to backend
7030 result = self.rpc.call_instance_os_add(inst.primary_node,
7031 (inst, self.os_inst), True,
7032 self.op.debug_level)
7033 result.Raise("Could not install OS for instance %s on node %s" %
7034 (inst.name, inst.primary_node))
7036 _ShutdownInstanceDisks(self, inst)
7039 class LUInstanceRecreateDisks(LogicalUnit):
7040 """Recreate an instance's missing disks.
7043 HPATH = "instance-recreate-disks"
7044 HTYPE = constants.HTYPE_INSTANCE
7047 _MODIFYABLE = frozenset([
7048 constants.IDISK_SIZE,
7049 constants.IDISK_MODE,
7052 # New or changed disk parameters may have different semantics
7053 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7054 constants.IDISK_ADOPT,
7056 # TODO: Implement support changing VG while recreating
7058 constants.IDISK_METAVG,
7061 def CheckArguments(self):
7062 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7063 # Normalize and convert deprecated list of disk indices
7064 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7066 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7068 raise errors.OpPrereqError("Some disks have been specified more than"
7069 " once: %s" % utils.CommaJoin(duplicates),
7072 for (idx, params) in self.op.disks:
7073 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7074 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7076 raise errors.OpPrereqError("Parameters for disk %s try to change"
7077 " unmodifyable parameter(s): %s" %
7078 (idx, utils.CommaJoin(unsupported)),
7081 def ExpandNames(self):
7082 self._ExpandAndLockInstance()
7083 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7085 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7086 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7088 self.needed_locks[locking.LEVEL_NODE] = []
7089 self.needed_locks[locking.LEVEL_NODE_RES] = []
7091 def DeclareLocks(self, level):
7092 if level == locking.LEVEL_NODE:
7093 # if we replace the nodes, we only need to lock the old primary,
7094 # otherwise we need to lock all nodes for disk re-creation
7095 primary_only = bool(self.op.nodes)
7096 self._LockInstancesNodes(primary_only=primary_only)
7097 elif level == locking.LEVEL_NODE_RES:
7099 self.needed_locks[locking.LEVEL_NODE_RES] = \
7100 self.needed_locks[locking.LEVEL_NODE][:]
7102 def BuildHooksEnv(self):
7105 This runs on master, primary and secondary nodes of the instance.
7108 return _BuildInstanceHookEnvByObject(self, self.instance)
7110 def BuildHooksNodes(self):
7111 """Build hooks nodes.
7114 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7117 def CheckPrereq(self):
7118 """Check prerequisites.
7120 This checks that the instance is in the cluster and is not running.
7123 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7124 assert instance is not None, \
7125 "Cannot retrieve locked instance %s" % self.op.instance_name
7127 if len(self.op.nodes) != len(instance.all_nodes):
7128 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7129 " %d replacement nodes were specified" %
7130 (instance.name, len(instance.all_nodes),
7131 len(self.op.nodes)),
7133 assert instance.disk_template != constants.DT_DRBD8 or \
7134 len(self.op.nodes) == 2
7135 assert instance.disk_template != constants.DT_PLAIN or \
7136 len(self.op.nodes) == 1
7137 primary_node = self.op.nodes[0]
7139 primary_node = instance.primary_node
7140 _CheckNodeOnline(self, primary_node)
7142 if instance.disk_template == constants.DT_DISKLESS:
7143 raise errors.OpPrereqError("Instance '%s' has no disks" %
7144 self.op.instance_name, errors.ECODE_INVAL)
7146 # if we replace nodes *and* the old primary is offline, we don't
7148 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7149 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7150 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7151 if not (self.op.nodes and old_pnode.offline):
7152 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7153 msg="cannot recreate disks")
7156 self.disks = dict(self.op.disks)
7158 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7160 maxidx = max(self.disks.keys())
7161 if maxidx >= len(instance.disks):
7162 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7165 if (self.op.nodes and
7166 sorted(self.disks.keys()) != range(len(instance.disks))):
7167 raise errors.OpPrereqError("Can't recreate disks partially and"
7168 " change the nodes at the same time",
7171 self.instance = instance
7173 def Exec(self, feedback_fn):
7174 """Recreate the disks.
7177 instance = self.instance
7179 assert (self.owned_locks(locking.LEVEL_NODE) ==
7180 self.owned_locks(locking.LEVEL_NODE_RES))
7183 mods = [] # keeps track of needed changes
7185 for idx, disk in enumerate(instance.disks):
7187 changes = self.disks[idx]
7189 # Disk should not be recreated
7193 # update secondaries for disks, if needed
7194 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7195 # need to update the nodes and minors
7196 assert len(self.op.nodes) == 2
7197 assert len(disk.logical_id) == 6 # otherwise disk internals
7199 (_, _, old_port, _, _, old_secret) = disk.logical_id
7200 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7201 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7202 new_minors[0], new_minors[1], old_secret)
7203 assert len(disk.logical_id) == len(new_id)
7207 mods.append((idx, new_id, changes))
7209 # now that we have passed all asserts above, we can apply the mods
7210 # in a single run (to avoid partial changes)
7211 for idx, new_id, changes in mods:
7212 disk = instance.disks[idx]
7213 if new_id is not None:
7214 assert disk.dev_type == constants.LD_DRBD8
7215 disk.logical_id = new_id
7217 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7218 mode=changes.get(constants.IDISK_MODE, None))
7220 # change primary node, if needed
7222 instance.primary_node = self.op.nodes[0]
7223 self.LogWarning("Changing the instance's nodes, you will have to"
7224 " remove any disks left on the older nodes manually")
7227 self.cfg.Update(instance, feedback_fn)
7229 _CreateDisks(self, instance, to_skip=to_skip)
7232 class LUInstanceRename(LogicalUnit):
7233 """Rename an instance.
7236 HPATH = "instance-rename"
7237 HTYPE = constants.HTYPE_INSTANCE
7239 def CheckArguments(self):
7243 if self.op.ip_check and not self.op.name_check:
7244 # TODO: make the ip check more flexible and not depend on the name check
7245 raise errors.OpPrereqError("IP address check requires a name check",
7248 def BuildHooksEnv(self):
7251 This runs on master, primary and secondary nodes of the instance.
7254 env = _BuildInstanceHookEnvByObject(self, self.instance)
7255 env["INSTANCE_NEW_NAME"] = self.op.new_name
7258 def BuildHooksNodes(self):
7259 """Build hooks nodes.
7262 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7265 def CheckPrereq(self):
7266 """Check prerequisites.
7268 This checks that the instance is in the cluster and is not running.
7271 self.op.instance_name = _ExpandInstanceName(self.cfg,
7272 self.op.instance_name)
7273 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7274 assert instance is not None
7275 _CheckNodeOnline(self, instance.primary_node)
7276 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7277 msg="cannot rename")
7278 self.instance = instance
7280 new_name = self.op.new_name
7281 if self.op.name_check:
7282 hostname = netutils.GetHostname(name=new_name)
7283 if hostname.name != new_name:
7284 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7286 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7287 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7288 " same as given hostname '%s'") %
7289 (hostname.name, self.op.new_name),
7291 new_name = self.op.new_name = hostname.name
7292 if (self.op.ip_check and
7293 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7294 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7295 (hostname.ip, new_name),
7296 errors.ECODE_NOTUNIQUE)
7298 instance_list = self.cfg.GetInstanceList()
7299 if new_name in instance_list and new_name != instance.name:
7300 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7301 new_name, errors.ECODE_EXISTS)
7303 def Exec(self, feedback_fn):
7304 """Rename the instance.
7307 inst = self.instance
7308 old_name = inst.name
7310 rename_file_storage = False
7311 if (inst.disk_template in constants.DTS_FILEBASED and
7312 self.op.new_name != inst.name):
7313 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7314 rename_file_storage = True
7316 self.cfg.RenameInstance(inst.name, self.op.new_name)
7317 # Change the instance lock. This is definitely safe while we hold the BGL.
7318 # Otherwise the new lock would have to be added in acquired mode.
7320 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7321 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7323 # re-read the instance from the configuration after rename
7324 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7326 if rename_file_storage:
7327 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7328 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7329 old_file_storage_dir,
7330 new_file_storage_dir)
7331 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7332 " (but the instance has been renamed in Ganeti)" %
7333 (inst.primary_node, old_file_storage_dir,
7334 new_file_storage_dir))
7336 _StartInstanceDisks(self, inst, None)
7338 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7339 old_name, self.op.debug_level)
7340 msg = result.fail_msg
7342 msg = ("Could not run OS rename script for instance %s on node %s"
7343 " (but the instance has been renamed in Ganeti): %s" %
7344 (inst.name, inst.primary_node, msg))
7345 self.proc.LogWarning(msg)
7347 _ShutdownInstanceDisks(self, inst)
7352 class LUInstanceRemove(LogicalUnit):
7353 """Remove an instance.
7356 HPATH = "instance-remove"
7357 HTYPE = constants.HTYPE_INSTANCE
7360 def ExpandNames(self):
7361 self._ExpandAndLockInstance()
7362 self.needed_locks[locking.LEVEL_NODE] = []
7363 self.needed_locks[locking.LEVEL_NODE_RES] = []
7364 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7366 def DeclareLocks(self, level):
7367 if level == locking.LEVEL_NODE:
7368 self._LockInstancesNodes()
7369 elif level == locking.LEVEL_NODE_RES:
7371 self.needed_locks[locking.LEVEL_NODE_RES] = \
7372 self.needed_locks[locking.LEVEL_NODE][:]
7374 def BuildHooksEnv(self):
7377 This runs on master, primary and secondary nodes of the instance.
7380 env = _BuildInstanceHookEnvByObject(self, self.instance)
7381 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7384 def BuildHooksNodes(self):
7385 """Build hooks nodes.
7388 nl = [self.cfg.GetMasterNode()]
7389 nl_post = list(self.instance.all_nodes) + nl
7390 return (nl, nl_post)
7392 def CheckPrereq(self):
7393 """Check prerequisites.
7395 This checks that the instance is in the cluster.
7398 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7399 assert self.instance is not None, \
7400 "Cannot retrieve locked instance %s" % self.op.instance_name
7402 def Exec(self, feedback_fn):
7403 """Remove the instance.
7406 instance = self.instance
7407 logging.info("Shutting down instance %s on node %s",
7408 instance.name, instance.primary_node)
7410 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7411 self.op.shutdown_timeout)
7412 msg = result.fail_msg
7414 if self.op.ignore_failures:
7415 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7417 raise errors.OpExecError("Could not shutdown instance %s on"
7419 (instance.name, instance.primary_node, msg))
7421 assert (self.owned_locks(locking.LEVEL_NODE) ==
7422 self.owned_locks(locking.LEVEL_NODE_RES))
7423 assert not (set(instance.all_nodes) -
7424 self.owned_locks(locking.LEVEL_NODE)), \
7425 "Not owning correct locks"
7427 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7430 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7431 """Utility function to remove an instance.
7434 logging.info("Removing block devices for instance %s", instance.name)
7436 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7437 if not ignore_failures:
7438 raise errors.OpExecError("Can't remove instance's disks")
7439 feedback_fn("Warning: can't remove instance's disks")
7441 logging.info("Removing instance %s out of cluster config", instance.name)
7443 lu.cfg.RemoveInstance(instance.name)
7445 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7446 "Instance lock removal conflict"
7448 # Remove lock for the instance
7449 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7452 class LUInstanceQuery(NoHooksLU):
7453 """Logical unit for querying instances.
7456 # pylint: disable=W0142
7459 def CheckArguments(self):
7460 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7461 self.op.output_fields, self.op.use_locking)
7463 def ExpandNames(self):
7464 self.iq.ExpandNames(self)
7466 def DeclareLocks(self, level):
7467 self.iq.DeclareLocks(self, level)
7469 def Exec(self, feedback_fn):
7470 return self.iq.OldStyleQuery(self)
7473 class LUInstanceFailover(LogicalUnit):
7474 """Failover an instance.
7477 HPATH = "instance-failover"
7478 HTYPE = constants.HTYPE_INSTANCE
7481 def CheckArguments(self):
7482 """Check the arguments.
7485 self.iallocator = getattr(self.op, "iallocator", None)
7486 self.target_node = getattr(self.op, "target_node", None)
7488 def ExpandNames(self):
7489 self._ExpandAndLockInstance()
7491 if self.op.target_node is not None:
7492 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7494 self.needed_locks[locking.LEVEL_NODE] = []
7495 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7497 self.needed_locks[locking.LEVEL_NODE_RES] = []
7498 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7500 ignore_consistency = self.op.ignore_consistency
7501 shutdown_timeout = self.op.shutdown_timeout
7502 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7505 ignore_consistency=ignore_consistency,
7506 shutdown_timeout=shutdown_timeout,
7507 ignore_ipolicy=self.op.ignore_ipolicy)
7508 self.tasklets = [self._migrater]
7510 def DeclareLocks(self, level):
7511 if level == locking.LEVEL_NODE:
7512 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7513 if instance.disk_template in constants.DTS_EXT_MIRROR:
7514 if self.op.target_node is None:
7515 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7517 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7518 self.op.target_node]
7519 del self.recalculate_locks[locking.LEVEL_NODE]
7521 self._LockInstancesNodes()
7522 elif level == locking.LEVEL_NODE_RES:
7524 self.needed_locks[locking.LEVEL_NODE_RES] = \
7525 self.needed_locks[locking.LEVEL_NODE][:]
7527 def BuildHooksEnv(self):
7530 This runs on master, primary and secondary nodes of the instance.
7533 instance = self._migrater.instance
7534 source_node = instance.primary_node
7535 target_node = self.op.target_node
7537 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7538 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7539 "OLD_PRIMARY": source_node,
7540 "NEW_PRIMARY": target_node,
7543 if instance.disk_template in constants.DTS_INT_MIRROR:
7544 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7545 env["NEW_SECONDARY"] = source_node
7547 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7549 env.update(_BuildInstanceHookEnvByObject(self, instance))
7553 def BuildHooksNodes(self):
7554 """Build hooks nodes.
7557 instance = self._migrater.instance
7558 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7559 return (nl, nl + [instance.primary_node])
7562 class LUInstanceMigrate(LogicalUnit):
7563 """Migrate an instance.
7565 This is migration without shutting down, compared to the failover,
7566 which is done with shutdown.
7569 HPATH = "instance-migrate"
7570 HTYPE = constants.HTYPE_INSTANCE
7573 def ExpandNames(self):
7574 self._ExpandAndLockInstance()
7576 if self.op.target_node is not None:
7577 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7579 self.needed_locks[locking.LEVEL_NODE] = []
7580 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7582 self.needed_locks[locking.LEVEL_NODE] = []
7583 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7586 TLMigrateInstance(self, self.op.instance_name,
7587 cleanup=self.op.cleanup,
7589 fallback=self.op.allow_failover,
7590 allow_runtime_changes=self.op.allow_runtime_changes,
7591 ignore_ipolicy=self.op.ignore_ipolicy)
7592 self.tasklets = [self._migrater]
7594 def DeclareLocks(self, level):
7595 if level == locking.LEVEL_NODE:
7596 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7597 if instance.disk_template in constants.DTS_EXT_MIRROR:
7598 if self.op.target_node is None:
7599 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7601 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7602 self.op.target_node]
7603 del self.recalculate_locks[locking.LEVEL_NODE]
7605 self._LockInstancesNodes()
7606 elif level == locking.LEVEL_NODE_RES:
7608 self.needed_locks[locking.LEVEL_NODE_RES] = \
7609 self.needed_locks[locking.LEVEL_NODE][:]
7611 def BuildHooksEnv(self):
7614 This runs on master, primary and secondary nodes of the instance.
7617 instance = self._migrater.instance
7618 source_node = instance.primary_node
7619 target_node = self.op.target_node
7620 env = _BuildInstanceHookEnvByObject(self, instance)
7622 "MIGRATE_LIVE": self._migrater.live,
7623 "MIGRATE_CLEANUP": self.op.cleanup,
7624 "OLD_PRIMARY": source_node,
7625 "NEW_PRIMARY": target_node,
7626 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7629 if instance.disk_template in constants.DTS_INT_MIRROR:
7630 env["OLD_SECONDARY"] = target_node
7631 env["NEW_SECONDARY"] = source_node
7633 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7637 def BuildHooksNodes(self):
7638 """Build hooks nodes.
7641 instance = self._migrater.instance
7642 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7643 return (nl, nl + [instance.primary_node])
7646 class LUInstanceMove(LogicalUnit):
7647 """Move an instance by data-copying.
7650 HPATH = "instance-move"
7651 HTYPE = constants.HTYPE_INSTANCE
7654 def ExpandNames(self):
7655 self._ExpandAndLockInstance()
7656 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7657 self.op.target_node = target_node
7658 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7659 self.needed_locks[locking.LEVEL_NODE_RES] = []
7660 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7662 def DeclareLocks(self, level):
7663 if level == locking.LEVEL_NODE:
7664 self._LockInstancesNodes(primary_only=True)
7665 elif level == locking.LEVEL_NODE_RES:
7667 self.needed_locks[locking.LEVEL_NODE_RES] = \
7668 self.needed_locks[locking.LEVEL_NODE][:]
7670 def BuildHooksEnv(self):
7673 This runs on master, primary and secondary nodes of the instance.
7677 "TARGET_NODE": self.op.target_node,
7678 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7680 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7683 def BuildHooksNodes(self):
7684 """Build hooks nodes.
7688 self.cfg.GetMasterNode(),
7689 self.instance.primary_node,
7690 self.op.target_node,
7694 def CheckPrereq(self):
7695 """Check prerequisites.
7697 This checks that the instance is in the cluster.
7700 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7701 assert self.instance is not None, \
7702 "Cannot retrieve locked instance %s" % self.op.instance_name
7704 node = self.cfg.GetNodeInfo(self.op.target_node)
7705 assert node is not None, \
7706 "Cannot retrieve locked node %s" % self.op.target_node
7708 self.target_node = target_node = node.name
7710 if target_node == instance.primary_node:
7711 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7712 (instance.name, target_node),
7715 bep = self.cfg.GetClusterInfo().FillBE(instance)
7717 for idx, dsk in enumerate(instance.disks):
7718 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7719 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7720 " cannot copy" % idx, errors.ECODE_STATE)
7722 _CheckNodeOnline(self, target_node)
7723 _CheckNodeNotDrained(self, target_node)
7724 _CheckNodeVmCapable(self, target_node)
7725 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7726 self.cfg.GetNodeGroup(node.group))
7727 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7728 ignore=self.op.ignore_ipolicy)
7730 if instance.admin_state == constants.ADMINST_UP:
7731 # check memory requirements on the secondary node
7732 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7733 instance.name, bep[constants.BE_MAXMEM],
7734 instance.hypervisor)
7736 self.LogInfo("Not checking memory on the secondary node as"
7737 " instance will not be started")
7739 # check bridge existance
7740 _CheckInstanceBridgesExist(self, instance, node=target_node)
7742 def Exec(self, feedback_fn):
7743 """Move an instance.
7745 The move is done by shutting it down on its present node, copying
7746 the data over (slow) and starting it on the new node.
7749 instance = self.instance
7751 source_node = instance.primary_node
7752 target_node = self.target_node
7754 self.LogInfo("Shutting down instance %s on source node %s",
7755 instance.name, source_node)
7757 assert (self.owned_locks(locking.LEVEL_NODE) ==
7758 self.owned_locks(locking.LEVEL_NODE_RES))
7760 result = self.rpc.call_instance_shutdown(source_node, instance,
7761 self.op.shutdown_timeout)
7762 msg = result.fail_msg
7764 if self.op.ignore_consistency:
7765 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7766 " Proceeding anyway. Please make sure node"
7767 " %s is down. Error details: %s",
7768 instance.name, source_node, source_node, msg)
7770 raise errors.OpExecError("Could not shutdown instance %s on"
7772 (instance.name, source_node, msg))
7774 # create the target disks
7776 _CreateDisks(self, instance, target_node=target_node)
7777 except errors.OpExecError:
7778 self.LogWarning("Device creation failed, reverting...")
7780 _RemoveDisks(self, instance, target_node=target_node)
7782 self.cfg.ReleaseDRBDMinors(instance.name)
7785 cluster_name = self.cfg.GetClusterInfo().cluster_name
7788 # activate, get path, copy the data over
7789 for idx, disk in enumerate(instance.disks):
7790 self.LogInfo("Copying data for disk %d", idx)
7791 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7792 instance.name, True, idx)
7794 self.LogWarning("Can't assemble newly created disk %d: %s",
7795 idx, result.fail_msg)
7796 errs.append(result.fail_msg)
7798 dev_path = result.payload
7799 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7800 target_node, dev_path,
7803 self.LogWarning("Can't copy data over for disk %d: %s",
7804 idx, result.fail_msg)
7805 errs.append(result.fail_msg)
7809 self.LogWarning("Some disks failed to copy, aborting")
7811 _RemoveDisks(self, instance, target_node=target_node)
7813 self.cfg.ReleaseDRBDMinors(instance.name)
7814 raise errors.OpExecError("Errors during disk copy: %s" %
7817 instance.primary_node = target_node
7818 self.cfg.Update(instance, feedback_fn)
7820 self.LogInfo("Removing the disks on the original node")
7821 _RemoveDisks(self, instance, target_node=source_node)
7823 # Only start the instance if it's marked as up
7824 if instance.admin_state == constants.ADMINST_UP:
7825 self.LogInfo("Starting instance %s on node %s",
7826 instance.name, target_node)
7828 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7829 ignore_secondaries=True)
7831 _ShutdownInstanceDisks(self, instance)
7832 raise errors.OpExecError("Can't activate the instance's disks")
7834 result = self.rpc.call_instance_start(target_node,
7835 (instance, None, None), False)
7836 msg = result.fail_msg
7838 _ShutdownInstanceDisks(self, instance)
7839 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7840 (instance.name, target_node, msg))
7843 class LUNodeMigrate(LogicalUnit):
7844 """Migrate all instances from a node.
7847 HPATH = "node-migrate"
7848 HTYPE = constants.HTYPE_NODE
7851 def CheckArguments(self):
7854 def ExpandNames(self):
7855 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7857 self.share_locks = _ShareAll()
7858 self.needed_locks = {
7859 locking.LEVEL_NODE: [self.op.node_name],
7862 def BuildHooksEnv(self):
7865 This runs on the master, the primary and all the secondaries.
7869 "NODE_NAME": self.op.node_name,
7870 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7873 def BuildHooksNodes(self):
7874 """Build hooks nodes.
7877 nl = [self.cfg.GetMasterNode()]
7880 def CheckPrereq(self):
7883 def Exec(self, feedback_fn):
7884 # Prepare jobs for migration instances
7885 allow_runtime_changes = self.op.allow_runtime_changes
7887 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7890 iallocator=self.op.iallocator,
7891 target_node=self.op.target_node,
7892 allow_runtime_changes=allow_runtime_changes,
7893 ignore_ipolicy=self.op.ignore_ipolicy)]
7894 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7897 # TODO: Run iallocator in this opcode and pass correct placement options to
7898 # OpInstanceMigrate. Since other jobs can modify the cluster between
7899 # running the iallocator and the actual migration, a good consistency model
7900 # will have to be found.
7902 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7903 frozenset([self.op.node_name]))
7905 return ResultWithJobs(jobs)
7908 class TLMigrateInstance(Tasklet):
7909 """Tasklet class for instance migration.
7912 @ivar live: whether the migration will be done live or non-live;
7913 this variable is initalized only after CheckPrereq has run
7914 @type cleanup: boolean
7915 @ivar cleanup: Wheater we cleanup from a failed migration
7916 @type iallocator: string
7917 @ivar iallocator: The iallocator used to determine target_node
7918 @type target_node: string
7919 @ivar target_node: If given, the target_node to reallocate the instance to
7920 @type failover: boolean
7921 @ivar failover: Whether operation results in failover or migration
7922 @type fallback: boolean
7923 @ivar fallback: Whether fallback to failover is allowed if migration not
7925 @type ignore_consistency: boolean
7926 @ivar ignore_consistency: Wheter we should ignore consistency between source
7928 @type shutdown_timeout: int
7929 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7930 @type ignore_ipolicy: bool
7931 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7936 _MIGRATION_POLL_INTERVAL = 1 # seconds
7937 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7939 def __init__(self, lu, instance_name, cleanup=False,
7940 failover=False, fallback=False,
7941 ignore_consistency=False,
7942 allow_runtime_changes=True,
7943 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7944 ignore_ipolicy=False):
7945 """Initializes this class.
7948 Tasklet.__init__(self, lu)
7951 self.instance_name = instance_name
7952 self.cleanup = cleanup
7953 self.live = False # will be overridden later
7954 self.failover = failover
7955 self.fallback = fallback
7956 self.ignore_consistency = ignore_consistency
7957 self.shutdown_timeout = shutdown_timeout
7958 self.ignore_ipolicy = ignore_ipolicy
7959 self.allow_runtime_changes = allow_runtime_changes
7961 def CheckPrereq(self):
7962 """Check prerequisites.
7964 This checks that the instance is in the cluster.
7967 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7968 instance = self.cfg.GetInstanceInfo(instance_name)
7969 assert instance is not None
7970 self.instance = instance
7971 cluster = self.cfg.GetClusterInfo()
7973 if (not self.cleanup and
7974 not instance.admin_state == constants.ADMINST_UP and
7975 not self.failover and self.fallback):
7976 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7977 " switching to failover")
7978 self.failover = True
7980 if instance.disk_template not in constants.DTS_MIRRORED:
7985 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7986 " %s" % (instance.disk_template, text),
7989 if instance.disk_template in constants.DTS_EXT_MIRROR:
7990 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7992 if self.lu.op.iallocator:
7993 self._RunAllocator()
7995 # We set set self.target_node as it is required by
7997 self.target_node = self.lu.op.target_node
7999 # Check that the target node is correct in terms of instance policy
8000 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8001 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8002 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8003 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8004 ignore=self.ignore_ipolicy)
8006 # self.target_node is already populated, either directly or by the
8008 target_node = self.target_node
8009 if self.target_node == instance.primary_node:
8010 raise errors.OpPrereqError("Cannot migrate instance %s"
8011 " to its primary (%s)" %
8012 (instance.name, instance.primary_node))
8014 if len(self.lu.tasklets) == 1:
8015 # It is safe to release locks only when we're the only tasklet
8017 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8018 keep=[instance.primary_node, self.target_node])
8021 secondary_nodes = instance.secondary_nodes
8022 if not secondary_nodes:
8023 raise errors.ConfigurationError("No secondary node but using"
8024 " %s disk template" %
8025 instance.disk_template)
8026 target_node = secondary_nodes[0]
8027 if self.lu.op.iallocator or (self.lu.op.target_node and
8028 self.lu.op.target_node != target_node):
8030 text = "failed over"
8033 raise errors.OpPrereqError("Instances with disk template %s cannot"
8034 " be %s to arbitrary nodes"
8035 " (neither an iallocator nor a target"
8036 " node can be passed)" %
8037 (instance.disk_template, text),
8039 nodeinfo = self.cfg.GetNodeInfo(target_node)
8040 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8041 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8042 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8043 ignore=self.ignore_ipolicy)
8045 i_be = cluster.FillBE(instance)
8047 # check memory requirements on the secondary node
8048 if (not self.cleanup and
8049 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8050 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8051 "migrating instance %s" %
8053 i_be[constants.BE_MINMEM],
8054 instance.hypervisor)
8056 self.lu.LogInfo("Not checking memory on the secondary node as"
8057 " instance will not be started")
8059 # check if failover must be forced instead of migration
8060 if (not self.cleanup and not self.failover and
8061 i_be[constants.BE_ALWAYS_FAILOVER]):
8063 self.lu.LogInfo("Instance configured to always failover; fallback"
8065 self.failover = True
8067 raise errors.OpPrereqError("This instance has been configured to"
8068 " always failover, please allow failover",
8071 # check bridge existance
8072 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8074 if not self.cleanup:
8075 _CheckNodeNotDrained(self.lu, target_node)
8076 if not self.failover:
8077 result = self.rpc.call_instance_migratable(instance.primary_node,
8079 if result.fail_msg and self.fallback:
8080 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8082 self.failover = True
8084 result.Raise("Can't migrate, please use failover",
8085 prereq=True, ecode=errors.ECODE_STATE)
8087 assert not (self.failover and self.cleanup)
8089 if not self.failover:
8090 if self.lu.op.live is not None and self.lu.op.mode is not None:
8091 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8092 " parameters are accepted",
8094 if self.lu.op.live is not None:
8096 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8098 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8099 # reset the 'live' parameter to None so that repeated
8100 # invocations of CheckPrereq do not raise an exception
8101 self.lu.op.live = None
8102 elif self.lu.op.mode is None:
8103 # read the default value from the hypervisor
8104 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8105 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8107 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8109 # Failover is never live
8112 if not (self.failover or self.cleanup):
8113 remote_info = self.rpc.call_instance_info(instance.primary_node,
8115 instance.hypervisor)
8116 remote_info.Raise("Error checking instance on node %s" %
8117 instance.primary_node)
8118 instance_running = bool(remote_info.payload)
8119 if instance_running:
8120 self.current_mem = int(remote_info.payload["memory"])
8122 def _RunAllocator(self):
8123 """Run the allocator based on input opcode.
8126 # FIXME: add a self.ignore_ipolicy option
8127 ial = IAllocator(self.cfg, self.rpc,
8128 mode=constants.IALLOCATOR_MODE_RELOC,
8129 name=self.instance_name,
8130 relocate_from=[self.instance.primary_node],
8133 ial.Run(self.lu.op.iallocator)
8136 raise errors.OpPrereqError("Can't compute nodes using"
8137 " iallocator '%s': %s" %
8138 (self.lu.op.iallocator, ial.info),
8140 if len(ial.result) != ial.required_nodes:
8141 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8142 " of nodes (%s), required %s" %
8143 (self.lu.op.iallocator, len(ial.result),
8144 ial.required_nodes), errors.ECODE_FAULT)
8145 self.target_node = ial.result[0]
8146 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8147 self.instance_name, self.lu.op.iallocator,
8148 utils.CommaJoin(ial.result))
8150 def _WaitUntilSync(self):
8151 """Poll with custom rpc for disk sync.
8153 This uses our own step-based rpc call.
8156 self.feedback_fn("* wait until resync is done")
8160 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8162 (self.instance.disks,
8165 for node, nres in result.items():
8166 nres.Raise("Cannot resync disks on node %s" % node)
8167 node_done, node_percent = nres.payload
8168 all_done = all_done and node_done
8169 if node_percent is not None:
8170 min_percent = min(min_percent, node_percent)
8172 if min_percent < 100:
8173 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8176 def _EnsureSecondary(self, node):
8177 """Demote a node to secondary.
8180 self.feedback_fn("* switching node %s to secondary mode" % node)
8182 for dev in self.instance.disks:
8183 self.cfg.SetDiskID(dev, node)
8185 result = self.rpc.call_blockdev_close(node, self.instance.name,
8186 self.instance.disks)
8187 result.Raise("Cannot change disk to secondary on node %s" % node)
8189 def _GoStandalone(self):
8190 """Disconnect from the network.
8193 self.feedback_fn("* changing into standalone mode")
8194 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8195 self.instance.disks)
8196 for node, nres in result.items():
8197 nres.Raise("Cannot disconnect disks node %s" % node)
8199 def _GoReconnect(self, multimaster):
8200 """Reconnect to the network.
8206 msg = "single-master"
8207 self.feedback_fn("* changing disks into %s mode" % msg)
8208 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8209 (self.instance.disks, self.instance),
8210 self.instance.name, multimaster)
8211 for node, nres in result.items():
8212 nres.Raise("Cannot change disks config on node %s" % node)
8214 def _ExecCleanup(self):
8215 """Try to cleanup after a failed migration.
8217 The cleanup is done by:
8218 - check that the instance is running only on one node
8219 (and update the config if needed)
8220 - change disks on its secondary node to secondary
8221 - wait until disks are fully synchronized
8222 - disconnect from the network
8223 - change disks into single-master mode
8224 - wait again until disks are fully synchronized
8227 instance = self.instance
8228 target_node = self.target_node
8229 source_node = self.source_node
8231 # check running on only one node
8232 self.feedback_fn("* checking where the instance actually runs"
8233 " (if this hangs, the hypervisor might be in"
8235 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8236 for node, result in ins_l.items():
8237 result.Raise("Can't contact node %s" % node)
8239 runningon_source = instance.name in ins_l[source_node].payload
8240 runningon_target = instance.name in ins_l[target_node].payload
8242 if runningon_source and runningon_target:
8243 raise errors.OpExecError("Instance seems to be running on two nodes,"
8244 " or the hypervisor is confused; you will have"
8245 " to ensure manually that it runs only on one"
8246 " and restart this operation")
8248 if not (runningon_source or runningon_target):
8249 raise errors.OpExecError("Instance does not seem to be running at all;"
8250 " in this case it's safer to repair by"
8251 " running 'gnt-instance stop' to ensure disk"
8252 " shutdown, and then restarting it")
8254 if runningon_target:
8255 # the migration has actually succeeded, we need to update the config
8256 self.feedback_fn("* instance running on secondary node (%s),"
8257 " updating config" % target_node)
8258 instance.primary_node = target_node
8259 self.cfg.Update(instance, self.feedback_fn)
8260 demoted_node = source_node
8262 self.feedback_fn("* instance confirmed to be running on its"
8263 " primary node (%s)" % source_node)
8264 demoted_node = target_node
8266 if instance.disk_template in constants.DTS_INT_MIRROR:
8267 self._EnsureSecondary(demoted_node)
8269 self._WaitUntilSync()
8270 except errors.OpExecError:
8271 # we ignore here errors, since if the device is standalone, it
8272 # won't be able to sync
8274 self._GoStandalone()
8275 self._GoReconnect(False)
8276 self._WaitUntilSync()
8278 self.feedback_fn("* done")
8280 def _RevertDiskStatus(self):
8281 """Try to revert the disk status after a failed migration.
8284 target_node = self.target_node
8285 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8289 self._EnsureSecondary(target_node)
8290 self._GoStandalone()
8291 self._GoReconnect(False)
8292 self._WaitUntilSync()
8293 except errors.OpExecError, err:
8294 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8295 " please try to recover the instance manually;"
8296 " error '%s'" % str(err))
8298 def _AbortMigration(self):
8299 """Call the hypervisor code to abort a started migration.
8302 instance = self.instance
8303 target_node = self.target_node
8304 source_node = self.source_node
8305 migration_info = self.migration_info
8307 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8311 abort_msg = abort_result.fail_msg
8313 logging.error("Aborting migration failed on target node %s: %s",
8314 target_node, abort_msg)
8315 # Don't raise an exception here, as we stil have to try to revert the
8316 # disk status, even if this step failed.
8318 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8319 instance, False, self.live)
8320 abort_msg = abort_result.fail_msg
8322 logging.error("Aborting migration failed on source node %s: %s",
8323 source_node, abort_msg)
8325 def _ExecMigration(self):
8326 """Migrate an instance.
8328 The migrate is done by:
8329 - change the disks into dual-master mode
8330 - wait until disks are fully synchronized again
8331 - migrate the instance
8332 - change disks on the new secondary node (the old primary) to secondary
8333 - wait until disks are fully synchronized
8334 - change disks into single-master mode
8337 instance = self.instance
8338 target_node = self.target_node
8339 source_node = self.source_node
8341 # Check for hypervisor version mismatch and warn the user.
8342 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8343 None, [self.instance.hypervisor])
8344 for ninfo in nodeinfo.values():
8345 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8347 (_, _, (src_info, )) = nodeinfo[source_node].payload
8348 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8350 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8351 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8352 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8353 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8354 if src_version != dst_version:
8355 self.feedback_fn("* warning: hypervisor version mismatch between"
8356 " source (%s) and target (%s) node" %
8357 (src_version, dst_version))
8359 self.feedback_fn("* checking disk consistency between source and target")
8360 for (idx, dev) in enumerate(instance.disks):
8361 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8362 raise errors.OpExecError("Disk %s is degraded or not fully"
8363 " synchronized on target node,"
8364 " aborting migration" % idx)
8366 if self.current_mem > self.tgt_free_mem:
8367 if not self.allow_runtime_changes:
8368 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8369 " free memory to fit instance %s on target"
8370 " node %s (have %dMB, need %dMB)" %
8371 (instance.name, target_node,
8372 self.tgt_free_mem, self.current_mem))
8373 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8374 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8377 rpcres.Raise("Cannot modify instance runtime memory")
8379 # First get the migration information from the remote node
8380 result = self.rpc.call_migration_info(source_node, instance)
8381 msg = result.fail_msg
8383 log_err = ("Failed fetching source migration information from %s: %s" %
8385 logging.error(log_err)
8386 raise errors.OpExecError(log_err)
8388 self.migration_info = migration_info = result.payload
8390 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8391 # Then switch the disks to master/master mode
8392 self._EnsureSecondary(target_node)
8393 self._GoStandalone()
8394 self._GoReconnect(True)
8395 self._WaitUntilSync()
8397 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8398 result = self.rpc.call_accept_instance(target_node,
8401 self.nodes_ip[target_node])
8403 msg = result.fail_msg
8405 logging.error("Instance pre-migration failed, trying to revert"
8406 " disk status: %s", msg)
8407 self.feedback_fn("Pre-migration failed, aborting")
8408 self._AbortMigration()
8409 self._RevertDiskStatus()
8410 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8411 (instance.name, msg))
8413 self.feedback_fn("* migrating instance to %s" % target_node)
8414 result = self.rpc.call_instance_migrate(source_node, instance,
8415 self.nodes_ip[target_node],
8417 msg = result.fail_msg
8419 logging.error("Instance migration failed, trying to revert"
8420 " disk status: %s", msg)
8421 self.feedback_fn("Migration failed, aborting")
8422 self._AbortMigration()
8423 self._RevertDiskStatus()
8424 raise errors.OpExecError("Could not migrate instance %s: %s" %
8425 (instance.name, msg))
8427 self.feedback_fn("* starting memory transfer")
8428 last_feedback = time.time()
8430 result = self.rpc.call_instance_get_migration_status(source_node,
8432 msg = result.fail_msg
8433 ms = result.payload # MigrationStatus instance
8434 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8435 logging.error("Instance migration failed, trying to revert"
8436 " disk status: %s", msg)
8437 self.feedback_fn("Migration failed, aborting")
8438 self._AbortMigration()
8439 self._RevertDiskStatus()
8440 raise errors.OpExecError("Could not migrate instance %s: %s" %
8441 (instance.name, msg))
8443 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8444 self.feedback_fn("* memory transfer complete")
8447 if (utils.TimeoutExpired(last_feedback,
8448 self._MIGRATION_FEEDBACK_INTERVAL) and
8449 ms.transferred_ram is not None):
8450 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8451 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8452 last_feedback = time.time()
8454 time.sleep(self._MIGRATION_POLL_INTERVAL)
8456 result = self.rpc.call_instance_finalize_migration_src(source_node,
8460 msg = result.fail_msg
8462 logging.error("Instance migration succeeded, but finalization failed"
8463 " on the source node: %s", msg)
8464 raise errors.OpExecError("Could not finalize instance migration: %s" %
8467 instance.primary_node = target_node
8469 # distribute new instance config to the other nodes
8470 self.cfg.Update(instance, self.feedback_fn)
8472 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8476 msg = result.fail_msg
8478 logging.error("Instance migration succeeded, but finalization failed"
8479 " on the target node: %s", msg)
8480 raise errors.OpExecError("Could not finalize instance migration: %s" %
8483 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8484 self._EnsureSecondary(source_node)
8485 self._WaitUntilSync()
8486 self._GoStandalone()
8487 self._GoReconnect(False)
8488 self._WaitUntilSync()
8490 # If the instance's disk template is `rbd' and there was a successful
8491 # migration, unmap the device from the source node.
8492 if self.instance.disk_template == constants.DT_RBD:
8493 disks = _ExpandCheckDisks(instance, instance.disks)
8494 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8496 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8497 msg = result.fail_msg
8499 logging.error("Migration was successful, but couldn't unmap the"
8500 " block device %s on source node %s: %s",
8501 disk.iv_name, source_node, msg)
8502 logging.error("You need to unmap the device %s manually on %s",
8503 disk.iv_name, source_node)
8505 self.feedback_fn("* done")
8507 def _ExecFailover(self):
8508 """Failover an instance.
8510 The failover is done by shutting it down on its present node and
8511 starting it on the secondary.
8514 instance = self.instance
8515 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8517 source_node = instance.primary_node
8518 target_node = self.target_node
8520 if instance.admin_state == constants.ADMINST_UP:
8521 self.feedback_fn("* checking disk consistency between source and target")
8522 for (idx, dev) in enumerate(instance.disks):
8523 # for drbd, these are drbd over lvm
8524 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8526 if primary_node.offline:
8527 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8529 (primary_node.name, idx, target_node))
8530 elif not self.ignore_consistency:
8531 raise errors.OpExecError("Disk %s is degraded on target node,"
8532 " aborting failover" % idx)
8534 self.feedback_fn("* not checking disk consistency as instance is not"
8537 self.feedback_fn("* shutting down instance on source node")
8538 logging.info("Shutting down instance %s on node %s",
8539 instance.name, source_node)
8541 result = self.rpc.call_instance_shutdown(source_node, instance,
8542 self.shutdown_timeout)
8543 msg = result.fail_msg
8545 if self.ignore_consistency or primary_node.offline:
8546 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8547 " proceeding anyway; please make sure node"
8548 " %s is down; error details: %s",
8549 instance.name, source_node, source_node, msg)
8551 raise errors.OpExecError("Could not shutdown instance %s on"
8553 (instance.name, source_node, msg))
8555 self.feedback_fn("* deactivating the instance's disks on source node")
8556 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8557 raise errors.OpExecError("Can't shut down the instance's disks")
8559 instance.primary_node = target_node
8560 # distribute new instance config to the other nodes
8561 self.cfg.Update(instance, self.feedback_fn)
8563 # Only start the instance if it's marked as up
8564 if instance.admin_state == constants.ADMINST_UP:
8565 self.feedback_fn("* activating the instance's disks on target node %s" %
8567 logging.info("Starting instance %s on node %s",
8568 instance.name, target_node)
8570 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8571 ignore_secondaries=True)
8573 _ShutdownInstanceDisks(self.lu, instance)
8574 raise errors.OpExecError("Can't activate the instance's disks")
8576 self.feedback_fn("* starting the instance on the target node %s" %
8578 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8580 msg = result.fail_msg
8582 _ShutdownInstanceDisks(self.lu, instance)
8583 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8584 (instance.name, target_node, msg))
8586 def Exec(self, feedback_fn):
8587 """Perform the migration.
8590 self.feedback_fn = feedback_fn
8591 self.source_node = self.instance.primary_node
8593 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8594 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8595 self.target_node = self.instance.secondary_nodes[0]
8596 # Otherwise self.target_node has been populated either
8597 # directly, or through an iallocator.
8599 self.all_nodes = [self.source_node, self.target_node]
8600 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8601 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8604 feedback_fn("Failover instance %s" % self.instance.name)
8605 self._ExecFailover()
8607 feedback_fn("Migrating instance %s" % self.instance.name)
8610 return self._ExecCleanup()
8612 return self._ExecMigration()
8615 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8617 """Wrapper around L{_CreateBlockDevInner}.
8619 This method annotates the root device first.
8622 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8623 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8627 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8629 """Create a tree of block devices on a given node.
8631 If this device type has to be created on secondaries, create it and
8634 If not, just recurse to children keeping the same 'force' value.
8636 @attention: The device has to be annotated already.
8638 @param lu: the lu on whose behalf we execute
8639 @param node: the node on which to create the device
8640 @type instance: L{objects.Instance}
8641 @param instance: the instance which owns the device
8642 @type device: L{objects.Disk}
8643 @param device: the device to create
8644 @type force_create: boolean
8645 @param force_create: whether to force creation of this device; this
8646 will be change to True whenever we find a device which has
8647 CreateOnSecondary() attribute
8648 @param info: the extra 'metadata' we should attach to the device
8649 (this will be represented as a LVM tag)
8650 @type force_open: boolean
8651 @param force_open: this parameter will be passes to the
8652 L{backend.BlockdevCreate} function where it specifies
8653 whether we run on primary or not, and it affects both
8654 the child assembly and the device own Open() execution
8657 if device.CreateOnSecondary():
8661 for child in device.children:
8662 _CreateBlockDevInner(lu, node, instance, child, force_create,
8665 if not force_create:
8668 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8671 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8672 """Create a single block device on a given node.
8674 This will not recurse over children of the device, so they must be
8677 @param lu: the lu on whose behalf we execute
8678 @param node: the node on which to create the device
8679 @type instance: L{objects.Instance}
8680 @param instance: the instance which owns the device
8681 @type device: L{objects.Disk}
8682 @param device: the device to create
8683 @param info: the extra 'metadata' we should attach to the device
8684 (this will be represented as a LVM tag)
8685 @type force_open: boolean
8686 @param force_open: this parameter will be passes to the
8687 L{backend.BlockdevCreate} function where it specifies
8688 whether we run on primary or not, and it affects both
8689 the child assembly and the device own Open() execution
8692 lu.cfg.SetDiskID(device, node)
8693 result = lu.rpc.call_blockdev_create(node, device, device.size,
8694 instance.name, force_open, info)
8695 result.Raise("Can't create block device %s on"
8696 " node %s for instance %s" % (device, node, instance.name))
8697 if device.physical_id is None:
8698 device.physical_id = result.payload
8701 def _GenerateUniqueNames(lu, exts):
8702 """Generate a suitable LV name.
8704 This will generate a logical volume name for the given instance.
8709 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8710 results.append("%s%s" % (new_id, val))
8714 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8715 iv_name, p_minor, s_minor):
8716 """Generate a drbd8 device complete with its children.
8719 assert len(vgnames) == len(names) == 2
8720 port = lu.cfg.AllocatePort()
8721 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8723 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8724 logical_id=(vgnames[0], names[0]),
8726 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8727 logical_id=(vgnames[1], names[1]),
8729 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8730 logical_id=(primary, secondary, port,
8733 children=[dev_data, dev_meta],
8734 iv_name=iv_name, params={})
8738 _DISK_TEMPLATE_NAME_PREFIX = {
8739 constants.DT_PLAIN: "",
8740 constants.DT_RBD: ".rbd",
8744 _DISK_TEMPLATE_DEVICE_TYPE = {
8745 constants.DT_PLAIN: constants.LD_LV,
8746 constants.DT_FILE: constants.LD_FILE,
8747 constants.DT_SHARED_FILE: constants.LD_FILE,
8748 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8749 constants.DT_RBD: constants.LD_RBD,
8753 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8754 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8755 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8756 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8757 """Generate the entire disk layout for a given template type.
8760 #TODO: compute space requirements
8762 vgname = lu.cfg.GetVGName()
8763 disk_count = len(disk_info)
8766 if template_name == constants.DT_DISKLESS:
8768 elif template_name == constants.DT_DRBD8:
8769 if len(secondary_nodes) != 1:
8770 raise errors.ProgrammerError("Wrong template configuration")
8771 remote_node = secondary_nodes[0]
8772 minors = lu.cfg.AllocateDRBDMinor(
8773 [primary_node, remote_node] * len(disk_info), instance_name)
8775 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8777 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8780 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8781 for i in range(disk_count)]):
8782 names.append(lv_prefix + "_data")
8783 names.append(lv_prefix + "_meta")
8784 for idx, disk in enumerate(disk_info):
8785 disk_index = idx + base_index
8786 data_vg = disk.get(constants.IDISK_VG, vgname)
8787 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8788 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8789 disk[constants.IDISK_SIZE],
8791 names[idx * 2:idx * 2 + 2],
8792 "disk/%d" % disk_index,
8793 minors[idx * 2], minors[idx * 2 + 1])
8794 disk_dev.mode = disk[constants.IDISK_MODE]
8795 disks.append(disk_dev)
8798 raise errors.ProgrammerError("Wrong template configuration")
8800 if template_name == constants.DT_FILE:
8802 elif template_name == constants.DT_SHARED_FILE:
8803 _req_shr_file_storage()
8805 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8806 if name_prefix is None:
8809 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8810 (name_prefix, base_index + i)
8811 for i in range(disk_count)])
8813 if template_name == constants.DT_PLAIN:
8814 def logical_id_fn(idx, _, disk):
8815 vg = disk.get(constants.IDISK_VG, vgname)
8816 return (vg, names[idx])
8817 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8819 lambda _, disk_index, disk: (file_driver,
8820 "%s/disk%d" % (file_storage_dir,
8822 elif template_name == constants.DT_BLOCK:
8824 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8825 disk[constants.IDISK_ADOPT])
8826 elif template_name == constants.DT_RBD:
8827 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8829 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8831 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8833 for idx, disk in enumerate(disk_info):
8834 disk_index = idx + base_index
8835 size = disk[constants.IDISK_SIZE]
8836 feedback_fn("* disk %s, size %s" %
8837 (disk_index, utils.FormatUnit(size, "h")))
8838 disks.append(objects.Disk(dev_type=dev_type, size=size,
8839 logical_id=logical_id_fn(idx, disk_index, disk),
8840 iv_name="disk/%d" % disk_index,
8841 mode=disk[constants.IDISK_MODE],
8847 def _GetInstanceInfoText(instance):
8848 """Compute that text that should be added to the disk's metadata.
8851 return "originstname+%s" % instance.name
8854 def _CalcEta(time_taken, written, total_size):
8855 """Calculates the ETA based on size written and total size.
8857 @param time_taken: The time taken so far
8858 @param written: amount written so far
8859 @param total_size: The total size of data to be written
8860 @return: The remaining time in seconds
8863 avg_time = time_taken / float(written)
8864 return (total_size - written) * avg_time
8867 def _WipeDisks(lu, instance):
8868 """Wipes instance disks.
8870 @type lu: L{LogicalUnit}
8871 @param lu: the logical unit on whose behalf we execute
8872 @type instance: L{objects.Instance}
8873 @param instance: the instance whose disks we should create
8874 @return: the success of the wipe
8877 node = instance.primary_node
8879 for device in instance.disks:
8880 lu.cfg.SetDiskID(device, node)
8882 logging.info("Pause sync of instance %s disks", instance.name)
8883 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8884 (instance.disks, instance),
8887 for idx, success in enumerate(result.payload):
8889 logging.warn("pause-sync of instance %s for disks %d failed",
8893 for idx, device in enumerate(instance.disks):
8894 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8895 # MAX_WIPE_CHUNK at max
8896 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8897 constants.MIN_WIPE_CHUNK_PERCENT)
8898 # we _must_ make this an int, otherwise rounding errors will
8900 wipe_chunk_size = int(wipe_chunk_size)
8902 lu.LogInfo("* Wiping disk %d", idx)
8903 logging.info("Wiping disk %d for instance %s, node %s using"
8904 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8909 start_time = time.time()
8911 while offset < size:
8912 wipe_size = min(wipe_chunk_size, size - offset)
8913 logging.debug("Wiping disk %d, offset %s, chunk %s",
8914 idx, offset, wipe_size)
8915 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8917 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8918 (idx, offset, wipe_size))
8921 if now - last_output >= 60:
8922 eta = _CalcEta(now - start_time, offset, size)
8923 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8924 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8927 logging.info("Resume sync of instance %s disks", instance.name)
8929 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8930 (instance.disks, instance),
8933 for idx, success in enumerate(result.payload):
8935 lu.LogWarning("Resume sync of disk %d failed, please have a"
8936 " look at the status and troubleshoot the issue", idx)
8937 logging.warn("resume-sync of instance %s for disks %d failed",
8941 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8942 """Create all disks for an instance.
8944 This abstracts away some work from AddInstance.
8946 @type lu: L{LogicalUnit}
8947 @param lu: the logical unit on whose behalf we execute
8948 @type instance: L{objects.Instance}
8949 @param instance: the instance whose disks we should create
8951 @param to_skip: list of indices to skip
8952 @type target_node: string
8953 @param target_node: if passed, overrides the target node for creation
8955 @return: the success of the creation
8958 info = _GetInstanceInfoText(instance)
8959 if target_node is None:
8960 pnode = instance.primary_node
8961 all_nodes = instance.all_nodes
8966 if instance.disk_template in constants.DTS_FILEBASED:
8967 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8968 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8970 result.Raise("Failed to create directory '%s' on"
8971 " node %s" % (file_storage_dir, pnode))
8973 # Note: this needs to be kept in sync with adding of disks in
8974 # LUInstanceSetParams
8975 for idx, device in enumerate(instance.disks):
8976 if to_skip and idx in to_skip:
8978 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8980 for node in all_nodes:
8981 f_create = node == pnode
8982 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8985 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8986 """Remove all disks for an instance.
8988 This abstracts away some work from `AddInstance()` and
8989 `RemoveInstance()`. Note that in case some of the devices couldn't
8990 be removed, the removal will continue with the other ones (compare
8991 with `_CreateDisks()`).
8993 @type lu: L{LogicalUnit}
8994 @param lu: the logical unit on whose behalf we execute
8995 @type instance: L{objects.Instance}
8996 @param instance: the instance whose disks we should remove
8997 @type target_node: string
8998 @param target_node: used to override the node on which to remove the disks
9000 @return: the success of the removal
9003 logging.info("Removing block devices for instance %s", instance.name)
9006 ports_to_release = set()
9007 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9008 for (idx, device) in enumerate(anno_disks):
9010 edata = [(target_node, device)]
9012 edata = device.ComputeNodeTree(instance.primary_node)
9013 for node, disk in edata:
9014 lu.cfg.SetDiskID(disk, node)
9015 result = lu.rpc.call_blockdev_remove(node, disk)
9017 lu.LogWarning("Could not remove disk %s on node %s,"
9018 " continuing anyway: %s", idx, node, result.fail_msg)
9019 if not (result.offline and node != instance.primary_node):
9022 # if this is a DRBD disk, return its port to the pool
9023 if device.dev_type in constants.LDS_DRBD:
9024 ports_to_release.add(device.logical_id[2])
9026 if all_result or ignore_failures:
9027 for port in ports_to_release:
9028 lu.cfg.AddTcpUdpPort(port)
9030 if instance.disk_template == constants.DT_FILE:
9031 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9035 tgt = instance.primary_node
9036 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9038 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9039 file_storage_dir, instance.primary_node, result.fail_msg)
9045 def _ComputeDiskSizePerVG(disk_template, disks):
9046 """Compute disk size requirements in the volume group
9049 def _compute(disks, payload):
9050 """Universal algorithm.
9055 vgs[disk[constants.IDISK_VG]] = \
9056 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9060 # Required free disk space as a function of disk and swap space
9062 constants.DT_DISKLESS: {},
9063 constants.DT_PLAIN: _compute(disks, 0),
9064 # 128 MB are added for drbd metadata for each disk
9065 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9066 constants.DT_FILE: {},
9067 constants.DT_SHARED_FILE: {},
9070 if disk_template not in req_size_dict:
9071 raise errors.ProgrammerError("Disk template '%s' size requirement"
9072 " is unknown" % disk_template)
9074 return req_size_dict[disk_template]
9077 def _ComputeDiskSize(disk_template, disks):
9078 """Compute disk size requirements in the volume group
9081 # Required free disk space as a function of disk and swap space
9083 constants.DT_DISKLESS: None,
9084 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9085 # 128 MB are added for drbd metadata for each disk
9087 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9088 constants.DT_FILE: None,
9089 constants.DT_SHARED_FILE: 0,
9090 constants.DT_BLOCK: 0,
9091 constants.DT_RBD: 0,
9094 if disk_template not in req_size_dict:
9095 raise errors.ProgrammerError("Disk template '%s' size requirement"
9096 " is unknown" % disk_template)
9098 return req_size_dict[disk_template]
9101 def _FilterVmNodes(lu, nodenames):
9102 """Filters out non-vm_capable nodes from a list.
9104 @type lu: L{LogicalUnit}
9105 @param lu: the logical unit for which we check
9106 @type nodenames: list
9107 @param nodenames: the list of nodes on which we should check
9109 @return: the list of vm-capable nodes
9112 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9113 return [name for name in nodenames if name not in vm_nodes]
9116 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9117 """Hypervisor parameter validation.
9119 This function abstract the hypervisor parameter validation to be
9120 used in both instance create and instance modify.
9122 @type lu: L{LogicalUnit}
9123 @param lu: the logical unit for which we check
9124 @type nodenames: list
9125 @param nodenames: the list of nodes on which we should check
9126 @type hvname: string
9127 @param hvname: the name of the hypervisor we should use
9128 @type hvparams: dict
9129 @param hvparams: the parameters which we need to check
9130 @raise errors.OpPrereqError: if the parameters are not valid
9133 nodenames = _FilterVmNodes(lu, nodenames)
9135 cluster = lu.cfg.GetClusterInfo()
9136 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9138 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9139 for node in nodenames:
9143 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9146 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9147 """OS parameters validation.
9149 @type lu: L{LogicalUnit}
9150 @param lu: the logical unit for which we check
9151 @type required: boolean
9152 @param required: whether the validation should fail if the OS is not
9154 @type nodenames: list
9155 @param nodenames: the list of nodes on which we should check
9156 @type osname: string
9157 @param osname: the name of the hypervisor we should use
9158 @type osparams: dict
9159 @param osparams: the parameters which we need to check
9160 @raise errors.OpPrereqError: if the parameters are not valid
9163 nodenames = _FilterVmNodes(lu, nodenames)
9164 result = lu.rpc.call_os_validate(nodenames, required, osname,
9165 [constants.OS_VALIDATE_PARAMETERS],
9167 for node, nres in result.items():
9168 # we don't check for offline cases since this should be run only
9169 # against the master node and/or an instance's nodes
9170 nres.Raise("OS Parameters validation failed on node %s" % node)
9171 if not nres.payload:
9172 lu.LogInfo("OS %s not found on node %s, validation skipped",
9176 class LUInstanceCreate(LogicalUnit):
9177 """Create an instance.
9180 HPATH = "instance-add"
9181 HTYPE = constants.HTYPE_INSTANCE
9184 def CheckArguments(self):
9188 # do not require name_check to ease forward/backward compatibility
9190 if self.op.no_install and self.op.start:
9191 self.LogInfo("No-installation mode selected, disabling startup")
9192 self.op.start = False
9193 # validate/normalize the instance name
9194 self.op.instance_name = \
9195 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9197 if self.op.ip_check and not self.op.name_check:
9198 # TODO: make the ip check more flexible and not depend on the name check
9199 raise errors.OpPrereqError("Cannot do IP address check without a name"
9200 " check", errors.ECODE_INVAL)
9202 # check nics' parameter names
9203 for nic in self.op.nics:
9204 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9206 # check disks. parameter names and consistent adopt/no-adopt strategy
9207 has_adopt = has_no_adopt = False
9208 for disk in self.op.disks:
9209 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9210 if constants.IDISK_ADOPT in disk:
9214 if has_adopt and has_no_adopt:
9215 raise errors.OpPrereqError("Either all disks are adopted or none is",
9218 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9219 raise errors.OpPrereqError("Disk adoption is not supported for the"
9220 " '%s' disk template" %
9221 self.op.disk_template,
9223 if self.op.iallocator is not None:
9224 raise errors.OpPrereqError("Disk adoption not allowed with an"
9225 " iallocator script", errors.ECODE_INVAL)
9226 if self.op.mode == constants.INSTANCE_IMPORT:
9227 raise errors.OpPrereqError("Disk adoption not allowed for"
9228 " instance import", errors.ECODE_INVAL)
9230 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9231 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9232 " but no 'adopt' parameter given" %
9233 self.op.disk_template,
9236 self.adopt_disks = has_adopt
9238 # instance name verification
9239 if self.op.name_check:
9240 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9241 self.op.instance_name = self.hostname1.name
9242 # used in CheckPrereq for ip ping check
9243 self.check_ip = self.hostname1.ip
9245 self.check_ip = None
9247 # file storage checks
9248 if (self.op.file_driver and
9249 not self.op.file_driver in constants.FILE_DRIVER):
9250 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9251 self.op.file_driver, errors.ECODE_INVAL)
9253 if self.op.disk_template == constants.DT_FILE:
9254 opcodes.RequireFileStorage()
9255 elif self.op.disk_template == constants.DT_SHARED_FILE:
9256 opcodes.RequireSharedFileStorage()
9258 ### Node/iallocator related checks
9259 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9261 if self.op.pnode is not None:
9262 if self.op.disk_template in constants.DTS_INT_MIRROR:
9263 if self.op.snode is None:
9264 raise errors.OpPrereqError("The networked disk templates need"
9265 " a mirror node", errors.ECODE_INVAL)
9267 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9269 self.op.snode = None
9271 self._cds = _GetClusterDomainSecret()
9273 if self.op.mode == constants.INSTANCE_IMPORT:
9274 # On import force_variant must be True, because if we forced it at
9275 # initial install, our only chance when importing it back is that it
9277 self.op.force_variant = True
9279 if self.op.no_install:
9280 self.LogInfo("No-installation mode has no effect during import")
9282 elif self.op.mode == constants.INSTANCE_CREATE:
9283 if self.op.os_type is None:
9284 raise errors.OpPrereqError("No guest OS specified",
9286 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9287 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9288 " installation" % self.op.os_type,
9290 if self.op.disk_template is None:
9291 raise errors.OpPrereqError("No disk template specified",
9294 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9295 # Check handshake to ensure both clusters have the same domain secret
9296 src_handshake = self.op.source_handshake
9297 if not src_handshake:
9298 raise errors.OpPrereqError("Missing source handshake",
9301 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9304 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9307 # Load and check source CA
9308 self.source_x509_ca_pem = self.op.source_x509_ca
9309 if not self.source_x509_ca_pem:
9310 raise errors.OpPrereqError("Missing source X509 CA",
9314 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9316 except OpenSSL.crypto.Error, err:
9317 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9318 (err, ), errors.ECODE_INVAL)
9320 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9321 if errcode is not None:
9322 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9325 self.source_x509_ca = cert
9327 src_instance_name = self.op.source_instance_name
9328 if not src_instance_name:
9329 raise errors.OpPrereqError("Missing source instance name",
9332 self.source_instance_name = \
9333 netutils.GetHostname(name=src_instance_name).name
9336 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9337 self.op.mode, errors.ECODE_INVAL)
9339 def ExpandNames(self):
9340 """ExpandNames for CreateInstance.
9342 Figure out the right locks for instance creation.
9345 self.needed_locks = {}
9347 instance_name = self.op.instance_name
9348 # this is just a preventive check, but someone might still add this
9349 # instance in the meantime, and creation will fail at lock-add time
9350 if instance_name in self.cfg.GetInstanceList():
9351 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9352 instance_name, errors.ECODE_EXISTS)
9354 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9356 if self.op.iallocator:
9357 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9358 # specifying a group on instance creation and then selecting nodes from
9360 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9361 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9363 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9364 nodelist = [self.op.pnode]
9365 if self.op.snode is not None:
9366 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9367 nodelist.append(self.op.snode)
9368 self.needed_locks[locking.LEVEL_NODE] = nodelist
9369 # Lock resources of instance's primary and secondary nodes (copy to
9370 # prevent accidential modification)
9371 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9373 # in case of import lock the source node too
9374 if self.op.mode == constants.INSTANCE_IMPORT:
9375 src_node = self.op.src_node
9376 src_path = self.op.src_path
9378 if src_path is None:
9379 self.op.src_path = src_path = self.op.instance_name
9381 if src_node is None:
9382 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9383 self.op.src_node = None
9384 if os.path.isabs(src_path):
9385 raise errors.OpPrereqError("Importing an instance from a path"
9386 " requires a source node option",
9389 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9390 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9391 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9392 if not os.path.isabs(src_path):
9393 self.op.src_path = src_path = \
9394 utils.PathJoin(constants.EXPORT_DIR, src_path)
9396 def _RunAllocator(self):
9397 """Run the allocator based on input opcode.
9400 nics = [n.ToDict() for n in self.nics]
9401 ial = IAllocator(self.cfg, self.rpc,
9402 mode=constants.IALLOCATOR_MODE_ALLOC,
9403 name=self.op.instance_name,
9404 disk_template=self.op.disk_template,
9407 vcpus=self.be_full[constants.BE_VCPUS],
9408 memory=self.be_full[constants.BE_MAXMEM],
9409 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9412 hypervisor=self.op.hypervisor,
9415 ial.Run(self.op.iallocator)
9418 raise errors.OpPrereqError("Can't compute nodes using"
9419 " iallocator '%s': %s" %
9420 (self.op.iallocator, ial.info),
9422 if len(ial.result) != ial.required_nodes:
9423 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9424 " of nodes (%s), required %s" %
9425 (self.op.iallocator, len(ial.result),
9426 ial.required_nodes), errors.ECODE_FAULT)
9427 self.op.pnode = ial.result[0]
9428 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9429 self.op.instance_name, self.op.iallocator,
9430 utils.CommaJoin(ial.result))
9431 if ial.required_nodes == 2:
9432 self.op.snode = ial.result[1]
9434 def BuildHooksEnv(self):
9437 This runs on master, primary and secondary nodes of the instance.
9441 "ADD_MODE": self.op.mode,
9443 if self.op.mode == constants.INSTANCE_IMPORT:
9444 env["SRC_NODE"] = self.op.src_node
9445 env["SRC_PATH"] = self.op.src_path
9446 env["SRC_IMAGES"] = self.src_images
9448 env.update(_BuildInstanceHookEnv(
9449 name=self.op.instance_name,
9450 primary_node=self.op.pnode,
9451 secondary_nodes=self.secondaries,
9452 status=self.op.start,
9453 os_type=self.op.os_type,
9454 minmem=self.be_full[constants.BE_MINMEM],
9455 maxmem=self.be_full[constants.BE_MAXMEM],
9456 vcpus=self.be_full[constants.BE_VCPUS],
9457 nics=_NICListToTuple(self, self.nics),
9458 disk_template=self.op.disk_template,
9459 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9460 for d in self.disks],
9463 hypervisor_name=self.op.hypervisor,
9469 def BuildHooksNodes(self):
9470 """Build hooks nodes.
9473 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9476 def _ReadExportInfo(self):
9477 """Reads the export information from disk.
9479 It will override the opcode source node and path with the actual
9480 information, if these two were not specified before.
9482 @return: the export information
9485 assert self.op.mode == constants.INSTANCE_IMPORT
9487 src_node = self.op.src_node
9488 src_path = self.op.src_path
9490 if src_node is None:
9491 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9492 exp_list = self.rpc.call_export_list(locked_nodes)
9494 for node in exp_list:
9495 if exp_list[node].fail_msg:
9497 if src_path in exp_list[node].payload:
9499 self.op.src_node = src_node = node
9500 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9504 raise errors.OpPrereqError("No export found for relative path %s" %
9505 src_path, errors.ECODE_INVAL)
9507 _CheckNodeOnline(self, src_node)
9508 result = self.rpc.call_export_info(src_node, src_path)
9509 result.Raise("No export or invalid export found in dir %s" % src_path)
9511 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9512 if not export_info.has_section(constants.INISECT_EXP):
9513 raise errors.ProgrammerError("Corrupted export config",
9514 errors.ECODE_ENVIRON)
9516 ei_version = export_info.get(constants.INISECT_EXP, "version")
9517 if (int(ei_version) != constants.EXPORT_VERSION):
9518 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9519 (ei_version, constants.EXPORT_VERSION),
9520 errors.ECODE_ENVIRON)
9523 def _ReadExportParams(self, einfo):
9524 """Use export parameters as defaults.
9526 In case the opcode doesn't specify (as in override) some instance
9527 parameters, then try to use them from the export information, if
9531 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9533 if self.op.disk_template is None:
9534 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9535 self.op.disk_template = einfo.get(constants.INISECT_INS,
9537 if self.op.disk_template not in constants.DISK_TEMPLATES:
9538 raise errors.OpPrereqError("Disk template specified in configuration"
9539 " file is not one of the allowed values:"
9540 " %s" % " ".join(constants.DISK_TEMPLATES))
9542 raise errors.OpPrereqError("No disk template specified and the export"
9543 " is missing the disk_template information",
9546 if not self.op.disks:
9548 # TODO: import the disk iv_name too
9549 for idx in range(constants.MAX_DISKS):
9550 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9551 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9552 disks.append({constants.IDISK_SIZE: disk_sz})
9553 self.op.disks = disks
9554 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9555 raise errors.OpPrereqError("No disk info specified and the export"
9556 " is missing the disk information",
9559 if not self.op.nics:
9561 for idx in range(constants.MAX_NICS):
9562 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9564 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9565 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9572 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9573 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9575 if (self.op.hypervisor is None and
9576 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9577 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9579 if einfo.has_section(constants.INISECT_HYP):
9580 # use the export parameters but do not override the ones
9581 # specified by the user
9582 for name, value in einfo.items(constants.INISECT_HYP):
9583 if name not in self.op.hvparams:
9584 self.op.hvparams[name] = value
9586 if einfo.has_section(constants.INISECT_BEP):
9587 # use the parameters, without overriding
9588 for name, value in einfo.items(constants.INISECT_BEP):
9589 if name not in self.op.beparams:
9590 self.op.beparams[name] = value
9591 # Compatibility for the old "memory" be param
9592 if name == constants.BE_MEMORY:
9593 if constants.BE_MAXMEM not in self.op.beparams:
9594 self.op.beparams[constants.BE_MAXMEM] = value
9595 if constants.BE_MINMEM not in self.op.beparams:
9596 self.op.beparams[constants.BE_MINMEM] = value
9598 # try to read the parameters old style, from the main section
9599 for name in constants.BES_PARAMETERS:
9600 if (name not in self.op.beparams and
9601 einfo.has_option(constants.INISECT_INS, name)):
9602 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9604 if einfo.has_section(constants.INISECT_OSP):
9605 # use the parameters, without overriding
9606 for name, value in einfo.items(constants.INISECT_OSP):
9607 if name not in self.op.osparams:
9608 self.op.osparams[name] = value
9610 def _RevertToDefaults(self, cluster):
9611 """Revert the instance parameters to the default values.
9615 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9616 for name in self.op.hvparams.keys():
9617 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9618 del self.op.hvparams[name]
9620 be_defs = cluster.SimpleFillBE({})
9621 for name in self.op.beparams.keys():
9622 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9623 del self.op.beparams[name]
9625 nic_defs = cluster.SimpleFillNIC({})
9626 for nic in self.op.nics:
9627 for name in constants.NICS_PARAMETERS:
9628 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9631 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9632 for name in self.op.osparams.keys():
9633 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9634 del self.op.osparams[name]
9636 def _CalculateFileStorageDir(self):
9637 """Calculate final instance file storage dir.
9640 # file storage dir calculation/check
9641 self.instance_file_storage_dir = None
9642 if self.op.disk_template in constants.DTS_FILEBASED:
9643 # build the full file storage dir path
9646 if self.op.disk_template == constants.DT_SHARED_FILE:
9647 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9649 get_fsd_fn = self.cfg.GetFileStorageDir
9651 cfg_storagedir = get_fsd_fn()
9652 if not cfg_storagedir:
9653 raise errors.OpPrereqError("Cluster file storage dir not defined")
9654 joinargs.append(cfg_storagedir)
9656 if self.op.file_storage_dir is not None:
9657 joinargs.append(self.op.file_storage_dir)
9659 joinargs.append(self.op.instance_name)
9661 # pylint: disable=W0142
9662 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9664 def CheckPrereq(self): # pylint: disable=R0914
9665 """Check prerequisites.
9668 self._CalculateFileStorageDir()
9670 if self.op.mode == constants.INSTANCE_IMPORT:
9671 export_info = self._ReadExportInfo()
9672 self._ReadExportParams(export_info)
9673 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9675 self._old_instance_name = None
9677 if (not self.cfg.GetVGName() and
9678 self.op.disk_template not in constants.DTS_NOT_LVM):
9679 raise errors.OpPrereqError("Cluster does not support lvm-based"
9680 " instances", errors.ECODE_STATE)
9682 if (self.op.hypervisor is None or
9683 self.op.hypervisor == constants.VALUE_AUTO):
9684 self.op.hypervisor = self.cfg.GetHypervisorType()
9686 cluster = self.cfg.GetClusterInfo()
9687 enabled_hvs = cluster.enabled_hypervisors
9688 if self.op.hypervisor not in enabled_hvs:
9689 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9690 " cluster (%s)" % (self.op.hypervisor,
9691 ",".join(enabled_hvs)),
9694 # Check tag validity
9695 for tag in self.op.tags:
9696 objects.TaggableObject.ValidateTag(tag)
9698 # check hypervisor parameter syntax (locally)
9699 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9700 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9702 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9703 hv_type.CheckParameterSyntax(filled_hvp)
9704 self.hv_full = filled_hvp
9705 # check that we don't specify global parameters on an instance
9706 _CheckGlobalHvParams(self.op.hvparams)
9708 # fill and remember the beparams dict
9709 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9710 for param, value in self.op.beparams.iteritems():
9711 if value == constants.VALUE_AUTO:
9712 self.op.beparams[param] = default_beparams[param]
9713 objects.UpgradeBeParams(self.op.beparams)
9714 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9715 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9717 # build os parameters
9718 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9720 # now that hvp/bep are in final format, let's reset to defaults,
9722 if self.op.identify_defaults:
9723 self._RevertToDefaults(cluster)
9727 for idx, nic in enumerate(self.op.nics):
9728 nic_mode_req = nic.get(constants.INIC_MODE, None)
9729 nic_mode = nic_mode_req
9730 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9731 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9733 # in routed mode, for the first nic, the default ip is 'auto'
9734 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9735 default_ip_mode = constants.VALUE_AUTO
9737 default_ip_mode = constants.VALUE_NONE
9739 # ip validity checks
9740 ip = nic.get(constants.INIC_IP, default_ip_mode)
9741 if ip is None or ip.lower() == constants.VALUE_NONE:
9743 elif ip.lower() == constants.VALUE_AUTO:
9744 if not self.op.name_check:
9745 raise errors.OpPrereqError("IP address set to auto but name checks"
9746 " have been skipped",
9748 nic_ip = self.hostname1.ip
9750 if not netutils.IPAddress.IsValid(ip):
9751 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9755 # TODO: check the ip address for uniqueness
9756 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9757 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9760 # MAC address verification
9761 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9762 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9763 mac = utils.NormalizeAndValidateMac(mac)
9766 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9767 except errors.ReservationError:
9768 raise errors.OpPrereqError("MAC address %s already in use"
9769 " in cluster" % mac,
9770 errors.ECODE_NOTUNIQUE)
9772 # Build nic parameters
9773 link = nic.get(constants.INIC_LINK, None)
9774 if link == constants.VALUE_AUTO:
9775 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9778 nicparams[constants.NIC_MODE] = nic_mode
9780 nicparams[constants.NIC_LINK] = link
9782 check_params = cluster.SimpleFillNIC(nicparams)
9783 objects.NIC.CheckParameterSyntax(check_params)
9784 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9786 # disk checks/pre-build
9787 default_vg = self.cfg.GetVGName()
9789 for disk in self.op.disks:
9790 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9791 if mode not in constants.DISK_ACCESS_SET:
9792 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9793 mode, errors.ECODE_INVAL)
9794 size = disk.get(constants.IDISK_SIZE, None)
9796 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9799 except (TypeError, ValueError):
9800 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9803 data_vg = disk.get(constants.IDISK_VG, default_vg)
9805 constants.IDISK_SIZE: size,
9806 constants.IDISK_MODE: mode,
9807 constants.IDISK_VG: data_vg,
9809 if constants.IDISK_METAVG in disk:
9810 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9811 if constants.IDISK_ADOPT in disk:
9812 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9813 self.disks.append(new_disk)
9815 if self.op.mode == constants.INSTANCE_IMPORT:
9817 for idx in range(len(self.disks)):
9818 option = "disk%d_dump" % idx
9819 if export_info.has_option(constants.INISECT_INS, option):
9820 # FIXME: are the old os-es, disk sizes, etc. useful?
9821 export_name = export_info.get(constants.INISECT_INS, option)
9822 image = utils.PathJoin(self.op.src_path, export_name)
9823 disk_images.append(image)
9825 disk_images.append(False)
9827 self.src_images = disk_images
9829 if self.op.instance_name == self._old_instance_name:
9830 for idx, nic in enumerate(self.nics):
9831 if nic.mac == constants.VALUE_AUTO:
9832 nic_mac_ini = "nic%d_mac" % idx
9833 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9835 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9837 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9838 if self.op.ip_check:
9839 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9840 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9841 (self.check_ip, self.op.instance_name),
9842 errors.ECODE_NOTUNIQUE)
9844 #### mac address generation
9845 # By generating here the mac address both the allocator and the hooks get
9846 # the real final mac address rather than the 'auto' or 'generate' value.
9847 # There is a race condition between the generation and the instance object
9848 # creation, which means that we know the mac is valid now, but we're not
9849 # sure it will be when we actually add the instance. If things go bad
9850 # adding the instance will abort because of a duplicate mac, and the
9851 # creation job will fail.
9852 for nic in self.nics:
9853 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9854 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9858 if self.op.iallocator is not None:
9859 self._RunAllocator()
9861 # Release all unneeded node locks
9862 _ReleaseLocks(self, locking.LEVEL_NODE,
9863 keep=filter(None, [self.op.pnode, self.op.snode,
9865 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9866 keep=filter(None, [self.op.pnode, self.op.snode,
9869 #### node related checks
9871 # check primary node
9872 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9873 assert self.pnode is not None, \
9874 "Cannot retrieve locked node %s" % self.op.pnode
9876 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9877 pnode.name, errors.ECODE_STATE)
9879 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9880 pnode.name, errors.ECODE_STATE)
9881 if not pnode.vm_capable:
9882 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9883 " '%s'" % pnode.name, errors.ECODE_STATE)
9885 self.secondaries = []
9887 # mirror node verification
9888 if self.op.disk_template in constants.DTS_INT_MIRROR:
9889 if self.op.snode == pnode.name:
9890 raise errors.OpPrereqError("The secondary node cannot be the"
9891 " primary node", errors.ECODE_INVAL)
9892 _CheckNodeOnline(self, self.op.snode)
9893 _CheckNodeNotDrained(self, self.op.snode)
9894 _CheckNodeVmCapable(self, self.op.snode)
9895 self.secondaries.append(self.op.snode)
9897 snode = self.cfg.GetNodeInfo(self.op.snode)
9898 if pnode.group != snode.group:
9899 self.LogWarning("The primary and secondary nodes are in two"
9900 " different node groups; the disk parameters"
9901 " from the first disk's node group will be"
9904 nodenames = [pnode.name] + self.secondaries
9906 # Verify instance specs
9907 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9909 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9910 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9911 constants.ISPEC_DISK_COUNT: len(self.disks),
9912 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9913 constants.ISPEC_NIC_COUNT: len(self.nics),
9914 constants.ISPEC_SPINDLE_USE: spindle_use,
9917 group_info = self.cfg.GetNodeGroup(pnode.group)
9918 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9919 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9920 if not self.op.ignore_ipolicy and res:
9921 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9922 " policy: %s") % (pnode.group,
9923 utils.CommaJoin(res)),
9926 if not self.adopt_disks:
9927 if self.op.disk_template == constants.DT_RBD:
9928 # _CheckRADOSFreeSpace() is just a placeholder.
9929 # Any function that checks prerequisites can be placed here.
9930 # Check if there is enough space on the RADOS cluster.
9931 _CheckRADOSFreeSpace()
9933 # Check lv size requirements, if not adopting
9934 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9935 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9937 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9938 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9939 disk[constants.IDISK_ADOPT])
9940 for disk in self.disks])
9941 if len(all_lvs) != len(self.disks):
9942 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9944 for lv_name in all_lvs:
9946 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9947 # to ReserveLV uses the same syntax
9948 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9949 except errors.ReservationError:
9950 raise errors.OpPrereqError("LV named %s used by another instance" %
9951 lv_name, errors.ECODE_NOTUNIQUE)
9953 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9954 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9956 node_lvs = self.rpc.call_lv_list([pnode.name],
9957 vg_names.payload.keys())[pnode.name]
9958 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9959 node_lvs = node_lvs.payload
9961 delta = all_lvs.difference(node_lvs.keys())
9963 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9964 utils.CommaJoin(delta),
9966 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9968 raise errors.OpPrereqError("Online logical volumes found, cannot"
9969 " adopt: %s" % utils.CommaJoin(online_lvs),
9971 # update the size of disk based on what is found
9972 for dsk in self.disks:
9973 dsk[constants.IDISK_SIZE] = \
9974 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9975 dsk[constants.IDISK_ADOPT])][0]))
9977 elif self.op.disk_template == constants.DT_BLOCK:
9978 # Normalize and de-duplicate device paths
9979 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9980 for disk in self.disks])
9981 if len(all_disks) != len(self.disks):
9982 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9984 baddisks = [d for d in all_disks
9985 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9987 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9988 " cannot be adopted" %
9989 (", ".join(baddisks),
9990 constants.ADOPTABLE_BLOCKDEV_ROOT),
9993 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9994 list(all_disks))[pnode.name]
9995 node_disks.Raise("Cannot get block device information from node %s" %
9997 node_disks = node_disks.payload
9998 delta = all_disks.difference(node_disks.keys())
10000 raise errors.OpPrereqError("Missing block device(s): %s" %
10001 utils.CommaJoin(delta),
10002 errors.ECODE_INVAL)
10003 for dsk in self.disks:
10004 dsk[constants.IDISK_SIZE] = \
10005 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10007 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10009 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10010 # check OS parameters (remotely)
10011 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10013 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10015 # memory check on primary node
10016 #TODO(dynmem): use MINMEM for checking
10018 _CheckNodeFreeMemory(self, self.pnode.name,
10019 "creating instance %s" % self.op.instance_name,
10020 self.be_full[constants.BE_MAXMEM],
10021 self.op.hypervisor)
10023 self.dry_run_result = list(nodenames)
10025 def Exec(self, feedback_fn):
10026 """Create and add the instance to the cluster.
10029 instance = self.op.instance_name
10030 pnode_name = self.pnode.name
10032 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10033 self.owned_locks(locking.LEVEL_NODE)), \
10034 "Node locks differ from node resource locks"
10036 ht_kind = self.op.hypervisor
10037 if ht_kind in constants.HTS_REQ_PORT:
10038 network_port = self.cfg.AllocatePort()
10040 network_port = None
10042 # This is ugly but we got a chicken-egg problem here
10043 # We can only take the group disk parameters, as the instance
10044 # has no disks yet (we are generating them right here).
10045 node = self.cfg.GetNodeInfo(pnode_name)
10046 nodegroup = self.cfg.GetNodeGroup(node.group)
10047 disks = _GenerateDiskTemplate(self,
10048 self.op.disk_template,
10049 instance, pnode_name,
10052 self.instance_file_storage_dir,
10053 self.op.file_driver,
10056 self.cfg.GetGroupDiskParams(nodegroup))
10058 iobj = objects.Instance(name=instance, os=self.op.os_type,
10059 primary_node=pnode_name,
10060 nics=self.nics, disks=disks,
10061 disk_template=self.op.disk_template,
10062 admin_state=constants.ADMINST_DOWN,
10063 network_port=network_port,
10064 beparams=self.op.beparams,
10065 hvparams=self.op.hvparams,
10066 hypervisor=self.op.hypervisor,
10067 osparams=self.op.osparams,
10071 for tag in self.op.tags:
10074 if self.adopt_disks:
10075 if self.op.disk_template == constants.DT_PLAIN:
10076 # rename LVs to the newly-generated names; we need to construct
10077 # 'fake' LV disks with the old data, plus the new unique_id
10078 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10080 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10081 rename_to.append(t_dsk.logical_id)
10082 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10083 self.cfg.SetDiskID(t_dsk, pnode_name)
10084 result = self.rpc.call_blockdev_rename(pnode_name,
10085 zip(tmp_disks, rename_to))
10086 result.Raise("Failed to rename adoped LVs")
10088 feedback_fn("* creating instance disks...")
10090 _CreateDisks(self, iobj)
10091 except errors.OpExecError:
10092 self.LogWarning("Device creation failed, reverting...")
10094 _RemoveDisks(self, iobj)
10096 self.cfg.ReleaseDRBDMinors(instance)
10099 feedback_fn("adding instance %s to cluster config" % instance)
10101 self.cfg.AddInstance(iobj, self.proc.GetECId())
10103 # Declare that we don't want to remove the instance lock anymore, as we've
10104 # added the instance to the config
10105 del self.remove_locks[locking.LEVEL_INSTANCE]
10107 if self.op.mode == constants.INSTANCE_IMPORT:
10108 # Release unused nodes
10109 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10111 # Release all nodes
10112 _ReleaseLocks(self, locking.LEVEL_NODE)
10115 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10116 feedback_fn("* wiping instance disks...")
10118 _WipeDisks(self, iobj)
10119 except errors.OpExecError, err:
10120 logging.exception("Wiping disks failed")
10121 self.LogWarning("Wiping instance disks failed (%s)", err)
10125 # Something is already wrong with the disks, don't do anything else
10127 elif self.op.wait_for_sync:
10128 disk_abort = not _WaitForSync(self, iobj)
10129 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10130 # make sure the disks are not degraded (still sync-ing is ok)
10131 feedback_fn("* checking mirrors status")
10132 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10137 _RemoveDisks(self, iobj)
10138 self.cfg.RemoveInstance(iobj.name)
10139 # Make sure the instance lock gets removed
10140 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10141 raise errors.OpExecError("There are some degraded disks for"
10144 # Release all node resource locks
10145 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10147 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10148 # we need to set the disks ID to the primary node, since the
10149 # preceding code might or might have not done it, depending on
10150 # disk template and other options
10151 for disk in iobj.disks:
10152 self.cfg.SetDiskID(disk, pnode_name)
10153 if self.op.mode == constants.INSTANCE_CREATE:
10154 if not self.op.no_install:
10155 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10156 not self.op.wait_for_sync)
10158 feedback_fn("* pausing disk sync to install instance OS")
10159 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10162 for idx, success in enumerate(result.payload):
10164 logging.warn("pause-sync of instance %s for disk %d failed",
10167 feedback_fn("* running the instance OS create scripts...")
10168 # FIXME: pass debug option from opcode to backend
10170 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10171 self.op.debug_level)
10173 feedback_fn("* resuming disk sync")
10174 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10177 for idx, success in enumerate(result.payload):
10179 logging.warn("resume-sync of instance %s for disk %d failed",
10182 os_add_result.Raise("Could not add os for instance %s"
10183 " on node %s" % (instance, pnode_name))
10186 if self.op.mode == constants.INSTANCE_IMPORT:
10187 feedback_fn("* running the instance OS import scripts...")
10191 for idx, image in enumerate(self.src_images):
10195 # FIXME: pass debug option from opcode to backend
10196 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10197 constants.IEIO_FILE, (image, ),
10198 constants.IEIO_SCRIPT,
10199 (iobj.disks[idx], idx),
10201 transfers.append(dt)
10204 masterd.instance.TransferInstanceData(self, feedback_fn,
10205 self.op.src_node, pnode_name,
10206 self.pnode.secondary_ip,
10208 if not compat.all(import_result):
10209 self.LogWarning("Some disks for instance %s on node %s were not"
10210 " imported successfully" % (instance, pnode_name))
10212 rename_from = self._old_instance_name
10214 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10215 feedback_fn("* preparing remote import...")
10216 # The source cluster will stop the instance before attempting to make
10217 # a connection. In some cases stopping an instance can take a long
10218 # time, hence the shutdown timeout is added to the connection
10220 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10221 self.op.source_shutdown_timeout)
10222 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10224 assert iobj.primary_node == self.pnode.name
10226 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10227 self.source_x509_ca,
10228 self._cds, timeouts)
10229 if not compat.all(disk_results):
10230 # TODO: Should the instance still be started, even if some disks
10231 # failed to import (valid for local imports, too)?
10232 self.LogWarning("Some disks for instance %s on node %s were not"
10233 " imported successfully" % (instance, pnode_name))
10235 rename_from = self.source_instance_name
10238 # also checked in the prereq part
10239 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10242 # Run rename script on newly imported instance
10243 assert iobj.name == instance
10244 feedback_fn("Running rename script for %s" % instance)
10245 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10247 self.op.debug_level)
10248 if result.fail_msg:
10249 self.LogWarning("Failed to run rename script for %s on node"
10250 " %s: %s" % (instance, pnode_name, result.fail_msg))
10252 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10255 iobj.admin_state = constants.ADMINST_UP
10256 self.cfg.Update(iobj, feedback_fn)
10257 logging.info("Starting instance %s on node %s", instance, pnode_name)
10258 feedback_fn("* starting instance...")
10259 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10261 result.Raise("Could not start instance")
10263 return list(iobj.all_nodes)
10266 def _CheckRADOSFreeSpace():
10267 """Compute disk size requirements inside the RADOS cluster.
10270 # For the RADOS cluster we assume there is always enough space.
10274 class LUInstanceConsole(NoHooksLU):
10275 """Connect to an instance's console.
10277 This is somewhat special in that it returns the command line that
10278 you need to run on the master node in order to connect to the
10284 def ExpandNames(self):
10285 self.share_locks = _ShareAll()
10286 self._ExpandAndLockInstance()
10288 def CheckPrereq(self):
10289 """Check prerequisites.
10291 This checks that the instance is in the cluster.
10294 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10295 assert self.instance is not None, \
10296 "Cannot retrieve locked instance %s" % self.op.instance_name
10297 _CheckNodeOnline(self, self.instance.primary_node)
10299 def Exec(self, feedback_fn):
10300 """Connect to the console of an instance
10303 instance = self.instance
10304 node = instance.primary_node
10306 node_insts = self.rpc.call_instance_list([node],
10307 [instance.hypervisor])[node]
10308 node_insts.Raise("Can't get node information from %s" % node)
10310 if instance.name not in node_insts.payload:
10311 if instance.admin_state == constants.ADMINST_UP:
10312 state = constants.INSTST_ERRORDOWN
10313 elif instance.admin_state == constants.ADMINST_DOWN:
10314 state = constants.INSTST_ADMINDOWN
10316 state = constants.INSTST_ADMINOFFLINE
10317 raise errors.OpExecError("Instance %s is not running (state %s)" %
10318 (instance.name, state))
10320 logging.debug("Connecting to console of %s on %s", instance.name, node)
10322 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10325 def _GetInstanceConsole(cluster, instance):
10326 """Returns console information for an instance.
10328 @type cluster: L{objects.Cluster}
10329 @type instance: L{objects.Instance}
10333 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10334 # beparams and hvparams are passed separately, to avoid editing the
10335 # instance and then saving the defaults in the instance itself.
10336 hvparams = cluster.FillHV(instance)
10337 beparams = cluster.FillBE(instance)
10338 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10340 assert console.instance == instance.name
10341 assert console.Validate()
10343 return console.ToDict()
10346 class LUInstanceReplaceDisks(LogicalUnit):
10347 """Replace the disks of an instance.
10350 HPATH = "mirrors-replace"
10351 HTYPE = constants.HTYPE_INSTANCE
10354 def CheckArguments(self):
10355 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10356 self.op.iallocator)
10358 def ExpandNames(self):
10359 self._ExpandAndLockInstance()
10361 assert locking.LEVEL_NODE not in self.needed_locks
10362 assert locking.LEVEL_NODE_RES not in self.needed_locks
10363 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10365 assert self.op.iallocator is None or self.op.remote_node is None, \
10366 "Conflicting options"
10368 if self.op.remote_node is not None:
10369 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10371 # Warning: do not remove the locking of the new secondary here
10372 # unless DRBD8.AddChildren is changed to work in parallel;
10373 # currently it doesn't since parallel invocations of
10374 # FindUnusedMinor will conflict
10375 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10376 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10378 self.needed_locks[locking.LEVEL_NODE] = []
10379 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10381 if self.op.iallocator is not None:
10382 # iallocator will select a new node in the same group
10383 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10385 self.needed_locks[locking.LEVEL_NODE_RES] = []
10387 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10388 self.op.iallocator, self.op.remote_node,
10389 self.op.disks, False, self.op.early_release,
10390 self.op.ignore_ipolicy)
10392 self.tasklets = [self.replacer]
10394 def DeclareLocks(self, level):
10395 if level == locking.LEVEL_NODEGROUP:
10396 assert self.op.remote_node is None
10397 assert self.op.iallocator is not None
10398 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10400 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10401 # Lock all groups used by instance optimistically; this requires going
10402 # via the node before it's locked, requiring verification later on
10403 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10404 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10406 elif level == locking.LEVEL_NODE:
10407 if self.op.iallocator is not None:
10408 assert self.op.remote_node is None
10409 assert not self.needed_locks[locking.LEVEL_NODE]
10411 # Lock member nodes of all locked groups
10412 self.needed_locks[locking.LEVEL_NODE] = [node_name
10413 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10414 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10416 self._LockInstancesNodes()
10417 elif level == locking.LEVEL_NODE_RES:
10419 self.needed_locks[locking.LEVEL_NODE_RES] = \
10420 self.needed_locks[locking.LEVEL_NODE]
10422 def BuildHooksEnv(self):
10423 """Build hooks env.
10425 This runs on the master, the primary and all the secondaries.
10428 instance = self.replacer.instance
10430 "MODE": self.op.mode,
10431 "NEW_SECONDARY": self.op.remote_node,
10432 "OLD_SECONDARY": instance.secondary_nodes[0],
10434 env.update(_BuildInstanceHookEnvByObject(self, instance))
10437 def BuildHooksNodes(self):
10438 """Build hooks nodes.
10441 instance = self.replacer.instance
10443 self.cfg.GetMasterNode(),
10444 instance.primary_node,
10446 if self.op.remote_node is not None:
10447 nl.append(self.op.remote_node)
10450 def CheckPrereq(self):
10451 """Check prerequisites.
10454 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10455 self.op.iallocator is None)
10457 # Verify if node group locks are still correct
10458 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10460 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10462 return LogicalUnit.CheckPrereq(self)
10465 class TLReplaceDisks(Tasklet):
10466 """Replaces disks for an instance.
10468 Note: Locking is not within the scope of this class.
10471 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10472 disks, delay_iallocator, early_release, ignore_ipolicy):
10473 """Initializes this class.
10476 Tasklet.__init__(self, lu)
10479 self.instance_name = instance_name
10481 self.iallocator_name = iallocator_name
10482 self.remote_node = remote_node
10484 self.delay_iallocator = delay_iallocator
10485 self.early_release = early_release
10486 self.ignore_ipolicy = ignore_ipolicy
10489 self.instance = None
10490 self.new_node = None
10491 self.target_node = None
10492 self.other_node = None
10493 self.remote_node_info = None
10494 self.node_secondary_ip = None
10497 def CheckArguments(mode, remote_node, iallocator):
10498 """Helper function for users of this class.
10501 # check for valid parameter combination
10502 if mode == constants.REPLACE_DISK_CHG:
10503 if remote_node is None and iallocator is None:
10504 raise errors.OpPrereqError("When changing the secondary either an"
10505 " iallocator script must be used or the"
10506 " new node given", errors.ECODE_INVAL)
10508 if remote_node is not None and iallocator is not None:
10509 raise errors.OpPrereqError("Give either the iallocator or the new"
10510 " secondary, not both", errors.ECODE_INVAL)
10512 elif remote_node is not None or iallocator is not None:
10513 # Not replacing the secondary
10514 raise errors.OpPrereqError("The iallocator and new node options can"
10515 " only be used when changing the"
10516 " secondary node", errors.ECODE_INVAL)
10519 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10520 """Compute a new secondary node using an IAllocator.
10523 ial = IAllocator(lu.cfg, lu.rpc,
10524 mode=constants.IALLOCATOR_MODE_RELOC,
10525 name=instance_name,
10526 relocate_from=list(relocate_from))
10528 ial.Run(iallocator_name)
10530 if not ial.success:
10531 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10532 " %s" % (iallocator_name, ial.info),
10533 errors.ECODE_NORES)
10535 if len(ial.result) != ial.required_nodes:
10536 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10537 " of nodes (%s), required %s" %
10539 len(ial.result), ial.required_nodes),
10540 errors.ECODE_FAULT)
10542 remote_node_name = ial.result[0]
10544 lu.LogInfo("Selected new secondary for instance '%s': %s",
10545 instance_name, remote_node_name)
10547 return remote_node_name
10549 def _FindFaultyDisks(self, node_name):
10550 """Wrapper for L{_FindFaultyInstanceDisks}.
10553 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10556 def _CheckDisksActivated(self, instance):
10557 """Checks if the instance disks are activated.
10559 @param instance: The instance to check disks
10560 @return: True if they are activated, False otherwise
10563 nodes = instance.all_nodes
10565 for idx, dev in enumerate(instance.disks):
10567 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10568 self.cfg.SetDiskID(dev, node)
10570 result = _BlockdevFind(self, node, dev, instance)
10574 elif result.fail_msg or not result.payload:
10579 def CheckPrereq(self):
10580 """Check prerequisites.
10582 This checks that the instance is in the cluster.
10585 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10586 assert instance is not None, \
10587 "Cannot retrieve locked instance %s" % self.instance_name
10589 if instance.disk_template != constants.DT_DRBD8:
10590 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10591 " instances", errors.ECODE_INVAL)
10593 if len(instance.secondary_nodes) != 1:
10594 raise errors.OpPrereqError("The instance has a strange layout,"
10595 " expected one secondary but found %d" %
10596 len(instance.secondary_nodes),
10597 errors.ECODE_FAULT)
10599 if not self.delay_iallocator:
10600 self._CheckPrereq2()
10602 def _CheckPrereq2(self):
10603 """Check prerequisites, second part.
10605 This function should always be part of CheckPrereq. It was separated and is
10606 now called from Exec because during node evacuation iallocator was only
10607 called with an unmodified cluster model, not taking planned changes into
10611 instance = self.instance
10612 secondary_node = instance.secondary_nodes[0]
10614 if self.iallocator_name is None:
10615 remote_node = self.remote_node
10617 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10618 instance.name, instance.secondary_nodes)
10620 if remote_node is None:
10621 self.remote_node_info = None
10623 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10624 "Remote node '%s' is not locked" % remote_node
10626 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10627 assert self.remote_node_info is not None, \
10628 "Cannot retrieve locked node %s" % remote_node
10630 if remote_node == self.instance.primary_node:
10631 raise errors.OpPrereqError("The specified node is the primary node of"
10632 " the instance", errors.ECODE_INVAL)
10634 if remote_node == secondary_node:
10635 raise errors.OpPrereqError("The specified node is already the"
10636 " secondary node of the instance",
10637 errors.ECODE_INVAL)
10639 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10640 constants.REPLACE_DISK_CHG):
10641 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10642 errors.ECODE_INVAL)
10644 if self.mode == constants.REPLACE_DISK_AUTO:
10645 if not self._CheckDisksActivated(instance):
10646 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10647 " first" % self.instance_name,
10648 errors.ECODE_STATE)
10649 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10650 faulty_secondary = self._FindFaultyDisks(secondary_node)
10652 if faulty_primary and faulty_secondary:
10653 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10654 " one node and can not be repaired"
10655 " automatically" % self.instance_name,
10656 errors.ECODE_STATE)
10659 self.disks = faulty_primary
10660 self.target_node = instance.primary_node
10661 self.other_node = secondary_node
10662 check_nodes = [self.target_node, self.other_node]
10663 elif faulty_secondary:
10664 self.disks = faulty_secondary
10665 self.target_node = secondary_node
10666 self.other_node = instance.primary_node
10667 check_nodes = [self.target_node, self.other_node]
10673 # Non-automatic modes
10674 if self.mode == constants.REPLACE_DISK_PRI:
10675 self.target_node = instance.primary_node
10676 self.other_node = secondary_node
10677 check_nodes = [self.target_node, self.other_node]
10679 elif self.mode == constants.REPLACE_DISK_SEC:
10680 self.target_node = secondary_node
10681 self.other_node = instance.primary_node
10682 check_nodes = [self.target_node, self.other_node]
10684 elif self.mode == constants.REPLACE_DISK_CHG:
10685 self.new_node = remote_node
10686 self.other_node = instance.primary_node
10687 self.target_node = secondary_node
10688 check_nodes = [self.new_node, self.other_node]
10690 _CheckNodeNotDrained(self.lu, remote_node)
10691 _CheckNodeVmCapable(self.lu, remote_node)
10693 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10694 assert old_node_info is not None
10695 if old_node_info.offline and not self.early_release:
10696 # doesn't make sense to delay the release
10697 self.early_release = True
10698 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10699 " early-release mode", secondary_node)
10702 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10705 # If not specified all disks should be replaced
10707 self.disks = range(len(self.instance.disks))
10709 # TODO: This is ugly, but right now we can't distinguish between internal
10710 # submitted opcode and external one. We should fix that.
10711 if self.remote_node_info:
10712 # We change the node, lets verify it still meets instance policy
10713 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10714 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10716 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10717 ignore=self.ignore_ipolicy)
10719 for node in check_nodes:
10720 _CheckNodeOnline(self.lu, node)
10722 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10725 if node_name is not None)
10727 # Release unneeded node and node resource locks
10728 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10729 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10731 # Release any owned node group
10732 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10733 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10735 # Check whether disks are valid
10736 for disk_idx in self.disks:
10737 instance.FindDisk(disk_idx)
10739 # Get secondary node IP addresses
10740 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10741 in self.cfg.GetMultiNodeInfo(touched_nodes))
10743 def Exec(self, feedback_fn):
10744 """Execute disk replacement.
10746 This dispatches the disk replacement to the appropriate handler.
10749 if self.delay_iallocator:
10750 self._CheckPrereq2()
10753 # Verify owned locks before starting operation
10754 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10755 assert set(owned_nodes) == set(self.node_secondary_ip), \
10756 ("Incorrect node locks, owning %s, expected %s" %
10757 (owned_nodes, self.node_secondary_ip.keys()))
10758 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10759 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10761 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10762 assert list(owned_instances) == [self.instance_name], \
10763 "Instance '%s' not locked" % self.instance_name
10765 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10766 "Should not own any node group lock at this point"
10769 feedback_fn("No disks need replacement")
10772 feedback_fn("Replacing disk(s) %s for %s" %
10773 (utils.CommaJoin(self.disks), self.instance.name))
10775 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10777 # Activate the instance disks if we're replacing them on a down instance
10779 _StartInstanceDisks(self.lu, self.instance, True)
10782 # Should we replace the secondary node?
10783 if self.new_node is not None:
10784 fn = self._ExecDrbd8Secondary
10786 fn = self._ExecDrbd8DiskOnly
10788 result = fn(feedback_fn)
10790 # Deactivate the instance disks if we're replacing them on a
10793 _SafeShutdownInstanceDisks(self.lu, self.instance)
10795 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10798 # Verify owned locks
10799 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10800 nodes = frozenset(self.node_secondary_ip)
10801 assert ((self.early_release and not owned_nodes) or
10802 (not self.early_release and not (set(owned_nodes) - nodes))), \
10803 ("Not owning the correct locks, early_release=%s, owned=%r,"
10804 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10808 def _CheckVolumeGroup(self, nodes):
10809 self.lu.LogInfo("Checking volume groups")
10811 vgname = self.cfg.GetVGName()
10813 # Make sure volume group exists on all involved nodes
10814 results = self.rpc.call_vg_list(nodes)
10816 raise errors.OpExecError("Can't list volume groups on the nodes")
10819 res = results[node]
10820 res.Raise("Error checking node %s" % node)
10821 if vgname not in res.payload:
10822 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10825 def _CheckDisksExistence(self, nodes):
10826 # Check disk existence
10827 for idx, dev in enumerate(self.instance.disks):
10828 if idx not in self.disks:
10832 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10833 self.cfg.SetDiskID(dev, node)
10835 result = _BlockdevFind(self, node, dev, self.instance)
10837 msg = result.fail_msg
10838 if msg or not result.payload:
10840 msg = "disk not found"
10841 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10844 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10845 for idx, dev in enumerate(self.instance.disks):
10846 if idx not in self.disks:
10849 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10852 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10853 on_primary, ldisk=ldisk):
10854 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10855 " replace disks for instance %s" %
10856 (node_name, self.instance.name))
10858 def _CreateNewStorage(self, node_name):
10859 """Create new storage on the primary or secondary node.
10861 This is only used for same-node replaces, not for changing the
10862 secondary node, hence we don't want to modify the existing disk.
10867 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10868 for idx, dev in enumerate(disks):
10869 if idx not in self.disks:
10872 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10874 self.cfg.SetDiskID(dev, node_name)
10876 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10877 names = _GenerateUniqueNames(self.lu, lv_names)
10879 (data_disk, meta_disk) = dev.children
10880 vg_data = data_disk.logical_id[0]
10881 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10882 logical_id=(vg_data, names[0]),
10883 params=data_disk.params)
10884 vg_meta = meta_disk.logical_id[0]
10885 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10886 logical_id=(vg_meta, names[1]),
10887 params=meta_disk.params)
10889 new_lvs = [lv_data, lv_meta]
10890 old_lvs = [child.Copy() for child in dev.children]
10891 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10893 # we pass force_create=True to force the LVM creation
10894 for new_lv in new_lvs:
10895 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10896 _GetInstanceInfoText(self.instance), False)
10900 def _CheckDevices(self, node_name, iv_names):
10901 for name, (dev, _, _) in iv_names.iteritems():
10902 self.cfg.SetDiskID(dev, node_name)
10904 result = _BlockdevFind(self, node_name, dev, self.instance)
10906 msg = result.fail_msg
10907 if msg or not result.payload:
10909 msg = "disk not found"
10910 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10913 if result.payload.is_degraded:
10914 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10916 def _RemoveOldStorage(self, node_name, iv_names):
10917 for name, (_, old_lvs, _) in iv_names.iteritems():
10918 self.lu.LogInfo("Remove logical volumes for %s" % name)
10921 self.cfg.SetDiskID(lv, node_name)
10923 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10925 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10926 hint="remove unused LVs manually")
10928 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10929 """Replace a disk on the primary or secondary for DRBD 8.
10931 The algorithm for replace is quite complicated:
10933 1. for each disk to be replaced:
10935 1. create new LVs on the target node with unique names
10936 1. detach old LVs from the drbd device
10937 1. rename old LVs to name_replaced.<time_t>
10938 1. rename new LVs to old LVs
10939 1. attach the new LVs (with the old names now) to the drbd device
10941 1. wait for sync across all devices
10943 1. for each modified disk:
10945 1. remove old LVs (which have the name name_replaces.<time_t>)
10947 Failures are not very well handled.
10952 # Step: check device activation
10953 self.lu.LogStep(1, steps_total, "Check device existence")
10954 self._CheckDisksExistence([self.other_node, self.target_node])
10955 self._CheckVolumeGroup([self.target_node, self.other_node])
10957 # Step: check other node consistency
10958 self.lu.LogStep(2, steps_total, "Check peer consistency")
10959 self._CheckDisksConsistency(self.other_node,
10960 self.other_node == self.instance.primary_node,
10963 # Step: create new storage
10964 self.lu.LogStep(3, steps_total, "Allocate new storage")
10965 iv_names = self._CreateNewStorage(self.target_node)
10967 # Step: for each lv, detach+rename*2+attach
10968 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10969 for dev, old_lvs, new_lvs in iv_names.itervalues():
10970 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10972 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10974 result.Raise("Can't detach drbd from local storage on node"
10975 " %s for device %s" % (self.target_node, dev.iv_name))
10977 #cfg.Update(instance)
10979 # ok, we created the new LVs, so now we know we have the needed
10980 # storage; as such, we proceed on the target node to rename
10981 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10982 # using the assumption that logical_id == physical_id (which in
10983 # turn is the unique_id on that node)
10985 # FIXME(iustin): use a better name for the replaced LVs
10986 temp_suffix = int(time.time())
10987 ren_fn = lambda d, suff: (d.physical_id[0],
10988 d.physical_id[1] + "_replaced-%s" % suff)
10990 # Build the rename list based on what LVs exist on the node
10991 rename_old_to_new = []
10992 for to_ren in old_lvs:
10993 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10994 if not result.fail_msg and result.payload:
10996 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10998 self.lu.LogInfo("Renaming the old LVs on the target node")
10999 result = self.rpc.call_blockdev_rename(self.target_node,
11001 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11003 # Now we rename the new LVs to the old LVs
11004 self.lu.LogInfo("Renaming the new LVs on the target node")
11005 rename_new_to_old = [(new, old.physical_id)
11006 for old, new in zip(old_lvs, new_lvs)]
11007 result = self.rpc.call_blockdev_rename(self.target_node,
11009 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11011 # Intermediate steps of in memory modifications
11012 for old, new in zip(old_lvs, new_lvs):
11013 new.logical_id = old.logical_id
11014 self.cfg.SetDiskID(new, self.target_node)
11016 # We need to modify old_lvs so that removal later removes the
11017 # right LVs, not the newly added ones; note that old_lvs is a
11019 for disk in old_lvs:
11020 disk.logical_id = ren_fn(disk, temp_suffix)
11021 self.cfg.SetDiskID(disk, self.target_node)
11023 # Now that the new lvs have the old name, we can add them to the device
11024 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11025 result = self.rpc.call_blockdev_addchildren(self.target_node,
11026 (dev, self.instance), new_lvs)
11027 msg = result.fail_msg
11029 for new_lv in new_lvs:
11030 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11033 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11034 hint=("cleanup manually the unused logical"
11036 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11038 cstep = itertools.count(5)
11040 if self.early_release:
11041 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11042 self._RemoveOldStorage(self.target_node, iv_names)
11043 # TODO: Check if releasing locks early still makes sense
11044 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11046 # Release all resource locks except those used by the instance
11047 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11048 keep=self.node_secondary_ip.keys())
11050 # Release all node locks while waiting for sync
11051 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11053 # TODO: Can the instance lock be downgraded here? Take the optional disk
11054 # shutdown in the caller into consideration.
11057 # This can fail as the old devices are degraded and _WaitForSync
11058 # does a combined result over all disks, so we don't check its return value
11059 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11060 _WaitForSync(self.lu, self.instance)
11062 # Check all devices manually
11063 self._CheckDevices(self.instance.primary_node, iv_names)
11065 # Step: remove old storage
11066 if not self.early_release:
11067 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11068 self._RemoveOldStorage(self.target_node, iv_names)
11070 def _ExecDrbd8Secondary(self, feedback_fn):
11071 """Replace the secondary node for DRBD 8.
11073 The algorithm for replace is quite complicated:
11074 - for all disks of the instance:
11075 - create new LVs on the new node with same names
11076 - shutdown the drbd device on the old secondary
11077 - disconnect the drbd network on the primary
11078 - create the drbd device on the new secondary
11079 - network attach the drbd on the primary, using an artifice:
11080 the drbd code for Attach() will connect to the network if it
11081 finds a device which is connected to the good local disks but
11082 not network enabled
11083 - wait for sync across all devices
11084 - remove all disks from the old secondary
11086 Failures are not very well handled.
11091 pnode = self.instance.primary_node
11093 # Step: check device activation
11094 self.lu.LogStep(1, steps_total, "Check device existence")
11095 self._CheckDisksExistence([self.instance.primary_node])
11096 self._CheckVolumeGroup([self.instance.primary_node])
11098 # Step: check other node consistency
11099 self.lu.LogStep(2, steps_total, "Check peer consistency")
11100 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11102 # Step: create new storage
11103 self.lu.LogStep(3, steps_total, "Allocate new storage")
11104 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11105 for idx, dev in enumerate(disks):
11106 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11107 (self.new_node, idx))
11108 # we pass force_create=True to force LVM creation
11109 for new_lv in dev.children:
11110 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11111 True, _GetInstanceInfoText(self.instance), False)
11113 # Step 4: dbrd minors and drbd setups changes
11114 # after this, we must manually remove the drbd minors on both the
11115 # error and the success paths
11116 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11117 minors = self.cfg.AllocateDRBDMinor([self.new_node
11118 for dev in self.instance.disks],
11119 self.instance.name)
11120 logging.debug("Allocated minors %r", minors)
11123 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11124 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11125 (self.new_node, idx))
11126 # create new devices on new_node; note that we create two IDs:
11127 # one without port, so the drbd will be activated without
11128 # networking information on the new node at this stage, and one
11129 # with network, for the latter activation in step 4
11130 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11131 if self.instance.primary_node == o_node1:
11134 assert self.instance.primary_node == o_node2, "Three-node instance?"
11137 new_alone_id = (self.instance.primary_node, self.new_node, None,
11138 p_minor, new_minor, o_secret)
11139 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11140 p_minor, new_minor, o_secret)
11142 iv_names[idx] = (dev, dev.children, new_net_id)
11143 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11145 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11146 logical_id=new_alone_id,
11147 children=dev.children,
11150 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11153 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11155 _GetInstanceInfoText(self.instance), False)
11156 except errors.GenericError:
11157 self.cfg.ReleaseDRBDMinors(self.instance.name)
11160 # We have new devices, shutdown the drbd on the old secondary
11161 for idx, dev in enumerate(self.instance.disks):
11162 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11163 self.cfg.SetDiskID(dev, self.target_node)
11164 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11165 (dev, self.instance)).fail_msg
11167 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11168 "node: %s" % (idx, msg),
11169 hint=("Please cleanup this device manually as"
11170 " soon as possible"))
11172 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11173 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11174 self.instance.disks)[pnode]
11176 msg = result.fail_msg
11178 # detaches didn't succeed (unlikely)
11179 self.cfg.ReleaseDRBDMinors(self.instance.name)
11180 raise errors.OpExecError("Can't detach the disks from the network on"
11181 " old node: %s" % (msg,))
11183 # if we managed to detach at least one, we update all the disks of
11184 # the instance to point to the new secondary
11185 self.lu.LogInfo("Updating instance configuration")
11186 for dev, _, new_logical_id in iv_names.itervalues():
11187 dev.logical_id = new_logical_id
11188 self.cfg.SetDiskID(dev, self.instance.primary_node)
11190 self.cfg.Update(self.instance, feedback_fn)
11192 # Release all node locks (the configuration has been updated)
11193 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11195 # and now perform the drbd attach
11196 self.lu.LogInfo("Attaching primary drbds to new secondary"
11197 " (standalone => connected)")
11198 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11200 self.node_secondary_ip,
11201 (self.instance.disks, self.instance),
11202 self.instance.name,
11204 for to_node, to_result in result.items():
11205 msg = to_result.fail_msg
11207 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11209 hint=("please do a gnt-instance info to see the"
11210 " status of disks"))
11212 cstep = itertools.count(5)
11214 if self.early_release:
11215 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11216 self._RemoveOldStorage(self.target_node, iv_names)
11217 # TODO: Check if releasing locks early still makes sense
11218 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11220 # Release all resource locks except those used by the instance
11221 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11222 keep=self.node_secondary_ip.keys())
11224 # TODO: Can the instance lock be downgraded here? Take the optional disk
11225 # shutdown in the caller into consideration.
11228 # This can fail as the old devices are degraded and _WaitForSync
11229 # does a combined result over all disks, so we don't check its return value
11230 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11231 _WaitForSync(self.lu, self.instance)
11233 # Check all devices manually
11234 self._CheckDevices(self.instance.primary_node, iv_names)
11236 # Step: remove old storage
11237 if not self.early_release:
11238 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11239 self._RemoveOldStorage(self.target_node, iv_names)
11242 class LURepairNodeStorage(NoHooksLU):
11243 """Repairs the volume group on a node.
11248 def CheckArguments(self):
11249 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11251 storage_type = self.op.storage_type
11253 if (constants.SO_FIX_CONSISTENCY not in
11254 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11255 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11256 " repaired" % storage_type,
11257 errors.ECODE_INVAL)
11259 def ExpandNames(self):
11260 self.needed_locks = {
11261 locking.LEVEL_NODE: [self.op.node_name],
11264 def _CheckFaultyDisks(self, instance, node_name):
11265 """Ensure faulty disks abort the opcode or at least warn."""
11267 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11269 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11270 " node '%s'" % (instance.name, node_name),
11271 errors.ECODE_STATE)
11272 except errors.OpPrereqError, err:
11273 if self.op.ignore_consistency:
11274 self.proc.LogWarning(str(err.args[0]))
11278 def CheckPrereq(self):
11279 """Check prerequisites.
11282 # Check whether any instance on this node has faulty disks
11283 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11284 if inst.admin_state != constants.ADMINST_UP:
11286 check_nodes = set(inst.all_nodes)
11287 check_nodes.discard(self.op.node_name)
11288 for inst_node_name in check_nodes:
11289 self._CheckFaultyDisks(inst, inst_node_name)
11291 def Exec(self, feedback_fn):
11292 feedback_fn("Repairing storage unit '%s' on %s ..." %
11293 (self.op.name, self.op.node_name))
11295 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11296 result = self.rpc.call_storage_execute(self.op.node_name,
11297 self.op.storage_type, st_args,
11299 constants.SO_FIX_CONSISTENCY)
11300 result.Raise("Failed to repair storage unit '%s' on %s" %
11301 (self.op.name, self.op.node_name))
11304 class LUNodeEvacuate(NoHooksLU):
11305 """Evacuates instances off a list of nodes.
11310 _MODE2IALLOCATOR = {
11311 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11312 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11313 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11315 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11316 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11317 constants.IALLOCATOR_NEVAC_MODES)
11319 def CheckArguments(self):
11320 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11322 def ExpandNames(self):
11323 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11325 if self.op.remote_node is not None:
11326 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11327 assert self.op.remote_node
11329 if self.op.remote_node == self.op.node_name:
11330 raise errors.OpPrereqError("Can not use evacuated node as a new"
11331 " secondary node", errors.ECODE_INVAL)
11333 if self.op.mode != constants.NODE_EVAC_SEC:
11334 raise errors.OpPrereqError("Without the use of an iallocator only"
11335 " secondary instances can be evacuated",
11336 errors.ECODE_INVAL)
11339 self.share_locks = _ShareAll()
11340 self.needed_locks = {
11341 locking.LEVEL_INSTANCE: [],
11342 locking.LEVEL_NODEGROUP: [],
11343 locking.LEVEL_NODE: [],
11346 # Determine nodes (via group) optimistically, needs verification once locks
11347 # have been acquired
11348 self.lock_nodes = self._DetermineNodes()
11350 def _DetermineNodes(self):
11351 """Gets the list of nodes to operate on.
11354 if self.op.remote_node is None:
11355 # Iallocator will choose any node(s) in the same group
11356 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11358 group_nodes = frozenset([self.op.remote_node])
11360 # Determine nodes to be locked
11361 return set([self.op.node_name]) | group_nodes
11363 def _DetermineInstances(self):
11364 """Builds list of instances to operate on.
11367 assert self.op.mode in constants.NODE_EVAC_MODES
11369 if self.op.mode == constants.NODE_EVAC_PRI:
11370 # Primary instances only
11371 inst_fn = _GetNodePrimaryInstances
11372 assert self.op.remote_node is None, \
11373 "Evacuating primary instances requires iallocator"
11374 elif self.op.mode == constants.NODE_EVAC_SEC:
11375 # Secondary instances only
11376 inst_fn = _GetNodeSecondaryInstances
11379 assert self.op.mode == constants.NODE_EVAC_ALL
11380 inst_fn = _GetNodeInstances
11381 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11383 raise errors.OpPrereqError("Due to an issue with the iallocator"
11384 " interface it is not possible to evacuate"
11385 " all instances at once; specify explicitly"
11386 " whether to evacuate primary or secondary"
11388 errors.ECODE_INVAL)
11390 return inst_fn(self.cfg, self.op.node_name)
11392 def DeclareLocks(self, level):
11393 if level == locking.LEVEL_INSTANCE:
11394 # Lock instances optimistically, needs verification once node and group
11395 # locks have been acquired
11396 self.needed_locks[locking.LEVEL_INSTANCE] = \
11397 set(i.name for i in self._DetermineInstances())
11399 elif level == locking.LEVEL_NODEGROUP:
11400 # Lock node groups for all potential target nodes optimistically, needs
11401 # verification once nodes have been acquired
11402 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11403 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11405 elif level == locking.LEVEL_NODE:
11406 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11408 def CheckPrereq(self):
11410 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11411 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11412 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11414 need_nodes = self._DetermineNodes()
11416 if not owned_nodes.issuperset(need_nodes):
11417 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11418 " locks were acquired, current nodes are"
11419 " are '%s', used to be '%s'; retry the"
11421 (self.op.node_name,
11422 utils.CommaJoin(need_nodes),
11423 utils.CommaJoin(owned_nodes)),
11424 errors.ECODE_STATE)
11426 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11427 if owned_groups != wanted_groups:
11428 raise errors.OpExecError("Node groups changed since locks were acquired,"
11429 " current groups are '%s', used to be '%s';"
11430 " retry the operation" %
11431 (utils.CommaJoin(wanted_groups),
11432 utils.CommaJoin(owned_groups)))
11434 # Determine affected instances
11435 self.instances = self._DetermineInstances()
11436 self.instance_names = [i.name for i in self.instances]
11438 if set(self.instance_names) != owned_instances:
11439 raise errors.OpExecError("Instances on node '%s' changed since locks"
11440 " were acquired, current instances are '%s',"
11441 " used to be '%s'; retry the operation" %
11442 (self.op.node_name,
11443 utils.CommaJoin(self.instance_names),
11444 utils.CommaJoin(owned_instances)))
11446 if self.instance_names:
11447 self.LogInfo("Evacuating instances from node '%s': %s",
11449 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11451 self.LogInfo("No instances to evacuate from node '%s'",
11454 if self.op.remote_node is not None:
11455 for i in self.instances:
11456 if i.primary_node == self.op.remote_node:
11457 raise errors.OpPrereqError("Node %s is the primary node of"
11458 " instance %s, cannot use it as"
11460 (self.op.remote_node, i.name),
11461 errors.ECODE_INVAL)
11463 def Exec(self, feedback_fn):
11464 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11466 if not self.instance_names:
11467 # No instances to evacuate
11470 elif self.op.iallocator is not None:
11471 # TODO: Implement relocation to other group
11472 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11473 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11474 instances=list(self.instance_names))
11476 ial.Run(self.op.iallocator)
11478 if not ial.success:
11479 raise errors.OpPrereqError("Can't compute node evacuation using"
11480 " iallocator '%s': %s" %
11481 (self.op.iallocator, ial.info),
11482 errors.ECODE_NORES)
11484 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11486 elif self.op.remote_node is not None:
11487 assert self.op.mode == constants.NODE_EVAC_SEC
11489 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11490 remote_node=self.op.remote_node,
11492 mode=constants.REPLACE_DISK_CHG,
11493 early_release=self.op.early_release)]
11494 for instance_name in self.instance_names
11498 raise errors.ProgrammerError("No iallocator or remote node")
11500 return ResultWithJobs(jobs)
11503 def _SetOpEarlyRelease(early_release, op):
11504 """Sets C{early_release} flag on opcodes if available.
11508 op.early_release = early_release
11509 except AttributeError:
11510 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11515 def _NodeEvacDest(use_nodes, group, nodes):
11516 """Returns group or nodes depending on caller's choice.
11520 return utils.CommaJoin(nodes)
11525 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11526 """Unpacks the result of change-group and node-evacuate iallocator requests.
11528 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11529 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11531 @type lu: L{LogicalUnit}
11532 @param lu: Logical unit instance
11533 @type alloc_result: tuple/list
11534 @param alloc_result: Result from iallocator
11535 @type early_release: bool
11536 @param early_release: Whether to release locks early if possible
11537 @type use_nodes: bool
11538 @param use_nodes: Whether to display node names instead of groups
11541 (moved, failed, jobs) = alloc_result
11544 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11545 for (name, reason) in failed)
11546 lu.LogWarning("Unable to evacuate instances %s", failreason)
11547 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11550 lu.LogInfo("Instances to be moved: %s",
11551 utils.CommaJoin("%s (to %s)" %
11552 (name, _NodeEvacDest(use_nodes, group, nodes))
11553 for (name, group, nodes) in moved))
11555 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11556 map(opcodes.OpCode.LoadOpCode, ops))
11560 class LUInstanceGrowDisk(LogicalUnit):
11561 """Grow a disk of an instance.
11564 HPATH = "disk-grow"
11565 HTYPE = constants.HTYPE_INSTANCE
11568 def ExpandNames(self):
11569 self._ExpandAndLockInstance()
11570 self.needed_locks[locking.LEVEL_NODE] = []
11571 self.needed_locks[locking.LEVEL_NODE_RES] = []
11572 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11573 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11575 def DeclareLocks(self, level):
11576 if level == locking.LEVEL_NODE:
11577 self._LockInstancesNodes()
11578 elif level == locking.LEVEL_NODE_RES:
11580 self.needed_locks[locking.LEVEL_NODE_RES] = \
11581 self.needed_locks[locking.LEVEL_NODE][:]
11583 def BuildHooksEnv(self):
11584 """Build hooks env.
11586 This runs on the master, the primary and all the secondaries.
11590 "DISK": self.op.disk,
11591 "AMOUNT": self.op.amount,
11592 "ABSOLUTE": self.op.absolute,
11594 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11597 def BuildHooksNodes(self):
11598 """Build hooks nodes.
11601 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11604 def CheckPrereq(self):
11605 """Check prerequisites.
11607 This checks that the instance is in the cluster.
11610 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11611 assert instance is not None, \
11612 "Cannot retrieve locked instance %s" % self.op.instance_name
11613 nodenames = list(instance.all_nodes)
11614 for node in nodenames:
11615 _CheckNodeOnline(self, node)
11617 self.instance = instance
11619 if instance.disk_template not in constants.DTS_GROWABLE:
11620 raise errors.OpPrereqError("Instance's disk layout does not support"
11621 " growing", errors.ECODE_INVAL)
11623 self.disk = instance.FindDisk(self.op.disk)
11625 if self.op.absolute:
11626 self.target = self.op.amount
11627 self.delta = self.target - self.disk.size
11629 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11630 "current disk size (%s)" %
11631 (utils.FormatUnit(self.target, "h"),
11632 utils.FormatUnit(self.disk.size, "h")),
11633 errors.ECODE_STATE)
11635 self.delta = self.op.amount
11636 self.target = self.disk.size + self.delta
11638 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11639 utils.FormatUnit(self.delta, "h"),
11640 errors.ECODE_INVAL)
11642 if instance.disk_template not in (constants.DT_FILE,
11643 constants.DT_SHARED_FILE,
11645 # TODO: check the free disk space for file, when that feature will be
11647 _CheckNodesFreeDiskPerVG(self, nodenames,
11648 self.disk.ComputeGrowth(self.delta))
11650 def Exec(self, feedback_fn):
11651 """Execute disk grow.
11654 instance = self.instance
11657 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11658 assert (self.owned_locks(locking.LEVEL_NODE) ==
11659 self.owned_locks(locking.LEVEL_NODE_RES))
11661 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11663 raise errors.OpExecError("Cannot activate block device to grow")
11665 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11666 (self.op.disk, instance.name,
11667 utils.FormatUnit(self.delta, "h"),
11668 utils.FormatUnit(self.target, "h")))
11670 # First run all grow ops in dry-run mode
11671 for node in instance.all_nodes:
11672 self.cfg.SetDiskID(disk, node)
11673 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11675 result.Raise("Grow request failed to node %s" % node)
11677 # We know that (as far as we can test) operations across different
11678 # nodes will succeed, time to run it for real
11679 for node in instance.all_nodes:
11680 self.cfg.SetDiskID(disk, node)
11681 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11683 result.Raise("Grow request failed to node %s" % node)
11685 # TODO: Rewrite code to work properly
11686 # DRBD goes into sync mode for a short amount of time after executing the
11687 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11688 # calling "resize" in sync mode fails. Sleeping for a short amount of
11689 # time is a work-around.
11692 disk.RecordGrow(self.delta)
11693 self.cfg.Update(instance, feedback_fn)
11695 # Changes have been recorded, release node lock
11696 _ReleaseLocks(self, locking.LEVEL_NODE)
11698 # Downgrade lock while waiting for sync
11699 self.glm.downgrade(locking.LEVEL_INSTANCE)
11701 if self.op.wait_for_sync:
11702 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11704 self.proc.LogWarning("Disk sync-ing has not returned a good"
11705 " status; please check the instance")
11706 if instance.admin_state != constants.ADMINST_UP:
11707 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11708 elif instance.admin_state != constants.ADMINST_UP:
11709 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11710 " not supposed to be running because no wait for"
11711 " sync mode was requested")
11713 assert self.owned_locks(locking.LEVEL_NODE_RES)
11714 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11717 class LUInstanceQueryData(NoHooksLU):
11718 """Query runtime instance data.
11723 def ExpandNames(self):
11724 self.needed_locks = {}
11726 # Use locking if requested or when non-static information is wanted
11727 if not (self.op.static or self.op.use_locking):
11728 self.LogWarning("Non-static data requested, locks need to be acquired")
11729 self.op.use_locking = True
11731 if self.op.instances or not self.op.use_locking:
11732 # Expand instance names right here
11733 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11735 # Will use acquired locks
11736 self.wanted_names = None
11738 if self.op.use_locking:
11739 self.share_locks = _ShareAll()
11741 if self.wanted_names is None:
11742 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11744 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11746 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11747 self.needed_locks[locking.LEVEL_NODE] = []
11748 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11750 def DeclareLocks(self, level):
11751 if self.op.use_locking:
11752 if level == locking.LEVEL_NODEGROUP:
11753 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11755 # Lock all groups used by instances optimistically; this requires going
11756 # via the node before it's locked, requiring verification later on
11757 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11758 frozenset(group_uuid
11759 for instance_name in owned_instances
11761 self.cfg.GetInstanceNodeGroups(instance_name))
11763 elif level == locking.LEVEL_NODE:
11764 self._LockInstancesNodes()
11766 def CheckPrereq(self):
11767 """Check prerequisites.
11769 This only checks the optional instance list against the existing names.
11772 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11773 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11774 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11776 if self.wanted_names is None:
11777 assert self.op.use_locking, "Locking was not used"
11778 self.wanted_names = owned_instances
11780 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11782 if self.op.use_locking:
11783 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11786 assert not (owned_instances or owned_groups or owned_nodes)
11788 self.wanted_instances = instances.values()
11790 def _ComputeBlockdevStatus(self, node, instance, dev):
11791 """Returns the status of a block device
11794 if self.op.static or not node:
11797 self.cfg.SetDiskID(dev, node)
11799 result = self.rpc.call_blockdev_find(node, dev)
11803 result.Raise("Can't compute disk status for %s" % instance.name)
11805 status = result.payload
11809 return (status.dev_path, status.major, status.minor,
11810 status.sync_percent, status.estimated_time,
11811 status.is_degraded, status.ldisk_status)
11813 def _ComputeDiskStatus(self, instance, snode, dev):
11814 """Compute block device status.
11817 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11819 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11821 def _ComputeDiskStatusInner(self, instance, snode, dev):
11822 """Compute block device status.
11824 @attention: The device has to be annotated already.
11827 if dev.dev_type in constants.LDS_DRBD:
11828 # we change the snode then (otherwise we use the one passed in)
11829 if dev.logical_id[0] == instance.primary_node:
11830 snode = dev.logical_id[1]
11832 snode = dev.logical_id[0]
11834 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11836 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11839 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11846 "iv_name": dev.iv_name,
11847 "dev_type": dev.dev_type,
11848 "logical_id": dev.logical_id,
11849 "physical_id": dev.physical_id,
11850 "pstatus": dev_pstatus,
11851 "sstatus": dev_sstatus,
11852 "children": dev_children,
11857 def Exec(self, feedback_fn):
11858 """Gather and return data"""
11861 cluster = self.cfg.GetClusterInfo()
11863 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11864 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11866 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11867 for node in nodes.values()))
11869 group2name_fn = lambda uuid: groups[uuid].name
11871 for instance in self.wanted_instances:
11872 pnode = nodes[instance.primary_node]
11874 if self.op.static or pnode.offline:
11875 remote_state = None
11877 self.LogWarning("Primary node %s is marked offline, returning static"
11878 " information only for instance %s" %
11879 (pnode.name, instance.name))
11881 remote_info = self.rpc.call_instance_info(instance.primary_node,
11883 instance.hypervisor)
11884 remote_info.Raise("Error checking node %s" % instance.primary_node)
11885 remote_info = remote_info.payload
11886 if remote_info and "state" in remote_info:
11887 remote_state = "up"
11889 if instance.admin_state == constants.ADMINST_UP:
11890 remote_state = "down"
11892 remote_state = instance.admin_state
11894 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11897 snodes_group_uuids = [nodes[snode_name].group
11898 for snode_name in instance.secondary_nodes]
11900 result[instance.name] = {
11901 "name": instance.name,
11902 "config_state": instance.admin_state,
11903 "run_state": remote_state,
11904 "pnode": instance.primary_node,
11905 "pnode_group_uuid": pnode.group,
11906 "pnode_group_name": group2name_fn(pnode.group),
11907 "snodes": instance.secondary_nodes,
11908 "snodes_group_uuids": snodes_group_uuids,
11909 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11911 # this happens to be the same format used for hooks
11912 "nics": _NICListToTuple(self, instance.nics),
11913 "disk_template": instance.disk_template,
11915 "hypervisor": instance.hypervisor,
11916 "network_port": instance.network_port,
11917 "hv_instance": instance.hvparams,
11918 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11919 "be_instance": instance.beparams,
11920 "be_actual": cluster.FillBE(instance),
11921 "os_instance": instance.osparams,
11922 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11923 "serial_no": instance.serial_no,
11924 "mtime": instance.mtime,
11925 "ctime": instance.ctime,
11926 "uuid": instance.uuid,
11932 def PrepareContainerMods(mods, private_fn):
11933 """Prepares a list of container modifications by adding a private data field.
11935 @type mods: list of tuples; (operation, index, parameters)
11936 @param mods: List of modifications
11937 @type private_fn: callable or None
11938 @param private_fn: Callable for constructing a private data field for a
11943 if private_fn is None:
11948 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11951 #: Type description for changes as returned by L{ApplyContainerMods}'s
11953 _TApplyContModsCbChanges = \
11954 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11955 ht.TNonEmptyString,
11960 def ApplyContainerMods(kind, container, chgdesc, mods,
11961 create_fn, modify_fn, remove_fn):
11962 """Applies descriptions in C{mods} to C{container}.
11965 @param kind: One-word item description
11966 @type container: list
11967 @param container: Container to modify
11968 @type chgdesc: None or list
11969 @param chgdesc: List of applied changes
11971 @param mods: Modifications as returned by L{PrepareContainerMods}
11972 @type create_fn: callable
11973 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11974 receives absolute item index, parameters and private data object as added
11975 by L{PrepareContainerMods}, returns tuple containing new item and changes
11977 @type modify_fn: callable
11978 @param modify_fn: Callback for modifying an existing item
11979 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11980 and private data object as added by L{PrepareContainerMods}, returns
11982 @type remove_fn: callable
11983 @param remove_fn: Callback on removing item; receives absolute item index,
11984 item and private data object as added by L{PrepareContainerMods}
11987 for (op, idx, params, private) in mods:
11990 absidx = len(container) - 1
11992 raise IndexError("Not accepting negative indices other than -1")
11993 elif idx > len(container):
11994 raise IndexError("Got %s index %s, but there are only %s" %
11995 (kind, idx, len(container)))
12001 if op == constants.DDM_ADD:
12002 # Calculate where item will be added
12004 addidx = len(container)
12008 if create_fn is None:
12011 (item, changes) = create_fn(addidx, params, private)
12014 container.append(item)
12017 assert idx <= len(container)
12018 # list.insert does so before the specified index
12019 container.insert(idx, item)
12021 # Retrieve existing item
12023 item = container[absidx]
12025 raise IndexError("Invalid %s index %s" % (kind, idx))
12027 if op == constants.DDM_REMOVE:
12030 if remove_fn is not None:
12031 remove_fn(absidx, item, private)
12033 changes = [("%s/%s" % (kind, absidx), "remove")]
12035 assert container[absidx] == item
12036 del container[absidx]
12037 elif op == constants.DDM_MODIFY:
12038 if modify_fn is not None:
12039 changes = modify_fn(absidx, item, params, private)
12041 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12043 assert _TApplyContModsCbChanges(changes)
12045 if not (chgdesc is None or changes is None):
12046 chgdesc.extend(changes)
12049 def _UpdateIvNames(base_index, disks):
12050 """Updates the C{iv_name} attribute of disks.
12052 @type disks: list of L{objects.Disk}
12055 for (idx, disk) in enumerate(disks):
12056 disk.iv_name = "disk/%s" % (base_index + idx, )
12059 class _InstNicModPrivate:
12060 """Data structure for network interface modifications.
12062 Used by L{LUInstanceSetParams}.
12065 def __init__(self):
12070 class LUInstanceSetParams(LogicalUnit):
12071 """Modifies an instances's parameters.
12074 HPATH = "instance-modify"
12075 HTYPE = constants.HTYPE_INSTANCE
12079 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12080 assert ht.TList(mods)
12081 assert not mods or len(mods[0]) in (2, 3)
12083 if mods and len(mods[0]) == 2:
12087 for op, params in mods:
12088 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12089 result.append((op, -1, params))
12093 raise errors.OpPrereqError("Only one %s add or remove operation is"
12094 " supported at a time" % kind,
12095 errors.ECODE_INVAL)
12097 result.append((constants.DDM_MODIFY, op, params))
12099 assert verify_fn(result)
12106 def _CheckMods(kind, mods, key_types, item_fn):
12107 """Ensures requested disk/NIC modifications are valid.
12110 for (op, _, params) in mods:
12111 assert ht.TDict(params)
12113 utils.ForceDictType(params, key_types)
12115 if op == constants.DDM_REMOVE:
12117 raise errors.OpPrereqError("No settings should be passed when"
12118 " removing a %s" % kind,
12119 errors.ECODE_INVAL)
12120 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12121 item_fn(op, params)
12123 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12126 def _VerifyDiskModification(op, params):
12127 """Verifies a disk modification.
12130 if op == constants.DDM_ADD:
12131 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12132 if mode not in constants.DISK_ACCESS_SET:
12133 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12134 errors.ECODE_INVAL)
12136 size = params.get(constants.IDISK_SIZE, None)
12138 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12139 constants.IDISK_SIZE, errors.ECODE_INVAL)
12143 except (TypeError, ValueError), err:
12144 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12145 errors.ECODE_INVAL)
12147 params[constants.IDISK_SIZE] = size
12149 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12150 raise errors.OpPrereqError("Disk size change not possible, use"
12151 " grow-disk", errors.ECODE_INVAL)
12154 def _VerifyNicModification(op, params):
12155 """Verifies a network interface modification.
12158 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12159 ip = params.get(constants.INIC_IP, None)
12162 elif ip.lower() == constants.VALUE_NONE:
12163 params[constants.INIC_IP] = None
12164 elif not netutils.IPAddress.IsValid(ip):
12165 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12166 errors.ECODE_INVAL)
12168 bridge = params.get("bridge", None)
12169 link = params.get(constants.INIC_LINK, None)
12170 if bridge and link:
12171 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12172 " at the same time", errors.ECODE_INVAL)
12173 elif bridge and bridge.lower() == constants.VALUE_NONE:
12174 params["bridge"] = None
12175 elif link and link.lower() == constants.VALUE_NONE:
12176 params[constants.INIC_LINK] = None
12178 if op == constants.DDM_ADD:
12179 macaddr = params.get(constants.INIC_MAC, None)
12180 if macaddr is None:
12181 params[constants.INIC_MAC] = constants.VALUE_AUTO
12183 if constants.INIC_MAC in params:
12184 macaddr = params[constants.INIC_MAC]
12185 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12186 macaddr = utils.NormalizeAndValidateMac(macaddr)
12188 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12189 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12190 " modifying an existing NIC",
12191 errors.ECODE_INVAL)
12193 def CheckArguments(self):
12194 if not (self.op.nics or self.op.disks or self.op.disk_template or
12195 self.op.hvparams or self.op.beparams or self.op.os_name or
12196 self.op.offline is not None or self.op.runtime_mem):
12197 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12199 if self.op.hvparams:
12200 _CheckGlobalHvParams(self.op.hvparams)
12203 self._UpgradeDiskNicMods("disk", self.op.disks,
12204 opcodes.OpInstanceSetParams.TestDiskModifications)
12206 self._UpgradeDiskNicMods("NIC", self.op.nics,
12207 opcodes.OpInstanceSetParams.TestNicModifications)
12209 # Check disk modifications
12210 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12211 self._VerifyDiskModification)
12213 if self.op.disks and self.op.disk_template is not None:
12214 raise errors.OpPrereqError("Disk template conversion and other disk"
12215 " changes not supported at the same time",
12216 errors.ECODE_INVAL)
12218 if (self.op.disk_template and
12219 self.op.disk_template in constants.DTS_INT_MIRROR and
12220 self.op.remote_node is None):
12221 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12222 " one requires specifying a secondary node",
12223 errors.ECODE_INVAL)
12225 # Check NIC modifications
12226 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12227 self._VerifyNicModification)
12229 def ExpandNames(self):
12230 self._ExpandAndLockInstance()
12231 # Can't even acquire node locks in shared mode as upcoming changes in
12232 # Ganeti 2.6 will start to modify the node object on disk conversion
12233 self.needed_locks[locking.LEVEL_NODE] = []
12234 self.needed_locks[locking.LEVEL_NODE_RES] = []
12235 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12237 def DeclareLocks(self, level):
12238 # TODO: Acquire group lock in shared mode (disk parameters)
12239 if level == locking.LEVEL_NODE:
12240 self._LockInstancesNodes()
12241 if self.op.disk_template and self.op.remote_node:
12242 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12243 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12244 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12246 self.needed_locks[locking.LEVEL_NODE_RES] = \
12247 self.needed_locks[locking.LEVEL_NODE][:]
12249 def BuildHooksEnv(self):
12250 """Build hooks env.
12252 This runs on the master, primary and secondaries.
12256 if constants.BE_MINMEM in self.be_new:
12257 args["minmem"] = self.be_new[constants.BE_MINMEM]
12258 if constants.BE_MAXMEM in self.be_new:
12259 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12260 if constants.BE_VCPUS in self.be_new:
12261 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12262 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12263 # information at all.
12265 if self._new_nics is not None:
12268 for nic in self._new_nics:
12269 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12270 mode = nicparams[constants.NIC_MODE]
12271 link = nicparams[constants.NIC_LINK]
12272 nics.append((nic.ip, nic.mac, mode, link))
12274 args["nics"] = nics
12276 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12277 if self.op.disk_template:
12278 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12279 if self.op.runtime_mem:
12280 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12284 def BuildHooksNodes(self):
12285 """Build hooks nodes.
12288 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12291 def _PrepareNicModification(self, params, private, old_ip, old_params,
12293 update_params_dict = dict([(key, params[key])
12294 for key in constants.NICS_PARAMETERS
12297 if "bridge" in params:
12298 update_params_dict[constants.NIC_LINK] = params["bridge"]
12300 new_params = _GetUpdatedParams(old_params, update_params_dict)
12301 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12303 new_filled_params = cluster.SimpleFillNIC(new_params)
12304 objects.NIC.CheckParameterSyntax(new_filled_params)
12306 new_mode = new_filled_params[constants.NIC_MODE]
12307 if new_mode == constants.NIC_MODE_BRIDGED:
12308 bridge = new_filled_params[constants.NIC_LINK]
12309 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12311 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12313 self.warn.append(msg)
12315 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12317 elif new_mode == constants.NIC_MODE_ROUTED:
12318 ip = params.get(constants.INIC_IP, old_ip)
12320 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12321 " on a routed NIC", errors.ECODE_INVAL)
12323 if constants.INIC_MAC in params:
12324 mac = params[constants.INIC_MAC]
12326 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12327 errors.ECODE_INVAL)
12328 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12329 # otherwise generate the MAC address
12330 params[constants.INIC_MAC] = \
12331 self.cfg.GenerateMAC(self.proc.GetECId())
12333 # or validate/reserve the current one
12335 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12336 except errors.ReservationError:
12337 raise errors.OpPrereqError("MAC address '%s' already in use"
12338 " in cluster" % mac,
12339 errors.ECODE_NOTUNIQUE)
12341 private.params = new_params
12342 private.filled = new_filled_params
12344 def CheckPrereq(self):
12345 """Check prerequisites.
12347 This only checks the instance list against the existing names.
12350 # checking the new params on the primary/secondary nodes
12352 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12353 cluster = self.cluster = self.cfg.GetClusterInfo()
12354 assert self.instance is not None, \
12355 "Cannot retrieve locked instance %s" % self.op.instance_name
12356 pnode = instance.primary_node
12357 nodelist = list(instance.all_nodes)
12358 pnode_info = self.cfg.GetNodeInfo(pnode)
12359 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12361 # Prepare disk/NIC modifications
12362 self.diskmod = PrepareContainerMods(self.op.disks, None)
12363 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12366 if self.op.os_name and not self.op.force:
12367 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12368 self.op.force_variant)
12369 instance_os = self.op.os_name
12371 instance_os = instance.os
12373 assert not (self.op.disk_template and self.op.disks), \
12374 "Can't modify disk template and apply disk changes at the same time"
12376 if self.op.disk_template:
12377 if instance.disk_template == self.op.disk_template:
12378 raise errors.OpPrereqError("Instance already has disk template %s" %
12379 instance.disk_template, errors.ECODE_INVAL)
12381 if (instance.disk_template,
12382 self.op.disk_template) not in self._DISK_CONVERSIONS:
12383 raise errors.OpPrereqError("Unsupported disk template conversion from"
12384 " %s to %s" % (instance.disk_template,
12385 self.op.disk_template),
12386 errors.ECODE_INVAL)
12387 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12388 msg="cannot change disk template")
12389 if self.op.disk_template in constants.DTS_INT_MIRROR:
12390 if self.op.remote_node == pnode:
12391 raise errors.OpPrereqError("Given new secondary node %s is the same"
12392 " as the primary node of the instance" %
12393 self.op.remote_node, errors.ECODE_STATE)
12394 _CheckNodeOnline(self, self.op.remote_node)
12395 _CheckNodeNotDrained(self, self.op.remote_node)
12396 # FIXME: here we assume that the old instance type is DT_PLAIN
12397 assert instance.disk_template == constants.DT_PLAIN
12398 disks = [{constants.IDISK_SIZE: d.size,
12399 constants.IDISK_VG: d.logical_id[0]}
12400 for d in instance.disks]
12401 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12402 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12404 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12405 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12406 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12407 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12408 ignore=self.op.ignore_ipolicy)
12409 if pnode_info.group != snode_info.group:
12410 self.LogWarning("The primary and secondary nodes are in two"
12411 " different node groups; the disk parameters"
12412 " from the first disk's node group will be"
12415 # hvparams processing
12416 if self.op.hvparams:
12417 hv_type = instance.hypervisor
12418 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12419 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12420 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12423 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12424 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12425 self.hv_proposed = self.hv_new = hv_new # the new actual values
12426 self.hv_inst = i_hvdict # the new dict (without defaults)
12428 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12430 self.hv_new = self.hv_inst = {}
12432 # beparams processing
12433 if self.op.beparams:
12434 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12436 objects.UpgradeBeParams(i_bedict)
12437 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12438 be_new = cluster.SimpleFillBE(i_bedict)
12439 self.be_proposed = self.be_new = be_new # the new actual values
12440 self.be_inst = i_bedict # the new dict (without defaults)
12442 self.be_new = self.be_inst = {}
12443 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12444 be_old = cluster.FillBE(instance)
12446 # CPU param validation -- checking every time a parameter is
12447 # changed to cover all cases where either CPU mask or vcpus have
12449 if (constants.BE_VCPUS in self.be_proposed and
12450 constants.HV_CPU_MASK in self.hv_proposed):
12452 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12453 # Verify mask is consistent with number of vCPUs. Can skip this
12454 # test if only 1 entry in the CPU mask, which means same mask
12455 # is applied to all vCPUs.
12456 if (len(cpu_list) > 1 and
12457 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12458 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12460 (self.be_proposed[constants.BE_VCPUS],
12461 self.hv_proposed[constants.HV_CPU_MASK]),
12462 errors.ECODE_INVAL)
12464 # Only perform this test if a new CPU mask is given
12465 if constants.HV_CPU_MASK in self.hv_new:
12466 # Calculate the largest CPU number requested
12467 max_requested_cpu = max(map(max, cpu_list))
12468 # Check that all of the instance's nodes have enough physical CPUs to
12469 # satisfy the requested CPU mask
12470 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12471 max_requested_cpu + 1, instance.hypervisor)
12473 # osparams processing
12474 if self.op.osparams:
12475 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12476 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12477 self.os_inst = i_osdict # the new dict (without defaults)
12483 #TODO(dynmem): do the appropriate check involving MINMEM
12484 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12485 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12486 mem_check_list = [pnode]
12487 if be_new[constants.BE_AUTO_BALANCE]:
12488 # either we changed auto_balance to yes or it was from before
12489 mem_check_list.extend(instance.secondary_nodes)
12490 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12491 instance.hypervisor)
12492 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12493 [instance.hypervisor])
12494 pninfo = nodeinfo[pnode]
12495 msg = pninfo.fail_msg
12497 # Assume the primary node is unreachable and go ahead
12498 self.warn.append("Can't get info from primary node %s: %s" %
12501 (_, _, (pnhvinfo, )) = pninfo.payload
12502 if not isinstance(pnhvinfo.get("memory_free", None), int):
12503 self.warn.append("Node data from primary node %s doesn't contain"
12504 " free memory information" % pnode)
12505 elif instance_info.fail_msg:
12506 self.warn.append("Can't get instance runtime information: %s" %
12507 instance_info.fail_msg)
12509 if instance_info.payload:
12510 current_mem = int(instance_info.payload["memory"])
12512 # Assume instance not running
12513 # (there is a slight race condition here, but it's not very
12514 # probable, and we have no other way to check)
12515 # TODO: Describe race condition
12517 #TODO(dynmem): do the appropriate check involving MINMEM
12518 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12519 pnhvinfo["memory_free"])
12521 raise errors.OpPrereqError("This change will prevent the instance"
12522 " from starting, due to %d MB of memory"
12523 " missing on its primary node" %
12525 errors.ECODE_NORES)
12527 if be_new[constants.BE_AUTO_BALANCE]:
12528 for node, nres in nodeinfo.items():
12529 if node not in instance.secondary_nodes:
12531 nres.Raise("Can't get info from secondary node %s" % node,
12532 prereq=True, ecode=errors.ECODE_STATE)
12533 (_, _, (nhvinfo, )) = nres.payload
12534 if not isinstance(nhvinfo.get("memory_free", None), int):
12535 raise errors.OpPrereqError("Secondary node %s didn't return free"
12536 " memory information" % node,
12537 errors.ECODE_STATE)
12538 #TODO(dynmem): do the appropriate check involving MINMEM
12539 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12540 raise errors.OpPrereqError("This change will prevent the instance"
12541 " from failover to its secondary node"
12542 " %s, due to not enough memory" % node,
12543 errors.ECODE_STATE)
12545 if self.op.runtime_mem:
12546 remote_info = self.rpc.call_instance_info(instance.primary_node,
12548 instance.hypervisor)
12549 remote_info.Raise("Error checking node %s" % instance.primary_node)
12550 if not remote_info.payload: # not running already
12551 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12552 errors.ECODE_STATE)
12554 current_memory = remote_info.payload["memory"]
12555 if (not self.op.force and
12556 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12557 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12558 raise errors.OpPrereqError("Instance %s must have memory between %d"
12559 " and %d MB of memory unless --force is"
12560 " given" % (instance.name,
12561 self.be_proposed[constants.BE_MINMEM],
12562 self.be_proposed[constants.BE_MAXMEM]),
12563 errors.ECODE_INVAL)
12565 if self.op.runtime_mem > current_memory:
12566 _CheckNodeFreeMemory(self, instance.primary_node,
12567 "ballooning memory for instance %s" %
12569 self.op.memory - current_memory,
12570 instance.hypervisor)
12572 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12573 raise errors.OpPrereqError("Disk operations not supported for"
12574 " diskless instances",
12575 errors.ECODE_INVAL)
12577 def _PrepareNicCreate(_, params, private):
12578 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12579 return (None, None)
12581 def _PrepareNicMod(_, nic, params, private):
12582 self._PrepareNicModification(params, private, nic.ip,
12583 nic.nicparams, cluster, pnode)
12586 # Verify NIC changes (operating on copy)
12587 nics = instance.nics[:]
12588 ApplyContainerMods("NIC", nics, None, self.nicmod,
12589 _PrepareNicCreate, _PrepareNicMod, None)
12590 if len(nics) > constants.MAX_NICS:
12591 raise errors.OpPrereqError("Instance has too many network interfaces"
12592 " (%d), cannot add more" % constants.MAX_NICS,
12593 errors.ECODE_STATE)
12595 # Verify disk changes (operating on a copy)
12596 disks = instance.disks[:]
12597 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12598 if len(disks) > constants.MAX_DISKS:
12599 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12600 " more" % constants.MAX_DISKS,
12601 errors.ECODE_STATE)
12603 if self.op.offline is not None:
12604 if self.op.offline:
12605 msg = "can't change to offline"
12607 msg = "can't change to online"
12608 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12610 # Pre-compute NIC changes (necessary to use result in hooks)
12611 self._nic_chgdesc = []
12613 # Operate on copies as this is still in prereq
12614 nics = [nic.Copy() for nic in instance.nics]
12615 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12616 self._CreateNewNic, self._ApplyNicMods, None)
12617 self._new_nics = nics
12619 self._new_nics = None
12621 def _ConvertPlainToDrbd(self, feedback_fn):
12622 """Converts an instance from plain to drbd.
12625 feedback_fn("Converting template to drbd")
12626 instance = self.instance
12627 pnode = instance.primary_node
12628 snode = self.op.remote_node
12630 assert instance.disk_template == constants.DT_PLAIN
12632 # create a fake disk info for _GenerateDiskTemplate
12633 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12634 constants.IDISK_VG: d.logical_id[0]}
12635 for d in instance.disks]
12636 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12637 instance.name, pnode, [snode],
12638 disk_info, None, None, 0, feedback_fn,
12640 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12642 info = _GetInstanceInfoText(instance)
12643 feedback_fn("Creating additional volumes...")
12644 # first, create the missing data and meta devices
12645 for disk in anno_disks:
12646 # unfortunately this is... not too nice
12647 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12649 for child in disk.children:
12650 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12651 # at this stage, all new LVs have been created, we can rename the
12653 feedback_fn("Renaming original volumes...")
12654 rename_list = [(o, n.children[0].logical_id)
12655 for (o, n) in zip(instance.disks, new_disks)]
12656 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12657 result.Raise("Failed to rename original LVs")
12659 feedback_fn("Initializing DRBD devices...")
12660 # all child devices are in place, we can now create the DRBD devices
12661 for disk in anno_disks:
12662 for node in [pnode, snode]:
12663 f_create = node == pnode
12664 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12666 # at this point, the instance has been modified
12667 instance.disk_template = constants.DT_DRBD8
12668 instance.disks = new_disks
12669 self.cfg.Update(instance, feedback_fn)
12671 # Release node locks while waiting for sync
12672 _ReleaseLocks(self, locking.LEVEL_NODE)
12674 # disks are created, waiting for sync
12675 disk_abort = not _WaitForSync(self, instance,
12676 oneshot=not self.op.wait_for_sync)
12678 raise errors.OpExecError("There are some degraded disks for"
12679 " this instance, please cleanup manually")
12681 # Node resource locks will be released by caller
12683 def _ConvertDrbdToPlain(self, feedback_fn):
12684 """Converts an instance from drbd to plain.
12687 instance = self.instance
12689 assert len(instance.secondary_nodes) == 1
12690 assert instance.disk_template == constants.DT_DRBD8
12692 pnode = instance.primary_node
12693 snode = instance.secondary_nodes[0]
12694 feedback_fn("Converting template to plain")
12696 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12697 new_disks = [d.children[0] for d in instance.disks]
12699 # copy over size and mode
12700 for parent, child in zip(old_disks, new_disks):
12701 child.size = parent.size
12702 child.mode = parent.mode
12704 # this is a DRBD disk, return its port to the pool
12705 # NOTE: this must be done right before the call to cfg.Update!
12706 for disk in old_disks:
12707 tcp_port = disk.logical_id[2]
12708 self.cfg.AddTcpUdpPort(tcp_port)
12710 # update instance structure
12711 instance.disks = new_disks
12712 instance.disk_template = constants.DT_PLAIN
12713 self.cfg.Update(instance, feedback_fn)
12715 # Release locks in case removing disks takes a while
12716 _ReleaseLocks(self, locking.LEVEL_NODE)
12718 feedback_fn("Removing volumes on the secondary node...")
12719 for disk in old_disks:
12720 self.cfg.SetDiskID(disk, snode)
12721 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12723 self.LogWarning("Could not remove block device %s on node %s,"
12724 " continuing anyway: %s", disk.iv_name, snode, msg)
12726 feedback_fn("Removing unneeded volumes on the primary node...")
12727 for idx, disk in enumerate(old_disks):
12728 meta = disk.children[1]
12729 self.cfg.SetDiskID(meta, pnode)
12730 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12732 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12733 " continuing anyway: %s", idx, pnode, msg)
12735 def _CreateNewDisk(self, idx, params, _):
12736 """Creates a new disk.
12739 instance = self.instance
12742 if instance.disk_template in constants.DTS_FILEBASED:
12743 (file_driver, file_path) = instance.disks[0].logical_id
12744 file_path = os.path.dirname(file_path)
12746 file_driver = file_path = None
12749 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12750 instance.primary_node, instance.secondary_nodes,
12751 [params], file_path, file_driver, idx,
12752 self.Log, self.diskparams)[0]
12754 info = _GetInstanceInfoText(instance)
12756 logging.info("Creating volume %s for instance %s",
12757 disk.iv_name, instance.name)
12758 # Note: this needs to be kept in sync with _CreateDisks
12760 for node in instance.all_nodes:
12761 f_create = (node == instance.primary_node)
12763 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12764 except errors.OpExecError, err:
12765 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12766 disk.iv_name, disk, node, err)
12769 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12773 def _ModifyDisk(idx, disk, params, _):
12774 """Modifies a disk.
12777 disk.mode = params[constants.IDISK_MODE]
12780 ("disk.mode/%d" % idx, disk.mode),
12783 def _RemoveDisk(self, idx, root, _):
12787 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12788 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12789 self.cfg.SetDiskID(disk, node)
12790 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12792 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12793 " continuing anyway", idx, node, msg)
12795 # if this is a DRBD disk, return its port to the pool
12796 if root.dev_type in constants.LDS_DRBD:
12797 self.cfg.AddTcpUdpPort(root.logical_id[2])
12800 def _CreateNewNic(idx, params, private):
12801 """Creates data structure for a new network interface.
12804 mac = params[constants.INIC_MAC]
12805 ip = params.get(constants.INIC_IP, None)
12806 nicparams = private.params
12808 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12810 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12811 (mac, ip, private.filled[constants.NIC_MODE],
12812 private.filled[constants.NIC_LINK])),
12816 def _ApplyNicMods(idx, nic, params, private):
12817 """Modifies a network interface.
12822 for key in [constants.INIC_MAC, constants.INIC_IP]:
12824 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12825 setattr(nic, key, params[key])
12828 nic.nicparams = private.params
12830 for (key, val) in params.items():
12831 changes.append(("nic.%s/%d" % (key, idx), val))
12835 def Exec(self, feedback_fn):
12836 """Modifies an instance.
12838 All parameters take effect only at the next restart of the instance.
12841 # Process here the warnings from CheckPrereq, as we don't have a
12842 # feedback_fn there.
12843 # TODO: Replace with self.LogWarning
12844 for warn in self.warn:
12845 feedback_fn("WARNING: %s" % warn)
12847 assert ((self.op.disk_template is None) ^
12848 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12849 "Not owning any node resource locks"
12852 instance = self.instance
12855 if self.op.runtime_mem:
12856 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12858 self.op.runtime_mem)
12859 rpcres.Raise("Cannot modify instance runtime memory")
12860 result.append(("runtime_memory", self.op.runtime_mem))
12862 # Apply disk changes
12863 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12864 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12865 _UpdateIvNames(0, instance.disks)
12867 if self.op.disk_template:
12869 check_nodes = set(instance.all_nodes)
12870 if self.op.remote_node:
12871 check_nodes.add(self.op.remote_node)
12872 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12873 owned = self.owned_locks(level)
12874 assert not (check_nodes - owned), \
12875 ("Not owning the correct locks, owning %r, expected at least %r" %
12876 (owned, check_nodes))
12878 r_shut = _ShutdownInstanceDisks(self, instance)
12880 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12881 " proceed with disk template conversion")
12882 mode = (instance.disk_template, self.op.disk_template)
12884 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12886 self.cfg.ReleaseDRBDMinors(instance.name)
12888 result.append(("disk_template", self.op.disk_template))
12890 assert instance.disk_template == self.op.disk_template, \
12891 ("Expected disk template '%s', found '%s'" %
12892 (self.op.disk_template, instance.disk_template))
12894 # Release node and resource locks if there are any (they might already have
12895 # been released during disk conversion)
12896 _ReleaseLocks(self, locking.LEVEL_NODE)
12897 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12899 # Apply NIC changes
12900 if self._new_nics is not None:
12901 instance.nics = self._new_nics
12902 result.extend(self._nic_chgdesc)
12905 if self.op.hvparams:
12906 instance.hvparams = self.hv_inst
12907 for key, val in self.op.hvparams.iteritems():
12908 result.append(("hv/%s" % key, val))
12911 if self.op.beparams:
12912 instance.beparams = self.be_inst
12913 for key, val in self.op.beparams.iteritems():
12914 result.append(("be/%s" % key, val))
12917 if self.op.os_name:
12918 instance.os = self.op.os_name
12921 if self.op.osparams:
12922 instance.osparams = self.os_inst
12923 for key, val in self.op.osparams.iteritems():
12924 result.append(("os/%s" % key, val))
12926 if self.op.offline is None:
12929 elif self.op.offline:
12930 # Mark instance as offline
12931 self.cfg.MarkInstanceOffline(instance.name)
12932 result.append(("admin_state", constants.ADMINST_OFFLINE))
12934 # Mark instance as online, but stopped
12935 self.cfg.MarkInstanceDown(instance.name)
12936 result.append(("admin_state", constants.ADMINST_DOWN))
12938 self.cfg.Update(instance, feedback_fn)
12940 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12941 self.owned_locks(locking.LEVEL_NODE)), \
12942 "All node locks should have been released by now"
12946 _DISK_CONVERSIONS = {
12947 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12948 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12952 class LUInstanceChangeGroup(LogicalUnit):
12953 HPATH = "instance-change-group"
12954 HTYPE = constants.HTYPE_INSTANCE
12957 def ExpandNames(self):
12958 self.share_locks = _ShareAll()
12959 self.needed_locks = {
12960 locking.LEVEL_NODEGROUP: [],
12961 locking.LEVEL_NODE: [],
12964 self._ExpandAndLockInstance()
12966 if self.op.target_groups:
12967 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12968 self.op.target_groups)
12970 self.req_target_uuids = None
12972 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12974 def DeclareLocks(self, level):
12975 if level == locking.LEVEL_NODEGROUP:
12976 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12978 if self.req_target_uuids:
12979 lock_groups = set(self.req_target_uuids)
12981 # Lock all groups used by instance optimistically; this requires going
12982 # via the node before it's locked, requiring verification later on
12983 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12984 lock_groups.update(instance_groups)
12986 # No target groups, need to lock all of them
12987 lock_groups = locking.ALL_SET
12989 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12991 elif level == locking.LEVEL_NODE:
12992 if self.req_target_uuids:
12993 # Lock all nodes used by instances
12994 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12995 self._LockInstancesNodes()
12997 # Lock all nodes in all potential target groups
12998 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12999 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13000 member_nodes = [node_name
13001 for group in lock_groups
13002 for node_name in self.cfg.GetNodeGroup(group).members]
13003 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13005 # Lock all nodes as all groups are potential targets
13006 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13008 def CheckPrereq(self):
13009 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13010 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13011 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13013 assert (self.req_target_uuids is None or
13014 owned_groups.issuperset(self.req_target_uuids))
13015 assert owned_instances == set([self.op.instance_name])
13017 # Get instance information
13018 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13020 # Check if node groups for locked instance are still correct
13021 assert owned_nodes.issuperset(self.instance.all_nodes), \
13022 ("Instance %s's nodes changed while we kept the lock" %
13023 self.op.instance_name)
13025 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13028 if self.req_target_uuids:
13029 # User requested specific target groups
13030 self.target_uuids = frozenset(self.req_target_uuids)
13032 # All groups except those used by the instance are potential targets
13033 self.target_uuids = owned_groups - inst_groups
13035 conflicting_groups = self.target_uuids & inst_groups
13036 if conflicting_groups:
13037 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13038 " used by the instance '%s'" %
13039 (utils.CommaJoin(conflicting_groups),
13040 self.op.instance_name),
13041 errors.ECODE_INVAL)
13043 if not self.target_uuids:
13044 raise errors.OpPrereqError("There are no possible target groups",
13045 errors.ECODE_INVAL)
13047 def BuildHooksEnv(self):
13048 """Build hooks env.
13051 assert self.target_uuids
13054 "TARGET_GROUPS": " ".join(self.target_uuids),
13057 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13061 def BuildHooksNodes(self):
13062 """Build hooks nodes.
13065 mn = self.cfg.GetMasterNode()
13066 return ([mn], [mn])
13068 def Exec(self, feedback_fn):
13069 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13071 assert instances == [self.op.instance_name], "Instance not locked"
13073 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13074 instances=instances, target_groups=list(self.target_uuids))
13076 ial.Run(self.op.iallocator)
13078 if not ial.success:
13079 raise errors.OpPrereqError("Can't compute solution for changing group of"
13080 " instance '%s' using iallocator '%s': %s" %
13081 (self.op.instance_name, self.op.iallocator,
13083 errors.ECODE_NORES)
13085 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13087 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13088 " instance '%s'", len(jobs), self.op.instance_name)
13090 return ResultWithJobs(jobs)
13093 class LUBackupQuery(NoHooksLU):
13094 """Query the exports list
13099 def CheckArguments(self):
13100 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13101 ["node", "export"], self.op.use_locking)
13103 def ExpandNames(self):
13104 self.expq.ExpandNames(self)
13106 def DeclareLocks(self, level):
13107 self.expq.DeclareLocks(self, level)
13109 def Exec(self, feedback_fn):
13112 for (node, expname) in self.expq.OldStyleQuery(self):
13113 if expname is None:
13114 result[node] = False
13116 result.setdefault(node, []).append(expname)
13121 class _ExportQuery(_QueryBase):
13122 FIELDS = query.EXPORT_FIELDS
13124 #: The node name is not a unique key for this query
13125 SORT_FIELD = "node"
13127 def ExpandNames(self, lu):
13128 lu.needed_locks = {}
13130 # The following variables interact with _QueryBase._GetNames
13132 self.wanted = _GetWantedNodes(lu, self.names)
13134 self.wanted = locking.ALL_SET
13136 self.do_locking = self.use_locking
13138 if self.do_locking:
13139 lu.share_locks = _ShareAll()
13140 lu.needed_locks = {
13141 locking.LEVEL_NODE: self.wanted,
13144 def DeclareLocks(self, lu, level):
13147 def _GetQueryData(self, lu):
13148 """Computes the list of nodes and their attributes.
13151 # Locking is not used
13153 assert not (compat.any(lu.glm.is_owned(level)
13154 for level in locking.LEVELS
13155 if level != locking.LEVEL_CLUSTER) or
13156 self.do_locking or self.use_locking)
13158 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13162 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13164 result.append((node, None))
13166 result.extend((node, expname) for expname in nres.payload)
13171 class LUBackupPrepare(NoHooksLU):
13172 """Prepares an instance for an export and returns useful information.
13177 def ExpandNames(self):
13178 self._ExpandAndLockInstance()
13180 def CheckPrereq(self):
13181 """Check prerequisites.
13184 instance_name = self.op.instance_name
13186 self.instance = self.cfg.GetInstanceInfo(instance_name)
13187 assert self.instance is not None, \
13188 "Cannot retrieve locked instance %s" % self.op.instance_name
13189 _CheckNodeOnline(self, self.instance.primary_node)
13191 self._cds = _GetClusterDomainSecret()
13193 def Exec(self, feedback_fn):
13194 """Prepares an instance for an export.
13197 instance = self.instance
13199 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13200 salt = utils.GenerateSecret(8)
13202 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13203 result = self.rpc.call_x509_cert_create(instance.primary_node,
13204 constants.RIE_CERT_VALIDITY)
13205 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13207 (name, cert_pem) = result.payload
13209 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13213 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13214 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13216 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13222 class LUBackupExport(LogicalUnit):
13223 """Export an instance to an image in the cluster.
13226 HPATH = "instance-export"
13227 HTYPE = constants.HTYPE_INSTANCE
13230 def CheckArguments(self):
13231 """Check the arguments.
13234 self.x509_key_name = self.op.x509_key_name
13235 self.dest_x509_ca_pem = self.op.destination_x509_ca
13237 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13238 if not self.x509_key_name:
13239 raise errors.OpPrereqError("Missing X509 key name for encryption",
13240 errors.ECODE_INVAL)
13242 if not self.dest_x509_ca_pem:
13243 raise errors.OpPrereqError("Missing destination X509 CA",
13244 errors.ECODE_INVAL)
13246 def ExpandNames(self):
13247 self._ExpandAndLockInstance()
13249 # Lock all nodes for local exports
13250 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13251 # FIXME: lock only instance primary and destination node
13253 # Sad but true, for now we have do lock all nodes, as we don't know where
13254 # the previous export might be, and in this LU we search for it and
13255 # remove it from its current node. In the future we could fix this by:
13256 # - making a tasklet to search (share-lock all), then create the
13257 # new one, then one to remove, after
13258 # - removing the removal operation altogether
13259 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13261 def DeclareLocks(self, level):
13262 """Last minute lock declaration."""
13263 # All nodes are locked anyway, so nothing to do here.
13265 def BuildHooksEnv(self):
13266 """Build hooks env.
13268 This will run on the master, primary node and target node.
13272 "EXPORT_MODE": self.op.mode,
13273 "EXPORT_NODE": self.op.target_node,
13274 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13275 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13276 # TODO: Generic function for boolean env variables
13277 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13280 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13284 def BuildHooksNodes(self):
13285 """Build hooks nodes.
13288 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13290 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13291 nl.append(self.op.target_node)
13295 def CheckPrereq(self):
13296 """Check prerequisites.
13298 This checks that the instance and node names are valid.
13301 instance_name = self.op.instance_name
13303 self.instance = self.cfg.GetInstanceInfo(instance_name)
13304 assert self.instance is not None, \
13305 "Cannot retrieve locked instance %s" % self.op.instance_name
13306 _CheckNodeOnline(self, self.instance.primary_node)
13308 if (self.op.remove_instance and
13309 self.instance.admin_state == constants.ADMINST_UP and
13310 not self.op.shutdown):
13311 raise errors.OpPrereqError("Can not remove instance without shutting it"
13314 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13315 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13316 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13317 assert self.dst_node is not None
13319 _CheckNodeOnline(self, self.dst_node.name)
13320 _CheckNodeNotDrained(self, self.dst_node.name)
13323 self.dest_disk_info = None
13324 self.dest_x509_ca = None
13326 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13327 self.dst_node = None
13329 if len(self.op.target_node) != len(self.instance.disks):
13330 raise errors.OpPrereqError(("Received destination information for %s"
13331 " disks, but instance %s has %s disks") %
13332 (len(self.op.target_node), instance_name,
13333 len(self.instance.disks)),
13334 errors.ECODE_INVAL)
13336 cds = _GetClusterDomainSecret()
13338 # Check X509 key name
13340 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13341 except (TypeError, ValueError), err:
13342 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13344 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13345 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13346 errors.ECODE_INVAL)
13348 # Load and verify CA
13350 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13351 except OpenSSL.crypto.Error, err:
13352 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13353 (err, ), errors.ECODE_INVAL)
13355 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13356 if errcode is not None:
13357 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13358 (msg, ), errors.ECODE_INVAL)
13360 self.dest_x509_ca = cert
13362 # Verify target information
13364 for idx, disk_data in enumerate(self.op.target_node):
13366 (host, port, magic) = \
13367 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13368 except errors.GenericError, err:
13369 raise errors.OpPrereqError("Target info for disk %s: %s" %
13370 (idx, err), errors.ECODE_INVAL)
13372 disk_info.append((host, port, magic))
13374 assert len(disk_info) == len(self.op.target_node)
13375 self.dest_disk_info = disk_info
13378 raise errors.ProgrammerError("Unhandled export mode %r" %
13381 # instance disk type verification
13382 # TODO: Implement export support for file-based disks
13383 for disk in self.instance.disks:
13384 if disk.dev_type == constants.LD_FILE:
13385 raise errors.OpPrereqError("Export not supported for instances with"
13386 " file-based disks", errors.ECODE_INVAL)
13388 def _CleanupExports(self, feedback_fn):
13389 """Removes exports of current instance from all other nodes.
13391 If an instance in a cluster with nodes A..D was exported to node C, its
13392 exports will be removed from the nodes A, B and D.
13395 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13397 nodelist = self.cfg.GetNodeList()
13398 nodelist.remove(self.dst_node.name)
13400 # on one-node clusters nodelist will be empty after the removal
13401 # if we proceed the backup would be removed because OpBackupQuery
13402 # substitutes an empty list with the full cluster node list.
13403 iname = self.instance.name
13405 feedback_fn("Removing old exports for instance %s" % iname)
13406 exportlist = self.rpc.call_export_list(nodelist)
13407 for node in exportlist:
13408 if exportlist[node].fail_msg:
13410 if iname in exportlist[node].payload:
13411 msg = self.rpc.call_export_remove(node, iname).fail_msg
13413 self.LogWarning("Could not remove older export for instance %s"
13414 " on node %s: %s", iname, node, msg)
13416 def Exec(self, feedback_fn):
13417 """Export an instance to an image in the cluster.
13420 assert self.op.mode in constants.EXPORT_MODES
13422 instance = self.instance
13423 src_node = instance.primary_node
13425 if self.op.shutdown:
13426 # shutdown the instance, but not the disks
13427 feedback_fn("Shutting down instance %s" % instance.name)
13428 result = self.rpc.call_instance_shutdown(src_node, instance,
13429 self.op.shutdown_timeout)
13430 # TODO: Maybe ignore failures if ignore_remove_failures is set
13431 result.Raise("Could not shutdown instance %s on"
13432 " node %s" % (instance.name, src_node))
13434 # set the disks ID correctly since call_instance_start needs the
13435 # correct drbd minor to create the symlinks
13436 for disk in instance.disks:
13437 self.cfg.SetDiskID(disk, src_node)
13439 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13442 # Activate the instance disks if we'exporting a stopped instance
13443 feedback_fn("Activating disks for %s" % instance.name)
13444 _StartInstanceDisks(self, instance, None)
13447 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13450 helper.CreateSnapshots()
13452 if (self.op.shutdown and
13453 instance.admin_state == constants.ADMINST_UP and
13454 not self.op.remove_instance):
13455 assert not activate_disks
13456 feedback_fn("Starting instance %s" % instance.name)
13457 result = self.rpc.call_instance_start(src_node,
13458 (instance, None, None), False)
13459 msg = result.fail_msg
13461 feedback_fn("Failed to start instance: %s" % msg)
13462 _ShutdownInstanceDisks(self, instance)
13463 raise errors.OpExecError("Could not start instance: %s" % msg)
13465 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13466 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13467 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13468 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13469 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13471 (key_name, _, _) = self.x509_key_name
13474 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13477 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13478 key_name, dest_ca_pem,
13483 # Check for backwards compatibility
13484 assert len(dresults) == len(instance.disks)
13485 assert compat.all(isinstance(i, bool) for i in dresults), \
13486 "Not all results are boolean: %r" % dresults
13490 feedback_fn("Deactivating disks for %s" % instance.name)
13491 _ShutdownInstanceDisks(self, instance)
13493 if not (compat.all(dresults) and fin_resu):
13496 failures.append("export finalization")
13497 if not compat.all(dresults):
13498 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13500 failures.append("disk export: disk(s) %s" % fdsk)
13502 raise errors.OpExecError("Export failed, errors in %s" %
13503 utils.CommaJoin(failures))
13505 # At this point, the export was successful, we can cleanup/finish
13507 # Remove instance if requested
13508 if self.op.remove_instance:
13509 feedback_fn("Removing instance %s" % instance.name)
13510 _RemoveInstance(self, feedback_fn, instance,
13511 self.op.ignore_remove_failures)
13513 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13514 self._CleanupExports(feedback_fn)
13516 return fin_resu, dresults
13519 class LUBackupRemove(NoHooksLU):
13520 """Remove exports related to the named instance.
13525 def ExpandNames(self):
13526 self.needed_locks = {}
13527 # We need all nodes to be locked in order for RemoveExport to work, but we
13528 # don't need to lock the instance itself, as nothing will happen to it (and
13529 # we can remove exports also for a removed instance)
13530 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13532 def Exec(self, feedback_fn):
13533 """Remove any export.
13536 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13537 # If the instance was not found we'll try with the name that was passed in.
13538 # This will only work if it was an FQDN, though.
13540 if not instance_name:
13542 instance_name = self.op.instance_name
13544 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13545 exportlist = self.rpc.call_export_list(locked_nodes)
13547 for node in exportlist:
13548 msg = exportlist[node].fail_msg
13550 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13552 if instance_name in exportlist[node].payload:
13554 result = self.rpc.call_export_remove(node, instance_name)
13555 msg = result.fail_msg
13557 logging.error("Could not remove export for instance %s"
13558 " on node %s: %s", instance_name, node, msg)
13560 if fqdn_warn and not found:
13561 feedback_fn("Export not found. If trying to remove an export belonging"
13562 " to a deleted instance please use its Fully Qualified"
13566 class LUGroupAdd(LogicalUnit):
13567 """Logical unit for creating node groups.
13570 HPATH = "group-add"
13571 HTYPE = constants.HTYPE_GROUP
13574 def ExpandNames(self):
13575 # We need the new group's UUID here so that we can create and acquire the
13576 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13577 # that it should not check whether the UUID exists in the configuration.
13578 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13579 self.needed_locks = {}
13580 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13582 def CheckPrereq(self):
13583 """Check prerequisites.
13585 This checks that the given group name is not an existing node group
13590 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13591 except errors.OpPrereqError:
13594 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13595 " node group (UUID: %s)" %
13596 (self.op.group_name, existing_uuid),
13597 errors.ECODE_EXISTS)
13599 if self.op.ndparams:
13600 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13602 if self.op.hv_state:
13603 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13605 self.new_hv_state = None
13607 if self.op.disk_state:
13608 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13610 self.new_disk_state = None
13612 if self.op.diskparams:
13613 for templ in constants.DISK_TEMPLATES:
13614 if templ in self.op.diskparams:
13615 utils.ForceDictType(self.op.diskparams[templ],
13616 constants.DISK_DT_TYPES)
13617 self.new_diskparams = self.op.diskparams
13619 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13620 except errors.OpPrereqError, err:
13621 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13622 errors.ECODE_INVAL)
13624 self.new_diskparams = {}
13626 if self.op.ipolicy:
13627 cluster = self.cfg.GetClusterInfo()
13628 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13630 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13631 except errors.ConfigurationError, err:
13632 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13633 errors.ECODE_INVAL)
13635 def BuildHooksEnv(self):
13636 """Build hooks env.
13640 "GROUP_NAME": self.op.group_name,
13643 def BuildHooksNodes(self):
13644 """Build hooks nodes.
13647 mn = self.cfg.GetMasterNode()
13648 return ([mn], [mn])
13650 def Exec(self, feedback_fn):
13651 """Add the node group to the cluster.
13654 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13655 uuid=self.group_uuid,
13656 alloc_policy=self.op.alloc_policy,
13657 ndparams=self.op.ndparams,
13658 diskparams=self.new_diskparams,
13659 ipolicy=self.op.ipolicy,
13660 hv_state_static=self.new_hv_state,
13661 disk_state_static=self.new_disk_state)
13663 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13664 del self.remove_locks[locking.LEVEL_NODEGROUP]
13667 class LUGroupAssignNodes(NoHooksLU):
13668 """Logical unit for assigning nodes to groups.
13673 def ExpandNames(self):
13674 # These raise errors.OpPrereqError on their own:
13675 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13676 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13678 # We want to lock all the affected nodes and groups. We have readily
13679 # available the list of nodes, and the *destination* group. To gather the
13680 # list of "source" groups, we need to fetch node information later on.
13681 self.needed_locks = {
13682 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13683 locking.LEVEL_NODE: self.op.nodes,
13686 def DeclareLocks(self, level):
13687 if level == locking.LEVEL_NODEGROUP:
13688 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13690 # Try to get all affected nodes' groups without having the group or node
13691 # lock yet. Needs verification later in the code flow.
13692 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13694 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13696 def CheckPrereq(self):
13697 """Check prerequisites.
13700 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13701 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13702 frozenset(self.op.nodes))
13704 expected_locks = (set([self.group_uuid]) |
13705 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13706 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13707 if actual_locks != expected_locks:
13708 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13709 " current groups are '%s', used to be '%s'" %
13710 (utils.CommaJoin(expected_locks),
13711 utils.CommaJoin(actual_locks)))
13713 self.node_data = self.cfg.GetAllNodesInfo()
13714 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13715 instance_data = self.cfg.GetAllInstancesInfo()
13717 if self.group is None:
13718 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13719 (self.op.group_name, self.group_uuid))
13721 (new_splits, previous_splits) = \
13722 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13723 for node in self.op.nodes],
13724 self.node_data, instance_data)
13727 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13729 if not self.op.force:
13730 raise errors.OpExecError("The following instances get split by this"
13731 " change and --force was not given: %s" %
13734 self.LogWarning("This operation will split the following instances: %s",
13737 if previous_splits:
13738 self.LogWarning("In addition, these already-split instances continue"
13739 " to be split across groups: %s",
13740 utils.CommaJoin(utils.NiceSort(previous_splits)))
13742 def Exec(self, feedback_fn):
13743 """Assign nodes to a new group.
13746 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13748 self.cfg.AssignGroupNodes(mods)
13751 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13752 """Check for split instances after a node assignment.
13754 This method considers a series of node assignments as an atomic operation,
13755 and returns information about split instances after applying the set of
13758 In particular, it returns information about newly split instances, and
13759 instances that were already split, and remain so after the change.
13761 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13764 @type changes: list of (node_name, new_group_uuid) pairs.
13765 @param changes: list of node assignments to consider.
13766 @param node_data: a dict with data for all nodes
13767 @param instance_data: a dict with all instances to consider
13768 @rtype: a two-tuple
13769 @return: a list of instances that were previously okay and result split as a
13770 consequence of this change, and a list of instances that were previously
13771 split and this change does not fix.
13774 changed_nodes = dict((node, group) for node, group in changes
13775 if node_data[node].group != group)
13777 all_split_instances = set()
13778 previously_split_instances = set()
13780 def InstanceNodes(instance):
13781 return [instance.primary_node] + list(instance.secondary_nodes)
13783 for inst in instance_data.values():
13784 if inst.disk_template not in constants.DTS_INT_MIRROR:
13787 instance_nodes = InstanceNodes(inst)
13789 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13790 previously_split_instances.add(inst.name)
13792 if len(set(changed_nodes.get(node, node_data[node].group)
13793 for node in instance_nodes)) > 1:
13794 all_split_instances.add(inst.name)
13796 return (list(all_split_instances - previously_split_instances),
13797 list(previously_split_instances & all_split_instances))
13800 class _GroupQuery(_QueryBase):
13801 FIELDS = query.GROUP_FIELDS
13803 def ExpandNames(self, lu):
13804 lu.needed_locks = {}
13806 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13807 self._cluster = lu.cfg.GetClusterInfo()
13808 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13811 self.wanted = [name_to_uuid[name]
13812 for name in utils.NiceSort(name_to_uuid.keys())]
13814 # Accept names to be either names or UUIDs.
13817 all_uuid = frozenset(self._all_groups.keys())
13819 for name in self.names:
13820 if name in all_uuid:
13821 self.wanted.append(name)
13822 elif name in name_to_uuid:
13823 self.wanted.append(name_to_uuid[name])
13825 missing.append(name)
13828 raise errors.OpPrereqError("Some groups do not exist: %s" %
13829 utils.CommaJoin(missing),
13830 errors.ECODE_NOENT)
13832 def DeclareLocks(self, lu, level):
13835 def _GetQueryData(self, lu):
13836 """Computes the list of node groups and their attributes.
13839 do_nodes = query.GQ_NODE in self.requested_data
13840 do_instances = query.GQ_INST in self.requested_data
13842 group_to_nodes = None
13843 group_to_instances = None
13845 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13846 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13847 # latter GetAllInstancesInfo() is not enough, for we have to go through
13848 # instance->node. Hence, we will need to process nodes even if we only need
13849 # instance information.
13850 if do_nodes or do_instances:
13851 all_nodes = lu.cfg.GetAllNodesInfo()
13852 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13855 for node in all_nodes.values():
13856 if node.group in group_to_nodes:
13857 group_to_nodes[node.group].append(node.name)
13858 node_to_group[node.name] = node.group
13861 all_instances = lu.cfg.GetAllInstancesInfo()
13862 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13864 for instance in all_instances.values():
13865 node = instance.primary_node
13866 if node in node_to_group:
13867 group_to_instances[node_to_group[node]].append(instance.name)
13870 # Do not pass on node information if it was not requested.
13871 group_to_nodes = None
13873 return query.GroupQueryData(self._cluster,
13874 [self._all_groups[uuid]
13875 for uuid in self.wanted],
13876 group_to_nodes, group_to_instances,
13877 query.GQ_DISKPARAMS in self.requested_data)
13880 class LUGroupQuery(NoHooksLU):
13881 """Logical unit for querying node groups.
13886 def CheckArguments(self):
13887 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13888 self.op.output_fields, False)
13890 def ExpandNames(self):
13891 self.gq.ExpandNames(self)
13893 def DeclareLocks(self, level):
13894 self.gq.DeclareLocks(self, level)
13896 def Exec(self, feedback_fn):
13897 return self.gq.OldStyleQuery(self)
13900 class LUGroupSetParams(LogicalUnit):
13901 """Modifies the parameters of a node group.
13904 HPATH = "group-modify"
13905 HTYPE = constants.HTYPE_GROUP
13908 def CheckArguments(self):
13911 self.op.diskparams,
13912 self.op.alloc_policy,
13914 self.op.disk_state,
13918 if all_changes.count(None) == len(all_changes):
13919 raise errors.OpPrereqError("Please pass at least one modification",
13920 errors.ECODE_INVAL)
13922 def ExpandNames(self):
13923 # This raises errors.OpPrereqError on its own:
13924 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13926 self.needed_locks = {
13927 locking.LEVEL_INSTANCE: [],
13928 locking.LEVEL_NODEGROUP: [self.group_uuid],
13931 self.share_locks[locking.LEVEL_INSTANCE] = 1
13933 def DeclareLocks(self, level):
13934 if level == locking.LEVEL_INSTANCE:
13935 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13937 # Lock instances optimistically, needs verification once group lock has
13939 self.needed_locks[locking.LEVEL_INSTANCE] = \
13940 self.cfg.GetNodeGroupInstances(self.group_uuid)
13943 def _UpdateAndVerifyDiskParams(old, new):
13944 """Updates and verifies disk parameters.
13947 new_params = _GetUpdatedParams(old, new)
13948 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13951 def CheckPrereq(self):
13952 """Check prerequisites.
13955 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13957 # Check if locked instances are still correct
13958 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13960 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13961 cluster = self.cfg.GetClusterInfo()
13963 if self.group is None:
13964 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13965 (self.op.group_name, self.group_uuid))
13967 if self.op.ndparams:
13968 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13969 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13970 self.new_ndparams = new_ndparams
13972 if self.op.diskparams:
13973 diskparams = self.group.diskparams
13974 uavdp = self._UpdateAndVerifyDiskParams
13975 # For each disktemplate subdict update and verify the values
13976 new_diskparams = dict((dt,
13977 uavdp(diskparams.get(dt, {}),
13978 self.op.diskparams[dt]))
13979 for dt in constants.DISK_TEMPLATES
13980 if dt in self.op.diskparams)
13981 # As we've all subdicts of diskparams ready, lets merge the actual
13982 # dict with all updated subdicts
13983 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
13985 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13986 except errors.OpPrereqError, err:
13987 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13988 errors.ECODE_INVAL)
13990 if self.op.hv_state:
13991 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13992 self.group.hv_state_static)
13994 if self.op.disk_state:
13995 self.new_disk_state = \
13996 _MergeAndVerifyDiskState(self.op.disk_state,
13997 self.group.disk_state_static)
13999 if self.op.ipolicy:
14000 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14004 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14005 inst_filter = lambda inst: inst.name in owned_instances
14006 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14008 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14010 new_ipolicy, instances)
14013 self.LogWarning("After the ipolicy change the following instances"
14014 " violate them: %s",
14015 utils.CommaJoin(violations))
14017 def BuildHooksEnv(self):
14018 """Build hooks env.
14022 "GROUP_NAME": self.op.group_name,
14023 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14026 def BuildHooksNodes(self):
14027 """Build hooks nodes.
14030 mn = self.cfg.GetMasterNode()
14031 return ([mn], [mn])
14033 def Exec(self, feedback_fn):
14034 """Modifies the node group.
14039 if self.op.ndparams:
14040 self.group.ndparams = self.new_ndparams
14041 result.append(("ndparams", str(self.group.ndparams)))
14043 if self.op.diskparams:
14044 self.group.diskparams = self.new_diskparams
14045 result.append(("diskparams", str(self.group.diskparams)))
14047 if self.op.alloc_policy:
14048 self.group.alloc_policy = self.op.alloc_policy
14050 if self.op.hv_state:
14051 self.group.hv_state_static = self.new_hv_state
14053 if self.op.disk_state:
14054 self.group.disk_state_static = self.new_disk_state
14056 if self.op.ipolicy:
14057 self.group.ipolicy = self.new_ipolicy
14059 self.cfg.Update(self.group, feedback_fn)
14063 class LUGroupRemove(LogicalUnit):
14064 HPATH = "group-remove"
14065 HTYPE = constants.HTYPE_GROUP
14068 def ExpandNames(self):
14069 # This will raises errors.OpPrereqError on its own:
14070 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14071 self.needed_locks = {
14072 locking.LEVEL_NODEGROUP: [self.group_uuid],
14075 def CheckPrereq(self):
14076 """Check prerequisites.
14078 This checks that the given group name exists as a node group, that is
14079 empty (i.e., contains no nodes), and that is not the last group of the
14083 # Verify that the group is empty.
14084 group_nodes = [node.name
14085 for node in self.cfg.GetAllNodesInfo().values()
14086 if node.group == self.group_uuid]
14089 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14091 (self.op.group_name,
14092 utils.CommaJoin(utils.NiceSort(group_nodes))),
14093 errors.ECODE_STATE)
14095 # Verify the cluster would not be left group-less.
14096 if len(self.cfg.GetNodeGroupList()) == 1:
14097 raise errors.OpPrereqError("Group '%s' is the only group,"
14098 " cannot be removed" %
14099 self.op.group_name,
14100 errors.ECODE_STATE)
14102 def BuildHooksEnv(self):
14103 """Build hooks env.
14107 "GROUP_NAME": self.op.group_name,
14110 def BuildHooksNodes(self):
14111 """Build hooks nodes.
14114 mn = self.cfg.GetMasterNode()
14115 return ([mn], [mn])
14117 def Exec(self, feedback_fn):
14118 """Remove the node group.
14122 self.cfg.RemoveNodeGroup(self.group_uuid)
14123 except errors.ConfigurationError:
14124 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14125 (self.op.group_name, self.group_uuid))
14127 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14130 class LUGroupRename(LogicalUnit):
14131 HPATH = "group-rename"
14132 HTYPE = constants.HTYPE_GROUP
14135 def ExpandNames(self):
14136 # This raises errors.OpPrereqError on its own:
14137 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14139 self.needed_locks = {
14140 locking.LEVEL_NODEGROUP: [self.group_uuid],
14143 def CheckPrereq(self):
14144 """Check prerequisites.
14146 Ensures requested new name is not yet used.
14150 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14151 except errors.OpPrereqError:
14154 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14155 " node group (UUID: %s)" %
14156 (self.op.new_name, new_name_uuid),
14157 errors.ECODE_EXISTS)
14159 def BuildHooksEnv(self):
14160 """Build hooks env.
14164 "OLD_NAME": self.op.group_name,
14165 "NEW_NAME": self.op.new_name,
14168 def BuildHooksNodes(self):
14169 """Build hooks nodes.
14172 mn = self.cfg.GetMasterNode()
14174 all_nodes = self.cfg.GetAllNodesInfo()
14175 all_nodes.pop(mn, None)
14178 run_nodes.extend(node.name for node in all_nodes.values()
14179 if node.group == self.group_uuid)
14181 return (run_nodes, run_nodes)
14183 def Exec(self, feedback_fn):
14184 """Rename the node group.
14187 group = self.cfg.GetNodeGroup(self.group_uuid)
14190 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14191 (self.op.group_name, self.group_uuid))
14193 group.name = self.op.new_name
14194 self.cfg.Update(group, feedback_fn)
14196 return self.op.new_name
14199 class LUGroupEvacuate(LogicalUnit):
14200 HPATH = "group-evacuate"
14201 HTYPE = constants.HTYPE_GROUP
14204 def ExpandNames(self):
14205 # This raises errors.OpPrereqError on its own:
14206 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14208 if self.op.target_groups:
14209 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14210 self.op.target_groups)
14212 self.req_target_uuids = []
14214 if self.group_uuid in self.req_target_uuids:
14215 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14216 " as a target group (targets are %s)" %
14218 utils.CommaJoin(self.req_target_uuids)),
14219 errors.ECODE_INVAL)
14221 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14223 self.share_locks = _ShareAll()
14224 self.needed_locks = {
14225 locking.LEVEL_INSTANCE: [],
14226 locking.LEVEL_NODEGROUP: [],
14227 locking.LEVEL_NODE: [],
14230 def DeclareLocks(self, level):
14231 if level == locking.LEVEL_INSTANCE:
14232 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14234 # Lock instances optimistically, needs verification once node and group
14235 # locks have been acquired
14236 self.needed_locks[locking.LEVEL_INSTANCE] = \
14237 self.cfg.GetNodeGroupInstances(self.group_uuid)
14239 elif level == locking.LEVEL_NODEGROUP:
14240 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14242 if self.req_target_uuids:
14243 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14245 # Lock all groups used by instances optimistically; this requires going
14246 # via the node before it's locked, requiring verification later on
14247 lock_groups.update(group_uuid
14248 for instance_name in
14249 self.owned_locks(locking.LEVEL_INSTANCE)
14251 self.cfg.GetInstanceNodeGroups(instance_name))
14253 # No target groups, need to lock all of them
14254 lock_groups = locking.ALL_SET
14256 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14258 elif level == locking.LEVEL_NODE:
14259 # This will only lock the nodes in the group to be evacuated which
14260 # contain actual instances
14261 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14262 self._LockInstancesNodes()
14264 # Lock all nodes in group to be evacuated and target groups
14265 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14266 assert self.group_uuid in owned_groups
14267 member_nodes = [node_name
14268 for group in owned_groups
14269 for node_name in self.cfg.GetNodeGroup(group).members]
14270 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14272 def CheckPrereq(self):
14273 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14274 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14275 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14277 assert owned_groups.issuperset(self.req_target_uuids)
14278 assert self.group_uuid in owned_groups
14280 # Check if locked instances are still correct
14281 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14283 # Get instance information
14284 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14286 # Check if node groups for locked instances are still correct
14287 _CheckInstancesNodeGroups(self.cfg, self.instances,
14288 owned_groups, owned_nodes, self.group_uuid)
14290 if self.req_target_uuids:
14291 # User requested specific target groups
14292 self.target_uuids = self.req_target_uuids
14294 # All groups except the one to be evacuated are potential targets
14295 self.target_uuids = [group_uuid for group_uuid in owned_groups
14296 if group_uuid != self.group_uuid]
14298 if not self.target_uuids:
14299 raise errors.OpPrereqError("There are no possible target groups",
14300 errors.ECODE_INVAL)
14302 def BuildHooksEnv(self):
14303 """Build hooks env.
14307 "GROUP_NAME": self.op.group_name,
14308 "TARGET_GROUPS": " ".join(self.target_uuids),
14311 def BuildHooksNodes(self):
14312 """Build hooks nodes.
14315 mn = self.cfg.GetMasterNode()
14317 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14319 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14321 return (run_nodes, run_nodes)
14323 def Exec(self, feedback_fn):
14324 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14326 assert self.group_uuid not in self.target_uuids
14328 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14329 instances=instances, target_groups=self.target_uuids)
14331 ial.Run(self.op.iallocator)
14333 if not ial.success:
14334 raise errors.OpPrereqError("Can't compute group evacuation using"
14335 " iallocator '%s': %s" %
14336 (self.op.iallocator, ial.info),
14337 errors.ECODE_NORES)
14339 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14341 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14342 len(jobs), self.op.group_name)
14344 return ResultWithJobs(jobs)
14347 class TagsLU(NoHooksLU): # pylint: disable=W0223
14348 """Generic tags LU.
14350 This is an abstract class which is the parent of all the other tags LUs.
14353 def ExpandNames(self):
14354 self.group_uuid = None
14355 self.needed_locks = {}
14357 if self.op.kind == constants.TAG_NODE:
14358 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14359 lock_level = locking.LEVEL_NODE
14360 lock_name = self.op.name
14361 elif self.op.kind == constants.TAG_INSTANCE:
14362 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14363 lock_level = locking.LEVEL_INSTANCE
14364 lock_name = self.op.name
14365 elif self.op.kind == constants.TAG_NODEGROUP:
14366 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14367 lock_level = locking.LEVEL_NODEGROUP
14368 lock_name = self.group_uuid
14373 if lock_level and getattr(self.op, "use_locking", True):
14374 self.needed_locks[lock_level] = lock_name
14376 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14377 # not possible to acquire the BGL based on opcode parameters)
14379 def CheckPrereq(self):
14380 """Check prerequisites.
14383 if self.op.kind == constants.TAG_CLUSTER:
14384 self.target = self.cfg.GetClusterInfo()
14385 elif self.op.kind == constants.TAG_NODE:
14386 self.target = self.cfg.GetNodeInfo(self.op.name)
14387 elif self.op.kind == constants.TAG_INSTANCE:
14388 self.target = self.cfg.GetInstanceInfo(self.op.name)
14389 elif self.op.kind == constants.TAG_NODEGROUP:
14390 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14392 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14393 str(self.op.kind), errors.ECODE_INVAL)
14396 class LUTagsGet(TagsLU):
14397 """Returns the tags of a given object.
14402 def ExpandNames(self):
14403 TagsLU.ExpandNames(self)
14405 # Share locks as this is only a read operation
14406 self.share_locks = _ShareAll()
14408 def Exec(self, feedback_fn):
14409 """Returns the tag list.
14412 return list(self.target.GetTags())
14415 class LUTagsSearch(NoHooksLU):
14416 """Searches the tags for a given pattern.
14421 def ExpandNames(self):
14422 self.needed_locks = {}
14424 def CheckPrereq(self):
14425 """Check prerequisites.
14427 This checks the pattern passed for validity by compiling it.
14431 self.re = re.compile(self.op.pattern)
14432 except re.error, err:
14433 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14434 (self.op.pattern, err), errors.ECODE_INVAL)
14436 def Exec(self, feedback_fn):
14437 """Returns the tag list.
14441 tgts = [("/cluster", cfg.GetClusterInfo())]
14442 ilist = cfg.GetAllInstancesInfo().values()
14443 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14444 nlist = cfg.GetAllNodesInfo().values()
14445 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14446 tgts.extend(("/nodegroup/%s" % n.name, n)
14447 for n in cfg.GetAllNodeGroupsInfo().values())
14449 for path, target in tgts:
14450 for tag in target.GetTags():
14451 if self.re.search(tag):
14452 results.append((path, tag))
14456 class LUTagsSet(TagsLU):
14457 """Sets a tag on a given object.
14462 def CheckPrereq(self):
14463 """Check prerequisites.
14465 This checks the type and length of the tag name and value.
14468 TagsLU.CheckPrereq(self)
14469 for tag in self.op.tags:
14470 objects.TaggableObject.ValidateTag(tag)
14472 def Exec(self, feedback_fn):
14477 for tag in self.op.tags:
14478 self.target.AddTag(tag)
14479 except errors.TagError, err:
14480 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14481 self.cfg.Update(self.target, feedback_fn)
14484 class LUTagsDel(TagsLU):
14485 """Delete a list of tags from a given object.
14490 def CheckPrereq(self):
14491 """Check prerequisites.
14493 This checks that we have the given tag.
14496 TagsLU.CheckPrereq(self)
14497 for tag in self.op.tags:
14498 objects.TaggableObject.ValidateTag(tag)
14499 del_tags = frozenset(self.op.tags)
14500 cur_tags = self.target.GetTags()
14502 diff_tags = del_tags - cur_tags
14504 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14505 raise errors.OpPrereqError("Tag(s) %s not found" %
14506 (utils.CommaJoin(diff_names), ),
14507 errors.ECODE_NOENT)
14509 def Exec(self, feedback_fn):
14510 """Remove the tag from the object.
14513 for tag in self.op.tags:
14514 self.target.RemoveTag(tag)
14515 self.cfg.Update(self.target, feedback_fn)
14518 class LUTestDelay(NoHooksLU):
14519 """Sleep for a specified amount of time.
14521 This LU sleeps on the master and/or nodes for a specified amount of
14527 def ExpandNames(self):
14528 """Expand names and set required locks.
14530 This expands the node list, if any.
14533 self.needed_locks = {}
14534 if self.op.on_nodes:
14535 # _GetWantedNodes can be used here, but is not always appropriate to use
14536 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14537 # more information.
14538 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14539 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14541 def _TestDelay(self):
14542 """Do the actual sleep.
14545 if self.op.on_master:
14546 if not utils.TestDelay(self.op.duration):
14547 raise errors.OpExecError("Error during master delay test")
14548 if self.op.on_nodes:
14549 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14550 for node, node_result in result.items():
14551 node_result.Raise("Failure during rpc call to node %s" % node)
14553 def Exec(self, feedback_fn):
14554 """Execute the test delay opcode, with the wanted repetitions.
14557 if self.op.repeat == 0:
14560 top_value = self.op.repeat - 1
14561 for i in range(self.op.repeat):
14562 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14566 class LUTestJqueue(NoHooksLU):
14567 """Utility LU to test some aspects of the job queue.
14572 # Must be lower than default timeout for WaitForJobChange to see whether it
14573 # notices changed jobs
14574 _CLIENT_CONNECT_TIMEOUT = 20.0
14575 _CLIENT_CONFIRM_TIMEOUT = 60.0
14578 def _NotifyUsingSocket(cls, cb, errcls):
14579 """Opens a Unix socket and waits for another program to connect.
14582 @param cb: Callback to send socket name to client
14583 @type errcls: class
14584 @param errcls: Exception class to use for errors
14587 # Using a temporary directory as there's no easy way to create temporary
14588 # sockets without writing a custom loop around tempfile.mktemp and
14590 tmpdir = tempfile.mkdtemp()
14592 tmpsock = utils.PathJoin(tmpdir, "sock")
14594 logging.debug("Creating temporary socket at %s", tmpsock)
14595 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14600 # Send details to client
14603 # Wait for client to connect before continuing
14604 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14606 (conn, _) = sock.accept()
14607 except socket.error, err:
14608 raise errcls("Client didn't connect in time (%s)" % err)
14612 # Remove as soon as client is connected
14613 shutil.rmtree(tmpdir)
14615 # Wait for client to close
14618 # pylint: disable=E1101
14619 # Instance of '_socketobject' has no ... member
14620 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14622 except socket.error, err:
14623 raise errcls("Client failed to confirm notification (%s)" % err)
14627 def _SendNotification(self, test, arg, sockname):
14628 """Sends a notification to the client.
14631 @param test: Test name
14632 @param arg: Test argument (depends on test)
14633 @type sockname: string
14634 @param sockname: Socket path
14637 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14639 def _Notify(self, prereq, test, arg):
14640 """Notifies the client of a test.
14643 @param prereq: Whether this is a prereq-phase test
14645 @param test: Test name
14646 @param arg: Test argument (depends on test)
14650 errcls = errors.OpPrereqError
14652 errcls = errors.OpExecError
14654 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14658 def CheckArguments(self):
14659 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14660 self.expandnames_calls = 0
14662 def ExpandNames(self):
14663 checkargs_calls = getattr(self, "checkargs_calls", 0)
14664 if checkargs_calls < 1:
14665 raise errors.ProgrammerError("CheckArguments was not called")
14667 self.expandnames_calls += 1
14669 if self.op.notify_waitlock:
14670 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14672 self.LogInfo("Expanding names")
14674 # Get lock on master node (just to get a lock, not for a particular reason)
14675 self.needed_locks = {
14676 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14679 def Exec(self, feedback_fn):
14680 if self.expandnames_calls < 1:
14681 raise errors.ProgrammerError("ExpandNames was not called")
14683 if self.op.notify_exec:
14684 self._Notify(False, constants.JQT_EXEC, None)
14686 self.LogInfo("Executing")
14688 if self.op.log_messages:
14689 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14690 for idx, msg in enumerate(self.op.log_messages):
14691 self.LogInfo("Sending log message %s", idx + 1)
14692 feedback_fn(constants.JQT_MSGPREFIX + msg)
14693 # Report how many test messages have been sent
14694 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14697 raise errors.OpExecError("Opcode failure was requested")
14702 class IAllocator(object):
14703 """IAllocator framework.
14705 An IAllocator instance has three sets of attributes:
14706 - cfg that is needed to query the cluster
14707 - input data (all members of the _KEYS class attribute are required)
14708 - four buffer attributes (in|out_data|text), that represent the
14709 input (to the external script) in text and data structure format,
14710 and the output from it, again in two formats
14711 - the result variables from the script (success, info, nodes) for
14715 # pylint: disable=R0902
14716 # lots of instance attributes
14718 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14720 self.rpc = rpc_runner
14721 # init buffer variables
14722 self.in_text = self.out_text = self.in_data = self.out_data = None
14723 # init all input fields so that pylint is happy
14725 self.memory = self.disks = self.disk_template = self.spindle_use = None
14726 self.os = self.tags = self.nics = self.vcpus = None
14727 self.hypervisor = None
14728 self.relocate_from = None
14730 self.instances = None
14731 self.evac_mode = None
14732 self.target_groups = []
14734 self.required_nodes = None
14735 # init result fields
14736 self.success = self.info = self.result = None
14739 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14741 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14742 " IAllocator" % self.mode)
14744 keyset = [n for (n, _) in keydata]
14747 if key not in keyset:
14748 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14749 " IAllocator" % key)
14750 setattr(self, key, kwargs[key])
14753 if key not in kwargs:
14754 raise errors.ProgrammerError("Missing input parameter '%s' to"
14755 " IAllocator" % key)
14756 self._BuildInputData(compat.partial(fn, self), keydata)
14758 def _ComputeClusterData(self):
14759 """Compute the generic allocator input data.
14761 This is the data that is independent of the actual operation.
14765 cluster_info = cfg.GetClusterInfo()
14768 "version": constants.IALLOCATOR_VERSION,
14769 "cluster_name": cfg.GetClusterName(),
14770 "cluster_tags": list(cluster_info.GetTags()),
14771 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14772 "ipolicy": cluster_info.ipolicy,
14774 ninfo = cfg.GetAllNodesInfo()
14775 iinfo = cfg.GetAllInstancesInfo().values()
14776 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14779 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14781 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14782 hypervisor_name = self.hypervisor
14783 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14784 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14786 hypervisor_name = cluster_info.primary_hypervisor
14788 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14791 self.rpc.call_all_instances_info(node_list,
14792 cluster_info.enabled_hypervisors)
14794 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14796 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14797 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14798 i_list, config_ndata)
14799 assert len(data["nodes"]) == len(ninfo), \
14800 "Incomplete node data computed"
14802 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14804 self.in_data = data
14807 def _ComputeNodeGroupData(cfg):
14808 """Compute node groups data.
14811 cluster = cfg.GetClusterInfo()
14812 ng = dict((guuid, {
14813 "name": gdata.name,
14814 "alloc_policy": gdata.alloc_policy,
14815 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14817 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14822 def _ComputeBasicNodeData(cfg, node_cfg):
14823 """Compute global node data.
14826 @returns: a dict of name: (node dict, node config)
14829 # fill in static (config-based) values
14830 node_results = dict((ninfo.name, {
14831 "tags": list(ninfo.GetTags()),
14832 "primary_ip": ninfo.primary_ip,
14833 "secondary_ip": ninfo.secondary_ip,
14834 "offline": ninfo.offline,
14835 "drained": ninfo.drained,
14836 "master_candidate": ninfo.master_candidate,
14837 "group": ninfo.group,
14838 "master_capable": ninfo.master_capable,
14839 "vm_capable": ninfo.vm_capable,
14840 "ndparams": cfg.GetNdParams(ninfo),
14842 for ninfo in node_cfg.values())
14844 return node_results
14847 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14849 """Compute global node data.
14851 @param node_results: the basic node structures as filled from the config
14854 #TODO(dynmem): compute the right data on MAX and MIN memory
14855 # make a copy of the current dict
14856 node_results = dict(node_results)
14857 for nname, nresult in node_data.items():
14858 assert nname in node_results, "Missing basic data for node %s" % nname
14859 ninfo = node_cfg[nname]
14861 if not (ninfo.offline or ninfo.drained):
14862 nresult.Raise("Can't get data for node %s" % nname)
14863 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14865 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14867 for attr in ["memory_total", "memory_free", "memory_dom0",
14868 "vg_size", "vg_free", "cpu_total"]:
14869 if attr not in remote_info:
14870 raise errors.OpExecError("Node '%s' didn't return attribute"
14871 " '%s'" % (nname, attr))
14872 if not isinstance(remote_info[attr], int):
14873 raise errors.OpExecError("Node '%s' returned invalid value"
14875 (nname, attr, remote_info[attr]))
14876 # compute memory used by primary instances
14877 i_p_mem = i_p_up_mem = 0
14878 for iinfo, beinfo in i_list:
14879 if iinfo.primary_node == nname:
14880 i_p_mem += beinfo[constants.BE_MAXMEM]
14881 if iinfo.name not in node_iinfo[nname].payload:
14884 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14885 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14886 remote_info["memory_free"] -= max(0, i_mem_diff)
14888 if iinfo.admin_state == constants.ADMINST_UP:
14889 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14891 # compute memory used by instances
14893 "total_memory": remote_info["memory_total"],
14894 "reserved_memory": remote_info["memory_dom0"],
14895 "free_memory": remote_info["memory_free"],
14896 "total_disk": remote_info["vg_size"],
14897 "free_disk": remote_info["vg_free"],
14898 "total_cpus": remote_info["cpu_total"],
14899 "i_pri_memory": i_p_mem,
14900 "i_pri_up_memory": i_p_up_mem,
14902 pnr_dyn.update(node_results[nname])
14903 node_results[nname] = pnr_dyn
14905 return node_results
14908 def _ComputeInstanceData(cluster_info, i_list):
14909 """Compute global instance data.
14913 for iinfo, beinfo in i_list:
14915 for nic in iinfo.nics:
14916 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14920 "mode": filled_params[constants.NIC_MODE],
14921 "link": filled_params[constants.NIC_LINK],
14923 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14924 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14925 nic_data.append(nic_dict)
14927 "tags": list(iinfo.GetTags()),
14928 "admin_state": iinfo.admin_state,
14929 "vcpus": beinfo[constants.BE_VCPUS],
14930 "memory": beinfo[constants.BE_MAXMEM],
14931 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14933 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14935 "disks": [{constants.IDISK_SIZE: dsk.size,
14936 constants.IDISK_MODE: dsk.mode}
14937 for dsk in iinfo.disks],
14938 "disk_template": iinfo.disk_template,
14939 "hypervisor": iinfo.hypervisor,
14941 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14943 instance_data[iinfo.name] = pir
14945 return instance_data
14947 def _AddNewInstance(self):
14948 """Add new instance data to allocator structure.
14950 This in combination with _AllocatorGetClusterData will create the
14951 correct structure needed as input for the allocator.
14953 The checks for the completeness of the opcode must have already been
14957 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14959 if self.disk_template in constants.DTS_INT_MIRROR:
14960 self.required_nodes = 2
14962 self.required_nodes = 1
14966 "disk_template": self.disk_template,
14969 "vcpus": self.vcpus,
14970 "memory": self.memory,
14971 "spindle_use": self.spindle_use,
14972 "disks": self.disks,
14973 "disk_space_total": disk_space,
14975 "required_nodes": self.required_nodes,
14976 "hypervisor": self.hypervisor,
14981 def _AddRelocateInstance(self):
14982 """Add relocate instance data to allocator structure.
14984 This in combination with _IAllocatorGetClusterData will create the
14985 correct structure needed as input for the allocator.
14987 The checks for the completeness of the opcode must have already been
14991 instance = self.cfg.GetInstanceInfo(self.name)
14992 if instance is None:
14993 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14994 " IAllocator" % self.name)
14996 if instance.disk_template not in constants.DTS_MIRRORED:
14997 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14998 errors.ECODE_INVAL)
15000 if instance.disk_template in constants.DTS_INT_MIRROR and \
15001 len(instance.secondary_nodes) != 1:
15002 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15003 errors.ECODE_STATE)
15005 self.required_nodes = 1
15006 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15007 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15011 "disk_space_total": disk_space,
15012 "required_nodes": self.required_nodes,
15013 "relocate_from": self.relocate_from,
15017 def _AddNodeEvacuate(self):
15018 """Get data for node-evacuate requests.
15022 "instances": self.instances,
15023 "evac_mode": self.evac_mode,
15026 def _AddChangeGroup(self):
15027 """Get data for node-evacuate requests.
15031 "instances": self.instances,
15032 "target_groups": self.target_groups,
15035 def _BuildInputData(self, fn, keydata):
15036 """Build input data structures.
15039 self._ComputeClusterData()
15042 request["type"] = self.mode
15043 for keyname, keytype in keydata:
15044 if keyname not in request:
15045 raise errors.ProgrammerError("Request parameter %s is missing" %
15047 val = request[keyname]
15048 if not keytype(val):
15049 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15050 " validation, value %s, expected"
15051 " type %s" % (keyname, val, keytype))
15052 self.in_data["request"] = request
15054 self.in_text = serializer.Dump(self.in_data)
15056 _STRING_LIST = ht.TListOf(ht.TString)
15057 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15058 # pylint: disable=E1101
15059 # Class '...' has no 'OP_ID' member
15060 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15061 opcodes.OpInstanceMigrate.OP_ID,
15062 opcodes.OpInstanceReplaceDisks.OP_ID])
15066 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15067 ht.TItems([ht.TNonEmptyString,
15068 ht.TNonEmptyString,
15069 ht.TListOf(ht.TNonEmptyString),
15072 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15073 ht.TItems([ht.TNonEmptyString,
15076 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15077 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15080 constants.IALLOCATOR_MODE_ALLOC:
15083 ("name", ht.TString),
15084 ("memory", ht.TInt),
15085 ("spindle_use", ht.TInt),
15086 ("disks", ht.TListOf(ht.TDict)),
15087 ("disk_template", ht.TString),
15088 ("os", ht.TString),
15089 ("tags", _STRING_LIST),
15090 ("nics", ht.TListOf(ht.TDict)),
15091 ("vcpus", ht.TInt),
15092 ("hypervisor", ht.TString),
15094 constants.IALLOCATOR_MODE_RELOC:
15095 (_AddRelocateInstance,
15096 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15098 constants.IALLOCATOR_MODE_NODE_EVAC:
15099 (_AddNodeEvacuate, [
15100 ("instances", _STRING_LIST),
15101 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15103 constants.IALLOCATOR_MODE_CHG_GROUP:
15104 (_AddChangeGroup, [
15105 ("instances", _STRING_LIST),
15106 ("target_groups", _STRING_LIST),
15110 def Run(self, name, validate=True, call_fn=None):
15111 """Run an instance allocator and return the results.
15114 if call_fn is None:
15115 call_fn = self.rpc.call_iallocator_runner
15117 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15118 result.Raise("Failure while running the iallocator script")
15120 self.out_text = result.payload
15122 self._ValidateResult()
15124 def _ValidateResult(self):
15125 """Process the allocator results.
15127 This will process and if successful save the result in
15128 self.out_data and the other parameters.
15132 rdict = serializer.Load(self.out_text)
15133 except Exception, err:
15134 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15136 if not isinstance(rdict, dict):
15137 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15139 # TODO: remove backwards compatiblity in later versions
15140 if "nodes" in rdict and "result" not in rdict:
15141 rdict["result"] = rdict["nodes"]
15144 for key in "success", "info", "result":
15145 if key not in rdict:
15146 raise errors.OpExecError("Can't parse iallocator results:"
15147 " missing key '%s'" % key)
15148 setattr(self, key, rdict[key])
15150 if not self._result_check(self.result):
15151 raise errors.OpExecError("Iallocator returned invalid result,"
15152 " expected %s, got %s" %
15153 (self._result_check, self.result),
15154 errors.ECODE_INVAL)
15156 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15157 assert self.relocate_from is not None
15158 assert self.required_nodes == 1
15160 node2group = dict((name, ndata["group"])
15161 for (name, ndata) in self.in_data["nodes"].items())
15163 fn = compat.partial(self._NodesToGroups, node2group,
15164 self.in_data["nodegroups"])
15166 instance = self.cfg.GetInstanceInfo(self.name)
15167 request_groups = fn(self.relocate_from + [instance.primary_node])
15168 result_groups = fn(rdict["result"] + [instance.primary_node])
15170 if self.success and not set(result_groups).issubset(request_groups):
15171 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15172 " differ from original groups (%s)" %
15173 (utils.CommaJoin(result_groups),
15174 utils.CommaJoin(request_groups)))
15176 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15177 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15179 self.out_data = rdict
15182 def _NodesToGroups(node2group, groups, nodes):
15183 """Returns a list of unique group names for a list of nodes.
15185 @type node2group: dict
15186 @param node2group: Map from node name to group UUID
15188 @param groups: Group information
15190 @param nodes: Node names
15197 group_uuid = node2group[node]
15199 # Ignore unknown node
15203 group = groups[group_uuid]
15205 # Can't find group, let's use UUID
15206 group_name = group_uuid
15208 group_name = group["name"]
15210 result.add(group_name)
15212 return sorted(result)
15215 class LUTestAllocator(NoHooksLU):
15216 """Run allocator tests.
15218 This LU runs the allocator tests
15221 def CheckPrereq(self):
15222 """Check prerequisites.
15224 This checks the opcode parameters depending on the director and mode test.
15227 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15228 for attr in ["memory", "disks", "disk_template",
15229 "os", "tags", "nics", "vcpus"]:
15230 if not hasattr(self.op, attr):
15231 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15232 attr, errors.ECODE_INVAL)
15233 iname = self.cfg.ExpandInstanceName(self.op.name)
15234 if iname is not None:
15235 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15236 iname, errors.ECODE_EXISTS)
15237 if not isinstance(self.op.nics, list):
15238 raise errors.OpPrereqError("Invalid parameter 'nics'",
15239 errors.ECODE_INVAL)
15240 if not isinstance(self.op.disks, list):
15241 raise errors.OpPrereqError("Invalid parameter 'disks'",
15242 errors.ECODE_INVAL)
15243 for row in self.op.disks:
15244 if (not isinstance(row, dict) or
15245 constants.IDISK_SIZE not in row or
15246 not isinstance(row[constants.IDISK_SIZE], int) or
15247 constants.IDISK_MODE not in row or
15248 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15249 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15250 " parameter", errors.ECODE_INVAL)
15251 if self.op.hypervisor is None:
15252 self.op.hypervisor = self.cfg.GetHypervisorType()
15253 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15254 fname = _ExpandInstanceName(self.cfg, self.op.name)
15255 self.op.name = fname
15256 self.relocate_from = \
15257 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15258 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15259 constants.IALLOCATOR_MODE_NODE_EVAC):
15260 if not self.op.instances:
15261 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15262 self.op.instances = _GetWantedInstances(self, self.op.instances)
15264 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15265 self.op.mode, errors.ECODE_INVAL)
15267 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15268 if self.op.allocator is None:
15269 raise errors.OpPrereqError("Missing allocator name",
15270 errors.ECODE_INVAL)
15271 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15272 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15273 self.op.direction, errors.ECODE_INVAL)
15275 def Exec(self, feedback_fn):
15276 """Run the allocator test.
15279 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15280 ial = IAllocator(self.cfg, self.rpc,
15283 memory=self.op.memory,
15284 disks=self.op.disks,
15285 disk_template=self.op.disk_template,
15289 vcpus=self.op.vcpus,
15290 hypervisor=self.op.hypervisor,
15292 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15293 ial = IAllocator(self.cfg, self.rpc,
15296 relocate_from=list(self.relocate_from),
15298 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15299 ial = IAllocator(self.cfg, self.rpc,
15301 instances=self.op.instances,
15302 target_groups=self.op.target_groups)
15303 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15304 ial = IAllocator(self.cfg, self.rpc,
15306 instances=self.op.instances,
15307 evac_mode=self.op.evac_mode)
15309 raise errors.ProgrammerError("Uncatched mode %s in"
15310 " LUTestAllocator.Exec", self.op.mode)
15312 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15313 result = ial.in_text
15315 ial.Run(self.op.allocator, validate=False)
15316 result = ial.out_text
15320 #: Query type implementations
15322 constants.QR_CLUSTER: _ClusterQuery,
15323 constants.QR_INSTANCE: _InstanceQuery,
15324 constants.QR_NODE: _NodeQuery,
15325 constants.QR_GROUP: _GroupQuery,
15326 constants.QR_OS: _OsQuery,
15327 constants.QR_EXPORT: _ExportQuery,
15330 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15333 def _GetQueryImplementation(name):
15334 """Returns the implemtnation for a query type.
15336 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15340 return _QUERY_IMPL[name]
15342 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15343 errors.ECODE_INVAL)