4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _GetWantedNodes(lu, nodes):
707 """Returns list of checked and expanded node names.
709 @type lu: L{LogicalUnit}
710 @param lu: the logical unit on whose behalf we execute
712 @param nodes: list of node names or None for all nodes
714 @return: the list of nodes, sorted
715 @raise errors.ProgrammerError: if the nodes parameter is wrong type
719 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
721 return utils.NiceSort(lu.cfg.GetNodeList())
724 def _GetWantedInstances(lu, instances):
725 """Returns list of checked and expanded instance names.
727 @type lu: L{LogicalUnit}
728 @param lu: the logical unit on whose behalf we execute
729 @type instances: list
730 @param instances: list of instance names or None for all instances
732 @return: the list of instances, sorted
733 @raise errors.OpPrereqError: if the instances parameter is wrong type
734 @raise errors.OpPrereqError: if any of the passed instances is not found
738 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
740 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
744 def _GetUpdatedParams(old_params, update_dict,
745 use_default=True, use_none=False):
746 """Return the new version of a parameter dictionary.
748 @type old_params: dict
749 @param old_params: old parameters
750 @type update_dict: dict
751 @param update_dict: dict containing new parameter values, or
752 constants.VALUE_DEFAULT to reset the parameter to its default
754 @param use_default: boolean
755 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
756 values as 'to be deleted' values
757 @param use_none: boolean
758 @type use_none: whether to recognise C{None} values as 'to be
761 @return: the new parameter dictionary
764 params_copy = copy.deepcopy(old_params)
765 for key, val in update_dict.iteritems():
766 if ((use_default and val == constants.VALUE_DEFAULT) or
767 (use_none and val is None)):
773 params_copy[key] = val
777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
778 """Return the new version of a instance policy.
780 @param group_policy: whether this policy applies to a group and thus
781 we should support removal of policy entries
784 use_none = use_default = group_policy
785 ipolicy = copy.deepcopy(old_ipolicy)
786 for key, value in new_ipolicy.items():
787 if key not in constants.IPOLICY_ALL_KEYS:
788 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
790 if key in constants.IPOLICY_ISPECS:
791 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
792 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
794 use_default=use_default)
796 if (not value or value == [constants.VALUE_DEFAULT] or
797 value == constants.VALUE_DEFAULT):
801 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
802 " on the cluster'" % key,
805 if key in constants.IPOLICY_PARAMETERS:
806 # FIXME: we assume all such values are float
808 ipolicy[key] = float(value)
809 except (TypeError, ValueError), err:
810 raise errors.OpPrereqError("Invalid value for attribute"
811 " '%s': '%s', error: %s" %
812 (key, value, err), errors.ECODE_INVAL)
814 # FIXME: we assume all others are lists; this should be redone
816 ipolicy[key] = list(value)
818 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
819 except errors.ConfigurationError, err:
820 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
825 def _UpdateAndVerifySubDict(base, updates, type_check):
826 """Updates and verifies a dict with sub dicts of the same type.
828 @param base: The dict with the old data
829 @param updates: The dict with the new data
830 @param type_check: Dict suitable to ForceDictType to verify correct types
831 @returns: A new dict with updated and verified values
835 new = _GetUpdatedParams(old, value)
836 utils.ForceDictType(new, type_check)
839 ret = copy.deepcopy(base)
840 ret.update(dict((key, fn(base.get(key, {}), value))
841 for key, value in updates.items()))
845 def _MergeAndVerifyHvState(op_input, obj_input):
846 """Combines the hv state from an opcode with the one of the object
848 @param op_input: The input dict from the opcode
849 @param obj_input: The input dict from the objects
850 @return: The verified and updated dict
854 invalid_hvs = set(op_input) - constants.HYPER_TYPES
856 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
857 " %s" % utils.CommaJoin(invalid_hvs),
859 if obj_input is None:
861 type_check = constants.HVSTS_PARAMETER_TYPES
862 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
867 def _MergeAndVerifyDiskState(op_input, obj_input):
868 """Combines the disk state from an opcode with the one of the object
870 @param op_input: The input dict from the opcode
871 @param obj_input: The input dict from the objects
872 @return: The verified and updated dict
875 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
877 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
878 utils.CommaJoin(invalid_dst),
880 type_check = constants.DSS_PARAMETER_TYPES
881 if obj_input is None:
883 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
885 for key, value in op_input.items())
890 def _ReleaseLocks(lu, level, names=None, keep=None):
891 """Releases locks owned by an LU.
893 @type lu: L{LogicalUnit}
894 @param level: Lock level
895 @type names: list or None
896 @param names: Names of locks to release
897 @type keep: list or None
898 @param keep: Names of locks to retain
901 assert not (keep is not None and names is not None), \
902 "Only one of the 'names' and the 'keep' parameters can be given"
904 if names is not None:
905 should_release = names.__contains__
907 should_release = lambda name: name not in keep
909 should_release = None
911 owned = lu.owned_locks(level)
913 # Not owning any lock at this level, do nothing
920 # Determine which locks to release
922 if should_release(name):
927 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
929 # Release just some locks
930 lu.glm.release(level, names=release)
932 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
935 lu.glm.release(level)
937 assert not lu.glm.is_owned(level), "No locks should be owned"
940 def _MapInstanceDisksToNodes(instances):
941 """Creates a map from (node, volume) to instance name.
943 @type instances: list of L{objects.Instance}
944 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
947 return dict(((node, vol), inst.name)
948 for inst in instances
949 for (node, vols) in inst.MapLVsByNode().items()
953 def _RunPostHook(lu, node_name):
954 """Runs the post-hook for an opcode on a single node.
957 hm = lu.proc.BuildHooksManager(lu)
959 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
961 # pylint: disable=W0702
962 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
965 def _CheckOutputFields(static, dynamic, selected):
966 """Checks whether all selected fields are valid.
968 @type static: L{utils.FieldSet}
969 @param static: static fields set
970 @type dynamic: L{utils.FieldSet}
971 @param dynamic: dynamic fields set
978 delta = f.NonMatching(selected)
980 raise errors.OpPrereqError("Unknown output fields selected: %s"
981 % ",".join(delta), errors.ECODE_INVAL)
984 def _CheckGlobalHvParams(params):
985 """Validates that given hypervisor params are not global ones.
987 This will ensure that instances don't get customised versions of
991 used_globals = constants.HVC_GLOBALS.intersection(params)
993 msg = ("The following hypervisor parameters are global and cannot"
994 " be customized at instance level, please modify them at"
995 " cluster level: %s" % utils.CommaJoin(used_globals))
996 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
999 def _CheckNodeOnline(lu, node, msg=None):
1000 """Ensure that a given node is online.
1002 @param lu: the LU on behalf of which we make the check
1003 @param node: the node to check
1004 @param msg: if passed, should be a message to replace the default one
1005 @raise errors.OpPrereqError: if the node is offline
1009 msg = "Can't use offline node"
1010 if lu.cfg.GetNodeInfo(node).offline:
1011 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1014 def _CheckNodeNotDrained(lu, node):
1015 """Ensure that a given node is not drained.
1017 @param lu: the LU on behalf of which we make the check
1018 @param node: the node to check
1019 @raise errors.OpPrereqError: if the node is drained
1022 if lu.cfg.GetNodeInfo(node).drained:
1023 raise errors.OpPrereqError("Can't use drained node %s" % node,
1027 def _CheckNodeVmCapable(lu, node):
1028 """Ensure that a given node is vm capable.
1030 @param lu: the LU on behalf of which we make the check
1031 @param node: the node to check
1032 @raise errors.OpPrereqError: if the node is not vm capable
1035 if not lu.cfg.GetNodeInfo(node).vm_capable:
1036 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1040 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1041 """Ensure that a node supports a given OS.
1043 @param lu: the LU on behalf of which we make the check
1044 @param node: the node to check
1045 @param os_name: the OS to query about
1046 @param force_variant: whether to ignore variant errors
1047 @raise errors.OpPrereqError: if the node is not supporting the OS
1050 result = lu.rpc.call_os_get(node, os_name)
1051 result.Raise("OS '%s' not in supported OS list for node %s" %
1053 prereq=True, ecode=errors.ECODE_INVAL)
1054 if not force_variant:
1055 _CheckOSVariant(result.payload, os_name)
1058 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1059 """Ensure that a node has the given secondary ip.
1061 @type lu: L{LogicalUnit}
1062 @param lu: the LU on behalf of which we make the check
1064 @param node: the node to check
1065 @type secondary_ip: string
1066 @param secondary_ip: the ip to check
1067 @type prereq: boolean
1068 @param prereq: whether to throw a prerequisite or an execute error
1069 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1070 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1073 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1074 result.Raise("Failure checking secondary ip on node %s" % node,
1075 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1076 if not result.payload:
1077 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1078 " please fix and re-run this command" % secondary_ip)
1080 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1082 raise errors.OpExecError(msg)
1085 def _GetClusterDomainSecret():
1086 """Reads the cluster domain secret.
1089 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1093 def _CheckInstanceState(lu, instance, req_states, msg=None):
1094 """Ensure that an instance is in one of the required states.
1096 @param lu: the LU on behalf of which we make the check
1097 @param instance: the instance to check
1098 @param msg: if passed, should be a message to replace the default one
1099 @raise errors.OpPrereqError: if the instance is not in the required state
1103 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1104 if instance.admin_state not in req_states:
1105 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1106 (instance.name, instance.admin_state, msg),
1109 if constants.ADMINST_UP not in req_states:
1110 pnode = instance.primary_node
1111 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113 prereq=True, ecode=errors.ECODE_ENVIRON)
1115 if instance.name in ins_l.payload:
1116 raise errors.OpPrereqError("Instance %s is running, %s" %
1117 (instance.name, msg), errors.ECODE_STATE)
1120 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1121 """Computes if value is in the desired range.
1123 @param name: name of the parameter for which we perform the check
1124 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1126 @param ipolicy: dictionary containing min, max and std values
1127 @param value: actual value that we want to use
1128 @return: None or element not meeting the criteria
1132 if value in [None, constants.VALUE_AUTO]:
1134 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1135 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1136 if value > max_v or min_v > value:
1138 fqn = "%s/%s" % (name, qualifier)
1141 return ("%s value %s is not in range [%s, %s]" %
1142 (fqn, value, min_v, max_v))
1146 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1147 nic_count, disk_sizes, spindle_use,
1148 _compute_fn=_ComputeMinMaxSpec):
1149 """Verifies ipolicy against provided specs.
1152 @param ipolicy: The ipolicy
1154 @param mem_size: The memory size
1155 @type cpu_count: int
1156 @param cpu_count: Used cpu cores
1157 @type disk_count: int
1158 @param disk_count: Number of disks used
1159 @type nic_count: int
1160 @param nic_count: Number of nics used
1161 @type disk_sizes: list of ints
1162 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1163 @type spindle_use: int
1164 @param spindle_use: The number of spindles this instance uses
1165 @param _compute_fn: The compute function (unittest only)
1166 @return: A list of violations, or an empty list of no violations are found
1169 assert disk_count == len(disk_sizes)
1172 (constants.ISPEC_MEM_SIZE, "", mem_size),
1173 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1174 (constants.ISPEC_DISK_COUNT, "", disk_count),
1175 (constants.ISPEC_NIC_COUNT, "", nic_count),
1176 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1177 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1178 for idx, d in enumerate(disk_sizes)]
1181 (_compute_fn(name, qualifier, ipolicy, value)
1182 for (name, qualifier, value) in test_settings))
1185 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1186 _compute_fn=_ComputeIPolicySpecViolation):
1187 """Compute if instance meets the specs of ipolicy.
1190 @param ipolicy: The ipolicy to verify against
1191 @type instance: L{objects.Instance}
1192 @param instance: The instance to verify
1193 @param _compute_fn: The function to verify ipolicy (unittest only)
1194 @see: L{_ComputeIPolicySpecViolation}
1197 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1198 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1199 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1200 disk_count = len(instance.disks)
1201 disk_sizes = [disk.size for disk in instance.disks]
1202 nic_count = len(instance.nics)
1204 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1205 disk_sizes, spindle_use)
1208 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1209 _compute_fn=_ComputeIPolicySpecViolation):
1210 """Compute if instance specs meets the specs of ipolicy.
1213 @param ipolicy: The ipolicy to verify against
1214 @param instance_spec: dict
1215 @param instance_spec: The instance spec to verify
1216 @param _compute_fn: The function to verify ipolicy (unittest only)
1217 @see: L{_ComputeIPolicySpecViolation}
1220 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1221 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1222 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1223 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1224 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1225 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1227 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1228 disk_sizes, spindle_use)
1231 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1233 _compute_fn=_ComputeIPolicyInstanceViolation):
1234 """Compute if instance meets the specs of the new target group.
1236 @param ipolicy: The ipolicy to verify
1237 @param instance: The instance object to verify
1238 @param current_group: The current group of the instance
1239 @param target_group: The new group of the instance
1240 @param _compute_fn: The function to verify ipolicy (unittest only)
1241 @see: L{_ComputeIPolicySpecViolation}
1244 if current_group == target_group:
1247 return _compute_fn(ipolicy, instance)
1250 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1251 _compute_fn=_ComputeIPolicyNodeViolation):
1252 """Checks that the target node is correct in terms of instance policy.
1254 @param ipolicy: The ipolicy to verify
1255 @param instance: The instance object to verify
1256 @param node: The new node to relocate
1257 @param ignore: Ignore violations of the ipolicy
1258 @param _compute_fn: The function to verify ipolicy (unittest only)
1259 @see: L{_ComputeIPolicySpecViolation}
1262 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1263 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1266 msg = ("Instance does not meet target node group's (%s) instance"
1267 " policy: %s") % (node.group, utils.CommaJoin(res))
1271 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1274 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1275 """Computes a set of any instances that would violate the new ipolicy.
1277 @param old_ipolicy: The current (still in-place) ipolicy
1278 @param new_ipolicy: The new (to become) ipolicy
1279 @param instances: List of instances to verify
1280 @return: A list of instances which violates the new ipolicy but
1284 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1285 _ComputeViolatingInstances(old_ipolicy, instances))
1288 def _ExpandItemName(fn, name, kind):
1289 """Expand an item name.
1291 @param fn: the function to use for expansion
1292 @param name: requested item name
1293 @param kind: text description ('Node' or 'Instance')
1294 @return: the resolved (full) name
1295 @raise errors.OpPrereqError: if the item is not found
1298 full_name = fn(name)
1299 if full_name is None:
1300 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1305 def _ExpandNodeName(cfg, name):
1306 """Wrapper over L{_ExpandItemName} for nodes."""
1307 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1310 def _ExpandInstanceName(cfg, name):
1311 """Wrapper over L{_ExpandItemName} for instance."""
1312 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1315 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1316 minmem, maxmem, vcpus, nics, disk_template, disks,
1317 bep, hvp, hypervisor_name, tags):
1318 """Builds instance related env variables for hooks
1320 This builds the hook environment from individual variables.
1323 @param name: the name of the instance
1324 @type primary_node: string
1325 @param primary_node: the name of the instance's primary node
1326 @type secondary_nodes: list
1327 @param secondary_nodes: list of secondary nodes as strings
1328 @type os_type: string
1329 @param os_type: the name of the instance's OS
1330 @type status: string
1331 @param status: the desired status of the instance
1332 @type minmem: string
1333 @param minmem: the minimum memory size of the instance
1334 @type maxmem: string
1335 @param maxmem: the maximum memory size of the instance
1337 @param vcpus: the count of VCPUs the instance has
1339 @param nics: list of tuples (ip, mac, mode, link) representing
1340 the NICs the instance has
1341 @type disk_template: string
1342 @param disk_template: the disk template of the instance
1344 @param disks: the list of (size, mode) pairs
1346 @param bep: the backend parameters for the instance
1348 @param hvp: the hypervisor parameters for the instance
1349 @type hypervisor_name: string
1350 @param hypervisor_name: the hypervisor for the instance
1352 @param tags: list of instance tags as strings
1354 @return: the hook environment for this instance
1359 "INSTANCE_NAME": name,
1360 "INSTANCE_PRIMARY": primary_node,
1361 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1362 "INSTANCE_OS_TYPE": os_type,
1363 "INSTANCE_STATUS": status,
1364 "INSTANCE_MINMEM": minmem,
1365 "INSTANCE_MAXMEM": maxmem,
1366 # TODO(2.7) remove deprecated "memory" value
1367 "INSTANCE_MEMORY": maxmem,
1368 "INSTANCE_VCPUS": vcpus,
1369 "INSTANCE_DISK_TEMPLATE": disk_template,
1370 "INSTANCE_HYPERVISOR": hypervisor_name,
1373 nic_count = len(nics)
1374 for idx, (ip, mac, mode, link) in enumerate(nics):
1377 env["INSTANCE_NIC%d_IP" % idx] = ip
1378 env["INSTANCE_NIC%d_MAC" % idx] = mac
1379 env["INSTANCE_NIC%d_MODE" % idx] = mode
1380 env["INSTANCE_NIC%d_LINK" % idx] = link
1381 if mode == constants.NIC_MODE_BRIDGED:
1382 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1386 env["INSTANCE_NIC_COUNT"] = nic_count
1389 disk_count = len(disks)
1390 for idx, (size, mode) in enumerate(disks):
1391 env["INSTANCE_DISK%d_SIZE" % idx] = size
1392 env["INSTANCE_DISK%d_MODE" % idx] = mode
1396 env["INSTANCE_DISK_COUNT"] = disk_count
1401 env["INSTANCE_TAGS"] = " ".join(tags)
1403 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1404 for key, value in source.items():
1405 env["INSTANCE_%s_%s" % (kind, key)] = value
1410 def _NICListToTuple(lu, nics):
1411 """Build a list of nic information tuples.
1413 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1414 value in LUInstanceQueryData.
1416 @type lu: L{LogicalUnit}
1417 @param lu: the logical unit on whose behalf we execute
1418 @type nics: list of L{objects.NIC}
1419 @param nics: list of nics to convert to hooks tuples
1423 cluster = lu.cfg.GetClusterInfo()
1427 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1428 mode = filled_params[constants.NIC_MODE]
1429 link = filled_params[constants.NIC_LINK]
1430 hooks_nics.append((ip, mac, mode, link))
1434 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1435 """Builds instance related env variables for hooks from an object.
1437 @type lu: L{LogicalUnit}
1438 @param lu: the logical unit on whose behalf we execute
1439 @type instance: L{objects.Instance}
1440 @param instance: the instance for which we should build the
1442 @type override: dict
1443 @param override: dictionary with key/values that will override
1446 @return: the hook environment dictionary
1449 cluster = lu.cfg.GetClusterInfo()
1450 bep = cluster.FillBE(instance)
1451 hvp = cluster.FillHV(instance)
1453 "name": instance.name,
1454 "primary_node": instance.primary_node,
1455 "secondary_nodes": instance.secondary_nodes,
1456 "os_type": instance.os,
1457 "status": instance.admin_state,
1458 "maxmem": bep[constants.BE_MAXMEM],
1459 "minmem": bep[constants.BE_MINMEM],
1460 "vcpus": bep[constants.BE_VCPUS],
1461 "nics": _NICListToTuple(lu, instance.nics),
1462 "disk_template": instance.disk_template,
1463 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1466 "hypervisor_name": instance.hypervisor,
1467 "tags": instance.tags,
1470 args.update(override)
1471 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1474 def _AdjustCandidatePool(lu, exceptions):
1475 """Adjust the candidate pool after node operations.
1478 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1480 lu.LogInfo("Promoted nodes to master candidate role: %s",
1481 utils.CommaJoin(node.name for node in mod_list))
1482 for name in mod_list:
1483 lu.context.ReaddNode(name)
1484 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1486 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1490 def _DecideSelfPromotion(lu, exceptions=None):
1491 """Decide whether I should promote myself as a master candidate.
1494 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1495 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1496 # the new node will increase mc_max with one, so:
1497 mc_should = min(mc_should + 1, cp_size)
1498 return mc_now < mc_should
1501 def _CalculateGroupIPolicy(cluster, group):
1502 """Calculate instance policy for group.
1505 return cluster.SimpleFillIPolicy(group.ipolicy)
1508 def _ComputeViolatingInstances(ipolicy, instances):
1509 """Computes a set of instances who violates given ipolicy.
1511 @param ipolicy: The ipolicy to verify
1512 @type instances: object.Instance
1513 @param instances: List of instances to verify
1514 @return: A frozenset of instance names violating the ipolicy
1517 return frozenset([inst.name for inst in instances
1518 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1521 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1522 """Check that the brigdes needed by a list of nics exist.
1525 cluster = lu.cfg.GetClusterInfo()
1526 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1527 brlist = [params[constants.NIC_LINK] for params in paramslist
1528 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1530 result = lu.rpc.call_bridges_exist(target_node, brlist)
1531 result.Raise("Error checking bridges on destination node '%s'" %
1532 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1535 def _CheckInstanceBridgesExist(lu, instance, node=None):
1536 """Check that the brigdes needed by an instance exist.
1540 node = instance.primary_node
1541 _CheckNicsBridgesExist(lu, instance.nics, node)
1544 def _CheckOSVariant(os_obj, name):
1545 """Check whether an OS name conforms to the os variants specification.
1547 @type os_obj: L{objects.OS}
1548 @param os_obj: OS object to check
1550 @param name: OS name passed by the user, to check for validity
1553 variant = objects.OS.GetVariant(name)
1554 if not os_obj.supported_variants:
1556 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1557 " passed)" % (os_obj.name, variant),
1561 raise errors.OpPrereqError("OS name must include a variant",
1564 if variant not in os_obj.supported_variants:
1565 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1568 def _GetNodeInstancesInner(cfg, fn):
1569 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1572 def _GetNodeInstances(cfg, node_name):
1573 """Returns a list of all primary and secondary instances on a node.
1577 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1580 def _GetNodePrimaryInstances(cfg, node_name):
1581 """Returns primary instances on a node.
1584 return _GetNodeInstancesInner(cfg,
1585 lambda inst: node_name == inst.primary_node)
1588 def _GetNodeSecondaryInstances(cfg, node_name):
1589 """Returns secondary instances on a node.
1592 return _GetNodeInstancesInner(cfg,
1593 lambda inst: node_name in inst.secondary_nodes)
1596 def _GetStorageTypeArgs(cfg, storage_type):
1597 """Returns the arguments for a storage type.
1600 # Special case for file storage
1601 if storage_type == constants.ST_FILE:
1602 # storage.FileStorage wants a list of storage directories
1603 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1608 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1611 for dev in instance.disks:
1612 cfg.SetDiskID(dev, node_name)
1614 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1616 result.Raise("Failed to get disk status from node %s" % node_name,
1617 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1619 for idx, bdev_status in enumerate(result.payload):
1620 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1626 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1627 """Check the sanity of iallocator and node arguments and use the
1628 cluster-wide iallocator if appropriate.
1630 Check that at most one of (iallocator, node) is specified. If none is
1631 specified, then the LU's opcode's iallocator slot is filled with the
1632 cluster-wide default iallocator.
1634 @type iallocator_slot: string
1635 @param iallocator_slot: the name of the opcode iallocator slot
1636 @type node_slot: string
1637 @param node_slot: the name of the opcode target node slot
1640 node = getattr(lu.op, node_slot, None)
1641 iallocator = getattr(lu.op, iallocator_slot, None)
1643 if node is not None and iallocator is not None:
1644 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1646 elif node is None and iallocator is None:
1647 default_iallocator = lu.cfg.GetDefaultIAllocator()
1648 if default_iallocator:
1649 setattr(lu.op, iallocator_slot, default_iallocator)
1651 raise errors.OpPrereqError("No iallocator or node given and no"
1652 " cluster-wide default iallocator found;"
1653 " please specify either an iallocator or a"
1654 " node, or set a cluster-wide default"
1658 def _GetDefaultIAllocator(cfg, iallocator):
1659 """Decides on which iallocator to use.
1661 @type cfg: L{config.ConfigWriter}
1662 @param cfg: Cluster configuration object
1663 @type iallocator: string or None
1664 @param iallocator: Iallocator specified in opcode
1666 @return: Iallocator name
1670 # Use default iallocator
1671 iallocator = cfg.GetDefaultIAllocator()
1674 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1675 " opcode nor as a cluster-wide default",
1681 class LUClusterPostInit(LogicalUnit):
1682 """Logical unit for running hooks after cluster initialization.
1685 HPATH = "cluster-init"
1686 HTYPE = constants.HTYPE_CLUSTER
1688 def BuildHooksEnv(self):
1693 "OP_TARGET": self.cfg.GetClusterName(),
1696 def BuildHooksNodes(self):
1697 """Build hooks nodes.
1700 return ([], [self.cfg.GetMasterNode()])
1702 def Exec(self, feedback_fn):
1709 class LUClusterDestroy(LogicalUnit):
1710 """Logical unit for destroying the cluster.
1713 HPATH = "cluster-destroy"
1714 HTYPE = constants.HTYPE_CLUSTER
1716 def BuildHooksEnv(self):
1721 "OP_TARGET": self.cfg.GetClusterName(),
1724 def BuildHooksNodes(self):
1725 """Build hooks nodes.
1730 def CheckPrereq(self):
1731 """Check prerequisites.
1733 This checks whether the cluster is empty.
1735 Any errors are signaled by raising errors.OpPrereqError.
1738 master = self.cfg.GetMasterNode()
1740 nodelist = self.cfg.GetNodeList()
1741 if len(nodelist) != 1 or nodelist[0] != master:
1742 raise errors.OpPrereqError("There are still %d node(s) in"
1743 " this cluster." % (len(nodelist) - 1),
1745 instancelist = self.cfg.GetInstanceList()
1747 raise errors.OpPrereqError("There are still %d instance(s) in"
1748 " this cluster." % len(instancelist),
1751 def Exec(self, feedback_fn):
1752 """Destroys the cluster.
1755 master_params = self.cfg.GetMasterNetworkParameters()
1757 # Run post hooks on master node before it's removed
1758 _RunPostHook(self, master_params.name)
1760 ems = self.cfg.GetUseExternalMipScript()
1761 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1764 self.LogWarning("Error disabling the master IP address: %s",
1767 return master_params.name
1770 def _VerifyCertificate(filename):
1771 """Verifies a certificate for L{LUClusterVerifyConfig}.
1773 @type filename: string
1774 @param filename: Path to PEM file
1778 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1779 utils.ReadFile(filename))
1780 except Exception, err: # pylint: disable=W0703
1781 return (LUClusterVerifyConfig.ETYPE_ERROR,
1782 "Failed to load X509 certificate %s: %s" % (filename, err))
1785 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1786 constants.SSL_CERT_EXPIRATION_ERROR)
1789 fnamemsg = "While verifying %s: %s" % (filename, msg)
1794 return (None, fnamemsg)
1795 elif errcode == utils.CERT_WARNING:
1796 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1797 elif errcode == utils.CERT_ERROR:
1798 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1800 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1803 def _GetAllHypervisorParameters(cluster, instances):
1804 """Compute the set of all hypervisor parameters.
1806 @type cluster: L{objects.Cluster}
1807 @param cluster: the cluster object
1808 @param instances: list of L{objects.Instance}
1809 @param instances: additional instances from which to obtain parameters
1810 @rtype: list of (origin, hypervisor, parameters)
1811 @return: a list with all parameters found, indicating the hypervisor they
1812 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1817 for hv_name in cluster.enabled_hypervisors:
1818 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1820 for os_name, os_hvp in cluster.os_hvp.items():
1821 for hv_name, hv_params in os_hvp.items():
1823 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1824 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1826 # TODO: collapse identical parameter values in a single one
1827 for instance in instances:
1828 if instance.hvparams:
1829 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1830 cluster.FillHV(instance)))
1835 class _VerifyErrors(object):
1836 """Mix-in for cluster/group verify LUs.
1838 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1839 self.op and self._feedback_fn to be available.)
1843 ETYPE_FIELD = "code"
1844 ETYPE_ERROR = "ERROR"
1845 ETYPE_WARNING = "WARNING"
1847 def _Error(self, ecode, item, msg, *args, **kwargs):
1848 """Format an error message.
1850 Based on the opcode's error_codes parameter, either format a
1851 parseable error code, or a simpler error string.
1853 This must be called only from Exec and functions called from Exec.
1856 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1857 itype, etxt, _ = ecode
1858 # first complete the msg
1861 # then format the whole message
1862 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1863 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1869 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1870 # and finally report it via the feedback_fn
1871 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1873 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1874 """Log an error message if the passed condition is True.
1878 or self.op.debug_simulate_errors) # pylint: disable=E1101
1880 # If the error code is in the list of ignored errors, demote the error to a
1882 (_, etxt, _) = ecode
1883 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1884 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1887 self._Error(ecode, *args, **kwargs)
1889 # do not mark the operation as failed for WARN cases only
1890 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1891 self.bad = self.bad or cond
1894 class LUClusterVerify(NoHooksLU):
1895 """Submits all jobs necessary to verify the cluster.
1900 def ExpandNames(self):
1901 self.needed_locks = {}
1903 def Exec(self, feedback_fn):
1906 if self.op.group_name:
1907 groups = [self.op.group_name]
1908 depends_fn = lambda: None
1910 groups = self.cfg.GetNodeGroupList()
1912 # Verify global configuration
1914 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1917 # Always depend on global verification
1918 depends_fn = lambda: [(-len(jobs), [])]
1920 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1921 ignore_errors=self.op.ignore_errors,
1922 depends=depends_fn())]
1923 for group in groups)
1925 # Fix up all parameters
1926 for op in itertools.chain(*jobs): # pylint: disable=W0142
1927 op.debug_simulate_errors = self.op.debug_simulate_errors
1928 op.verbose = self.op.verbose
1929 op.error_codes = self.op.error_codes
1931 op.skip_checks = self.op.skip_checks
1932 except AttributeError:
1933 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1935 return ResultWithJobs(jobs)
1938 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1939 """Verifies the cluster config.
1944 def _VerifyHVP(self, hvp_data):
1945 """Verifies locally the syntax of the hypervisor parameters.
1948 for item, hv_name, hv_params in hvp_data:
1949 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1952 hv_class = hypervisor.GetHypervisor(hv_name)
1953 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1954 hv_class.CheckParameterSyntax(hv_params)
1955 except errors.GenericError, err:
1956 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1958 def ExpandNames(self):
1959 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1960 self.share_locks = _ShareAll()
1962 def CheckPrereq(self):
1963 """Check prerequisites.
1966 # Retrieve all information
1967 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1968 self.all_node_info = self.cfg.GetAllNodesInfo()
1969 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1971 def Exec(self, feedback_fn):
1972 """Verify integrity of cluster, performing various test on nodes.
1976 self._feedback_fn = feedback_fn
1978 feedback_fn("* Verifying cluster config")
1980 for msg in self.cfg.VerifyConfig():
1981 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1983 feedback_fn("* Verifying cluster certificate files")
1985 for cert_filename in constants.ALL_CERT_FILES:
1986 (errcode, msg) = _VerifyCertificate(cert_filename)
1987 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1989 feedback_fn("* Verifying hypervisor parameters")
1991 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1992 self.all_inst_info.values()))
1994 feedback_fn("* Verifying all nodes belong to an existing group")
1996 # We do this verification here because, should this bogus circumstance
1997 # occur, it would never be caught by VerifyGroup, which only acts on
1998 # nodes/instances reachable from existing node groups.
2000 dangling_nodes = set(node.name for node in self.all_node_info.values()
2001 if node.group not in self.all_group_info)
2003 dangling_instances = {}
2004 no_node_instances = []
2006 for inst in self.all_inst_info.values():
2007 if inst.primary_node in dangling_nodes:
2008 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2009 elif inst.primary_node not in self.all_node_info:
2010 no_node_instances.append(inst.name)
2015 utils.CommaJoin(dangling_instances.get(node.name,
2017 for node in dangling_nodes]
2019 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2021 "the following nodes (and their instances) belong to a non"
2022 " existing group: %s", utils.CommaJoin(pretty_dangling))
2024 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2026 "the following instances have a non-existing primary-node:"
2027 " %s", utils.CommaJoin(no_node_instances))
2032 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2033 """Verifies the status of a node group.
2036 HPATH = "cluster-verify"
2037 HTYPE = constants.HTYPE_CLUSTER
2040 _HOOKS_INDENT_RE = re.compile("^", re.M)
2042 class NodeImage(object):
2043 """A class representing the logical and physical status of a node.
2046 @ivar name: the node name to which this object refers
2047 @ivar volumes: a structure as returned from
2048 L{ganeti.backend.GetVolumeList} (runtime)
2049 @ivar instances: a list of running instances (runtime)
2050 @ivar pinst: list of configured primary instances (config)
2051 @ivar sinst: list of configured secondary instances (config)
2052 @ivar sbp: dictionary of {primary-node: list of instances} for all
2053 instances for which this node is secondary (config)
2054 @ivar mfree: free memory, as reported by hypervisor (runtime)
2055 @ivar dfree: free disk, as reported by the node (runtime)
2056 @ivar offline: the offline status (config)
2057 @type rpc_fail: boolean
2058 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2059 not whether the individual keys were correct) (runtime)
2060 @type lvm_fail: boolean
2061 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2062 @type hyp_fail: boolean
2063 @ivar hyp_fail: whether the RPC call didn't return the instance list
2064 @type ghost: boolean
2065 @ivar ghost: whether this is a known node or not (config)
2066 @type os_fail: boolean
2067 @ivar os_fail: whether the RPC call didn't return valid OS data
2069 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2070 @type vm_capable: boolean
2071 @ivar vm_capable: whether the node can host instances
2074 def __init__(self, offline=False, name=None, vm_capable=True):
2083 self.offline = offline
2084 self.vm_capable = vm_capable
2085 self.rpc_fail = False
2086 self.lvm_fail = False
2087 self.hyp_fail = False
2089 self.os_fail = False
2092 def ExpandNames(self):
2093 # This raises errors.OpPrereqError on its own:
2094 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2096 # Get instances in node group; this is unsafe and needs verification later
2098 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2100 self.needed_locks = {
2101 locking.LEVEL_INSTANCE: inst_names,
2102 locking.LEVEL_NODEGROUP: [self.group_uuid],
2103 locking.LEVEL_NODE: [],
2106 self.share_locks = _ShareAll()
2108 def DeclareLocks(self, level):
2109 if level == locking.LEVEL_NODE:
2110 # Get members of node group; this is unsafe and needs verification later
2111 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2113 all_inst_info = self.cfg.GetAllInstancesInfo()
2115 # In Exec(), we warn about mirrored instances that have primary and
2116 # secondary living in separate node groups. To fully verify that
2117 # volumes for these instances are healthy, we will need to do an
2118 # extra call to their secondaries. We ensure here those nodes will
2120 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2121 # Important: access only the instances whose lock is owned
2122 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2123 nodes.update(all_inst_info[inst].secondary_nodes)
2125 self.needed_locks[locking.LEVEL_NODE] = nodes
2127 def CheckPrereq(self):
2128 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2129 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2131 group_nodes = set(self.group_info.members)
2133 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2136 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2138 unlocked_instances = \
2139 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2142 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2143 utils.CommaJoin(unlocked_nodes),
2146 if unlocked_instances:
2147 raise errors.OpPrereqError("Missing lock for instances: %s" %
2148 utils.CommaJoin(unlocked_instances),
2151 self.all_node_info = self.cfg.GetAllNodesInfo()
2152 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2154 self.my_node_names = utils.NiceSort(group_nodes)
2155 self.my_inst_names = utils.NiceSort(group_instances)
2157 self.my_node_info = dict((name, self.all_node_info[name])
2158 for name in self.my_node_names)
2160 self.my_inst_info = dict((name, self.all_inst_info[name])
2161 for name in self.my_inst_names)
2163 # We detect here the nodes that will need the extra RPC calls for verifying
2164 # split LV volumes; they should be locked.
2165 extra_lv_nodes = set()
2167 for inst in self.my_inst_info.values():
2168 if inst.disk_template in constants.DTS_INT_MIRROR:
2169 for nname in inst.all_nodes:
2170 if self.all_node_info[nname].group != self.group_uuid:
2171 extra_lv_nodes.add(nname)
2173 unlocked_lv_nodes = \
2174 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2176 if unlocked_lv_nodes:
2177 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2178 utils.CommaJoin(unlocked_lv_nodes),
2180 self.extra_lv_nodes = list(extra_lv_nodes)
2182 def _VerifyNode(self, ninfo, nresult):
2183 """Perform some basic validation on data returned from a node.
2185 - check the result data structure is well formed and has all the
2187 - check ganeti version
2189 @type ninfo: L{objects.Node}
2190 @param ninfo: the node to check
2191 @param nresult: the results from the node
2193 @return: whether overall this call was successful (and we can expect
2194 reasonable values in the respose)
2198 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2200 # main result, nresult should be a non-empty dict
2201 test = not nresult or not isinstance(nresult, dict)
2202 _ErrorIf(test, constants.CV_ENODERPC, node,
2203 "unable to verify node: no data returned")
2207 # compares ganeti version
2208 local_version = constants.PROTOCOL_VERSION
2209 remote_version = nresult.get("version", None)
2210 test = not (remote_version and
2211 isinstance(remote_version, (list, tuple)) and
2212 len(remote_version) == 2)
2213 _ErrorIf(test, constants.CV_ENODERPC, node,
2214 "connection to node returned invalid data")
2218 test = local_version != remote_version[0]
2219 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2220 "incompatible protocol versions: master %s,"
2221 " node %s", local_version, remote_version[0])
2225 # node seems compatible, we can actually try to look into its results
2227 # full package version
2228 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2229 constants.CV_ENODEVERSION, node,
2230 "software version mismatch: master %s, node %s",
2231 constants.RELEASE_VERSION, remote_version[1],
2232 code=self.ETYPE_WARNING)
2234 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2235 if ninfo.vm_capable and isinstance(hyp_result, dict):
2236 for hv_name, hv_result in hyp_result.iteritems():
2237 test = hv_result is not None
2238 _ErrorIf(test, constants.CV_ENODEHV, node,
2239 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2241 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2242 if ninfo.vm_capable and isinstance(hvp_result, list):
2243 for item, hv_name, hv_result in hvp_result:
2244 _ErrorIf(True, constants.CV_ENODEHV, node,
2245 "hypervisor %s parameter verify failure (source %s): %s",
2246 hv_name, item, hv_result)
2248 test = nresult.get(constants.NV_NODESETUP,
2249 ["Missing NODESETUP results"])
2250 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2255 def _VerifyNodeTime(self, ninfo, nresult,
2256 nvinfo_starttime, nvinfo_endtime):
2257 """Check the node time.
2259 @type ninfo: L{objects.Node}
2260 @param ninfo: the node to check
2261 @param nresult: the remote results for the node
2262 @param nvinfo_starttime: the start time of the RPC call
2263 @param nvinfo_endtime: the end time of the RPC call
2267 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2269 ntime = nresult.get(constants.NV_TIME, None)
2271 ntime_merged = utils.MergeTime(ntime)
2272 except (ValueError, TypeError):
2273 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2276 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2277 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2278 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2279 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2283 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2284 "Node time diverges by at least %s from master node time",
2287 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2288 """Check the node LVM results.
2290 @type ninfo: L{objects.Node}
2291 @param ninfo: the node to check
2292 @param nresult: the remote results for the node
2293 @param vg_name: the configured VG name
2300 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2302 # checks vg existence and size > 20G
2303 vglist = nresult.get(constants.NV_VGLIST, None)
2305 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2307 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2308 constants.MIN_VG_SIZE)
2309 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2312 pvlist = nresult.get(constants.NV_PVLIST, None)
2313 test = pvlist is None
2314 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2316 # check that ':' is not present in PV names, since it's a
2317 # special character for lvcreate (denotes the range of PEs to
2319 for _, pvname, owner_vg in pvlist:
2320 test = ":" in pvname
2321 _ErrorIf(test, constants.CV_ENODELVM, node,
2322 "Invalid character ':' in PV '%s' of VG '%s'",
2325 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2326 """Check the node bridges.
2328 @type ninfo: L{objects.Node}
2329 @param ninfo: the node to check
2330 @param nresult: the remote results for the node
2331 @param bridges: the expected list of bridges
2338 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2340 missing = nresult.get(constants.NV_BRIDGES, None)
2341 test = not isinstance(missing, list)
2342 _ErrorIf(test, constants.CV_ENODENET, node,
2343 "did not return valid bridge information")
2345 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2346 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2348 def _VerifyNodeUserScripts(self, ninfo, nresult):
2349 """Check the results of user scripts presence and executability on the node
2351 @type ninfo: L{objects.Node}
2352 @param ninfo: the node to check
2353 @param nresult: the remote results for the node
2358 test = not constants.NV_USERSCRIPTS in nresult
2359 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2360 "did not return user scripts information")
2362 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2364 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2365 "user scripts not present or not executable: %s" %
2366 utils.CommaJoin(sorted(broken_scripts)))
2368 def _VerifyNodeNetwork(self, ninfo, nresult):
2369 """Check the node network connectivity results.
2371 @type ninfo: L{objects.Node}
2372 @param ninfo: the node to check
2373 @param nresult: the remote results for the node
2377 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2379 test = constants.NV_NODELIST not in nresult
2380 _ErrorIf(test, constants.CV_ENODESSH, node,
2381 "node hasn't returned node ssh connectivity data")
2383 if nresult[constants.NV_NODELIST]:
2384 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2385 _ErrorIf(True, constants.CV_ENODESSH, node,
2386 "ssh communication with node '%s': %s", a_node, a_msg)
2388 test = constants.NV_NODENETTEST not in nresult
2389 _ErrorIf(test, constants.CV_ENODENET, node,
2390 "node hasn't returned node tcp connectivity data")
2392 if nresult[constants.NV_NODENETTEST]:
2393 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2395 _ErrorIf(True, constants.CV_ENODENET, node,
2396 "tcp communication with node '%s': %s",
2397 anode, nresult[constants.NV_NODENETTEST][anode])
2399 test = constants.NV_MASTERIP not in nresult
2400 _ErrorIf(test, constants.CV_ENODENET, node,
2401 "node hasn't returned node master IP reachability data")
2403 if not nresult[constants.NV_MASTERIP]:
2404 if node == self.master_node:
2405 msg = "the master node cannot reach the master IP (not configured?)"
2407 msg = "cannot reach the master IP"
2408 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2410 def _VerifyInstance(self, instance, instanceconfig, node_image,
2412 """Verify an instance.
2414 This function checks to see if the required block devices are
2415 available on the instance's node.
2418 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2419 node_current = instanceconfig.primary_node
2421 node_vol_should = {}
2422 instanceconfig.MapLVsByNode(node_vol_should)
2424 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2425 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2426 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2428 for node in node_vol_should:
2429 n_img = node_image[node]
2430 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2431 # ignore missing volumes on offline or broken nodes
2433 for volume in node_vol_should[node]:
2434 test = volume not in n_img.volumes
2435 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2436 "volume %s missing on node %s", volume, node)
2438 if instanceconfig.admin_state == constants.ADMINST_UP:
2439 pri_img = node_image[node_current]
2440 test = instance not in pri_img.instances and not pri_img.offline
2441 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2442 "instance not running on its primary node %s",
2445 diskdata = [(nname, success, status, idx)
2446 for (nname, disks) in diskstatus.items()
2447 for idx, (success, status) in enumerate(disks)]
2449 for nname, success, bdev_status, idx in diskdata:
2450 # the 'ghost node' construction in Exec() ensures that we have a
2452 snode = node_image[nname]
2453 bad_snode = snode.ghost or snode.offline
2454 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2455 not success and not bad_snode,
2456 constants.CV_EINSTANCEFAULTYDISK, instance,
2457 "couldn't retrieve status for disk/%s on %s: %s",
2458 idx, nname, bdev_status)
2459 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2460 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2461 constants.CV_EINSTANCEFAULTYDISK, instance,
2462 "disk/%s on %s is faulty", idx, nname)
2464 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2465 """Verify if there are any unknown volumes in the cluster.
2467 The .os, .swap and backup volumes are ignored. All other volumes are
2468 reported as unknown.
2470 @type reserved: L{ganeti.utils.FieldSet}
2471 @param reserved: a FieldSet of reserved volume names
2474 for node, n_img in node_image.items():
2475 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2476 self.all_node_info[node].group != self.group_uuid):
2477 # skip non-healthy nodes
2479 for volume in n_img.volumes:
2480 test = ((node not in node_vol_should or
2481 volume not in node_vol_should[node]) and
2482 not reserved.Matches(volume))
2483 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2484 "volume %s is unknown", volume)
2486 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2487 """Verify N+1 Memory Resilience.
2489 Check that if one single node dies we can still start all the
2490 instances it was primary for.
2493 cluster_info = self.cfg.GetClusterInfo()
2494 for node, n_img in node_image.items():
2495 # This code checks that every node which is now listed as
2496 # secondary has enough memory to host all instances it is
2497 # supposed to should a single other node in the cluster fail.
2498 # FIXME: not ready for failover to an arbitrary node
2499 # FIXME: does not support file-backed instances
2500 # WARNING: we currently take into account down instances as well
2501 # as up ones, considering that even if they're down someone
2502 # might want to start them even in the event of a node failure.
2503 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2504 # we're skipping nodes marked offline and nodes in other groups from
2505 # the N+1 warning, since most likely we don't have good memory
2506 # infromation from them; we already list instances living on such
2507 # nodes, and that's enough warning
2509 #TODO(dynmem): also consider ballooning out other instances
2510 for prinode, instances in n_img.sbp.items():
2512 for instance in instances:
2513 bep = cluster_info.FillBE(instance_cfg[instance])
2514 if bep[constants.BE_AUTO_BALANCE]:
2515 needed_mem += bep[constants.BE_MINMEM]
2516 test = n_img.mfree < needed_mem
2517 self._ErrorIf(test, constants.CV_ENODEN1, node,
2518 "not enough memory to accomodate instance failovers"
2519 " should node %s fail (%dMiB needed, %dMiB available)",
2520 prinode, needed_mem, n_img.mfree)
2523 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2524 (files_all, files_opt, files_mc, files_vm)):
2525 """Verifies file checksums collected from all nodes.
2527 @param errorif: Callback for reporting errors
2528 @param nodeinfo: List of L{objects.Node} objects
2529 @param master_node: Name of master node
2530 @param all_nvinfo: RPC results
2533 # Define functions determining which nodes to consider for a file
2536 (files_mc, lambda node: (node.master_candidate or
2537 node.name == master_node)),
2538 (files_vm, lambda node: node.vm_capable),
2541 # Build mapping from filename to list of nodes which should have the file
2543 for (files, fn) in files2nodefn:
2545 filenodes = nodeinfo
2547 filenodes = filter(fn, nodeinfo)
2548 nodefiles.update((filename,
2549 frozenset(map(operator.attrgetter("name"), filenodes)))
2550 for filename in files)
2552 assert set(nodefiles) == (files_all | files_mc | files_vm)
2554 fileinfo = dict((filename, {}) for filename in nodefiles)
2555 ignore_nodes = set()
2557 for node in nodeinfo:
2559 ignore_nodes.add(node.name)
2562 nresult = all_nvinfo[node.name]
2564 if nresult.fail_msg or not nresult.payload:
2567 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2569 test = not (node_files and isinstance(node_files, dict))
2570 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2571 "Node did not return file checksum data")
2573 ignore_nodes.add(node.name)
2576 # Build per-checksum mapping from filename to nodes having it
2577 for (filename, checksum) in node_files.items():
2578 assert filename in nodefiles
2579 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2581 for (filename, checksums) in fileinfo.items():
2582 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2584 # Nodes having the file
2585 with_file = frozenset(node_name
2586 for nodes in fileinfo[filename].values()
2587 for node_name in nodes) - ignore_nodes
2589 expected_nodes = nodefiles[filename] - ignore_nodes
2591 # Nodes missing file
2592 missing_file = expected_nodes - with_file
2594 if filename in files_opt:
2596 errorif(missing_file and missing_file != expected_nodes,
2597 constants.CV_ECLUSTERFILECHECK, None,
2598 "File %s is optional, but it must exist on all or no"
2599 " nodes (not found on %s)",
2600 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2602 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2603 "File %s is missing from node(s) %s", filename,
2604 utils.CommaJoin(utils.NiceSort(missing_file)))
2606 # Warn if a node has a file it shouldn't
2607 unexpected = with_file - expected_nodes
2609 constants.CV_ECLUSTERFILECHECK, None,
2610 "File %s should not exist on node(s) %s",
2611 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2613 # See if there are multiple versions of the file
2614 test = len(checksums) > 1
2616 variants = ["variant %s on %s" %
2617 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2618 for (idx, (checksum, nodes)) in
2619 enumerate(sorted(checksums.items()))]
2623 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2624 "File %s found with %s different checksums (%s)",
2625 filename, len(checksums), "; ".join(variants))
2627 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2629 """Verifies and the node DRBD status.
2631 @type ninfo: L{objects.Node}
2632 @param ninfo: the node to check
2633 @param nresult: the remote results for the node
2634 @param instanceinfo: the dict of instances
2635 @param drbd_helper: the configured DRBD usermode helper
2636 @param drbd_map: the DRBD map as returned by
2637 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2641 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2644 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2645 test = (helper_result == None)
2646 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2647 "no drbd usermode helper returned")
2649 status, payload = helper_result
2651 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2652 "drbd usermode helper check unsuccessful: %s", payload)
2653 test = status and (payload != drbd_helper)
2654 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2655 "wrong drbd usermode helper: %s", payload)
2657 # compute the DRBD minors
2659 for minor, instance in drbd_map[node].items():
2660 test = instance not in instanceinfo
2661 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2662 "ghost instance '%s' in temporary DRBD map", instance)
2663 # ghost instance should not be running, but otherwise we
2664 # don't give double warnings (both ghost instance and
2665 # unallocated minor in use)
2667 node_drbd[minor] = (instance, False)
2669 instance = instanceinfo[instance]
2670 node_drbd[minor] = (instance.name,
2671 instance.admin_state == constants.ADMINST_UP)
2673 # and now check them
2674 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2675 test = not isinstance(used_minors, (tuple, list))
2676 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2677 "cannot parse drbd status file: %s", str(used_minors))
2679 # we cannot check drbd status
2682 for minor, (iname, must_exist) in node_drbd.items():
2683 test = minor not in used_minors and must_exist
2684 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2685 "drbd minor %d of instance %s is not active", minor, iname)
2686 for minor in used_minors:
2687 test = minor not in node_drbd
2688 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2689 "unallocated drbd minor %d is in use", minor)
2691 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2692 """Builds the node OS structures.
2694 @type ninfo: L{objects.Node}
2695 @param ninfo: the node to check
2696 @param nresult: the remote results for the node
2697 @param nimg: the node image object
2701 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2703 remote_os = nresult.get(constants.NV_OSLIST, None)
2704 test = (not isinstance(remote_os, list) or
2705 not compat.all(isinstance(v, list) and len(v) == 7
2706 for v in remote_os))
2708 _ErrorIf(test, constants.CV_ENODEOS, node,
2709 "node hasn't returned valid OS data")
2718 for (name, os_path, status, diagnose,
2719 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2721 if name not in os_dict:
2724 # parameters is a list of lists instead of list of tuples due to
2725 # JSON lacking a real tuple type, fix it:
2726 parameters = [tuple(v) for v in parameters]
2727 os_dict[name].append((os_path, status, diagnose,
2728 set(variants), set(parameters), set(api_ver)))
2730 nimg.oslist = os_dict
2732 def _VerifyNodeOS(self, ninfo, nimg, base):
2733 """Verifies the node OS list.
2735 @type ninfo: L{objects.Node}
2736 @param ninfo: the node to check
2737 @param nimg: the node image object
2738 @param base: the 'template' node we match against (e.g. from the master)
2742 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2744 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2746 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2747 for os_name, os_data in nimg.oslist.items():
2748 assert os_data, "Empty OS status for OS %s?!" % os_name
2749 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2750 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2751 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2752 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2753 "OS '%s' has multiple entries (first one shadows the rest): %s",
2754 os_name, utils.CommaJoin([v[0] for v in os_data]))
2755 # comparisons with the 'base' image
2756 test = os_name not in base.oslist
2757 _ErrorIf(test, constants.CV_ENODEOS, node,
2758 "Extra OS %s not present on reference node (%s)",
2762 assert base.oslist[os_name], "Base node has empty OS status?"
2763 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2765 # base OS is invalid, skipping
2767 for kind, a, b in [("API version", f_api, b_api),
2768 ("variants list", f_var, b_var),
2769 ("parameters", beautify_params(f_param),
2770 beautify_params(b_param))]:
2771 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2772 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2773 kind, os_name, base.name,
2774 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2776 # check any missing OSes
2777 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2778 _ErrorIf(missing, constants.CV_ENODEOS, node,
2779 "OSes present on reference node %s but missing on this node: %s",
2780 base.name, utils.CommaJoin(missing))
2782 def _VerifyOob(self, ninfo, nresult):
2783 """Verifies out of band functionality of a node.
2785 @type ninfo: L{objects.Node}
2786 @param ninfo: the node to check
2787 @param nresult: the remote results for the node
2791 # We just have to verify the paths on master and/or master candidates
2792 # as the oob helper is invoked on the master
2793 if ((ninfo.master_candidate or ninfo.master_capable) and
2794 constants.NV_OOB_PATHS in nresult):
2795 for path_result in nresult[constants.NV_OOB_PATHS]:
2796 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2798 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2799 """Verifies and updates the node volume data.
2801 This function will update a L{NodeImage}'s internal structures
2802 with data from the remote call.
2804 @type ninfo: L{objects.Node}
2805 @param ninfo: the node to check
2806 @param nresult: the remote results for the node
2807 @param nimg: the node image object
2808 @param vg_name: the configured VG name
2812 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2814 nimg.lvm_fail = True
2815 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2818 elif isinstance(lvdata, basestring):
2819 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2820 utils.SafeEncode(lvdata))
2821 elif not isinstance(lvdata, dict):
2822 _ErrorIf(True, constants.CV_ENODELVM, node,
2823 "rpc call to node failed (lvlist)")
2825 nimg.volumes = lvdata
2826 nimg.lvm_fail = False
2828 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2829 """Verifies and updates the node instance list.
2831 If the listing was successful, then updates this node's instance
2832 list. Otherwise, it marks the RPC call as failed for the instance
2835 @type ninfo: L{objects.Node}
2836 @param ninfo: the node to check
2837 @param nresult: the remote results for the node
2838 @param nimg: the node image object
2841 idata = nresult.get(constants.NV_INSTANCELIST, None)
2842 test = not isinstance(idata, list)
2843 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2844 "rpc call to node failed (instancelist): %s",
2845 utils.SafeEncode(str(idata)))
2847 nimg.hyp_fail = True
2849 nimg.instances = idata
2851 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2852 """Verifies and computes a node information map
2854 @type ninfo: L{objects.Node}
2855 @param ninfo: the node to check
2856 @param nresult: the remote results for the node
2857 @param nimg: the node image object
2858 @param vg_name: the configured VG name
2862 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2864 # try to read free memory (from the hypervisor)
2865 hv_info = nresult.get(constants.NV_HVINFO, None)
2866 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2867 _ErrorIf(test, constants.CV_ENODEHV, node,
2868 "rpc call to node failed (hvinfo)")
2871 nimg.mfree = int(hv_info["memory_free"])
2872 except (ValueError, TypeError):
2873 _ErrorIf(True, constants.CV_ENODERPC, node,
2874 "node returned invalid nodeinfo, check hypervisor")
2876 # FIXME: devise a free space model for file based instances as well
2877 if vg_name is not None:
2878 test = (constants.NV_VGLIST not in nresult or
2879 vg_name not in nresult[constants.NV_VGLIST])
2880 _ErrorIf(test, constants.CV_ENODELVM, node,
2881 "node didn't return data for the volume group '%s'"
2882 " - it is either missing or broken", vg_name)
2885 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2886 except (ValueError, TypeError):
2887 _ErrorIf(True, constants.CV_ENODERPC, node,
2888 "node returned invalid LVM info, check LVM status")
2890 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2891 """Gets per-disk status information for all instances.
2893 @type nodelist: list of strings
2894 @param nodelist: Node names
2895 @type node_image: dict of (name, L{objects.Node})
2896 @param node_image: Node objects
2897 @type instanceinfo: dict of (name, L{objects.Instance})
2898 @param instanceinfo: Instance objects
2899 @rtype: {instance: {node: [(succes, payload)]}}
2900 @return: a dictionary of per-instance dictionaries with nodes as
2901 keys and disk information as values; the disk information is a
2902 list of tuples (success, payload)
2905 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2908 node_disks_devonly = {}
2909 diskless_instances = set()
2910 diskless = constants.DT_DISKLESS
2912 for nname in nodelist:
2913 node_instances = list(itertools.chain(node_image[nname].pinst,
2914 node_image[nname].sinst))
2915 diskless_instances.update(inst for inst in node_instances
2916 if instanceinfo[inst].disk_template == diskless)
2917 disks = [(inst, disk)
2918 for inst in node_instances
2919 for disk in instanceinfo[inst].disks]
2922 # No need to collect data
2925 node_disks[nname] = disks
2927 # _AnnotateDiskParams makes already copies of the disks
2929 for (inst, dev) in disks:
2930 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2931 self.cfg.SetDiskID(anno_disk, nname)
2932 devonly.append(anno_disk)
2934 node_disks_devonly[nname] = devonly
2936 assert len(node_disks) == len(node_disks_devonly)
2938 # Collect data from all nodes with disks
2939 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2942 assert len(result) == len(node_disks)
2946 for (nname, nres) in result.items():
2947 disks = node_disks[nname]
2950 # No data from this node
2951 data = len(disks) * [(False, "node offline")]
2954 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2955 "while getting disk information: %s", msg)
2957 # No data from this node
2958 data = len(disks) * [(False, msg)]
2961 for idx, i in enumerate(nres.payload):
2962 if isinstance(i, (tuple, list)) and len(i) == 2:
2965 logging.warning("Invalid result from node %s, entry %d: %s",
2967 data.append((False, "Invalid result from the remote node"))
2969 for ((inst, _), status) in zip(disks, data):
2970 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2972 # Add empty entries for diskless instances.
2973 for inst in diskless_instances:
2974 assert inst not in instdisk
2977 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2978 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2979 compat.all(isinstance(s, (tuple, list)) and
2980 len(s) == 2 for s in statuses)
2981 for inst, nnames in instdisk.items()
2982 for nname, statuses in nnames.items())
2983 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2988 def _SshNodeSelector(group_uuid, all_nodes):
2989 """Create endless iterators for all potential SSH check hosts.
2992 nodes = [node for node in all_nodes
2993 if (node.group != group_uuid and
2995 keyfunc = operator.attrgetter("group")
2997 return map(itertools.cycle,
2998 [sorted(map(operator.attrgetter("name"), names))
2999 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3003 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3004 """Choose which nodes should talk to which other nodes.
3006 We will make nodes contact all nodes in their group, and one node from
3009 @warning: This algorithm has a known issue if one node group is much
3010 smaller than others (e.g. just one node). In such a case all other
3011 nodes will talk to the single node.
3014 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3015 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3017 return (online_nodes,
3018 dict((name, sorted([i.next() for i in sel]))
3019 for name in online_nodes))
3021 def BuildHooksEnv(self):
3024 Cluster-Verify hooks just ran in the post phase and their failure makes
3025 the output be logged in the verify output and the verification to fail.
3029 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3032 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3033 for node in self.my_node_info.values())
3037 def BuildHooksNodes(self):
3038 """Build hooks nodes.
3041 return ([], self.my_node_names)
3043 def Exec(self, feedback_fn):
3044 """Verify integrity of the node group, performing various test on nodes.
3047 # This method has too many local variables. pylint: disable=R0914
3048 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3050 if not self.my_node_names:
3052 feedback_fn("* Empty node group, skipping verification")
3056 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3057 verbose = self.op.verbose
3058 self._feedback_fn = feedback_fn
3060 vg_name = self.cfg.GetVGName()
3061 drbd_helper = self.cfg.GetDRBDHelper()
3062 cluster = self.cfg.GetClusterInfo()
3063 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3064 hypervisors = cluster.enabled_hypervisors
3065 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3067 i_non_redundant = [] # Non redundant instances
3068 i_non_a_balanced = [] # Non auto-balanced instances
3069 i_offline = 0 # Count of offline instances
3070 n_offline = 0 # Count of offline nodes
3071 n_drained = 0 # Count of nodes being drained
3072 node_vol_should = {}
3074 # FIXME: verify OS list
3077 filemap = _ComputeAncillaryFiles(cluster, False)
3079 # do local checksums
3080 master_node = self.master_node = self.cfg.GetMasterNode()
3081 master_ip = self.cfg.GetMasterIP()
3083 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3086 if self.cfg.GetUseExternalMipScript():
3087 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3089 node_verify_param = {
3090 constants.NV_FILELIST:
3091 utils.UniqueSequence(filename
3092 for files in filemap
3093 for filename in files),
3094 constants.NV_NODELIST:
3095 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3096 self.all_node_info.values()),
3097 constants.NV_HYPERVISOR: hypervisors,
3098 constants.NV_HVPARAMS:
3099 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3100 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3101 for node in node_data_list
3102 if not node.offline],
3103 constants.NV_INSTANCELIST: hypervisors,
3104 constants.NV_VERSION: None,
3105 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3106 constants.NV_NODESETUP: None,
3107 constants.NV_TIME: None,
3108 constants.NV_MASTERIP: (master_node, master_ip),
3109 constants.NV_OSLIST: None,
3110 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3111 constants.NV_USERSCRIPTS: user_scripts,
3114 if vg_name is not None:
3115 node_verify_param[constants.NV_VGLIST] = None
3116 node_verify_param[constants.NV_LVLIST] = vg_name
3117 node_verify_param[constants.NV_PVLIST] = [vg_name]
3118 node_verify_param[constants.NV_DRBDLIST] = None
3121 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3124 # FIXME: this needs to be changed per node-group, not cluster-wide
3126 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3127 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3128 bridges.add(default_nicpp[constants.NIC_LINK])
3129 for instance in self.my_inst_info.values():
3130 for nic in instance.nics:
3131 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3132 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3133 bridges.add(full_nic[constants.NIC_LINK])
3136 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3138 # Build our expected cluster state
3139 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3141 vm_capable=node.vm_capable))
3142 for node in node_data_list)
3146 for node in self.all_node_info.values():
3147 path = _SupportsOob(self.cfg, node)
3148 if path and path not in oob_paths:
3149 oob_paths.append(path)
3152 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3154 for instance in self.my_inst_names:
3155 inst_config = self.my_inst_info[instance]
3156 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3159 for nname in inst_config.all_nodes:
3160 if nname not in node_image:
3161 gnode = self.NodeImage(name=nname)
3162 gnode.ghost = (nname not in self.all_node_info)
3163 node_image[nname] = gnode
3165 inst_config.MapLVsByNode(node_vol_should)
3167 pnode = inst_config.primary_node
3168 node_image[pnode].pinst.append(instance)
3170 for snode in inst_config.secondary_nodes:
3171 nimg = node_image[snode]
3172 nimg.sinst.append(instance)
3173 if pnode not in nimg.sbp:
3174 nimg.sbp[pnode] = []
3175 nimg.sbp[pnode].append(instance)
3177 # At this point, we have the in-memory data structures complete,
3178 # except for the runtime information, which we'll gather next
3180 # Due to the way our RPC system works, exact response times cannot be
3181 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3182 # time before and after executing the request, we can at least have a time
3184 nvinfo_starttime = time.time()
3185 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3187 self.cfg.GetClusterName())
3188 nvinfo_endtime = time.time()
3190 if self.extra_lv_nodes and vg_name is not None:
3192 self.rpc.call_node_verify(self.extra_lv_nodes,
3193 {constants.NV_LVLIST: vg_name},
3194 self.cfg.GetClusterName())
3196 extra_lv_nvinfo = {}
3198 all_drbd_map = self.cfg.ComputeDRBDMap()
3200 feedback_fn("* Gathering disk information (%s nodes)" %
3201 len(self.my_node_names))
3202 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3205 feedback_fn("* Verifying configuration file consistency")
3207 # If not all nodes are being checked, we need to make sure the master node
3208 # and a non-checked vm_capable node are in the list.
3209 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3211 vf_nvinfo = all_nvinfo.copy()
3212 vf_node_info = list(self.my_node_info.values())
3213 additional_nodes = []
3214 if master_node not in self.my_node_info:
3215 additional_nodes.append(master_node)
3216 vf_node_info.append(self.all_node_info[master_node])
3217 # Add the first vm_capable node we find which is not included
3218 for node in absent_nodes:
3219 nodeinfo = self.all_node_info[node]
3220 if nodeinfo.vm_capable and not nodeinfo.offline:
3221 additional_nodes.append(node)
3222 vf_node_info.append(self.all_node_info[node])
3224 key = constants.NV_FILELIST
3225 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3226 {key: node_verify_param[key]},
3227 self.cfg.GetClusterName()))
3229 vf_nvinfo = all_nvinfo
3230 vf_node_info = self.my_node_info.values()
3232 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3234 feedback_fn("* Verifying node status")
3238 for node_i in node_data_list:
3240 nimg = node_image[node]
3244 feedback_fn("* Skipping offline node %s" % (node,))
3248 if node == master_node:
3250 elif node_i.master_candidate:
3251 ntype = "master candidate"
3252 elif node_i.drained:
3258 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3260 msg = all_nvinfo[node].fail_msg
3261 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3264 nimg.rpc_fail = True
3267 nresult = all_nvinfo[node].payload
3269 nimg.call_ok = self._VerifyNode(node_i, nresult)
3270 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3271 self._VerifyNodeNetwork(node_i, nresult)
3272 self._VerifyNodeUserScripts(node_i, nresult)
3273 self._VerifyOob(node_i, nresult)
3276 self._VerifyNodeLVM(node_i, nresult, vg_name)
3277 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3280 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3281 self._UpdateNodeInstances(node_i, nresult, nimg)
3282 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3283 self._UpdateNodeOS(node_i, nresult, nimg)
3285 if not nimg.os_fail:
3286 if refos_img is None:
3288 self._VerifyNodeOS(node_i, nimg, refos_img)
3289 self._VerifyNodeBridges(node_i, nresult, bridges)
3291 # Check whether all running instancies are primary for the node. (This
3292 # can no longer be done from _VerifyInstance below, since some of the
3293 # wrong instances could be from other node groups.)
3294 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3296 for inst in non_primary_inst:
3297 test = inst in self.all_inst_info
3298 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3299 "instance should not run on node %s", node_i.name)
3300 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3301 "node is running unknown instance %s", inst)
3303 for node, result in extra_lv_nvinfo.items():
3304 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3305 node_image[node], vg_name)
3307 feedback_fn("* Verifying instance status")
3308 for instance in self.my_inst_names:
3310 feedback_fn("* Verifying instance %s" % instance)
3311 inst_config = self.my_inst_info[instance]
3312 self._VerifyInstance(instance, inst_config, node_image,
3314 inst_nodes_offline = []
3316 pnode = inst_config.primary_node
3317 pnode_img = node_image[pnode]
3318 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3319 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3320 " primary node failed", instance)
3322 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3324 constants.CV_EINSTANCEBADNODE, instance,
3325 "instance is marked as running and lives on offline node %s",
3326 inst_config.primary_node)
3328 # If the instance is non-redundant we cannot survive losing its primary
3329 # node, so we are not N+1 compliant. On the other hand we have no disk
3330 # templates with more than one secondary so that situation is not well
3332 # FIXME: does not support file-backed instances
3333 if not inst_config.secondary_nodes:
3334 i_non_redundant.append(instance)
3336 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3337 constants.CV_EINSTANCELAYOUT,
3338 instance, "instance has multiple secondary nodes: %s",
3339 utils.CommaJoin(inst_config.secondary_nodes),
3340 code=self.ETYPE_WARNING)
3342 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3343 pnode = inst_config.primary_node
3344 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3345 instance_groups = {}
3347 for node in instance_nodes:
3348 instance_groups.setdefault(self.all_node_info[node].group,
3352 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3353 # Sort so that we always list the primary node first.
3354 for group, nodes in sorted(instance_groups.items(),
3355 key=lambda (_, nodes): pnode in nodes,
3358 self._ErrorIf(len(instance_groups) > 1,
3359 constants.CV_EINSTANCESPLITGROUPS,
3360 instance, "instance has primary and secondary nodes in"
3361 " different groups: %s", utils.CommaJoin(pretty_list),
3362 code=self.ETYPE_WARNING)
3364 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3365 i_non_a_balanced.append(instance)
3367 for snode in inst_config.secondary_nodes:
3368 s_img = node_image[snode]
3369 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3370 snode, "instance %s, connection to secondary node failed",
3374 inst_nodes_offline.append(snode)
3376 # warn that the instance lives on offline nodes
3377 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3378 "instance has offline secondary node(s) %s",
3379 utils.CommaJoin(inst_nodes_offline))
3380 # ... or ghost/non-vm_capable nodes
3381 for node in inst_config.all_nodes:
3382 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3383 instance, "instance lives on ghost node %s", node)
3384 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3385 instance, "instance lives on non-vm_capable node %s", node)
3387 feedback_fn("* Verifying orphan volumes")
3388 reserved = utils.FieldSet(*cluster.reserved_lvs)
3390 # We will get spurious "unknown volume" warnings if any node of this group
3391 # is secondary for an instance whose primary is in another group. To avoid
3392 # them, we find these instances and add their volumes to node_vol_should.
3393 for inst in self.all_inst_info.values():
3394 for secondary in inst.secondary_nodes:
3395 if (secondary in self.my_node_info
3396 and inst.name not in self.my_inst_info):
3397 inst.MapLVsByNode(node_vol_should)
3400 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3402 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3403 feedback_fn("* Verifying N+1 Memory redundancy")
3404 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3406 feedback_fn("* Other Notes")
3408 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3409 % len(i_non_redundant))
3411 if i_non_a_balanced:
3412 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3413 % len(i_non_a_balanced))
3416 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3419 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3422 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3426 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3427 """Analyze the post-hooks' result
3429 This method analyses the hook result, handles it, and sends some
3430 nicely-formatted feedback back to the user.
3432 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3433 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3434 @param hooks_results: the results of the multi-node hooks rpc call
3435 @param feedback_fn: function used send feedback back to the caller
3436 @param lu_result: previous Exec result
3437 @return: the new Exec result, based on the previous result
3441 # We only really run POST phase hooks, only for non-empty groups,
3442 # and are only interested in their results
3443 if not self.my_node_names:
3446 elif phase == constants.HOOKS_PHASE_POST:
3447 # Used to change hooks' output to proper indentation
3448 feedback_fn("* Hooks Results")
3449 assert hooks_results, "invalid result from hooks"
3451 for node_name in hooks_results:
3452 res = hooks_results[node_name]
3454 test = msg and not res.offline
3455 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3456 "Communication failure in hooks execution: %s", msg)
3457 if res.offline or msg:
3458 # No need to investigate payload if node is offline or gave
3461 for script, hkr, output in res.payload:
3462 test = hkr == constants.HKR_FAIL
3463 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3464 "Script %s failed, output:", script)
3466 output = self._HOOKS_INDENT_RE.sub(" ", output)
3467 feedback_fn("%s" % output)
3473 class LUClusterVerifyDisks(NoHooksLU):
3474 """Verifies the cluster disks status.
3479 def ExpandNames(self):
3480 self.share_locks = _ShareAll()
3481 self.needed_locks = {
3482 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3485 def Exec(self, feedback_fn):
3486 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3488 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3489 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3490 for group in group_names])
3493 class LUGroupVerifyDisks(NoHooksLU):
3494 """Verifies the status of all disks in a node group.
3499 def ExpandNames(self):
3500 # Raises errors.OpPrereqError on its own if group can't be found
3501 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3503 self.share_locks = _ShareAll()
3504 self.needed_locks = {
3505 locking.LEVEL_INSTANCE: [],
3506 locking.LEVEL_NODEGROUP: [],
3507 locking.LEVEL_NODE: [],
3510 def DeclareLocks(self, level):
3511 if level == locking.LEVEL_INSTANCE:
3512 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3514 # Lock instances optimistically, needs verification once node and group
3515 # locks have been acquired
3516 self.needed_locks[locking.LEVEL_INSTANCE] = \
3517 self.cfg.GetNodeGroupInstances(self.group_uuid)
3519 elif level == locking.LEVEL_NODEGROUP:
3520 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3522 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3523 set([self.group_uuid] +
3524 # Lock all groups used by instances optimistically; this requires
3525 # going via the node before it's locked, requiring verification
3528 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3529 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3531 elif level == locking.LEVEL_NODE:
3532 # This will only lock the nodes in the group to be verified which contain
3534 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3535 self._LockInstancesNodes()
3537 # Lock all nodes in group to be verified
3538 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3539 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3540 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3542 def CheckPrereq(self):
3543 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3544 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3545 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3547 assert self.group_uuid in owned_groups
3549 # Check if locked instances are still correct
3550 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3552 # Get instance information
3553 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3555 # Check if node groups for locked instances are still correct
3556 _CheckInstancesNodeGroups(self.cfg, self.instances,
3557 owned_groups, owned_nodes, self.group_uuid)
3559 def Exec(self, feedback_fn):
3560 """Verify integrity of cluster disks.
3562 @rtype: tuple of three items
3563 @return: a tuple of (dict of node-to-node_error, list of instances
3564 which need activate-disks, dict of instance: (node, volume) for
3569 res_instances = set()
3572 nv_dict = _MapInstanceDisksToNodes([inst
3573 for inst in self.instances.values()
3574 if inst.admin_state == constants.ADMINST_UP])
3577 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3578 set(self.cfg.GetVmCapableNodeList()))
3580 node_lvs = self.rpc.call_lv_list(nodes, [])
3582 for (node, node_res) in node_lvs.items():
3583 if node_res.offline:
3586 msg = node_res.fail_msg
3588 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3589 res_nodes[node] = msg
3592 for lv_name, (_, _, lv_online) in node_res.payload.items():
3593 inst = nv_dict.pop((node, lv_name), None)
3594 if not (lv_online or inst is None):
3595 res_instances.add(inst)
3597 # any leftover items in nv_dict are missing LVs, let's arrange the data
3599 for key, inst in nv_dict.iteritems():
3600 res_missing.setdefault(inst, []).append(list(key))
3602 return (res_nodes, list(res_instances), res_missing)
3605 class LUClusterRepairDiskSizes(NoHooksLU):
3606 """Verifies the cluster disks sizes.
3611 def ExpandNames(self):
3612 if self.op.instances:
3613 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3614 self.needed_locks = {
3615 locking.LEVEL_NODE_RES: [],
3616 locking.LEVEL_INSTANCE: self.wanted_names,
3618 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3620 self.wanted_names = None
3621 self.needed_locks = {
3622 locking.LEVEL_NODE_RES: locking.ALL_SET,
3623 locking.LEVEL_INSTANCE: locking.ALL_SET,
3625 self.share_locks = {
3626 locking.LEVEL_NODE_RES: 1,
3627 locking.LEVEL_INSTANCE: 0,
3630 def DeclareLocks(self, level):
3631 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3632 self._LockInstancesNodes(primary_only=True, level=level)
3634 def CheckPrereq(self):
3635 """Check prerequisites.
3637 This only checks the optional instance list against the existing names.
3640 if self.wanted_names is None:
3641 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3643 self.wanted_instances = \
3644 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3646 def _EnsureChildSizes(self, disk):
3647 """Ensure children of the disk have the needed disk size.
3649 This is valid mainly for DRBD8 and fixes an issue where the
3650 children have smaller disk size.
3652 @param disk: an L{ganeti.objects.Disk} object
3655 if disk.dev_type == constants.LD_DRBD8:
3656 assert disk.children, "Empty children for DRBD8?"
3657 fchild = disk.children[0]
3658 mismatch = fchild.size < disk.size
3660 self.LogInfo("Child disk has size %d, parent %d, fixing",
3661 fchild.size, disk.size)
3662 fchild.size = disk.size
3664 # and we recurse on this child only, not on the metadev
3665 return self._EnsureChildSizes(fchild) or mismatch
3669 def Exec(self, feedback_fn):
3670 """Verify the size of cluster disks.
3673 # TODO: check child disks too
3674 # TODO: check differences in size between primary/secondary nodes
3676 for instance in self.wanted_instances:
3677 pnode = instance.primary_node
3678 if pnode not in per_node_disks:
3679 per_node_disks[pnode] = []
3680 for idx, disk in enumerate(instance.disks):
3681 per_node_disks[pnode].append((instance, idx, disk))
3683 assert not (frozenset(per_node_disks.keys()) -
3684 self.owned_locks(locking.LEVEL_NODE_RES)), \
3685 "Not owning correct locks"
3686 assert not self.owned_locks(locking.LEVEL_NODE)
3689 for node, dskl in per_node_disks.items():
3690 newl = [v[2].Copy() for v in dskl]
3692 self.cfg.SetDiskID(dsk, node)
3693 result = self.rpc.call_blockdev_getsize(node, newl)
3695 self.LogWarning("Failure in blockdev_getsize call to node"
3696 " %s, ignoring", node)
3698 if len(result.payload) != len(dskl):
3699 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3700 " result.payload=%s", node, len(dskl), result.payload)
3701 self.LogWarning("Invalid result from node %s, ignoring node results",
3704 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3706 self.LogWarning("Disk %d of instance %s did not return size"
3707 " information, ignoring", idx, instance.name)
3709 if not isinstance(size, (int, long)):
3710 self.LogWarning("Disk %d of instance %s did not return valid"
3711 " size information, ignoring", idx, instance.name)
3714 if size != disk.size:
3715 self.LogInfo("Disk %d of instance %s has mismatched size,"
3716 " correcting: recorded %d, actual %d", idx,
3717 instance.name, disk.size, size)
3719 self.cfg.Update(instance, feedback_fn)
3720 changed.append((instance.name, idx, size))
3721 if self._EnsureChildSizes(disk):
3722 self.cfg.Update(instance, feedback_fn)
3723 changed.append((instance.name, idx, disk.size))
3727 class LUClusterRename(LogicalUnit):
3728 """Rename the cluster.
3731 HPATH = "cluster-rename"
3732 HTYPE = constants.HTYPE_CLUSTER
3734 def BuildHooksEnv(self):
3739 "OP_TARGET": self.cfg.GetClusterName(),
3740 "NEW_NAME": self.op.name,
3743 def BuildHooksNodes(self):
3744 """Build hooks nodes.
3747 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3749 def CheckPrereq(self):
3750 """Verify that the passed name is a valid one.
3753 hostname = netutils.GetHostname(name=self.op.name,
3754 family=self.cfg.GetPrimaryIPFamily())
3756 new_name = hostname.name
3757 self.ip = new_ip = hostname.ip
3758 old_name = self.cfg.GetClusterName()
3759 old_ip = self.cfg.GetMasterIP()
3760 if new_name == old_name and new_ip == old_ip:
3761 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3762 " cluster has changed",
3764 if new_ip != old_ip:
3765 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3766 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3767 " reachable on the network" %
3768 new_ip, errors.ECODE_NOTUNIQUE)
3770 self.op.name = new_name
3772 def Exec(self, feedback_fn):
3773 """Rename the cluster.
3776 clustername = self.op.name
3779 # shutdown the master IP
3780 master_params = self.cfg.GetMasterNetworkParameters()
3781 ems = self.cfg.GetUseExternalMipScript()
3782 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3784 result.Raise("Could not disable the master role")
3787 cluster = self.cfg.GetClusterInfo()
3788 cluster.cluster_name = clustername
3789 cluster.master_ip = new_ip
3790 self.cfg.Update(cluster, feedback_fn)
3792 # update the known hosts file
3793 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3794 node_list = self.cfg.GetOnlineNodeList()
3796 node_list.remove(master_params.name)
3799 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3801 master_params.ip = new_ip
3802 result = self.rpc.call_node_activate_master_ip(master_params.name,
3804 msg = result.fail_msg
3806 self.LogWarning("Could not re-enable the master role on"
3807 " the master, please restart manually: %s", msg)
3812 def _ValidateNetmask(cfg, netmask):
3813 """Checks if a netmask is valid.
3815 @type cfg: L{config.ConfigWriter}
3816 @param cfg: The cluster configuration
3818 @param netmask: the netmask to be verified
3819 @raise errors.OpPrereqError: if the validation fails
3822 ip_family = cfg.GetPrimaryIPFamily()
3824 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3825 except errors.ProgrammerError:
3826 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3828 if not ipcls.ValidateNetmask(netmask):
3829 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3833 class LUClusterSetParams(LogicalUnit):
3834 """Change the parameters of the cluster.
3837 HPATH = "cluster-modify"
3838 HTYPE = constants.HTYPE_CLUSTER
3841 def CheckArguments(self):
3845 if self.op.uid_pool:
3846 uidpool.CheckUidPool(self.op.uid_pool)
3848 if self.op.add_uids:
3849 uidpool.CheckUidPool(self.op.add_uids)
3851 if self.op.remove_uids:
3852 uidpool.CheckUidPool(self.op.remove_uids)
3854 if self.op.master_netmask is not None:
3855 _ValidateNetmask(self.cfg, self.op.master_netmask)
3857 if self.op.diskparams:
3858 for dt_params in self.op.diskparams.values():
3859 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3861 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3862 except errors.OpPrereqError, err:
3863 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3866 def ExpandNames(self):
3867 # FIXME: in the future maybe other cluster params won't require checking on
3868 # all nodes to be modified.
3869 self.needed_locks = {
3870 locking.LEVEL_NODE: locking.ALL_SET,
3871 locking.LEVEL_INSTANCE: locking.ALL_SET,
3872 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3874 self.share_locks = {
3875 locking.LEVEL_NODE: 1,
3876 locking.LEVEL_INSTANCE: 1,
3877 locking.LEVEL_NODEGROUP: 1,
3880 def BuildHooksEnv(self):
3885 "OP_TARGET": self.cfg.GetClusterName(),
3886 "NEW_VG_NAME": self.op.vg_name,
3889 def BuildHooksNodes(self):
3890 """Build hooks nodes.
3893 mn = self.cfg.GetMasterNode()
3896 def CheckPrereq(self):
3897 """Check prerequisites.
3899 This checks whether the given params don't conflict and
3900 if the given volume group is valid.
3903 if self.op.vg_name is not None and not self.op.vg_name:
3904 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3905 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3906 " instances exist", errors.ECODE_INVAL)
3908 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3909 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3910 raise errors.OpPrereqError("Cannot disable drbd helper while"
3911 " drbd-based instances exist",
3914 node_list = self.owned_locks(locking.LEVEL_NODE)
3916 # if vg_name not None, checks given volume group on all nodes
3918 vglist = self.rpc.call_vg_list(node_list)
3919 for node in node_list:
3920 msg = vglist[node].fail_msg
3922 # ignoring down node
3923 self.LogWarning("Error while gathering data on node %s"
3924 " (ignoring node): %s", node, msg)
3926 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3928 constants.MIN_VG_SIZE)
3930 raise errors.OpPrereqError("Error on node '%s': %s" %
3931 (node, vgstatus), errors.ECODE_ENVIRON)
3933 if self.op.drbd_helper:
3934 # checks given drbd helper on all nodes
3935 helpers = self.rpc.call_drbd_helper(node_list)
3936 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3938 self.LogInfo("Not checking drbd helper on offline node %s", node)
3940 msg = helpers[node].fail_msg
3942 raise errors.OpPrereqError("Error checking drbd helper on node"
3943 " '%s': %s" % (node, msg),
3944 errors.ECODE_ENVIRON)
3945 node_helper = helpers[node].payload
3946 if node_helper != self.op.drbd_helper:
3947 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3948 (node, node_helper), errors.ECODE_ENVIRON)
3950 self.cluster = cluster = self.cfg.GetClusterInfo()
3951 # validate params changes
3952 if self.op.beparams:
3953 objects.UpgradeBeParams(self.op.beparams)
3954 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3955 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3957 if self.op.ndparams:
3958 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3959 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3961 # TODO: we need a more general way to handle resetting
3962 # cluster-level parameters to default values
3963 if self.new_ndparams["oob_program"] == "":
3964 self.new_ndparams["oob_program"] = \
3965 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3967 if self.op.hv_state:
3968 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3969 self.cluster.hv_state_static)
3970 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3971 for hv, values in new_hv_state.items())
3973 if self.op.disk_state:
3974 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3975 self.cluster.disk_state_static)
3976 self.new_disk_state = \
3977 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3978 for name, values in svalues.items()))
3979 for storage, svalues in new_disk_state.items())
3982 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3985 all_instances = self.cfg.GetAllInstancesInfo().values()
3987 for group in self.cfg.GetAllNodeGroupsInfo().values():
3988 instances = frozenset([inst for inst in all_instances
3989 if compat.any(node in group.members
3990 for node in inst.all_nodes)])
3991 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3992 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3994 new_ipolicy, instances)
3996 violations.update(new)
3999 self.LogWarning("After the ipolicy change the following instances"
4000 " violate them: %s",
4001 utils.CommaJoin(utils.NiceSort(violations)))
4003 if self.op.nicparams:
4004 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4005 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4006 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4009 # check all instances for consistency
4010 for instance in self.cfg.GetAllInstancesInfo().values():
4011 for nic_idx, nic in enumerate(instance.nics):
4012 params_copy = copy.deepcopy(nic.nicparams)
4013 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4015 # check parameter syntax
4017 objects.NIC.CheckParameterSyntax(params_filled)
4018 except errors.ConfigurationError, err:
4019 nic_errors.append("Instance %s, nic/%d: %s" %
4020 (instance.name, nic_idx, err))
4022 # if we're moving instances to routed, check that they have an ip
4023 target_mode = params_filled[constants.NIC_MODE]
4024 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4025 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4026 " address" % (instance.name, nic_idx))
4028 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4029 "\n".join(nic_errors))
4031 # hypervisor list/parameters
4032 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4033 if self.op.hvparams:
4034 for hv_name, hv_dict in self.op.hvparams.items():
4035 if hv_name not in self.new_hvparams:
4036 self.new_hvparams[hv_name] = hv_dict
4038 self.new_hvparams[hv_name].update(hv_dict)
4040 # disk template parameters
4041 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4042 if self.op.diskparams:
4043 for dt_name, dt_params in self.op.diskparams.items():
4044 if dt_name not in self.op.diskparams:
4045 self.new_diskparams[dt_name] = dt_params
4047 self.new_diskparams[dt_name].update(dt_params)
4049 # os hypervisor parameters
4050 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4052 for os_name, hvs in self.op.os_hvp.items():
4053 if os_name not in self.new_os_hvp:
4054 self.new_os_hvp[os_name] = hvs
4056 for hv_name, hv_dict in hvs.items():
4057 if hv_name not in self.new_os_hvp[os_name]:
4058 self.new_os_hvp[os_name][hv_name] = hv_dict
4060 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4063 self.new_osp = objects.FillDict(cluster.osparams, {})
4064 if self.op.osparams:
4065 for os_name, osp in self.op.osparams.items():
4066 if os_name not in self.new_osp:
4067 self.new_osp[os_name] = {}
4069 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4072 if not self.new_osp[os_name]:
4073 # we removed all parameters
4074 del self.new_osp[os_name]
4076 # check the parameter validity (remote check)
4077 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4078 os_name, self.new_osp[os_name])
4080 # changes to the hypervisor list
4081 if self.op.enabled_hypervisors is not None:
4082 self.hv_list = self.op.enabled_hypervisors
4083 for hv in self.hv_list:
4084 # if the hypervisor doesn't already exist in the cluster
4085 # hvparams, we initialize it to empty, and then (in both
4086 # cases) we make sure to fill the defaults, as we might not
4087 # have a complete defaults list if the hypervisor wasn't
4089 if hv not in new_hvp:
4091 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4092 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4094 self.hv_list = cluster.enabled_hypervisors
4096 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4097 # either the enabled list has changed, or the parameters have, validate
4098 for hv_name, hv_params in self.new_hvparams.items():
4099 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4100 (self.op.enabled_hypervisors and
4101 hv_name in self.op.enabled_hypervisors)):
4102 # either this is a new hypervisor, or its parameters have changed
4103 hv_class = hypervisor.GetHypervisor(hv_name)
4104 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4105 hv_class.CheckParameterSyntax(hv_params)
4106 _CheckHVParams(self, node_list, hv_name, hv_params)
4109 # no need to check any newly-enabled hypervisors, since the
4110 # defaults have already been checked in the above code-block
4111 for os_name, os_hvp in self.new_os_hvp.items():
4112 for hv_name, hv_params in os_hvp.items():
4113 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4114 # we need to fill in the new os_hvp on top of the actual hv_p
4115 cluster_defaults = self.new_hvparams.get(hv_name, {})
4116 new_osp = objects.FillDict(cluster_defaults, hv_params)
4117 hv_class = hypervisor.GetHypervisor(hv_name)
4118 hv_class.CheckParameterSyntax(new_osp)
4119 _CheckHVParams(self, node_list, hv_name, new_osp)
4121 if self.op.default_iallocator:
4122 alloc_script = utils.FindFile(self.op.default_iallocator,
4123 constants.IALLOCATOR_SEARCH_PATH,
4125 if alloc_script is None:
4126 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4127 " specified" % self.op.default_iallocator,
4130 def Exec(self, feedback_fn):
4131 """Change the parameters of the cluster.
4134 if self.op.vg_name is not None:
4135 new_volume = self.op.vg_name
4138 if new_volume != self.cfg.GetVGName():
4139 self.cfg.SetVGName(new_volume)
4141 feedback_fn("Cluster LVM configuration already in desired"
4142 " state, not changing")
4143 if self.op.drbd_helper is not None:
4144 new_helper = self.op.drbd_helper
4147 if new_helper != self.cfg.GetDRBDHelper():
4148 self.cfg.SetDRBDHelper(new_helper)
4150 feedback_fn("Cluster DRBD helper already in desired state,"
4152 if self.op.hvparams:
4153 self.cluster.hvparams = self.new_hvparams
4155 self.cluster.os_hvp = self.new_os_hvp
4156 if self.op.enabled_hypervisors is not None:
4157 self.cluster.hvparams = self.new_hvparams
4158 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4159 if self.op.beparams:
4160 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4161 if self.op.nicparams:
4162 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4164 self.cluster.ipolicy = self.new_ipolicy
4165 if self.op.osparams:
4166 self.cluster.osparams = self.new_osp
4167 if self.op.ndparams:
4168 self.cluster.ndparams = self.new_ndparams
4169 if self.op.diskparams:
4170 self.cluster.diskparams = self.new_diskparams
4171 if self.op.hv_state:
4172 self.cluster.hv_state_static = self.new_hv_state
4173 if self.op.disk_state:
4174 self.cluster.disk_state_static = self.new_disk_state
4176 if self.op.candidate_pool_size is not None:
4177 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4178 # we need to update the pool size here, otherwise the save will fail
4179 _AdjustCandidatePool(self, [])
4181 if self.op.maintain_node_health is not None:
4182 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4183 feedback_fn("Note: CONFD was disabled at build time, node health"
4184 " maintenance is not useful (still enabling it)")
4185 self.cluster.maintain_node_health = self.op.maintain_node_health
4187 if self.op.prealloc_wipe_disks is not None:
4188 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4190 if self.op.add_uids is not None:
4191 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4193 if self.op.remove_uids is not None:
4194 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4196 if self.op.uid_pool is not None:
4197 self.cluster.uid_pool = self.op.uid_pool
4199 if self.op.default_iallocator is not None:
4200 self.cluster.default_iallocator = self.op.default_iallocator
4202 if self.op.reserved_lvs is not None:
4203 self.cluster.reserved_lvs = self.op.reserved_lvs
4205 if self.op.use_external_mip_script is not None:
4206 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4208 def helper_os(aname, mods, desc):
4210 lst = getattr(self.cluster, aname)
4211 for key, val in mods:
4212 if key == constants.DDM_ADD:
4214 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4217 elif key == constants.DDM_REMOVE:
4221 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4223 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4225 if self.op.hidden_os:
4226 helper_os("hidden_os", self.op.hidden_os, "hidden")
4228 if self.op.blacklisted_os:
4229 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4231 if self.op.master_netdev:
4232 master_params = self.cfg.GetMasterNetworkParameters()
4233 ems = self.cfg.GetUseExternalMipScript()
4234 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4235 self.cluster.master_netdev)
4236 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4238 result.Raise("Could not disable the master ip")
4239 feedback_fn("Changing master_netdev from %s to %s" %
4240 (master_params.netdev, self.op.master_netdev))
4241 self.cluster.master_netdev = self.op.master_netdev
4243 if self.op.master_netmask:
4244 master_params = self.cfg.GetMasterNetworkParameters()
4245 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4246 result = self.rpc.call_node_change_master_netmask(master_params.name,
4247 master_params.netmask,
4248 self.op.master_netmask,
4250 master_params.netdev)
4252 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4255 self.cluster.master_netmask = self.op.master_netmask
4257 self.cfg.Update(self.cluster, feedback_fn)
4259 if self.op.master_netdev:
4260 master_params = self.cfg.GetMasterNetworkParameters()
4261 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4262 self.op.master_netdev)
4263 ems = self.cfg.GetUseExternalMipScript()
4264 result = self.rpc.call_node_activate_master_ip(master_params.name,
4267 self.LogWarning("Could not re-enable the master ip on"
4268 " the master, please restart manually: %s",
4272 def _UploadHelper(lu, nodes, fname):
4273 """Helper for uploading a file and showing warnings.
4276 if os.path.exists(fname):
4277 result = lu.rpc.call_upload_file(nodes, fname)
4278 for to_node, to_result in result.items():
4279 msg = to_result.fail_msg
4281 msg = ("Copy of file %s to node %s failed: %s" %
4282 (fname, to_node, msg))
4283 lu.proc.LogWarning(msg)
4286 def _ComputeAncillaryFiles(cluster, redist):
4287 """Compute files external to Ganeti which need to be consistent.
4289 @type redist: boolean
4290 @param redist: Whether to include files which need to be redistributed
4293 # Compute files for all nodes
4295 constants.SSH_KNOWN_HOSTS_FILE,
4296 constants.CONFD_HMAC_KEY,
4297 constants.CLUSTER_DOMAIN_SECRET_FILE,
4298 constants.SPICE_CERT_FILE,
4299 constants.SPICE_CACERT_FILE,
4300 constants.RAPI_USERS_FILE,
4304 files_all.update(constants.ALL_CERT_FILES)
4305 files_all.update(ssconf.SimpleStore().GetFileList())
4307 # we need to ship at least the RAPI certificate
4308 files_all.add(constants.RAPI_CERT_FILE)
4310 if cluster.modify_etc_hosts:
4311 files_all.add(constants.ETC_HOSTS)
4313 if cluster.use_external_mip_script:
4314 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4316 # Files which are optional, these must:
4317 # - be present in one other category as well
4318 # - either exist or not exist on all nodes of that category (mc, vm all)
4320 constants.RAPI_USERS_FILE,
4323 # Files which should only be on master candidates
4327 files_mc.add(constants.CLUSTER_CONF_FILE)
4329 # Files which should only be on VM-capable nodes
4330 files_vm = set(filename
4331 for hv_name in cluster.enabled_hypervisors
4332 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4334 files_opt |= set(filename
4335 for hv_name in cluster.enabled_hypervisors
4336 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4338 # Filenames in each category must be unique
4339 all_files_set = files_all | files_mc | files_vm
4340 assert (len(all_files_set) ==
4341 sum(map(len, [files_all, files_mc, files_vm]))), \
4342 "Found file listed in more than one file list"
4344 # Optional files must be present in one other category
4345 assert all_files_set.issuperset(files_opt), \
4346 "Optional file not in a different required list"
4348 return (files_all, files_opt, files_mc, files_vm)
4351 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4352 """Distribute additional files which are part of the cluster configuration.
4354 ConfigWriter takes care of distributing the config and ssconf files, but
4355 there are more files which should be distributed to all nodes. This function
4356 makes sure those are copied.
4358 @param lu: calling logical unit
4359 @param additional_nodes: list of nodes not in the config to distribute to
4360 @type additional_vm: boolean
4361 @param additional_vm: whether the additional nodes are vm-capable or not
4364 # Gather target nodes
4365 cluster = lu.cfg.GetClusterInfo()
4366 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4368 online_nodes = lu.cfg.GetOnlineNodeList()
4369 online_set = frozenset(online_nodes)
4370 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4372 if additional_nodes is not None:
4373 online_nodes.extend(additional_nodes)
4375 vm_nodes.extend(additional_nodes)
4377 # Never distribute to master node
4378 for nodelist in [online_nodes, vm_nodes]:
4379 if master_info.name in nodelist:
4380 nodelist.remove(master_info.name)
4383 (files_all, _, files_mc, files_vm) = \
4384 _ComputeAncillaryFiles(cluster, True)
4386 # Never re-distribute configuration file from here
4387 assert not (constants.CLUSTER_CONF_FILE in files_all or
4388 constants.CLUSTER_CONF_FILE in files_vm)
4389 assert not files_mc, "Master candidates not handled in this function"
4392 (online_nodes, files_all),
4393 (vm_nodes, files_vm),
4397 for (node_list, files) in filemap:
4399 _UploadHelper(lu, node_list, fname)
4402 class LUClusterRedistConf(NoHooksLU):
4403 """Force the redistribution of cluster configuration.
4405 This is a very simple LU.
4410 def ExpandNames(self):
4411 self.needed_locks = {
4412 locking.LEVEL_NODE: locking.ALL_SET,
4414 self.share_locks[locking.LEVEL_NODE] = 1
4416 def Exec(self, feedback_fn):
4417 """Redistribute the configuration.
4420 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4421 _RedistributeAncillaryFiles(self)
4424 class LUClusterActivateMasterIp(NoHooksLU):
4425 """Activate the master IP on the master node.
4428 def Exec(self, feedback_fn):
4429 """Activate the master IP.
4432 master_params = self.cfg.GetMasterNetworkParameters()
4433 ems = self.cfg.GetUseExternalMipScript()
4434 result = self.rpc.call_node_activate_master_ip(master_params.name,
4436 result.Raise("Could not activate the master IP")
4439 class LUClusterDeactivateMasterIp(NoHooksLU):
4440 """Deactivate the master IP on the master node.
4443 def Exec(self, feedback_fn):
4444 """Deactivate the master IP.
4447 master_params = self.cfg.GetMasterNetworkParameters()
4448 ems = self.cfg.GetUseExternalMipScript()
4449 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4451 result.Raise("Could not deactivate the master IP")
4454 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4455 """Sleep and poll for an instance's disk to sync.
4458 if not instance.disks or disks is not None and not disks:
4461 disks = _ExpandCheckDisks(instance, disks)
4464 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4466 node = instance.primary_node
4469 lu.cfg.SetDiskID(dev, node)
4471 # TODO: Convert to utils.Retry
4474 degr_retries = 10 # in seconds, as we sleep 1 second each time
4478 cumul_degraded = False
4479 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4480 msg = rstats.fail_msg
4482 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4485 raise errors.RemoteError("Can't contact node %s for mirror data,"
4486 " aborting." % node)
4489 rstats = rstats.payload
4491 for i, mstat in enumerate(rstats):
4493 lu.LogWarning("Can't compute data for node %s/%s",
4494 node, disks[i].iv_name)
4497 cumul_degraded = (cumul_degraded or
4498 (mstat.is_degraded and mstat.sync_percent is None))
4499 if mstat.sync_percent is not None:
4501 if mstat.estimated_time is not None:
4502 rem_time = ("%s remaining (estimated)" %
4503 utils.FormatSeconds(mstat.estimated_time))
4504 max_time = mstat.estimated_time
4506 rem_time = "no time estimate"
4507 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4508 (disks[i].iv_name, mstat.sync_percent, rem_time))
4510 # if we're done but degraded, let's do a few small retries, to
4511 # make sure we see a stable and not transient situation; therefore
4512 # we force restart of the loop
4513 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4514 logging.info("Degraded disks found, %d retries left", degr_retries)
4522 time.sleep(min(60, max_time))
4525 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4526 return not cumul_degraded
4529 def _BlockdevFind(lu, node, dev, instance):
4530 """Wrapper around call_blockdev_find to annotate diskparams.
4532 @param lu: A reference to the lu object
4533 @param node: The node to call out
4534 @param dev: The device to find
4535 @param instance: The instance object the device belongs to
4536 @returns The result of the rpc call
4539 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4540 return lu.rpc.call_blockdev_find(node, disk)
4543 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4544 """Wrapper around L{_CheckDiskConsistencyInner}.
4547 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4548 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4552 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4554 """Check that mirrors are not degraded.
4556 @attention: The device has to be annotated already.
4558 The ldisk parameter, if True, will change the test from the
4559 is_degraded attribute (which represents overall non-ok status for
4560 the device(s)) to the ldisk (representing the local storage status).
4563 lu.cfg.SetDiskID(dev, node)
4567 if on_primary or dev.AssembleOnSecondary():
4568 rstats = lu.rpc.call_blockdev_find(node, dev)
4569 msg = rstats.fail_msg
4571 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4573 elif not rstats.payload:
4574 lu.LogWarning("Can't find disk on node %s", node)
4578 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4580 result = result and not rstats.payload.is_degraded
4583 for child in dev.children:
4584 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4590 class LUOobCommand(NoHooksLU):
4591 """Logical unit for OOB handling.
4595 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4597 def ExpandNames(self):
4598 """Gather locks we need.
4601 if self.op.node_names:
4602 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4603 lock_names = self.op.node_names
4605 lock_names = locking.ALL_SET
4607 self.needed_locks = {
4608 locking.LEVEL_NODE: lock_names,
4611 def CheckPrereq(self):
4612 """Check prerequisites.
4615 - the node exists in the configuration
4618 Any errors are signaled by raising errors.OpPrereqError.
4622 self.master_node = self.cfg.GetMasterNode()
4624 assert self.op.power_delay >= 0.0
4626 if self.op.node_names:
4627 if (self.op.command in self._SKIP_MASTER and
4628 self.master_node in self.op.node_names):
4629 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4630 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4632 if master_oob_handler:
4633 additional_text = ("run '%s %s %s' if you want to operate on the"
4634 " master regardless") % (master_oob_handler,
4638 additional_text = "it does not support out-of-band operations"
4640 raise errors.OpPrereqError(("Operating on the master node %s is not"
4641 " allowed for %s; %s") %
4642 (self.master_node, self.op.command,
4643 additional_text), errors.ECODE_INVAL)
4645 self.op.node_names = self.cfg.GetNodeList()
4646 if self.op.command in self._SKIP_MASTER:
4647 self.op.node_names.remove(self.master_node)
4649 if self.op.command in self._SKIP_MASTER:
4650 assert self.master_node not in self.op.node_names
4652 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4654 raise errors.OpPrereqError("Node %s not found" % node_name,
4657 self.nodes.append(node)
4659 if (not self.op.ignore_status and
4660 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4661 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4662 " not marked offline") % node_name,
4665 def Exec(self, feedback_fn):
4666 """Execute OOB and return result if we expect any.
4669 master_node = self.master_node
4672 for idx, node in enumerate(utils.NiceSort(self.nodes,
4673 key=lambda node: node.name)):
4674 node_entry = [(constants.RS_NORMAL, node.name)]
4675 ret.append(node_entry)
4677 oob_program = _SupportsOob(self.cfg, node)
4680 node_entry.append((constants.RS_UNAVAIL, None))
4683 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4684 self.op.command, oob_program, node.name)
4685 result = self.rpc.call_run_oob(master_node, oob_program,
4686 self.op.command, node.name,
4690 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4691 node.name, result.fail_msg)
4692 node_entry.append((constants.RS_NODATA, None))
4695 self._CheckPayload(result)
4696 except errors.OpExecError, err:
4697 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4699 node_entry.append((constants.RS_NODATA, None))
4701 if self.op.command == constants.OOB_HEALTH:
4702 # For health we should log important events
4703 for item, status in result.payload:
4704 if status in [constants.OOB_STATUS_WARNING,
4705 constants.OOB_STATUS_CRITICAL]:
4706 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4707 item, node.name, status)
4709 if self.op.command == constants.OOB_POWER_ON:
4711 elif self.op.command == constants.OOB_POWER_OFF:
4712 node.powered = False
4713 elif self.op.command == constants.OOB_POWER_STATUS:
4714 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4715 if powered != node.powered:
4716 logging.warning(("Recorded power state (%s) of node '%s' does not"
4717 " match actual power state (%s)"), node.powered,
4720 # For configuration changing commands we should update the node
4721 if self.op.command in (constants.OOB_POWER_ON,
4722 constants.OOB_POWER_OFF):
4723 self.cfg.Update(node, feedback_fn)
4725 node_entry.append((constants.RS_NORMAL, result.payload))
4727 if (self.op.command == constants.OOB_POWER_ON and
4728 idx < len(self.nodes) - 1):
4729 time.sleep(self.op.power_delay)
4733 def _CheckPayload(self, result):
4734 """Checks if the payload is valid.
4736 @param result: RPC result
4737 @raises errors.OpExecError: If payload is not valid
4741 if self.op.command == constants.OOB_HEALTH:
4742 if not isinstance(result.payload, list):
4743 errs.append("command 'health' is expected to return a list but got %s" %
4744 type(result.payload))
4746 for item, status in result.payload:
4747 if status not in constants.OOB_STATUSES:
4748 errs.append("health item '%s' has invalid status '%s'" %
4751 if self.op.command == constants.OOB_POWER_STATUS:
4752 if not isinstance(result.payload, dict):
4753 errs.append("power-status is expected to return a dict but got %s" %
4754 type(result.payload))
4756 if self.op.command in [
4757 constants.OOB_POWER_ON,
4758 constants.OOB_POWER_OFF,
4759 constants.OOB_POWER_CYCLE,
4761 if result.payload is not None:
4762 errs.append("%s is expected to not return payload but got '%s'" %
4763 (self.op.command, result.payload))
4766 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4767 utils.CommaJoin(errs))
4770 class _OsQuery(_QueryBase):
4771 FIELDS = query.OS_FIELDS
4773 def ExpandNames(self, lu):
4774 # Lock all nodes in shared mode
4775 # Temporary removal of locks, should be reverted later
4776 # TODO: reintroduce locks when they are lighter-weight
4777 lu.needed_locks = {}
4778 #self.share_locks[locking.LEVEL_NODE] = 1
4779 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4781 # The following variables interact with _QueryBase._GetNames
4783 self.wanted = self.names
4785 self.wanted = locking.ALL_SET
4787 self.do_locking = self.use_locking
4789 def DeclareLocks(self, lu, level):
4793 def _DiagnoseByOS(rlist):
4794 """Remaps a per-node return list into an a per-os per-node dictionary
4796 @param rlist: a map with node names as keys and OS objects as values
4799 @return: a dictionary with osnames as keys and as value another
4800 map, with nodes as keys and tuples of (path, status, diagnose,
4801 variants, parameters, api_versions) as values, eg::
4803 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4804 (/srv/..., False, "invalid api")],
4805 "node2": [(/srv/..., True, "", [], [])]}
4810 # we build here the list of nodes that didn't fail the RPC (at RPC
4811 # level), so that nodes with a non-responding node daemon don't
4812 # make all OSes invalid
4813 good_nodes = [node_name for node_name in rlist
4814 if not rlist[node_name].fail_msg]
4815 for node_name, nr in rlist.items():
4816 if nr.fail_msg or not nr.payload:
4818 for (name, path, status, diagnose, variants,
4819 params, api_versions) in nr.payload:
4820 if name not in all_os:
4821 # build a list of nodes for this os containing empty lists
4822 # for each node in node_list
4824 for nname in good_nodes:
4825 all_os[name][nname] = []
4826 # convert params from [name, help] to (name, help)
4827 params = [tuple(v) for v in params]
4828 all_os[name][node_name].append((path, status, diagnose,
4829 variants, params, api_versions))
4832 def _GetQueryData(self, lu):
4833 """Computes the list of nodes and their attributes.
4836 # Locking is not used
4837 assert not (compat.any(lu.glm.is_owned(level)
4838 for level in locking.LEVELS
4839 if level != locking.LEVEL_CLUSTER) or
4840 self.do_locking or self.use_locking)
4842 valid_nodes = [node.name
4843 for node in lu.cfg.GetAllNodesInfo().values()
4844 if not node.offline and node.vm_capable]
4845 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4846 cluster = lu.cfg.GetClusterInfo()
4850 for (os_name, os_data) in pol.items():
4851 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4852 hidden=(os_name in cluster.hidden_os),
4853 blacklisted=(os_name in cluster.blacklisted_os))
4857 api_versions = set()
4859 for idx, osl in enumerate(os_data.values()):
4860 info.valid = bool(info.valid and osl and osl[0][1])
4864 (node_variants, node_params, node_api) = osl[0][3:6]
4867 variants.update(node_variants)
4868 parameters.update(node_params)
4869 api_versions.update(node_api)
4871 # Filter out inconsistent values
4872 variants.intersection_update(node_variants)
4873 parameters.intersection_update(node_params)
4874 api_versions.intersection_update(node_api)
4876 info.variants = list(variants)
4877 info.parameters = list(parameters)
4878 info.api_versions = list(api_versions)
4880 data[os_name] = info
4882 # Prepare data in requested order
4883 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4887 class LUOsDiagnose(NoHooksLU):
4888 """Logical unit for OS diagnose/query.
4894 def _BuildFilter(fields, names):
4895 """Builds a filter for querying OSes.
4898 name_filter = qlang.MakeSimpleFilter("name", names)
4900 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4901 # respective field is not requested
4902 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4903 for fname in ["hidden", "blacklisted"]
4904 if fname not in fields]
4905 if "valid" not in fields:
4906 status_filter.append([qlang.OP_TRUE, "valid"])
4909 status_filter.insert(0, qlang.OP_AND)
4911 status_filter = None
4913 if name_filter and status_filter:
4914 return [qlang.OP_AND, name_filter, status_filter]
4918 return status_filter
4920 def CheckArguments(self):
4921 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4922 self.op.output_fields, False)
4924 def ExpandNames(self):
4925 self.oq.ExpandNames(self)
4927 def Exec(self, feedback_fn):
4928 return self.oq.OldStyleQuery(self)
4931 class LUNodeRemove(LogicalUnit):
4932 """Logical unit for removing a node.
4935 HPATH = "node-remove"
4936 HTYPE = constants.HTYPE_NODE
4938 def BuildHooksEnv(self):
4943 "OP_TARGET": self.op.node_name,
4944 "NODE_NAME": self.op.node_name,
4947 def BuildHooksNodes(self):
4948 """Build hooks nodes.
4950 This doesn't run on the target node in the pre phase as a failed
4951 node would then be impossible to remove.
4954 all_nodes = self.cfg.GetNodeList()
4956 all_nodes.remove(self.op.node_name)
4959 return (all_nodes, all_nodes)
4961 def CheckPrereq(self):
4962 """Check prerequisites.
4965 - the node exists in the configuration
4966 - it does not have primary or secondary instances
4967 - it's not the master
4969 Any errors are signaled by raising errors.OpPrereqError.
4972 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4973 node = self.cfg.GetNodeInfo(self.op.node_name)
4974 assert node is not None
4976 masternode = self.cfg.GetMasterNode()
4977 if node.name == masternode:
4978 raise errors.OpPrereqError("Node is the master node, failover to another"
4979 " node is required", errors.ECODE_INVAL)
4981 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4982 if node.name in instance.all_nodes:
4983 raise errors.OpPrereqError("Instance %s is still running on the node,"
4984 " please remove first" % instance_name,
4986 self.op.node_name = node.name
4989 def Exec(self, feedback_fn):
4990 """Removes the node from the cluster.
4994 logging.info("Stopping the node daemon and removing configs from node %s",
4997 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4999 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5002 # Promote nodes to master candidate as needed
5003 _AdjustCandidatePool(self, exceptions=[node.name])
5004 self.context.RemoveNode(node.name)
5006 # Run post hooks on the node before it's removed
5007 _RunPostHook(self, node.name)
5009 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5010 msg = result.fail_msg
5012 self.LogWarning("Errors encountered on the remote node while leaving"
5013 " the cluster: %s", msg)
5015 # Remove node from our /etc/hosts
5016 if self.cfg.GetClusterInfo().modify_etc_hosts:
5017 master_node = self.cfg.GetMasterNode()
5018 result = self.rpc.call_etc_hosts_modify(master_node,
5019 constants.ETC_HOSTS_REMOVE,
5021 result.Raise("Can't update hosts file with new host data")
5022 _RedistributeAncillaryFiles(self)
5025 class _NodeQuery(_QueryBase):
5026 FIELDS = query.NODE_FIELDS
5028 def ExpandNames(self, lu):
5029 lu.needed_locks = {}
5030 lu.share_locks = _ShareAll()
5033 self.wanted = _GetWantedNodes(lu, self.names)
5035 self.wanted = locking.ALL_SET
5037 self.do_locking = (self.use_locking and
5038 query.NQ_LIVE in self.requested_data)
5041 # If any non-static field is requested we need to lock the nodes
5042 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5044 def DeclareLocks(self, lu, level):
5047 def _GetQueryData(self, lu):
5048 """Computes the list of nodes and their attributes.
5051 all_info = lu.cfg.GetAllNodesInfo()
5053 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5055 # Gather data as requested
5056 if query.NQ_LIVE in self.requested_data:
5057 # filter out non-vm_capable nodes
5058 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5060 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5061 [lu.cfg.GetHypervisorType()])
5062 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5063 for (name, nresult) in node_data.items()
5064 if not nresult.fail_msg and nresult.payload)
5068 if query.NQ_INST in self.requested_data:
5069 node_to_primary = dict([(name, set()) for name in nodenames])
5070 node_to_secondary = dict([(name, set()) for name in nodenames])
5072 inst_data = lu.cfg.GetAllInstancesInfo()
5074 for inst in inst_data.values():
5075 if inst.primary_node in node_to_primary:
5076 node_to_primary[inst.primary_node].add(inst.name)
5077 for secnode in inst.secondary_nodes:
5078 if secnode in node_to_secondary:
5079 node_to_secondary[secnode].add(inst.name)
5081 node_to_primary = None
5082 node_to_secondary = None
5084 if query.NQ_OOB in self.requested_data:
5085 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5086 for name, node in all_info.iteritems())
5090 if query.NQ_GROUP in self.requested_data:
5091 groups = lu.cfg.GetAllNodeGroupsInfo()
5095 return query.NodeQueryData([all_info[name] for name in nodenames],
5096 live_data, lu.cfg.GetMasterNode(),
5097 node_to_primary, node_to_secondary, groups,
5098 oob_support, lu.cfg.GetClusterInfo())
5101 class LUNodeQuery(NoHooksLU):
5102 """Logical unit for querying nodes.
5105 # pylint: disable=W0142
5108 def CheckArguments(self):
5109 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5110 self.op.output_fields, self.op.use_locking)
5112 def ExpandNames(self):
5113 self.nq.ExpandNames(self)
5115 def DeclareLocks(self, level):
5116 self.nq.DeclareLocks(self, level)
5118 def Exec(self, feedback_fn):
5119 return self.nq.OldStyleQuery(self)
5122 class LUNodeQueryvols(NoHooksLU):
5123 """Logical unit for getting volumes on node(s).
5127 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5128 _FIELDS_STATIC = utils.FieldSet("node")
5130 def CheckArguments(self):
5131 _CheckOutputFields(static=self._FIELDS_STATIC,
5132 dynamic=self._FIELDS_DYNAMIC,
5133 selected=self.op.output_fields)
5135 def ExpandNames(self):
5136 self.share_locks = _ShareAll()
5137 self.needed_locks = {}
5139 if not self.op.nodes:
5140 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5142 self.needed_locks[locking.LEVEL_NODE] = \
5143 _GetWantedNodes(self, self.op.nodes)
5145 def Exec(self, feedback_fn):
5146 """Computes the list of nodes and their attributes.
5149 nodenames = self.owned_locks(locking.LEVEL_NODE)
5150 volumes = self.rpc.call_node_volumes(nodenames)
5152 ilist = self.cfg.GetAllInstancesInfo()
5153 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5156 for node in nodenames:
5157 nresult = volumes[node]
5160 msg = nresult.fail_msg
5162 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5165 node_vols = sorted(nresult.payload,
5166 key=operator.itemgetter("dev"))
5168 for vol in node_vols:
5170 for field in self.op.output_fields:
5173 elif field == "phys":
5177 elif field == "name":
5179 elif field == "size":
5180 val = int(float(vol["size"]))
5181 elif field == "instance":
5182 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5184 raise errors.ParameterError(field)
5185 node_output.append(str(val))
5187 output.append(node_output)
5192 class LUNodeQueryStorage(NoHooksLU):
5193 """Logical unit for getting information on storage units on node(s).
5196 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5199 def CheckArguments(self):
5200 _CheckOutputFields(static=self._FIELDS_STATIC,
5201 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5202 selected=self.op.output_fields)
5204 def ExpandNames(self):
5205 self.share_locks = _ShareAll()
5206 self.needed_locks = {}
5209 self.needed_locks[locking.LEVEL_NODE] = \
5210 _GetWantedNodes(self, self.op.nodes)
5212 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5214 def Exec(self, feedback_fn):
5215 """Computes the list of nodes and their attributes.
5218 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5220 # Always get name to sort by
5221 if constants.SF_NAME in self.op.output_fields:
5222 fields = self.op.output_fields[:]
5224 fields = [constants.SF_NAME] + self.op.output_fields
5226 # Never ask for node or type as it's only known to the LU
5227 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5228 while extra in fields:
5229 fields.remove(extra)
5231 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5232 name_idx = field_idx[constants.SF_NAME]
5234 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5235 data = self.rpc.call_storage_list(self.nodes,
5236 self.op.storage_type, st_args,
5237 self.op.name, fields)
5241 for node in utils.NiceSort(self.nodes):
5242 nresult = data[node]
5246 msg = nresult.fail_msg
5248 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5251 rows = dict([(row[name_idx], row) for row in nresult.payload])
5253 for name in utils.NiceSort(rows.keys()):
5258 for field in self.op.output_fields:
5259 if field == constants.SF_NODE:
5261 elif field == constants.SF_TYPE:
5262 val = self.op.storage_type
5263 elif field in field_idx:
5264 val = row[field_idx[field]]
5266 raise errors.ParameterError(field)
5275 class _InstanceQuery(_QueryBase):
5276 FIELDS = query.INSTANCE_FIELDS
5278 def ExpandNames(self, lu):
5279 lu.needed_locks = {}
5280 lu.share_locks = _ShareAll()
5283 self.wanted = _GetWantedInstances(lu, self.names)
5285 self.wanted = locking.ALL_SET
5287 self.do_locking = (self.use_locking and
5288 query.IQ_LIVE in self.requested_data)
5290 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5291 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5292 lu.needed_locks[locking.LEVEL_NODE] = []
5293 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5295 self.do_grouplocks = (self.do_locking and
5296 query.IQ_NODES in self.requested_data)
5298 def DeclareLocks(self, lu, level):
5300 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5301 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5303 # Lock all groups used by instances optimistically; this requires going
5304 # via the node before it's locked, requiring verification later on
5305 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5307 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5308 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5309 elif level == locking.LEVEL_NODE:
5310 lu._LockInstancesNodes() # pylint: disable=W0212
5313 def _CheckGroupLocks(lu):
5314 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5315 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5317 # Check if node groups for locked instances are still correct
5318 for instance_name in owned_instances:
5319 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5321 def _GetQueryData(self, lu):
5322 """Computes the list of instances and their attributes.
5325 if self.do_grouplocks:
5326 self._CheckGroupLocks(lu)
5328 cluster = lu.cfg.GetClusterInfo()
5329 all_info = lu.cfg.GetAllInstancesInfo()
5331 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5333 instance_list = [all_info[name] for name in instance_names]
5334 nodes = frozenset(itertools.chain(*(inst.all_nodes
5335 for inst in instance_list)))
5336 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5339 wrongnode_inst = set()
5341 # Gather data as requested
5342 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5344 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5346 result = node_data[name]
5348 # offline nodes will be in both lists
5349 assert result.fail_msg
5350 offline_nodes.append(name)
5352 bad_nodes.append(name)
5353 elif result.payload:
5354 for inst in result.payload:
5355 if inst in all_info:
5356 if all_info[inst].primary_node == name:
5357 live_data.update(result.payload)
5359 wrongnode_inst.add(inst)
5361 # orphan instance; we don't list it here as we don't
5362 # handle this case yet in the output of instance listing
5363 logging.warning("Orphan instance '%s' found on node %s",
5365 # else no instance is alive
5369 if query.IQ_DISKUSAGE in self.requested_data:
5370 disk_usage = dict((inst.name,
5371 _ComputeDiskSize(inst.disk_template,
5372 [{constants.IDISK_SIZE: disk.size}
5373 for disk in inst.disks]))
5374 for inst in instance_list)
5378 if query.IQ_CONSOLE in self.requested_data:
5380 for inst in instance_list:
5381 if inst.name in live_data:
5382 # Instance is running
5383 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5385 consinfo[inst.name] = None
5386 assert set(consinfo.keys()) == set(instance_names)
5390 if query.IQ_NODES in self.requested_data:
5391 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5393 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5394 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5395 for uuid in set(map(operator.attrgetter("group"),
5401 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5402 disk_usage, offline_nodes, bad_nodes,
5403 live_data, wrongnode_inst, consinfo,
5407 class LUQuery(NoHooksLU):
5408 """Query for resources/items of a certain kind.
5411 # pylint: disable=W0142
5414 def CheckArguments(self):
5415 qcls = _GetQueryImplementation(self.op.what)
5417 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5419 def ExpandNames(self):
5420 self.impl.ExpandNames(self)
5422 def DeclareLocks(self, level):
5423 self.impl.DeclareLocks(self, level)
5425 def Exec(self, feedback_fn):
5426 return self.impl.NewStyleQuery(self)
5429 class LUQueryFields(NoHooksLU):
5430 """Query for resources/items of a certain kind.
5433 # pylint: disable=W0142
5436 def CheckArguments(self):
5437 self.qcls = _GetQueryImplementation(self.op.what)
5439 def ExpandNames(self):
5440 self.needed_locks = {}
5442 def Exec(self, feedback_fn):
5443 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5446 class LUNodeModifyStorage(NoHooksLU):
5447 """Logical unit for modifying a storage volume on a node.
5452 def CheckArguments(self):
5453 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5455 storage_type = self.op.storage_type
5458 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5460 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5461 " modified" % storage_type,
5464 diff = set(self.op.changes.keys()) - modifiable
5466 raise errors.OpPrereqError("The following fields can not be modified for"
5467 " storage units of type '%s': %r" %
5468 (storage_type, list(diff)),
5471 def ExpandNames(self):
5472 self.needed_locks = {
5473 locking.LEVEL_NODE: self.op.node_name,
5476 def Exec(self, feedback_fn):
5477 """Computes the list of nodes and their attributes.
5480 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5481 result = self.rpc.call_storage_modify(self.op.node_name,
5482 self.op.storage_type, st_args,
5483 self.op.name, self.op.changes)
5484 result.Raise("Failed to modify storage unit '%s' on %s" %
5485 (self.op.name, self.op.node_name))
5488 class LUNodeAdd(LogicalUnit):
5489 """Logical unit for adding node to the cluster.
5493 HTYPE = constants.HTYPE_NODE
5494 _NFLAGS = ["master_capable", "vm_capable"]
5496 def CheckArguments(self):
5497 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5498 # validate/normalize the node name
5499 self.hostname = netutils.GetHostname(name=self.op.node_name,
5500 family=self.primary_ip_family)
5501 self.op.node_name = self.hostname.name
5503 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5504 raise errors.OpPrereqError("Cannot readd the master node",
5507 if self.op.readd and self.op.group:
5508 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5509 " being readded", errors.ECODE_INVAL)
5511 def BuildHooksEnv(self):
5514 This will run on all nodes before, and on all nodes + the new node after.
5518 "OP_TARGET": self.op.node_name,
5519 "NODE_NAME": self.op.node_name,
5520 "NODE_PIP": self.op.primary_ip,
5521 "NODE_SIP": self.op.secondary_ip,
5522 "MASTER_CAPABLE": str(self.op.master_capable),
5523 "VM_CAPABLE": str(self.op.vm_capable),
5526 def BuildHooksNodes(self):
5527 """Build hooks nodes.
5530 # Exclude added node
5531 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5532 post_nodes = pre_nodes + [self.op.node_name, ]
5534 return (pre_nodes, post_nodes)
5536 def CheckPrereq(self):
5537 """Check prerequisites.
5540 - the new node is not already in the config
5542 - its parameters (single/dual homed) matches the cluster
5544 Any errors are signaled by raising errors.OpPrereqError.
5548 hostname = self.hostname
5549 node = hostname.name
5550 primary_ip = self.op.primary_ip = hostname.ip
5551 if self.op.secondary_ip is None:
5552 if self.primary_ip_family == netutils.IP6Address.family:
5553 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5554 " IPv4 address must be given as secondary",
5556 self.op.secondary_ip = primary_ip
5558 secondary_ip = self.op.secondary_ip
5559 if not netutils.IP4Address.IsValid(secondary_ip):
5560 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5561 " address" % secondary_ip, errors.ECODE_INVAL)
5563 node_list = cfg.GetNodeList()
5564 if not self.op.readd and node in node_list:
5565 raise errors.OpPrereqError("Node %s is already in the configuration" %
5566 node, errors.ECODE_EXISTS)
5567 elif self.op.readd and node not in node_list:
5568 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5571 self.changed_primary_ip = False
5573 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5574 if self.op.readd and node == existing_node_name:
5575 if existing_node.secondary_ip != secondary_ip:
5576 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5577 " address configuration as before",
5579 if existing_node.primary_ip != primary_ip:
5580 self.changed_primary_ip = True
5584 if (existing_node.primary_ip == primary_ip or
5585 existing_node.secondary_ip == primary_ip or
5586 existing_node.primary_ip == secondary_ip or
5587 existing_node.secondary_ip == secondary_ip):
5588 raise errors.OpPrereqError("New node ip address(es) conflict with"
5589 " existing node %s" % existing_node.name,
5590 errors.ECODE_NOTUNIQUE)
5592 # After this 'if' block, None is no longer a valid value for the
5593 # _capable op attributes
5595 old_node = self.cfg.GetNodeInfo(node)
5596 assert old_node is not None, "Can't retrieve locked node %s" % node
5597 for attr in self._NFLAGS:
5598 if getattr(self.op, attr) is None:
5599 setattr(self.op, attr, getattr(old_node, attr))
5601 for attr in self._NFLAGS:
5602 if getattr(self.op, attr) is None:
5603 setattr(self.op, attr, True)
5605 if self.op.readd and not self.op.vm_capable:
5606 pri, sec = cfg.GetNodeInstances(node)
5608 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5609 " flag set to false, but it already holds"
5610 " instances" % node,
5613 # check that the type of the node (single versus dual homed) is the
5614 # same as for the master
5615 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5616 master_singlehomed = myself.secondary_ip == myself.primary_ip
5617 newbie_singlehomed = secondary_ip == primary_ip
5618 if master_singlehomed != newbie_singlehomed:
5619 if master_singlehomed:
5620 raise errors.OpPrereqError("The master has no secondary ip but the"
5621 " new node has one",
5624 raise errors.OpPrereqError("The master has a secondary ip but the"
5625 " new node doesn't have one",
5628 # checks reachability
5629 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5630 raise errors.OpPrereqError("Node not reachable by ping",
5631 errors.ECODE_ENVIRON)
5633 if not newbie_singlehomed:
5634 # check reachability from my secondary ip to newbie's secondary ip
5635 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5636 source=myself.secondary_ip):
5637 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5638 " based ping to node daemon port",
5639 errors.ECODE_ENVIRON)
5646 if self.op.master_capable:
5647 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5649 self.master_candidate = False
5652 self.new_node = old_node
5654 node_group = cfg.LookupNodeGroup(self.op.group)
5655 self.new_node = objects.Node(name=node,
5656 primary_ip=primary_ip,
5657 secondary_ip=secondary_ip,
5658 master_candidate=self.master_candidate,
5659 offline=False, drained=False,
5662 if self.op.ndparams:
5663 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5665 if self.op.hv_state:
5666 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5668 if self.op.disk_state:
5669 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5671 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5672 # it a property on the base class.
5673 result = rpc.DnsOnlyRunner().call_version([node])[node]
5674 result.Raise("Can't get version information from node %s" % node)
5675 if constants.PROTOCOL_VERSION == result.payload:
5676 logging.info("Communication to node %s fine, sw version %s match",
5677 node, result.payload)
5679 raise errors.OpPrereqError("Version mismatch master version %s,"
5680 " node version %s" %
5681 (constants.PROTOCOL_VERSION, result.payload),
5682 errors.ECODE_ENVIRON)
5684 def Exec(self, feedback_fn):
5685 """Adds the new node to the cluster.
5688 new_node = self.new_node
5689 node = new_node.name
5691 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5694 # We adding a new node so we assume it's powered
5695 new_node.powered = True
5697 # for re-adds, reset the offline/drained/master-candidate flags;
5698 # we need to reset here, otherwise offline would prevent RPC calls
5699 # later in the procedure; this also means that if the re-add
5700 # fails, we are left with a non-offlined, broken node
5702 new_node.drained = new_node.offline = False # pylint: disable=W0201
5703 self.LogInfo("Readding a node, the offline/drained flags were reset")
5704 # if we demote the node, we do cleanup later in the procedure
5705 new_node.master_candidate = self.master_candidate
5706 if self.changed_primary_ip:
5707 new_node.primary_ip = self.op.primary_ip
5709 # copy the master/vm_capable flags
5710 for attr in self._NFLAGS:
5711 setattr(new_node, attr, getattr(self.op, attr))
5713 # notify the user about any possible mc promotion
5714 if new_node.master_candidate:
5715 self.LogInfo("Node will be a master candidate")
5717 if self.op.ndparams:
5718 new_node.ndparams = self.op.ndparams
5720 new_node.ndparams = {}
5722 if self.op.hv_state:
5723 new_node.hv_state_static = self.new_hv_state
5725 if self.op.disk_state:
5726 new_node.disk_state_static = self.new_disk_state
5728 # Add node to our /etc/hosts, and add key to known_hosts
5729 if self.cfg.GetClusterInfo().modify_etc_hosts:
5730 master_node = self.cfg.GetMasterNode()
5731 result = self.rpc.call_etc_hosts_modify(master_node,
5732 constants.ETC_HOSTS_ADD,
5735 result.Raise("Can't update hosts file with new host data")
5737 if new_node.secondary_ip != new_node.primary_ip:
5738 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5741 node_verify_list = [self.cfg.GetMasterNode()]
5742 node_verify_param = {
5743 constants.NV_NODELIST: ([node], {}),
5744 # TODO: do a node-net-test as well?
5747 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5748 self.cfg.GetClusterName())
5749 for verifier in node_verify_list:
5750 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5751 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5753 for failed in nl_payload:
5754 feedback_fn("ssh/hostname verification failed"
5755 " (checking from %s): %s" %
5756 (verifier, nl_payload[failed]))
5757 raise errors.OpExecError("ssh/hostname verification failed")
5760 _RedistributeAncillaryFiles(self)
5761 self.context.ReaddNode(new_node)
5762 # make sure we redistribute the config
5763 self.cfg.Update(new_node, feedback_fn)
5764 # and make sure the new node will not have old files around
5765 if not new_node.master_candidate:
5766 result = self.rpc.call_node_demote_from_mc(new_node.name)
5767 msg = result.fail_msg
5769 self.LogWarning("Node failed to demote itself from master"
5770 " candidate status: %s" % msg)
5772 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5773 additional_vm=self.op.vm_capable)
5774 self.context.AddNode(new_node, self.proc.GetECId())
5777 class LUNodeSetParams(LogicalUnit):
5778 """Modifies the parameters of a node.
5780 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5781 to the node role (as _ROLE_*)
5782 @cvar _R2F: a dictionary from node role to tuples of flags
5783 @cvar _FLAGS: a list of attribute names corresponding to the flags
5786 HPATH = "node-modify"
5787 HTYPE = constants.HTYPE_NODE
5789 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5791 (True, False, False): _ROLE_CANDIDATE,
5792 (False, True, False): _ROLE_DRAINED,
5793 (False, False, True): _ROLE_OFFLINE,
5794 (False, False, False): _ROLE_REGULAR,
5796 _R2F = dict((v, k) for k, v in _F2R.items())
5797 _FLAGS = ["master_candidate", "drained", "offline"]
5799 def CheckArguments(self):
5800 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5801 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5802 self.op.master_capable, self.op.vm_capable,
5803 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5805 if all_mods.count(None) == len(all_mods):
5806 raise errors.OpPrereqError("Please pass at least one modification",
5808 if all_mods.count(True) > 1:
5809 raise errors.OpPrereqError("Can't set the node into more than one"
5810 " state at the same time",
5813 # Boolean value that tells us whether we might be demoting from MC
5814 self.might_demote = (self.op.master_candidate == False or
5815 self.op.offline == True or
5816 self.op.drained == True or
5817 self.op.master_capable == False)
5819 if self.op.secondary_ip:
5820 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5821 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5822 " address" % self.op.secondary_ip,
5825 self.lock_all = self.op.auto_promote and self.might_demote
5826 self.lock_instances = self.op.secondary_ip is not None
5828 def _InstanceFilter(self, instance):
5829 """Filter for getting affected instances.
5832 return (instance.disk_template in constants.DTS_INT_MIRROR and
5833 self.op.node_name in instance.all_nodes)
5835 def ExpandNames(self):
5837 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5839 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5841 # Since modifying a node can have severe effects on currently running
5842 # operations the resource lock is at least acquired in shared mode
5843 self.needed_locks[locking.LEVEL_NODE_RES] = \
5844 self.needed_locks[locking.LEVEL_NODE]
5846 # Get node resource and instance locks in shared mode; they are not used
5847 # for anything but read-only access
5848 self.share_locks[locking.LEVEL_NODE_RES] = 1
5849 self.share_locks[locking.LEVEL_INSTANCE] = 1
5851 if self.lock_instances:
5852 self.needed_locks[locking.LEVEL_INSTANCE] = \
5853 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5855 def BuildHooksEnv(self):
5858 This runs on the master node.
5862 "OP_TARGET": self.op.node_name,
5863 "MASTER_CANDIDATE": str(self.op.master_candidate),
5864 "OFFLINE": str(self.op.offline),
5865 "DRAINED": str(self.op.drained),
5866 "MASTER_CAPABLE": str(self.op.master_capable),
5867 "VM_CAPABLE": str(self.op.vm_capable),
5870 def BuildHooksNodes(self):
5871 """Build hooks nodes.
5874 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5877 def CheckPrereq(self):
5878 """Check prerequisites.
5880 This only checks the instance list against the existing names.
5883 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5885 if self.lock_instances:
5886 affected_instances = \
5887 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5889 # Verify instance locks
5890 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5891 wanted_instances = frozenset(affected_instances.keys())
5892 if wanted_instances - owned_instances:
5893 raise errors.OpPrereqError("Instances affected by changing node %s's"
5894 " secondary IP address have changed since"
5895 " locks were acquired, wanted '%s', have"
5896 " '%s'; retry the operation" %
5898 utils.CommaJoin(wanted_instances),
5899 utils.CommaJoin(owned_instances)),
5902 affected_instances = None
5904 if (self.op.master_candidate is not None or
5905 self.op.drained is not None or
5906 self.op.offline is not None):
5907 # we can't change the master's node flags
5908 if self.op.node_name == self.cfg.GetMasterNode():
5909 raise errors.OpPrereqError("The master role can be changed"
5910 " only via master-failover",
5913 if self.op.master_candidate and not node.master_capable:
5914 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5915 " it a master candidate" % node.name,
5918 if self.op.vm_capable == False:
5919 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5921 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5922 " the vm_capable flag" % node.name,
5925 if node.master_candidate and self.might_demote and not self.lock_all:
5926 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5927 # check if after removing the current node, we're missing master
5929 (mc_remaining, mc_should, _) = \
5930 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5931 if mc_remaining < mc_should:
5932 raise errors.OpPrereqError("Not enough master candidates, please"
5933 " pass auto promote option to allow"
5934 " promotion (--auto-promote or RAPI"
5935 " auto_promote=True)", errors.ECODE_STATE)
5937 self.old_flags = old_flags = (node.master_candidate,
5938 node.drained, node.offline)
5939 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5940 self.old_role = old_role = self._F2R[old_flags]
5942 # Check for ineffective changes
5943 for attr in self._FLAGS:
5944 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5945 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5946 setattr(self.op, attr, None)
5948 # Past this point, any flag change to False means a transition
5949 # away from the respective state, as only real changes are kept
5951 # TODO: We might query the real power state if it supports OOB
5952 if _SupportsOob(self.cfg, node):
5953 if self.op.offline is False and not (node.powered or
5954 self.op.powered == True):
5955 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5956 " offline status can be reset") %
5958 elif self.op.powered is not None:
5959 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5960 " as it does not support out-of-band"
5961 " handling") % self.op.node_name)
5963 # If we're being deofflined/drained, we'll MC ourself if needed
5964 if (self.op.drained == False or self.op.offline == False or
5965 (self.op.master_capable and not node.master_capable)):
5966 if _DecideSelfPromotion(self):
5967 self.op.master_candidate = True
5968 self.LogInfo("Auto-promoting node to master candidate")
5970 # If we're no longer master capable, we'll demote ourselves from MC
5971 if self.op.master_capable == False and node.master_candidate:
5972 self.LogInfo("Demoting from master candidate")
5973 self.op.master_candidate = False
5976 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5977 if self.op.master_candidate:
5978 new_role = self._ROLE_CANDIDATE
5979 elif self.op.drained:
5980 new_role = self._ROLE_DRAINED
5981 elif self.op.offline:
5982 new_role = self._ROLE_OFFLINE
5983 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5984 # False is still in new flags, which means we're un-setting (the
5986 new_role = self._ROLE_REGULAR
5987 else: # no new flags, nothing, keep old role
5990 self.new_role = new_role
5992 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5993 # Trying to transition out of offline status
5994 result = self.rpc.call_version([node.name])[node.name]
5996 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5997 " to report its version: %s" %
5998 (node.name, result.fail_msg),
6001 self.LogWarning("Transitioning node from offline to online state"
6002 " without using re-add. Please make sure the node"
6005 if self.op.secondary_ip:
6006 # Ok even without locking, because this can't be changed by any LU
6007 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6008 master_singlehomed = master.secondary_ip == master.primary_ip
6009 if master_singlehomed and self.op.secondary_ip:
6010 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6011 " homed cluster", errors.ECODE_INVAL)
6013 assert not (frozenset(affected_instances) -
6014 self.owned_locks(locking.LEVEL_INSTANCE))
6017 if affected_instances:
6018 raise errors.OpPrereqError("Cannot change secondary IP address:"
6019 " offline node has instances (%s)"
6020 " configured to use it" %
6021 utils.CommaJoin(affected_instances.keys()))
6023 # On online nodes, check that no instances are running, and that
6024 # the node has the new ip and we can reach it.
6025 for instance in affected_instances.values():
6026 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6027 msg="cannot change secondary ip")
6029 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6030 if master.name != node.name:
6031 # check reachability from master secondary ip to new secondary ip
6032 if not netutils.TcpPing(self.op.secondary_ip,
6033 constants.DEFAULT_NODED_PORT,
6034 source=master.secondary_ip):
6035 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6036 " based ping to node daemon port",
6037 errors.ECODE_ENVIRON)
6039 if self.op.ndparams:
6040 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6041 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6042 self.new_ndparams = new_ndparams
6044 if self.op.hv_state:
6045 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6046 self.node.hv_state_static)
6048 if self.op.disk_state:
6049 self.new_disk_state = \
6050 _MergeAndVerifyDiskState(self.op.disk_state,
6051 self.node.disk_state_static)
6053 def Exec(self, feedback_fn):
6058 old_role = self.old_role
6059 new_role = self.new_role
6063 if self.op.ndparams:
6064 node.ndparams = self.new_ndparams
6066 if self.op.powered is not None:
6067 node.powered = self.op.powered
6069 if self.op.hv_state:
6070 node.hv_state_static = self.new_hv_state
6072 if self.op.disk_state:
6073 node.disk_state_static = self.new_disk_state
6075 for attr in ["master_capable", "vm_capable"]:
6076 val = getattr(self.op, attr)
6078 setattr(node, attr, val)
6079 result.append((attr, str(val)))
6081 if new_role != old_role:
6082 # Tell the node to demote itself, if no longer MC and not offline
6083 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6084 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6086 self.LogWarning("Node failed to demote itself: %s", msg)
6088 new_flags = self._R2F[new_role]
6089 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6091 result.append((desc, str(nf)))
6092 (node.master_candidate, node.drained, node.offline) = new_flags
6094 # we locked all nodes, we adjust the CP before updating this node
6096 _AdjustCandidatePool(self, [node.name])
6098 if self.op.secondary_ip:
6099 node.secondary_ip = self.op.secondary_ip
6100 result.append(("secondary_ip", self.op.secondary_ip))
6102 # this will trigger configuration file update, if needed
6103 self.cfg.Update(node, feedback_fn)
6105 # this will trigger job queue propagation or cleanup if the mc
6107 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6108 self.context.ReaddNode(node)
6113 class LUNodePowercycle(NoHooksLU):
6114 """Powercycles a node.
6119 def CheckArguments(self):
6120 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6121 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6122 raise errors.OpPrereqError("The node is the master and the force"
6123 " parameter was not set",
6126 def ExpandNames(self):
6127 """Locking for PowercycleNode.
6129 This is a last-resort option and shouldn't block on other
6130 jobs. Therefore, we grab no locks.
6133 self.needed_locks = {}
6135 def Exec(self, feedback_fn):
6139 result = self.rpc.call_node_powercycle(self.op.node_name,
6140 self.cfg.GetHypervisorType())
6141 result.Raise("Failed to schedule the reboot")
6142 return result.payload
6145 class LUClusterQuery(NoHooksLU):
6146 """Query cluster configuration.
6151 def ExpandNames(self):
6152 self.needed_locks = {}
6154 def Exec(self, feedback_fn):
6155 """Return cluster config.
6158 cluster = self.cfg.GetClusterInfo()
6161 # Filter just for enabled hypervisors
6162 for os_name, hv_dict in cluster.os_hvp.items():
6163 os_hvp[os_name] = {}
6164 for hv_name, hv_params in hv_dict.items():
6165 if hv_name in cluster.enabled_hypervisors:
6166 os_hvp[os_name][hv_name] = hv_params
6168 # Convert ip_family to ip_version
6169 primary_ip_version = constants.IP4_VERSION
6170 if cluster.primary_ip_family == netutils.IP6Address.family:
6171 primary_ip_version = constants.IP6_VERSION
6174 "software_version": constants.RELEASE_VERSION,
6175 "protocol_version": constants.PROTOCOL_VERSION,
6176 "config_version": constants.CONFIG_VERSION,
6177 "os_api_version": max(constants.OS_API_VERSIONS),
6178 "export_version": constants.EXPORT_VERSION,
6179 "architecture": runtime.GetArchInfo(),
6180 "name": cluster.cluster_name,
6181 "master": cluster.master_node,
6182 "default_hypervisor": cluster.primary_hypervisor,
6183 "enabled_hypervisors": cluster.enabled_hypervisors,
6184 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6185 for hypervisor_name in cluster.enabled_hypervisors]),
6187 "beparams": cluster.beparams,
6188 "osparams": cluster.osparams,
6189 "ipolicy": cluster.ipolicy,
6190 "nicparams": cluster.nicparams,
6191 "ndparams": cluster.ndparams,
6192 "diskparams": cluster.diskparams,
6193 "candidate_pool_size": cluster.candidate_pool_size,
6194 "master_netdev": cluster.master_netdev,
6195 "master_netmask": cluster.master_netmask,
6196 "use_external_mip_script": cluster.use_external_mip_script,
6197 "volume_group_name": cluster.volume_group_name,
6198 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6199 "file_storage_dir": cluster.file_storage_dir,
6200 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6201 "maintain_node_health": cluster.maintain_node_health,
6202 "ctime": cluster.ctime,
6203 "mtime": cluster.mtime,
6204 "uuid": cluster.uuid,
6205 "tags": list(cluster.GetTags()),
6206 "uid_pool": cluster.uid_pool,
6207 "default_iallocator": cluster.default_iallocator,
6208 "reserved_lvs": cluster.reserved_lvs,
6209 "primary_ip_version": primary_ip_version,
6210 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6211 "hidden_os": cluster.hidden_os,
6212 "blacklisted_os": cluster.blacklisted_os,
6218 class LUClusterConfigQuery(NoHooksLU):
6219 """Return configuration values.
6224 def CheckArguments(self):
6225 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6227 def ExpandNames(self):
6228 self.cq.ExpandNames(self)
6230 def DeclareLocks(self, level):
6231 self.cq.DeclareLocks(self, level)
6233 def Exec(self, feedback_fn):
6234 result = self.cq.OldStyleQuery(self)
6236 assert len(result) == 1
6241 class _ClusterQuery(_QueryBase):
6242 FIELDS = query.CLUSTER_FIELDS
6244 #: Do not sort (there is only one item)
6247 def ExpandNames(self, lu):
6248 lu.needed_locks = {}
6250 # The following variables interact with _QueryBase._GetNames
6251 self.wanted = locking.ALL_SET
6252 self.do_locking = self.use_locking
6255 raise errors.OpPrereqError("Can not use locking for cluster queries",
6258 def DeclareLocks(self, lu, level):
6261 def _GetQueryData(self, lu):
6262 """Computes the list of nodes and their attributes.
6265 # Locking is not used
6266 assert not (compat.any(lu.glm.is_owned(level)
6267 for level in locking.LEVELS
6268 if level != locking.LEVEL_CLUSTER) or
6269 self.do_locking or self.use_locking)
6271 if query.CQ_CONFIG in self.requested_data:
6272 cluster = lu.cfg.GetClusterInfo()
6274 cluster = NotImplemented
6276 if query.CQ_QUEUE_DRAINED in self.requested_data:
6277 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6279 drain_flag = NotImplemented
6281 if query.CQ_WATCHER_PAUSE in self.requested_data:
6282 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6284 watcher_pause = NotImplemented
6286 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6289 class LUInstanceActivateDisks(NoHooksLU):
6290 """Bring up an instance's disks.
6295 def ExpandNames(self):
6296 self._ExpandAndLockInstance()
6297 self.needed_locks[locking.LEVEL_NODE] = []
6298 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6300 def DeclareLocks(self, level):
6301 if level == locking.LEVEL_NODE:
6302 self._LockInstancesNodes()
6304 def CheckPrereq(self):
6305 """Check prerequisites.
6307 This checks that the instance is in the cluster.
6310 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6311 assert self.instance is not None, \
6312 "Cannot retrieve locked instance %s" % self.op.instance_name
6313 _CheckNodeOnline(self, self.instance.primary_node)
6315 def Exec(self, feedback_fn):
6316 """Activate the disks.
6319 disks_ok, disks_info = \
6320 _AssembleInstanceDisks(self, self.instance,
6321 ignore_size=self.op.ignore_size)
6323 raise errors.OpExecError("Cannot activate block devices")
6328 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6330 """Prepare the block devices for an instance.
6332 This sets up the block devices on all nodes.
6334 @type lu: L{LogicalUnit}
6335 @param lu: the logical unit on whose behalf we execute
6336 @type instance: L{objects.Instance}
6337 @param instance: the instance for whose disks we assemble
6338 @type disks: list of L{objects.Disk} or None
6339 @param disks: which disks to assemble (or all, if None)
6340 @type ignore_secondaries: boolean
6341 @param ignore_secondaries: if true, errors on secondary nodes
6342 won't result in an error return from the function
6343 @type ignore_size: boolean
6344 @param ignore_size: if true, the current known size of the disk
6345 will not be used during the disk activation, useful for cases
6346 when the size is wrong
6347 @return: False if the operation failed, otherwise a list of
6348 (host, instance_visible_name, node_visible_name)
6349 with the mapping from node devices to instance devices
6354 iname = instance.name
6355 disks = _ExpandCheckDisks(instance, disks)
6357 # With the two passes mechanism we try to reduce the window of
6358 # opportunity for the race condition of switching DRBD to primary
6359 # before handshaking occured, but we do not eliminate it
6361 # The proper fix would be to wait (with some limits) until the
6362 # connection has been made and drbd transitions from WFConnection
6363 # into any other network-connected state (Connected, SyncTarget,
6366 # 1st pass, assemble on all nodes in secondary mode
6367 for idx, inst_disk in enumerate(disks):
6368 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6370 node_disk = node_disk.Copy()
6371 node_disk.UnsetSize()
6372 lu.cfg.SetDiskID(node_disk, node)
6373 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6375 msg = result.fail_msg
6377 is_offline_secondary = (node in instance.secondary_nodes and
6379 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6380 " (is_primary=False, pass=1): %s",
6381 inst_disk.iv_name, node, msg)
6382 if not (ignore_secondaries or is_offline_secondary):
6385 # FIXME: race condition on drbd migration to primary
6387 # 2nd pass, do only the primary node
6388 for idx, inst_disk in enumerate(disks):
6391 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6392 if node != instance.primary_node:
6395 node_disk = node_disk.Copy()
6396 node_disk.UnsetSize()
6397 lu.cfg.SetDiskID(node_disk, node)
6398 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6400 msg = result.fail_msg
6402 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6403 " (is_primary=True, pass=2): %s",
6404 inst_disk.iv_name, node, msg)
6407 dev_path = result.payload
6409 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6411 # leave the disks configured for the primary node
6412 # this is a workaround that would be fixed better by
6413 # improving the logical/physical id handling
6415 lu.cfg.SetDiskID(disk, instance.primary_node)
6417 return disks_ok, device_info
6420 def _StartInstanceDisks(lu, instance, force):
6421 """Start the disks of an instance.
6424 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6425 ignore_secondaries=force)
6427 _ShutdownInstanceDisks(lu, instance)
6428 if force is not None and not force:
6429 lu.proc.LogWarning("", hint="If the message above refers to a"
6431 " you can retry the operation using '--force'.")
6432 raise errors.OpExecError("Disk consistency error")
6435 class LUInstanceDeactivateDisks(NoHooksLU):
6436 """Shutdown an instance's disks.
6441 def ExpandNames(self):
6442 self._ExpandAndLockInstance()
6443 self.needed_locks[locking.LEVEL_NODE] = []
6444 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6446 def DeclareLocks(self, level):
6447 if level == locking.LEVEL_NODE:
6448 self._LockInstancesNodes()
6450 def CheckPrereq(self):
6451 """Check prerequisites.
6453 This checks that the instance is in the cluster.
6456 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6457 assert self.instance is not None, \
6458 "Cannot retrieve locked instance %s" % self.op.instance_name
6460 def Exec(self, feedback_fn):
6461 """Deactivate the disks
6464 instance = self.instance
6466 _ShutdownInstanceDisks(self, instance)
6468 _SafeShutdownInstanceDisks(self, instance)
6471 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6472 """Shutdown block devices of an instance.
6474 This function checks if an instance is running, before calling
6475 _ShutdownInstanceDisks.
6478 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6479 _ShutdownInstanceDisks(lu, instance, disks=disks)
6482 def _ExpandCheckDisks(instance, disks):
6483 """Return the instance disks selected by the disks list
6485 @type disks: list of L{objects.Disk} or None
6486 @param disks: selected disks
6487 @rtype: list of L{objects.Disk}
6488 @return: selected instance disks to act on
6492 return instance.disks
6494 if not set(disks).issubset(instance.disks):
6495 raise errors.ProgrammerError("Can only act on disks belonging to the"
6500 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6501 """Shutdown block devices of an instance.
6503 This does the shutdown on all nodes of the instance.
6505 If the ignore_primary is false, errors on the primary node are
6510 disks = _ExpandCheckDisks(instance, disks)
6513 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6514 lu.cfg.SetDiskID(top_disk, node)
6515 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6516 msg = result.fail_msg
6518 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6519 disk.iv_name, node, msg)
6520 if ((node == instance.primary_node and not ignore_primary) or
6521 (node != instance.primary_node and not result.offline)):
6526 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6527 """Checks if a node has enough free memory.
6529 This function check if a given node has the needed amount of free
6530 memory. In case the node has less memory or we cannot get the
6531 information from the node, this function raise an OpPrereqError
6534 @type lu: C{LogicalUnit}
6535 @param lu: a logical unit from which we get configuration data
6537 @param node: the node to check
6538 @type reason: C{str}
6539 @param reason: string to use in the error message
6540 @type requested: C{int}
6541 @param requested: the amount of memory in MiB to check for
6542 @type hypervisor_name: C{str}
6543 @param hypervisor_name: the hypervisor to ask for memory stats
6545 @return: node current free memory
6546 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6547 we cannot check the node
6550 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6551 nodeinfo[node].Raise("Can't get data from node %s" % node,
6552 prereq=True, ecode=errors.ECODE_ENVIRON)
6553 (_, _, (hv_info, )) = nodeinfo[node].payload
6555 free_mem = hv_info.get("memory_free", None)
6556 if not isinstance(free_mem, int):
6557 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6558 " was '%s'" % (node, free_mem),
6559 errors.ECODE_ENVIRON)
6560 if requested > free_mem:
6561 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6562 " needed %s MiB, available %s MiB" %
6563 (node, reason, requested, free_mem),
6568 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6569 """Checks if nodes have enough free disk space in the all VGs.
6571 This function check if all given nodes have the needed amount of
6572 free disk. In case any node has less disk or we cannot get the
6573 information from the node, this function raise an OpPrereqError
6576 @type lu: C{LogicalUnit}
6577 @param lu: a logical unit from which we get configuration data
6578 @type nodenames: C{list}
6579 @param nodenames: the list of node names to check
6580 @type req_sizes: C{dict}
6581 @param req_sizes: the hash of vg and corresponding amount of disk in
6583 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6584 or we cannot check the node
6587 for vg, req_size in req_sizes.items():
6588 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6591 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6592 """Checks if nodes have enough free disk space in the specified VG.
6594 This function check if all given nodes have the needed amount of
6595 free disk. In case any node has less disk or we cannot get the
6596 information from the node, this function raise an OpPrereqError
6599 @type lu: C{LogicalUnit}
6600 @param lu: a logical unit from which we get configuration data
6601 @type nodenames: C{list}
6602 @param nodenames: the list of node names to check
6604 @param vg: the volume group to check
6605 @type requested: C{int}
6606 @param requested: the amount of disk in MiB to check for
6607 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6608 or we cannot check the node
6611 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6612 for node in nodenames:
6613 info = nodeinfo[node]
6614 info.Raise("Cannot get current information from node %s" % node,
6615 prereq=True, ecode=errors.ECODE_ENVIRON)
6616 (_, (vg_info, ), _) = info.payload
6617 vg_free = vg_info.get("vg_free", None)
6618 if not isinstance(vg_free, int):
6619 raise errors.OpPrereqError("Can't compute free disk space on node"
6620 " %s for vg %s, result was '%s'" %
6621 (node, vg, vg_free), errors.ECODE_ENVIRON)
6622 if requested > vg_free:
6623 raise errors.OpPrereqError("Not enough disk space on target node %s"
6624 " vg %s: required %d MiB, available %d MiB" %
6625 (node, vg, requested, vg_free),
6629 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6630 """Checks if nodes have enough physical CPUs
6632 This function checks if all given nodes have the needed number of
6633 physical CPUs. In case any node has less CPUs or we cannot get the
6634 information from the node, this function raises an OpPrereqError
6637 @type lu: C{LogicalUnit}
6638 @param lu: a logical unit from which we get configuration data
6639 @type nodenames: C{list}
6640 @param nodenames: the list of node names to check
6641 @type requested: C{int}
6642 @param requested: the minimum acceptable number of physical CPUs
6643 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6644 or we cannot check the node
6647 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6648 for node in nodenames:
6649 info = nodeinfo[node]
6650 info.Raise("Cannot get current information from node %s" % node,
6651 prereq=True, ecode=errors.ECODE_ENVIRON)
6652 (_, _, (hv_info, )) = info.payload
6653 num_cpus = hv_info.get("cpu_total", None)
6654 if not isinstance(num_cpus, int):
6655 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6656 " on node %s, result was '%s'" %
6657 (node, num_cpus), errors.ECODE_ENVIRON)
6658 if requested > num_cpus:
6659 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6660 "required" % (node, num_cpus, requested),
6664 class LUInstanceStartup(LogicalUnit):
6665 """Starts an instance.
6668 HPATH = "instance-start"
6669 HTYPE = constants.HTYPE_INSTANCE
6672 def CheckArguments(self):
6674 if self.op.beparams:
6675 # fill the beparams dict
6676 objects.UpgradeBeParams(self.op.beparams)
6677 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6679 def ExpandNames(self):
6680 self._ExpandAndLockInstance()
6681 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6683 def DeclareLocks(self, level):
6684 if level == locking.LEVEL_NODE_RES:
6685 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6687 def BuildHooksEnv(self):
6690 This runs on master, primary and secondary nodes of the instance.
6694 "FORCE": self.op.force,
6697 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6701 def BuildHooksNodes(self):
6702 """Build hooks nodes.
6705 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6708 def CheckPrereq(self):
6709 """Check prerequisites.
6711 This checks that the instance is in the cluster.
6714 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6715 assert self.instance is not None, \
6716 "Cannot retrieve locked instance %s" % self.op.instance_name
6719 if self.op.hvparams:
6720 # check hypervisor parameter syntax (locally)
6721 cluster = self.cfg.GetClusterInfo()
6722 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6723 filled_hvp = cluster.FillHV(instance)
6724 filled_hvp.update(self.op.hvparams)
6725 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6726 hv_type.CheckParameterSyntax(filled_hvp)
6727 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6729 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6731 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6733 if self.primary_offline and self.op.ignore_offline_nodes:
6734 self.proc.LogWarning("Ignoring offline primary node")
6736 if self.op.hvparams or self.op.beparams:
6737 self.proc.LogWarning("Overridden parameters are ignored")
6739 _CheckNodeOnline(self, instance.primary_node)
6741 bep = self.cfg.GetClusterInfo().FillBE(instance)
6742 bep.update(self.op.beparams)
6744 # check bridges existence
6745 _CheckInstanceBridgesExist(self, instance)
6747 remote_info = self.rpc.call_instance_info(instance.primary_node,
6749 instance.hypervisor)
6750 remote_info.Raise("Error checking node %s" % instance.primary_node,
6751 prereq=True, ecode=errors.ECODE_ENVIRON)
6752 if not remote_info.payload: # not running already
6753 _CheckNodeFreeMemory(self, instance.primary_node,
6754 "starting instance %s" % instance.name,
6755 bep[constants.BE_MINMEM], instance.hypervisor)
6757 def Exec(self, feedback_fn):
6758 """Start the instance.
6761 instance = self.instance
6762 force = self.op.force
6764 if not self.op.no_remember:
6765 self.cfg.MarkInstanceUp(instance.name)
6767 if self.primary_offline:
6768 assert self.op.ignore_offline_nodes
6769 self.proc.LogInfo("Primary node offline, marked instance as started")
6771 node_current = instance.primary_node
6773 _StartInstanceDisks(self, instance, force)
6776 self.rpc.call_instance_start(node_current,
6777 (instance, self.op.hvparams,
6779 self.op.startup_paused)
6780 msg = result.fail_msg
6782 _ShutdownInstanceDisks(self, instance)
6783 raise errors.OpExecError("Could not start instance: %s" % msg)
6786 class LUInstanceReboot(LogicalUnit):
6787 """Reboot an instance.
6790 HPATH = "instance-reboot"
6791 HTYPE = constants.HTYPE_INSTANCE
6794 def ExpandNames(self):
6795 self._ExpandAndLockInstance()
6797 def BuildHooksEnv(self):
6800 This runs on master, primary and secondary nodes of the instance.
6804 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6805 "REBOOT_TYPE": self.op.reboot_type,
6806 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6809 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6813 def BuildHooksNodes(self):
6814 """Build hooks nodes.
6817 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6820 def CheckPrereq(self):
6821 """Check prerequisites.
6823 This checks that the instance is in the cluster.
6826 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6827 assert self.instance is not None, \
6828 "Cannot retrieve locked instance %s" % self.op.instance_name
6829 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6830 _CheckNodeOnline(self, instance.primary_node)
6832 # check bridges existence
6833 _CheckInstanceBridgesExist(self, instance)
6835 def Exec(self, feedback_fn):
6836 """Reboot the instance.
6839 instance = self.instance
6840 ignore_secondaries = self.op.ignore_secondaries
6841 reboot_type = self.op.reboot_type
6843 remote_info = self.rpc.call_instance_info(instance.primary_node,
6845 instance.hypervisor)
6846 remote_info.Raise("Error checking node %s" % instance.primary_node)
6847 instance_running = bool(remote_info.payload)
6849 node_current = instance.primary_node
6851 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6852 constants.INSTANCE_REBOOT_HARD]:
6853 for disk in instance.disks:
6854 self.cfg.SetDiskID(disk, node_current)
6855 result = self.rpc.call_instance_reboot(node_current, instance,
6857 self.op.shutdown_timeout)
6858 result.Raise("Could not reboot instance")
6860 if instance_running:
6861 result = self.rpc.call_instance_shutdown(node_current, instance,
6862 self.op.shutdown_timeout)
6863 result.Raise("Could not shutdown instance for full reboot")
6864 _ShutdownInstanceDisks(self, instance)
6866 self.LogInfo("Instance %s was already stopped, starting now",
6868 _StartInstanceDisks(self, instance, ignore_secondaries)
6869 result = self.rpc.call_instance_start(node_current,
6870 (instance, None, None), False)
6871 msg = result.fail_msg
6873 _ShutdownInstanceDisks(self, instance)
6874 raise errors.OpExecError("Could not start instance for"
6875 " full reboot: %s" % msg)
6877 self.cfg.MarkInstanceUp(instance.name)
6880 class LUInstanceShutdown(LogicalUnit):
6881 """Shutdown an instance.
6884 HPATH = "instance-stop"
6885 HTYPE = constants.HTYPE_INSTANCE
6888 def ExpandNames(self):
6889 self._ExpandAndLockInstance()
6891 def BuildHooksEnv(self):
6894 This runs on master, primary and secondary nodes of the instance.
6897 env = _BuildInstanceHookEnvByObject(self, self.instance)
6898 env["TIMEOUT"] = self.op.timeout
6901 def BuildHooksNodes(self):
6902 """Build hooks nodes.
6905 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6908 def CheckPrereq(self):
6909 """Check prerequisites.
6911 This checks that the instance is in the cluster.
6914 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6915 assert self.instance is not None, \
6916 "Cannot retrieve locked instance %s" % self.op.instance_name
6918 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6920 self.primary_offline = \
6921 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6923 if self.primary_offline and self.op.ignore_offline_nodes:
6924 self.proc.LogWarning("Ignoring offline primary node")
6926 _CheckNodeOnline(self, self.instance.primary_node)
6928 def Exec(self, feedback_fn):
6929 """Shutdown the instance.
6932 instance = self.instance
6933 node_current = instance.primary_node
6934 timeout = self.op.timeout
6936 if not self.op.no_remember:
6937 self.cfg.MarkInstanceDown(instance.name)
6939 if self.primary_offline:
6940 assert self.op.ignore_offline_nodes
6941 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6943 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6944 msg = result.fail_msg
6946 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6948 _ShutdownInstanceDisks(self, instance)
6951 class LUInstanceReinstall(LogicalUnit):
6952 """Reinstall an instance.
6955 HPATH = "instance-reinstall"
6956 HTYPE = constants.HTYPE_INSTANCE
6959 def ExpandNames(self):
6960 self._ExpandAndLockInstance()
6962 def BuildHooksEnv(self):
6965 This runs on master, primary and secondary nodes of the instance.
6968 return _BuildInstanceHookEnvByObject(self, self.instance)
6970 def BuildHooksNodes(self):
6971 """Build hooks nodes.
6974 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6977 def CheckPrereq(self):
6978 """Check prerequisites.
6980 This checks that the instance is in the cluster and is not running.
6983 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6984 assert instance is not None, \
6985 "Cannot retrieve locked instance %s" % self.op.instance_name
6986 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6987 " offline, cannot reinstall")
6988 for node in instance.secondary_nodes:
6989 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6990 " cannot reinstall")
6992 if instance.disk_template == constants.DT_DISKLESS:
6993 raise errors.OpPrereqError("Instance '%s' has no disks" %
6994 self.op.instance_name,
6996 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6998 if self.op.os_type is not None:
7000 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7001 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7002 instance_os = self.op.os_type
7004 instance_os = instance.os
7006 nodelist = list(instance.all_nodes)
7008 if self.op.osparams:
7009 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7010 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7011 self.os_inst = i_osdict # the new dict (without defaults)
7015 self.instance = instance
7017 def Exec(self, feedback_fn):
7018 """Reinstall the instance.
7021 inst = self.instance
7023 if self.op.os_type is not None:
7024 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7025 inst.os = self.op.os_type
7026 # Write to configuration
7027 self.cfg.Update(inst, feedback_fn)
7029 _StartInstanceDisks(self, inst, None)
7031 feedback_fn("Running the instance OS create scripts...")
7032 # FIXME: pass debug option from opcode to backend
7033 result = self.rpc.call_instance_os_add(inst.primary_node,
7034 (inst, self.os_inst), True,
7035 self.op.debug_level)
7036 result.Raise("Could not install OS for instance %s on node %s" %
7037 (inst.name, inst.primary_node))
7039 _ShutdownInstanceDisks(self, inst)
7042 class LUInstanceRecreateDisks(LogicalUnit):
7043 """Recreate an instance's missing disks.
7046 HPATH = "instance-recreate-disks"
7047 HTYPE = constants.HTYPE_INSTANCE
7050 _MODIFYABLE = frozenset([
7051 constants.IDISK_SIZE,
7052 constants.IDISK_MODE,
7055 # New or changed disk parameters may have different semantics
7056 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7057 constants.IDISK_ADOPT,
7059 # TODO: Implement support changing VG while recreating
7061 constants.IDISK_METAVG,
7064 def CheckArguments(self):
7065 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7066 # Normalize and convert deprecated list of disk indices
7067 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7069 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7071 raise errors.OpPrereqError("Some disks have been specified more than"
7072 " once: %s" % utils.CommaJoin(duplicates),
7075 for (idx, params) in self.op.disks:
7076 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7077 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7079 raise errors.OpPrereqError("Parameters for disk %s try to change"
7080 " unmodifyable parameter(s): %s" %
7081 (idx, utils.CommaJoin(unsupported)),
7084 def ExpandNames(self):
7085 self._ExpandAndLockInstance()
7086 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7088 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7089 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7091 self.needed_locks[locking.LEVEL_NODE] = []
7092 self.needed_locks[locking.LEVEL_NODE_RES] = []
7094 def DeclareLocks(self, level):
7095 if level == locking.LEVEL_NODE:
7096 # if we replace the nodes, we only need to lock the old primary,
7097 # otherwise we need to lock all nodes for disk re-creation
7098 primary_only = bool(self.op.nodes)
7099 self._LockInstancesNodes(primary_only=primary_only)
7100 elif level == locking.LEVEL_NODE_RES:
7102 self.needed_locks[locking.LEVEL_NODE_RES] = \
7103 self.needed_locks[locking.LEVEL_NODE][:]
7105 def BuildHooksEnv(self):
7108 This runs on master, primary and secondary nodes of the instance.
7111 return _BuildInstanceHookEnvByObject(self, self.instance)
7113 def BuildHooksNodes(self):
7114 """Build hooks nodes.
7117 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7120 def CheckPrereq(self):
7121 """Check prerequisites.
7123 This checks that the instance is in the cluster and is not running.
7126 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7127 assert instance is not None, \
7128 "Cannot retrieve locked instance %s" % self.op.instance_name
7130 if len(self.op.nodes) != len(instance.all_nodes):
7131 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7132 " %d replacement nodes were specified" %
7133 (instance.name, len(instance.all_nodes),
7134 len(self.op.nodes)),
7136 assert instance.disk_template != constants.DT_DRBD8 or \
7137 len(self.op.nodes) == 2
7138 assert instance.disk_template != constants.DT_PLAIN or \
7139 len(self.op.nodes) == 1
7140 primary_node = self.op.nodes[0]
7142 primary_node = instance.primary_node
7143 _CheckNodeOnline(self, primary_node)
7145 if instance.disk_template == constants.DT_DISKLESS:
7146 raise errors.OpPrereqError("Instance '%s' has no disks" %
7147 self.op.instance_name, errors.ECODE_INVAL)
7149 # if we replace nodes *and* the old primary is offline, we don't
7151 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7152 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7153 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7154 if not (self.op.nodes and old_pnode.offline):
7155 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7156 msg="cannot recreate disks")
7159 self.disks = dict(self.op.disks)
7161 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7163 maxidx = max(self.disks.keys())
7164 if maxidx >= len(instance.disks):
7165 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7168 if (self.op.nodes and
7169 sorted(self.disks.keys()) != range(len(instance.disks))):
7170 raise errors.OpPrereqError("Can't recreate disks partially and"
7171 " change the nodes at the same time",
7174 self.instance = instance
7176 def Exec(self, feedback_fn):
7177 """Recreate the disks.
7180 instance = self.instance
7182 assert (self.owned_locks(locking.LEVEL_NODE) ==
7183 self.owned_locks(locking.LEVEL_NODE_RES))
7186 mods = [] # keeps track of needed changes
7188 for idx, disk in enumerate(instance.disks):
7190 changes = self.disks[idx]
7192 # Disk should not be recreated
7196 # update secondaries for disks, if needed
7197 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7198 # need to update the nodes and minors
7199 assert len(self.op.nodes) == 2
7200 assert len(disk.logical_id) == 6 # otherwise disk internals
7202 (_, _, old_port, _, _, old_secret) = disk.logical_id
7203 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7204 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7205 new_minors[0], new_minors[1], old_secret)
7206 assert len(disk.logical_id) == len(new_id)
7210 mods.append((idx, new_id, changes))
7212 # now that we have passed all asserts above, we can apply the mods
7213 # in a single run (to avoid partial changes)
7214 for idx, new_id, changes in mods:
7215 disk = instance.disks[idx]
7216 if new_id is not None:
7217 assert disk.dev_type == constants.LD_DRBD8
7218 disk.logical_id = new_id
7220 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7221 mode=changes.get(constants.IDISK_MODE, None))
7223 # change primary node, if needed
7225 instance.primary_node = self.op.nodes[0]
7226 self.LogWarning("Changing the instance's nodes, you will have to"
7227 " remove any disks left on the older nodes manually")
7230 self.cfg.Update(instance, feedback_fn)
7232 _CreateDisks(self, instance, to_skip=to_skip)
7235 class LUInstanceRename(LogicalUnit):
7236 """Rename an instance.
7239 HPATH = "instance-rename"
7240 HTYPE = constants.HTYPE_INSTANCE
7242 def CheckArguments(self):
7246 if self.op.ip_check and not self.op.name_check:
7247 # TODO: make the ip check more flexible and not depend on the name check
7248 raise errors.OpPrereqError("IP address check requires a name check",
7251 def BuildHooksEnv(self):
7254 This runs on master, primary and secondary nodes of the instance.
7257 env = _BuildInstanceHookEnvByObject(self, self.instance)
7258 env["INSTANCE_NEW_NAME"] = self.op.new_name
7261 def BuildHooksNodes(self):
7262 """Build hooks nodes.
7265 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7268 def CheckPrereq(self):
7269 """Check prerequisites.
7271 This checks that the instance is in the cluster and is not running.
7274 self.op.instance_name = _ExpandInstanceName(self.cfg,
7275 self.op.instance_name)
7276 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7277 assert instance is not None
7278 _CheckNodeOnline(self, instance.primary_node)
7279 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7280 msg="cannot rename")
7281 self.instance = instance
7283 new_name = self.op.new_name
7284 if self.op.name_check:
7285 hostname = netutils.GetHostname(name=new_name)
7286 if hostname.name != new_name:
7287 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7289 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7290 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7291 " same as given hostname '%s'") %
7292 (hostname.name, self.op.new_name),
7294 new_name = self.op.new_name = hostname.name
7295 if (self.op.ip_check and
7296 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7297 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7298 (hostname.ip, new_name),
7299 errors.ECODE_NOTUNIQUE)
7301 instance_list = self.cfg.GetInstanceList()
7302 if new_name in instance_list and new_name != instance.name:
7303 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7304 new_name, errors.ECODE_EXISTS)
7306 def Exec(self, feedback_fn):
7307 """Rename the instance.
7310 inst = self.instance
7311 old_name = inst.name
7313 rename_file_storage = False
7314 if (inst.disk_template in constants.DTS_FILEBASED and
7315 self.op.new_name != inst.name):
7316 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7317 rename_file_storage = True
7319 self.cfg.RenameInstance(inst.name, self.op.new_name)
7320 # Change the instance lock. This is definitely safe while we hold the BGL.
7321 # Otherwise the new lock would have to be added in acquired mode.
7323 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7324 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7326 # re-read the instance from the configuration after rename
7327 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7329 if rename_file_storage:
7330 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7331 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7332 old_file_storage_dir,
7333 new_file_storage_dir)
7334 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7335 " (but the instance has been renamed in Ganeti)" %
7336 (inst.primary_node, old_file_storage_dir,
7337 new_file_storage_dir))
7339 _StartInstanceDisks(self, inst, None)
7341 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7342 old_name, self.op.debug_level)
7343 msg = result.fail_msg
7345 msg = ("Could not run OS rename script for instance %s on node %s"
7346 " (but the instance has been renamed in Ganeti): %s" %
7347 (inst.name, inst.primary_node, msg))
7348 self.proc.LogWarning(msg)
7350 _ShutdownInstanceDisks(self, inst)
7355 class LUInstanceRemove(LogicalUnit):
7356 """Remove an instance.
7359 HPATH = "instance-remove"
7360 HTYPE = constants.HTYPE_INSTANCE
7363 def ExpandNames(self):
7364 self._ExpandAndLockInstance()
7365 self.needed_locks[locking.LEVEL_NODE] = []
7366 self.needed_locks[locking.LEVEL_NODE_RES] = []
7367 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7369 def DeclareLocks(self, level):
7370 if level == locking.LEVEL_NODE:
7371 self._LockInstancesNodes()
7372 elif level == locking.LEVEL_NODE_RES:
7374 self.needed_locks[locking.LEVEL_NODE_RES] = \
7375 self.needed_locks[locking.LEVEL_NODE][:]
7377 def BuildHooksEnv(self):
7380 This runs on master, primary and secondary nodes of the instance.
7383 env = _BuildInstanceHookEnvByObject(self, self.instance)
7384 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7387 def BuildHooksNodes(self):
7388 """Build hooks nodes.
7391 nl = [self.cfg.GetMasterNode()]
7392 nl_post = list(self.instance.all_nodes) + nl
7393 return (nl, nl_post)
7395 def CheckPrereq(self):
7396 """Check prerequisites.
7398 This checks that the instance is in the cluster.
7401 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7402 assert self.instance is not None, \
7403 "Cannot retrieve locked instance %s" % self.op.instance_name
7405 def Exec(self, feedback_fn):
7406 """Remove the instance.
7409 instance = self.instance
7410 logging.info("Shutting down instance %s on node %s",
7411 instance.name, instance.primary_node)
7413 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7414 self.op.shutdown_timeout)
7415 msg = result.fail_msg
7417 if self.op.ignore_failures:
7418 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7420 raise errors.OpExecError("Could not shutdown instance %s on"
7422 (instance.name, instance.primary_node, msg))
7424 assert (self.owned_locks(locking.LEVEL_NODE) ==
7425 self.owned_locks(locking.LEVEL_NODE_RES))
7426 assert not (set(instance.all_nodes) -
7427 self.owned_locks(locking.LEVEL_NODE)), \
7428 "Not owning correct locks"
7430 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7433 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7434 """Utility function to remove an instance.
7437 logging.info("Removing block devices for instance %s", instance.name)
7439 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7440 if not ignore_failures:
7441 raise errors.OpExecError("Can't remove instance's disks")
7442 feedback_fn("Warning: can't remove instance's disks")
7444 logging.info("Removing instance %s out of cluster config", instance.name)
7446 lu.cfg.RemoveInstance(instance.name)
7448 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7449 "Instance lock removal conflict"
7451 # Remove lock for the instance
7452 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7455 class LUInstanceQuery(NoHooksLU):
7456 """Logical unit for querying instances.
7459 # pylint: disable=W0142
7462 def CheckArguments(self):
7463 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7464 self.op.output_fields, self.op.use_locking)
7466 def ExpandNames(self):
7467 self.iq.ExpandNames(self)
7469 def DeclareLocks(self, level):
7470 self.iq.DeclareLocks(self, level)
7472 def Exec(self, feedback_fn):
7473 return self.iq.OldStyleQuery(self)
7476 class LUInstanceFailover(LogicalUnit):
7477 """Failover an instance.
7480 HPATH = "instance-failover"
7481 HTYPE = constants.HTYPE_INSTANCE
7484 def CheckArguments(self):
7485 """Check the arguments.
7488 self.iallocator = getattr(self.op, "iallocator", None)
7489 self.target_node = getattr(self.op, "target_node", None)
7491 def ExpandNames(self):
7492 self._ExpandAndLockInstance()
7494 if self.op.target_node is not None:
7495 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7497 self.needed_locks[locking.LEVEL_NODE] = []
7498 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7500 self.needed_locks[locking.LEVEL_NODE_RES] = []
7501 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7503 ignore_consistency = self.op.ignore_consistency
7504 shutdown_timeout = self.op.shutdown_timeout
7505 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7508 ignore_consistency=ignore_consistency,
7509 shutdown_timeout=shutdown_timeout,
7510 ignore_ipolicy=self.op.ignore_ipolicy)
7511 self.tasklets = [self._migrater]
7513 def DeclareLocks(self, level):
7514 if level == locking.LEVEL_NODE:
7515 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7516 if instance.disk_template in constants.DTS_EXT_MIRROR:
7517 if self.op.target_node is None:
7518 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7520 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7521 self.op.target_node]
7522 del self.recalculate_locks[locking.LEVEL_NODE]
7524 self._LockInstancesNodes()
7525 elif level == locking.LEVEL_NODE_RES:
7527 self.needed_locks[locking.LEVEL_NODE_RES] = \
7528 self.needed_locks[locking.LEVEL_NODE][:]
7530 def BuildHooksEnv(self):
7533 This runs on master, primary and secondary nodes of the instance.
7536 instance = self._migrater.instance
7537 source_node = instance.primary_node
7538 target_node = self.op.target_node
7540 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7541 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7542 "OLD_PRIMARY": source_node,
7543 "NEW_PRIMARY": target_node,
7546 if instance.disk_template in constants.DTS_INT_MIRROR:
7547 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7548 env["NEW_SECONDARY"] = source_node
7550 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7552 env.update(_BuildInstanceHookEnvByObject(self, instance))
7556 def BuildHooksNodes(self):
7557 """Build hooks nodes.
7560 instance = self._migrater.instance
7561 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7562 return (nl, nl + [instance.primary_node])
7565 class LUInstanceMigrate(LogicalUnit):
7566 """Migrate an instance.
7568 This is migration without shutting down, compared to the failover,
7569 which is done with shutdown.
7572 HPATH = "instance-migrate"
7573 HTYPE = constants.HTYPE_INSTANCE
7576 def ExpandNames(self):
7577 self._ExpandAndLockInstance()
7579 if self.op.target_node is not None:
7580 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7582 self.needed_locks[locking.LEVEL_NODE] = []
7583 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7585 self.needed_locks[locking.LEVEL_NODE] = []
7586 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7589 TLMigrateInstance(self, self.op.instance_name,
7590 cleanup=self.op.cleanup,
7592 fallback=self.op.allow_failover,
7593 allow_runtime_changes=self.op.allow_runtime_changes,
7594 ignore_ipolicy=self.op.ignore_ipolicy)
7595 self.tasklets = [self._migrater]
7597 def DeclareLocks(self, level):
7598 if level == locking.LEVEL_NODE:
7599 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7600 if instance.disk_template in constants.DTS_EXT_MIRROR:
7601 if self.op.target_node is None:
7602 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7604 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7605 self.op.target_node]
7606 del self.recalculate_locks[locking.LEVEL_NODE]
7608 self._LockInstancesNodes()
7609 elif level == locking.LEVEL_NODE_RES:
7611 self.needed_locks[locking.LEVEL_NODE_RES] = \
7612 self.needed_locks[locking.LEVEL_NODE][:]
7614 def BuildHooksEnv(self):
7617 This runs on master, primary and secondary nodes of the instance.
7620 instance = self._migrater.instance
7621 source_node = instance.primary_node
7622 target_node = self.op.target_node
7623 env = _BuildInstanceHookEnvByObject(self, instance)
7625 "MIGRATE_LIVE": self._migrater.live,
7626 "MIGRATE_CLEANUP": self.op.cleanup,
7627 "OLD_PRIMARY": source_node,
7628 "NEW_PRIMARY": target_node,
7629 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7632 if instance.disk_template in constants.DTS_INT_MIRROR:
7633 env["OLD_SECONDARY"] = target_node
7634 env["NEW_SECONDARY"] = source_node
7636 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7640 def BuildHooksNodes(self):
7641 """Build hooks nodes.
7644 instance = self._migrater.instance
7645 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7646 return (nl, nl + [instance.primary_node])
7649 class LUInstanceMove(LogicalUnit):
7650 """Move an instance by data-copying.
7653 HPATH = "instance-move"
7654 HTYPE = constants.HTYPE_INSTANCE
7657 def ExpandNames(self):
7658 self._ExpandAndLockInstance()
7659 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7660 self.op.target_node = target_node
7661 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7662 self.needed_locks[locking.LEVEL_NODE_RES] = []
7663 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7665 def DeclareLocks(self, level):
7666 if level == locking.LEVEL_NODE:
7667 self._LockInstancesNodes(primary_only=True)
7668 elif level == locking.LEVEL_NODE_RES:
7670 self.needed_locks[locking.LEVEL_NODE_RES] = \
7671 self.needed_locks[locking.LEVEL_NODE][:]
7673 def BuildHooksEnv(self):
7676 This runs on master, primary and secondary nodes of the instance.
7680 "TARGET_NODE": self.op.target_node,
7681 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7683 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7686 def BuildHooksNodes(self):
7687 """Build hooks nodes.
7691 self.cfg.GetMasterNode(),
7692 self.instance.primary_node,
7693 self.op.target_node,
7697 def CheckPrereq(self):
7698 """Check prerequisites.
7700 This checks that the instance is in the cluster.
7703 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7704 assert self.instance is not None, \
7705 "Cannot retrieve locked instance %s" % self.op.instance_name
7707 node = self.cfg.GetNodeInfo(self.op.target_node)
7708 assert node is not None, \
7709 "Cannot retrieve locked node %s" % self.op.target_node
7711 self.target_node = target_node = node.name
7713 if target_node == instance.primary_node:
7714 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7715 (instance.name, target_node),
7718 bep = self.cfg.GetClusterInfo().FillBE(instance)
7720 for idx, dsk in enumerate(instance.disks):
7721 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7722 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7723 " cannot copy" % idx, errors.ECODE_STATE)
7725 _CheckNodeOnline(self, target_node)
7726 _CheckNodeNotDrained(self, target_node)
7727 _CheckNodeVmCapable(self, target_node)
7728 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7729 self.cfg.GetNodeGroup(node.group))
7730 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7731 ignore=self.op.ignore_ipolicy)
7733 if instance.admin_state == constants.ADMINST_UP:
7734 # check memory requirements on the secondary node
7735 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7736 instance.name, bep[constants.BE_MAXMEM],
7737 instance.hypervisor)
7739 self.LogInfo("Not checking memory on the secondary node as"
7740 " instance will not be started")
7742 # check bridge existance
7743 _CheckInstanceBridgesExist(self, instance, node=target_node)
7745 def Exec(self, feedback_fn):
7746 """Move an instance.
7748 The move is done by shutting it down on its present node, copying
7749 the data over (slow) and starting it on the new node.
7752 instance = self.instance
7754 source_node = instance.primary_node
7755 target_node = self.target_node
7757 self.LogInfo("Shutting down instance %s on source node %s",
7758 instance.name, source_node)
7760 assert (self.owned_locks(locking.LEVEL_NODE) ==
7761 self.owned_locks(locking.LEVEL_NODE_RES))
7763 result = self.rpc.call_instance_shutdown(source_node, instance,
7764 self.op.shutdown_timeout)
7765 msg = result.fail_msg
7767 if self.op.ignore_consistency:
7768 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7769 " Proceeding anyway. Please make sure node"
7770 " %s is down. Error details: %s",
7771 instance.name, source_node, source_node, msg)
7773 raise errors.OpExecError("Could not shutdown instance %s on"
7775 (instance.name, source_node, msg))
7777 # create the target disks
7779 _CreateDisks(self, instance, target_node=target_node)
7780 except errors.OpExecError:
7781 self.LogWarning("Device creation failed, reverting...")
7783 _RemoveDisks(self, instance, target_node=target_node)
7785 self.cfg.ReleaseDRBDMinors(instance.name)
7788 cluster_name = self.cfg.GetClusterInfo().cluster_name
7791 # activate, get path, copy the data over
7792 for idx, disk in enumerate(instance.disks):
7793 self.LogInfo("Copying data for disk %d", idx)
7794 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7795 instance.name, True, idx)
7797 self.LogWarning("Can't assemble newly created disk %d: %s",
7798 idx, result.fail_msg)
7799 errs.append(result.fail_msg)
7801 dev_path = result.payload
7802 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7803 target_node, dev_path,
7806 self.LogWarning("Can't copy data over for disk %d: %s",
7807 idx, result.fail_msg)
7808 errs.append(result.fail_msg)
7812 self.LogWarning("Some disks failed to copy, aborting")
7814 _RemoveDisks(self, instance, target_node=target_node)
7816 self.cfg.ReleaseDRBDMinors(instance.name)
7817 raise errors.OpExecError("Errors during disk copy: %s" %
7820 instance.primary_node = target_node
7821 self.cfg.Update(instance, feedback_fn)
7823 self.LogInfo("Removing the disks on the original node")
7824 _RemoveDisks(self, instance, target_node=source_node)
7826 # Only start the instance if it's marked as up
7827 if instance.admin_state == constants.ADMINST_UP:
7828 self.LogInfo("Starting instance %s on node %s",
7829 instance.name, target_node)
7831 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7832 ignore_secondaries=True)
7834 _ShutdownInstanceDisks(self, instance)
7835 raise errors.OpExecError("Can't activate the instance's disks")
7837 result = self.rpc.call_instance_start(target_node,
7838 (instance, None, None), False)
7839 msg = result.fail_msg
7841 _ShutdownInstanceDisks(self, instance)
7842 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7843 (instance.name, target_node, msg))
7846 class LUNodeMigrate(LogicalUnit):
7847 """Migrate all instances from a node.
7850 HPATH = "node-migrate"
7851 HTYPE = constants.HTYPE_NODE
7854 def CheckArguments(self):
7857 def ExpandNames(self):
7858 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7860 self.share_locks = _ShareAll()
7861 self.needed_locks = {
7862 locking.LEVEL_NODE: [self.op.node_name],
7865 def BuildHooksEnv(self):
7868 This runs on the master, the primary and all the secondaries.
7872 "NODE_NAME": self.op.node_name,
7873 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7876 def BuildHooksNodes(self):
7877 """Build hooks nodes.
7880 nl = [self.cfg.GetMasterNode()]
7883 def CheckPrereq(self):
7886 def Exec(self, feedback_fn):
7887 # Prepare jobs for migration instances
7888 allow_runtime_changes = self.op.allow_runtime_changes
7890 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7893 iallocator=self.op.iallocator,
7894 target_node=self.op.target_node,
7895 allow_runtime_changes=allow_runtime_changes,
7896 ignore_ipolicy=self.op.ignore_ipolicy)]
7897 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7900 # TODO: Run iallocator in this opcode and pass correct placement options to
7901 # OpInstanceMigrate. Since other jobs can modify the cluster between
7902 # running the iallocator and the actual migration, a good consistency model
7903 # will have to be found.
7905 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7906 frozenset([self.op.node_name]))
7908 return ResultWithJobs(jobs)
7911 class TLMigrateInstance(Tasklet):
7912 """Tasklet class for instance migration.
7915 @ivar live: whether the migration will be done live or non-live;
7916 this variable is initalized only after CheckPrereq has run
7917 @type cleanup: boolean
7918 @ivar cleanup: Wheater we cleanup from a failed migration
7919 @type iallocator: string
7920 @ivar iallocator: The iallocator used to determine target_node
7921 @type target_node: string
7922 @ivar target_node: If given, the target_node to reallocate the instance to
7923 @type failover: boolean
7924 @ivar failover: Whether operation results in failover or migration
7925 @type fallback: boolean
7926 @ivar fallback: Whether fallback to failover is allowed if migration not
7928 @type ignore_consistency: boolean
7929 @ivar ignore_consistency: Wheter we should ignore consistency between source
7931 @type shutdown_timeout: int
7932 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7933 @type ignore_ipolicy: bool
7934 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7939 _MIGRATION_POLL_INTERVAL = 1 # seconds
7940 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7942 def __init__(self, lu, instance_name, cleanup=False,
7943 failover=False, fallback=False,
7944 ignore_consistency=False,
7945 allow_runtime_changes=True,
7946 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7947 ignore_ipolicy=False):
7948 """Initializes this class.
7951 Tasklet.__init__(self, lu)
7954 self.instance_name = instance_name
7955 self.cleanup = cleanup
7956 self.live = False # will be overridden later
7957 self.failover = failover
7958 self.fallback = fallback
7959 self.ignore_consistency = ignore_consistency
7960 self.shutdown_timeout = shutdown_timeout
7961 self.ignore_ipolicy = ignore_ipolicy
7962 self.allow_runtime_changes = allow_runtime_changes
7964 def CheckPrereq(self):
7965 """Check prerequisites.
7967 This checks that the instance is in the cluster.
7970 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7971 instance = self.cfg.GetInstanceInfo(instance_name)
7972 assert instance is not None
7973 self.instance = instance
7974 cluster = self.cfg.GetClusterInfo()
7976 if (not self.cleanup and
7977 not instance.admin_state == constants.ADMINST_UP and
7978 not self.failover and self.fallback):
7979 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7980 " switching to failover")
7981 self.failover = True
7983 if instance.disk_template not in constants.DTS_MIRRORED:
7988 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7989 " %s" % (instance.disk_template, text),
7992 if instance.disk_template in constants.DTS_EXT_MIRROR:
7993 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7995 if self.lu.op.iallocator:
7996 self._RunAllocator()
7998 # We set set self.target_node as it is required by
8000 self.target_node = self.lu.op.target_node
8002 # Check that the target node is correct in terms of instance policy
8003 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8004 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8005 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8006 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8007 ignore=self.ignore_ipolicy)
8009 # self.target_node is already populated, either directly or by the
8011 target_node = self.target_node
8012 if self.target_node == instance.primary_node:
8013 raise errors.OpPrereqError("Cannot migrate instance %s"
8014 " to its primary (%s)" %
8015 (instance.name, instance.primary_node))
8017 if len(self.lu.tasklets) == 1:
8018 # It is safe to release locks only when we're the only tasklet
8020 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8021 keep=[instance.primary_node, self.target_node])
8024 secondary_nodes = instance.secondary_nodes
8025 if not secondary_nodes:
8026 raise errors.ConfigurationError("No secondary node but using"
8027 " %s disk template" %
8028 instance.disk_template)
8029 target_node = secondary_nodes[0]
8030 if self.lu.op.iallocator or (self.lu.op.target_node and
8031 self.lu.op.target_node != target_node):
8033 text = "failed over"
8036 raise errors.OpPrereqError("Instances with disk template %s cannot"
8037 " be %s to arbitrary nodes"
8038 " (neither an iallocator nor a target"
8039 " node can be passed)" %
8040 (instance.disk_template, text),
8042 nodeinfo = self.cfg.GetNodeInfo(target_node)
8043 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8044 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8045 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8046 ignore=self.ignore_ipolicy)
8048 i_be = cluster.FillBE(instance)
8050 # check memory requirements on the secondary node
8051 if (not self.cleanup and
8052 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8053 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8054 "migrating instance %s" %
8056 i_be[constants.BE_MINMEM],
8057 instance.hypervisor)
8059 self.lu.LogInfo("Not checking memory on the secondary node as"
8060 " instance will not be started")
8062 # check if failover must be forced instead of migration
8063 if (not self.cleanup and not self.failover and
8064 i_be[constants.BE_ALWAYS_FAILOVER]):
8066 self.lu.LogInfo("Instance configured to always failover; fallback"
8068 self.failover = True
8070 raise errors.OpPrereqError("This instance has been configured to"
8071 " always failover, please allow failover",
8074 # check bridge existance
8075 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8077 if not self.cleanup:
8078 _CheckNodeNotDrained(self.lu, target_node)
8079 if not self.failover:
8080 result = self.rpc.call_instance_migratable(instance.primary_node,
8082 if result.fail_msg and self.fallback:
8083 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8085 self.failover = True
8087 result.Raise("Can't migrate, please use failover",
8088 prereq=True, ecode=errors.ECODE_STATE)
8090 assert not (self.failover and self.cleanup)
8092 if not self.failover:
8093 if self.lu.op.live is not None and self.lu.op.mode is not None:
8094 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8095 " parameters are accepted",
8097 if self.lu.op.live is not None:
8099 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8101 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8102 # reset the 'live' parameter to None so that repeated
8103 # invocations of CheckPrereq do not raise an exception
8104 self.lu.op.live = None
8105 elif self.lu.op.mode is None:
8106 # read the default value from the hypervisor
8107 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8108 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8110 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8112 # Failover is never live
8115 if not (self.failover or self.cleanup):
8116 remote_info = self.rpc.call_instance_info(instance.primary_node,
8118 instance.hypervisor)
8119 remote_info.Raise("Error checking instance on node %s" %
8120 instance.primary_node)
8121 instance_running = bool(remote_info.payload)
8122 if instance_running:
8123 self.current_mem = int(remote_info.payload["memory"])
8125 def _RunAllocator(self):
8126 """Run the allocator based on input opcode.
8129 # FIXME: add a self.ignore_ipolicy option
8130 ial = IAllocator(self.cfg, self.rpc,
8131 mode=constants.IALLOCATOR_MODE_RELOC,
8132 name=self.instance_name,
8133 relocate_from=[self.instance.primary_node],
8136 ial.Run(self.lu.op.iallocator)
8139 raise errors.OpPrereqError("Can't compute nodes using"
8140 " iallocator '%s': %s" %
8141 (self.lu.op.iallocator, ial.info),
8143 if len(ial.result) != ial.required_nodes:
8144 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8145 " of nodes (%s), required %s" %
8146 (self.lu.op.iallocator, len(ial.result),
8147 ial.required_nodes), errors.ECODE_FAULT)
8148 self.target_node = ial.result[0]
8149 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8150 self.instance_name, self.lu.op.iallocator,
8151 utils.CommaJoin(ial.result))
8153 def _WaitUntilSync(self):
8154 """Poll with custom rpc for disk sync.
8156 This uses our own step-based rpc call.
8159 self.feedback_fn("* wait until resync is done")
8163 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8165 (self.instance.disks,
8168 for node, nres in result.items():
8169 nres.Raise("Cannot resync disks on node %s" % node)
8170 node_done, node_percent = nres.payload
8171 all_done = all_done and node_done
8172 if node_percent is not None:
8173 min_percent = min(min_percent, node_percent)
8175 if min_percent < 100:
8176 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8179 def _EnsureSecondary(self, node):
8180 """Demote a node to secondary.
8183 self.feedback_fn("* switching node %s to secondary mode" % node)
8185 for dev in self.instance.disks:
8186 self.cfg.SetDiskID(dev, node)
8188 result = self.rpc.call_blockdev_close(node, self.instance.name,
8189 self.instance.disks)
8190 result.Raise("Cannot change disk to secondary on node %s" % node)
8192 def _GoStandalone(self):
8193 """Disconnect from the network.
8196 self.feedback_fn("* changing into standalone mode")
8197 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8198 self.instance.disks)
8199 for node, nres in result.items():
8200 nres.Raise("Cannot disconnect disks node %s" % node)
8202 def _GoReconnect(self, multimaster):
8203 """Reconnect to the network.
8209 msg = "single-master"
8210 self.feedback_fn("* changing disks into %s mode" % msg)
8211 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8212 (self.instance.disks, self.instance),
8213 self.instance.name, multimaster)
8214 for node, nres in result.items():
8215 nres.Raise("Cannot change disks config on node %s" % node)
8217 def _ExecCleanup(self):
8218 """Try to cleanup after a failed migration.
8220 The cleanup is done by:
8221 - check that the instance is running only on one node
8222 (and update the config if needed)
8223 - change disks on its secondary node to secondary
8224 - wait until disks are fully synchronized
8225 - disconnect from the network
8226 - change disks into single-master mode
8227 - wait again until disks are fully synchronized
8230 instance = self.instance
8231 target_node = self.target_node
8232 source_node = self.source_node
8234 # check running on only one node
8235 self.feedback_fn("* checking where the instance actually runs"
8236 " (if this hangs, the hypervisor might be in"
8238 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8239 for node, result in ins_l.items():
8240 result.Raise("Can't contact node %s" % node)
8242 runningon_source = instance.name in ins_l[source_node].payload
8243 runningon_target = instance.name in ins_l[target_node].payload
8245 if runningon_source and runningon_target:
8246 raise errors.OpExecError("Instance seems to be running on two nodes,"
8247 " or the hypervisor is confused; you will have"
8248 " to ensure manually that it runs only on one"
8249 " and restart this operation")
8251 if not (runningon_source or runningon_target):
8252 raise errors.OpExecError("Instance does not seem to be running at all;"
8253 " in this case it's safer to repair by"
8254 " running 'gnt-instance stop' to ensure disk"
8255 " shutdown, and then restarting it")
8257 if runningon_target:
8258 # the migration has actually succeeded, we need to update the config
8259 self.feedback_fn("* instance running on secondary node (%s),"
8260 " updating config" % target_node)
8261 instance.primary_node = target_node
8262 self.cfg.Update(instance, self.feedback_fn)
8263 demoted_node = source_node
8265 self.feedback_fn("* instance confirmed to be running on its"
8266 " primary node (%s)" % source_node)
8267 demoted_node = target_node
8269 if instance.disk_template in constants.DTS_INT_MIRROR:
8270 self._EnsureSecondary(demoted_node)
8272 self._WaitUntilSync()
8273 except errors.OpExecError:
8274 # we ignore here errors, since if the device is standalone, it
8275 # won't be able to sync
8277 self._GoStandalone()
8278 self._GoReconnect(False)
8279 self._WaitUntilSync()
8281 self.feedback_fn("* done")
8283 def _RevertDiskStatus(self):
8284 """Try to revert the disk status after a failed migration.
8287 target_node = self.target_node
8288 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8292 self._EnsureSecondary(target_node)
8293 self._GoStandalone()
8294 self._GoReconnect(False)
8295 self._WaitUntilSync()
8296 except errors.OpExecError, err:
8297 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8298 " please try to recover the instance manually;"
8299 " error '%s'" % str(err))
8301 def _AbortMigration(self):
8302 """Call the hypervisor code to abort a started migration.
8305 instance = self.instance
8306 target_node = self.target_node
8307 source_node = self.source_node
8308 migration_info = self.migration_info
8310 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8314 abort_msg = abort_result.fail_msg
8316 logging.error("Aborting migration failed on target node %s: %s",
8317 target_node, abort_msg)
8318 # Don't raise an exception here, as we stil have to try to revert the
8319 # disk status, even if this step failed.
8321 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8322 instance, False, self.live)
8323 abort_msg = abort_result.fail_msg
8325 logging.error("Aborting migration failed on source node %s: %s",
8326 source_node, abort_msg)
8328 def _ExecMigration(self):
8329 """Migrate an instance.
8331 The migrate is done by:
8332 - change the disks into dual-master mode
8333 - wait until disks are fully synchronized again
8334 - migrate the instance
8335 - change disks on the new secondary node (the old primary) to secondary
8336 - wait until disks are fully synchronized
8337 - change disks into single-master mode
8340 instance = self.instance
8341 target_node = self.target_node
8342 source_node = self.source_node
8344 # Check for hypervisor version mismatch and warn the user.
8345 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8346 None, [self.instance.hypervisor])
8347 for ninfo in nodeinfo.values():
8348 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8350 (_, _, (src_info, )) = nodeinfo[source_node].payload
8351 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8353 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8354 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8355 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8356 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8357 if src_version != dst_version:
8358 self.feedback_fn("* warning: hypervisor version mismatch between"
8359 " source (%s) and target (%s) node" %
8360 (src_version, dst_version))
8362 self.feedback_fn("* checking disk consistency between source and target")
8363 for (idx, dev) in enumerate(instance.disks):
8364 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8365 raise errors.OpExecError("Disk %s is degraded or not fully"
8366 " synchronized on target node,"
8367 " aborting migration" % idx)
8369 if self.current_mem > self.tgt_free_mem:
8370 if not self.allow_runtime_changes:
8371 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8372 " free memory to fit instance %s on target"
8373 " node %s (have %dMB, need %dMB)" %
8374 (instance.name, target_node,
8375 self.tgt_free_mem, self.current_mem))
8376 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8377 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8380 rpcres.Raise("Cannot modify instance runtime memory")
8382 # First get the migration information from the remote node
8383 result = self.rpc.call_migration_info(source_node, instance)
8384 msg = result.fail_msg
8386 log_err = ("Failed fetching source migration information from %s: %s" %
8388 logging.error(log_err)
8389 raise errors.OpExecError(log_err)
8391 self.migration_info = migration_info = result.payload
8393 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8394 # Then switch the disks to master/master mode
8395 self._EnsureSecondary(target_node)
8396 self._GoStandalone()
8397 self._GoReconnect(True)
8398 self._WaitUntilSync()
8400 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8401 result = self.rpc.call_accept_instance(target_node,
8404 self.nodes_ip[target_node])
8406 msg = result.fail_msg
8408 logging.error("Instance pre-migration failed, trying to revert"
8409 " disk status: %s", msg)
8410 self.feedback_fn("Pre-migration failed, aborting")
8411 self._AbortMigration()
8412 self._RevertDiskStatus()
8413 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8414 (instance.name, msg))
8416 self.feedback_fn("* migrating instance to %s" % target_node)
8417 result = self.rpc.call_instance_migrate(source_node, instance,
8418 self.nodes_ip[target_node],
8420 msg = result.fail_msg
8422 logging.error("Instance migration failed, trying to revert"
8423 " disk status: %s", msg)
8424 self.feedback_fn("Migration failed, aborting")
8425 self._AbortMigration()
8426 self._RevertDiskStatus()
8427 raise errors.OpExecError("Could not migrate instance %s: %s" %
8428 (instance.name, msg))
8430 self.feedback_fn("* starting memory transfer")
8431 last_feedback = time.time()
8433 result = self.rpc.call_instance_get_migration_status(source_node,
8435 msg = result.fail_msg
8436 ms = result.payload # MigrationStatus instance
8437 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8438 logging.error("Instance migration failed, trying to revert"
8439 " disk status: %s", msg)
8440 self.feedback_fn("Migration failed, aborting")
8441 self._AbortMigration()
8442 self._RevertDiskStatus()
8443 raise errors.OpExecError("Could not migrate instance %s: %s" %
8444 (instance.name, msg))
8446 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8447 self.feedback_fn("* memory transfer complete")
8450 if (utils.TimeoutExpired(last_feedback,
8451 self._MIGRATION_FEEDBACK_INTERVAL) and
8452 ms.transferred_ram is not None):
8453 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8454 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8455 last_feedback = time.time()
8457 time.sleep(self._MIGRATION_POLL_INTERVAL)
8459 result = self.rpc.call_instance_finalize_migration_src(source_node,
8463 msg = result.fail_msg
8465 logging.error("Instance migration succeeded, but finalization failed"
8466 " on the source node: %s", msg)
8467 raise errors.OpExecError("Could not finalize instance migration: %s" %
8470 instance.primary_node = target_node
8472 # distribute new instance config to the other nodes
8473 self.cfg.Update(instance, self.feedback_fn)
8475 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8479 msg = result.fail_msg
8481 logging.error("Instance migration succeeded, but finalization failed"
8482 " on the target node: %s", msg)
8483 raise errors.OpExecError("Could not finalize instance migration: %s" %
8486 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8487 self._EnsureSecondary(source_node)
8488 self._WaitUntilSync()
8489 self._GoStandalone()
8490 self._GoReconnect(False)
8491 self._WaitUntilSync()
8493 # If the instance's disk template is `rbd' and there was a successful
8494 # migration, unmap the device from the source node.
8495 if self.instance.disk_template == constants.DT_RBD:
8496 disks = _ExpandCheckDisks(instance, instance.disks)
8497 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8499 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8500 msg = result.fail_msg
8502 logging.error("Migration was successful, but couldn't unmap the"
8503 " block device %s on source node %s: %s",
8504 disk.iv_name, source_node, msg)
8505 logging.error("You need to unmap the device %s manually on %s",
8506 disk.iv_name, source_node)
8508 self.feedback_fn("* done")
8510 def _ExecFailover(self):
8511 """Failover an instance.
8513 The failover is done by shutting it down on its present node and
8514 starting it on the secondary.
8517 instance = self.instance
8518 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8520 source_node = instance.primary_node
8521 target_node = self.target_node
8523 if instance.admin_state == constants.ADMINST_UP:
8524 self.feedback_fn("* checking disk consistency between source and target")
8525 for (idx, dev) in enumerate(instance.disks):
8526 # for drbd, these are drbd over lvm
8527 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8529 if primary_node.offline:
8530 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8532 (primary_node.name, idx, target_node))
8533 elif not self.ignore_consistency:
8534 raise errors.OpExecError("Disk %s is degraded on target node,"
8535 " aborting failover" % idx)
8537 self.feedback_fn("* not checking disk consistency as instance is not"
8540 self.feedback_fn("* shutting down instance on source node")
8541 logging.info("Shutting down instance %s on node %s",
8542 instance.name, source_node)
8544 result = self.rpc.call_instance_shutdown(source_node, instance,
8545 self.shutdown_timeout)
8546 msg = result.fail_msg
8548 if self.ignore_consistency or primary_node.offline:
8549 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8550 " proceeding anyway; please make sure node"
8551 " %s is down; error details: %s",
8552 instance.name, source_node, source_node, msg)
8554 raise errors.OpExecError("Could not shutdown instance %s on"
8556 (instance.name, source_node, msg))
8558 self.feedback_fn("* deactivating the instance's disks on source node")
8559 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8560 raise errors.OpExecError("Can't shut down the instance's disks")
8562 instance.primary_node = target_node
8563 # distribute new instance config to the other nodes
8564 self.cfg.Update(instance, self.feedback_fn)
8566 # Only start the instance if it's marked as up
8567 if instance.admin_state == constants.ADMINST_UP:
8568 self.feedback_fn("* activating the instance's disks on target node %s" %
8570 logging.info("Starting instance %s on node %s",
8571 instance.name, target_node)
8573 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8574 ignore_secondaries=True)
8576 _ShutdownInstanceDisks(self.lu, instance)
8577 raise errors.OpExecError("Can't activate the instance's disks")
8579 self.feedback_fn("* starting the instance on the target node %s" %
8581 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8583 msg = result.fail_msg
8585 _ShutdownInstanceDisks(self.lu, instance)
8586 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8587 (instance.name, target_node, msg))
8589 def Exec(self, feedback_fn):
8590 """Perform the migration.
8593 self.feedback_fn = feedback_fn
8594 self.source_node = self.instance.primary_node
8596 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8597 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8598 self.target_node = self.instance.secondary_nodes[0]
8599 # Otherwise self.target_node has been populated either
8600 # directly, or through an iallocator.
8602 self.all_nodes = [self.source_node, self.target_node]
8603 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8604 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8607 feedback_fn("Failover instance %s" % self.instance.name)
8608 self._ExecFailover()
8610 feedback_fn("Migrating instance %s" % self.instance.name)
8613 return self._ExecCleanup()
8615 return self._ExecMigration()
8618 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8620 """Wrapper around L{_CreateBlockDevInner}.
8622 This method annotates the root device first.
8625 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8626 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8630 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8632 """Create a tree of block devices on a given node.
8634 If this device type has to be created on secondaries, create it and
8637 If not, just recurse to children keeping the same 'force' value.
8639 @attention: The device has to be annotated already.
8641 @param lu: the lu on whose behalf we execute
8642 @param node: the node on which to create the device
8643 @type instance: L{objects.Instance}
8644 @param instance: the instance which owns the device
8645 @type device: L{objects.Disk}
8646 @param device: the device to create
8647 @type force_create: boolean
8648 @param force_create: whether to force creation of this device; this
8649 will be change to True whenever we find a device which has
8650 CreateOnSecondary() attribute
8651 @param info: the extra 'metadata' we should attach to the device
8652 (this will be represented as a LVM tag)
8653 @type force_open: boolean
8654 @param force_open: this parameter will be passes to the
8655 L{backend.BlockdevCreate} function where it specifies
8656 whether we run on primary or not, and it affects both
8657 the child assembly and the device own Open() execution
8660 if device.CreateOnSecondary():
8664 for child in device.children:
8665 _CreateBlockDevInner(lu, node, instance, child, force_create,
8668 if not force_create:
8671 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8674 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8675 """Create a single block device on a given node.
8677 This will not recurse over children of the device, so they must be
8680 @param lu: the lu on whose behalf we execute
8681 @param node: the node on which to create the device
8682 @type instance: L{objects.Instance}
8683 @param instance: the instance which owns the device
8684 @type device: L{objects.Disk}
8685 @param device: the device to create
8686 @param info: the extra 'metadata' we should attach to the device
8687 (this will be represented as a LVM tag)
8688 @type force_open: boolean
8689 @param force_open: this parameter will be passes to the
8690 L{backend.BlockdevCreate} function where it specifies
8691 whether we run on primary or not, and it affects both
8692 the child assembly and the device own Open() execution
8695 lu.cfg.SetDiskID(device, node)
8696 result = lu.rpc.call_blockdev_create(node, device, device.size,
8697 instance.name, force_open, info)
8698 result.Raise("Can't create block device %s on"
8699 " node %s for instance %s" % (device, node, instance.name))
8700 if device.physical_id is None:
8701 device.physical_id = result.payload
8704 def _GenerateUniqueNames(lu, exts):
8705 """Generate a suitable LV name.
8707 This will generate a logical volume name for the given instance.
8712 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8713 results.append("%s%s" % (new_id, val))
8717 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8718 iv_name, p_minor, s_minor):
8719 """Generate a drbd8 device complete with its children.
8722 assert len(vgnames) == len(names) == 2
8723 port = lu.cfg.AllocatePort()
8724 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8726 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8727 logical_id=(vgnames[0], names[0]),
8729 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8730 logical_id=(vgnames[1], names[1]),
8732 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8733 logical_id=(primary, secondary, port,
8736 children=[dev_data, dev_meta],
8737 iv_name=iv_name, params={})
8741 _DISK_TEMPLATE_NAME_PREFIX = {
8742 constants.DT_PLAIN: "",
8743 constants.DT_RBD: ".rbd",
8747 _DISK_TEMPLATE_DEVICE_TYPE = {
8748 constants.DT_PLAIN: constants.LD_LV,
8749 constants.DT_FILE: constants.LD_FILE,
8750 constants.DT_SHARED_FILE: constants.LD_FILE,
8751 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8752 constants.DT_RBD: constants.LD_RBD,
8756 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8757 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8758 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8759 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8760 """Generate the entire disk layout for a given template type.
8763 #TODO: compute space requirements
8765 vgname = lu.cfg.GetVGName()
8766 disk_count = len(disk_info)
8769 if template_name == constants.DT_DISKLESS:
8771 elif template_name == constants.DT_DRBD8:
8772 if len(secondary_nodes) != 1:
8773 raise errors.ProgrammerError("Wrong template configuration")
8774 remote_node = secondary_nodes[0]
8775 minors = lu.cfg.AllocateDRBDMinor(
8776 [primary_node, remote_node] * len(disk_info), instance_name)
8778 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8780 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8783 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8784 for i in range(disk_count)]):
8785 names.append(lv_prefix + "_data")
8786 names.append(lv_prefix + "_meta")
8787 for idx, disk in enumerate(disk_info):
8788 disk_index = idx + base_index
8789 data_vg = disk.get(constants.IDISK_VG, vgname)
8790 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8791 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8792 disk[constants.IDISK_SIZE],
8794 names[idx * 2:idx * 2 + 2],
8795 "disk/%d" % disk_index,
8796 minors[idx * 2], minors[idx * 2 + 1])
8797 disk_dev.mode = disk[constants.IDISK_MODE]
8798 disks.append(disk_dev)
8801 raise errors.ProgrammerError("Wrong template configuration")
8803 if template_name == constants.DT_FILE:
8805 elif template_name == constants.DT_SHARED_FILE:
8806 _req_shr_file_storage()
8808 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8809 if name_prefix is None:
8812 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8813 (name_prefix, base_index + i)
8814 for i in range(disk_count)])
8816 if template_name == constants.DT_PLAIN:
8817 def logical_id_fn(idx, _, disk):
8818 vg = disk.get(constants.IDISK_VG, vgname)
8819 return (vg, names[idx])
8820 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8822 lambda _, disk_index, disk: (file_driver,
8823 "%s/disk%d" % (file_storage_dir,
8825 elif template_name == constants.DT_BLOCK:
8827 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8828 disk[constants.IDISK_ADOPT])
8829 elif template_name == constants.DT_RBD:
8830 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8832 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8834 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8836 for idx, disk in enumerate(disk_info):
8837 disk_index = idx + base_index
8838 size = disk[constants.IDISK_SIZE]
8839 feedback_fn("* disk %s, size %s" %
8840 (disk_index, utils.FormatUnit(size, "h")))
8841 disks.append(objects.Disk(dev_type=dev_type, size=size,
8842 logical_id=logical_id_fn(idx, disk_index, disk),
8843 iv_name="disk/%d" % disk_index,
8844 mode=disk[constants.IDISK_MODE],
8850 def _GetInstanceInfoText(instance):
8851 """Compute that text that should be added to the disk's metadata.
8854 return "originstname+%s" % instance.name
8857 def _CalcEta(time_taken, written, total_size):
8858 """Calculates the ETA based on size written and total size.
8860 @param time_taken: The time taken so far
8861 @param written: amount written so far
8862 @param total_size: The total size of data to be written
8863 @return: The remaining time in seconds
8866 avg_time = time_taken / float(written)
8867 return (total_size - written) * avg_time
8870 def _WipeDisks(lu, instance):
8871 """Wipes instance disks.
8873 @type lu: L{LogicalUnit}
8874 @param lu: the logical unit on whose behalf we execute
8875 @type instance: L{objects.Instance}
8876 @param instance: the instance whose disks we should create
8877 @return: the success of the wipe
8880 node = instance.primary_node
8882 for device in instance.disks:
8883 lu.cfg.SetDiskID(device, node)
8885 logging.info("Pause sync of instance %s disks", instance.name)
8886 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8887 (instance.disks, instance),
8890 for idx, success in enumerate(result.payload):
8892 logging.warn("pause-sync of instance %s for disks %d failed",
8896 for idx, device in enumerate(instance.disks):
8897 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8898 # MAX_WIPE_CHUNK at max
8899 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8900 constants.MIN_WIPE_CHUNK_PERCENT)
8901 # we _must_ make this an int, otherwise rounding errors will
8903 wipe_chunk_size = int(wipe_chunk_size)
8905 lu.LogInfo("* Wiping disk %d", idx)
8906 logging.info("Wiping disk %d for instance %s, node %s using"
8907 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8912 start_time = time.time()
8914 while offset < size:
8915 wipe_size = min(wipe_chunk_size, size - offset)
8916 logging.debug("Wiping disk %d, offset %s, chunk %s",
8917 idx, offset, wipe_size)
8918 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8920 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8921 (idx, offset, wipe_size))
8924 if now - last_output >= 60:
8925 eta = _CalcEta(now - start_time, offset, size)
8926 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8927 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8930 logging.info("Resume sync of instance %s disks", instance.name)
8932 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8933 (instance.disks, instance),
8936 for idx, success in enumerate(result.payload):
8938 lu.LogWarning("Resume sync of disk %d failed, please have a"
8939 " look at the status and troubleshoot the issue", idx)
8940 logging.warn("resume-sync of instance %s for disks %d failed",
8944 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8945 """Create all disks for an instance.
8947 This abstracts away some work from AddInstance.
8949 @type lu: L{LogicalUnit}
8950 @param lu: the logical unit on whose behalf we execute
8951 @type instance: L{objects.Instance}
8952 @param instance: the instance whose disks we should create
8954 @param to_skip: list of indices to skip
8955 @type target_node: string
8956 @param target_node: if passed, overrides the target node for creation
8958 @return: the success of the creation
8961 info = _GetInstanceInfoText(instance)
8962 if target_node is None:
8963 pnode = instance.primary_node
8964 all_nodes = instance.all_nodes
8969 if instance.disk_template in constants.DTS_FILEBASED:
8970 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8971 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8973 result.Raise("Failed to create directory '%s' on"
8974 " node %s" % (file_storage_dir, pnode))
8976 # Note: this needs to be kept in sync with adding of disks in
8977 # LUInstanceSetParams
8978 for idx, device in enumerate(instance.disks):
8979 if to_skip and idx in to_skip:
8981 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8983 for node in all_nodes:
8984 f_create = node == pnode
8985 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8988 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8989 """Remove all disks for an instance.
8991 This abstracts away some work from `AddInstance()` and
8992 `RemoveInstance()`. Note that in case some of the devices couldn't
8993 be removed, the removal will continue with the other ones (compare
8994 with `_CreateDisks()`).
8996 @type lu: L{LogicalUnit}
8997 @param lu: the logical unit on whose behalf we execute
8998 @type instance: L{objects.Instance}
8999 @param instance: the instance whose disks we should remove
9000 @type target_node: string
9001 @param target_node: used to override the node on which to remove the disks
9003 @return: the success of the removal
9006 logging.info("Removing block devices for instance %s", instance.name)
9009 ports_to_release = set()
9010 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9011 for (idx, device) in enumerate(anno_disks):
9013 edata = [(target_node, device)]
9015 edata = device.ComputeNodeTree(instance.primary_node)
9016 for node, disk in edata:
9017 lu.cfg.SetDiskID(disk, node)
9018 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
9020 lu.LogWarning("Could not remove disk %s on node %s,"
9021 " continuing anyway: %s", idx, node, msg)
9024 # if this is a DRBD disk, return its port to the pool
9025 if device.dev_type in constants.LDS_DRBD:
9026 ports_to_release.add(device.logical_id[2])
9028 if all_result or ignore_failures:
9029 for port in ports_to_release:
9030 lu.cfg.AddTcpUdpPort(port)
9032 if instance.disk_template == constants.DT_FILE:
9033 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9037 tgt = instance.primary_node
9038 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9040 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9041 file_storage_dir, instance.primary_node, result.fail_msg)
9047 def _ComputeDiskSizePerVG(disk_template, disks):
9048 """Compute disk size requirements in the volume group
9051 def _compute(disks, payload):
9052 """Universal algorithm.
9057 vgs[disk[constants.IDISK_VG]] = \
9058 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9062 # Required free disk space as a function of disk and swap space
9064 constants.DT_DISKLESS: {},
9065 constants.DT_PLAIN: _compute(disks, 0),
9066 # 128 MB are added for drbd metadata for each disk
9067 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9068 constants.DT_FILE: {},
9069 constants.DT_SHARED_FILE: {},
9072 if disk_template not in req_size_dict:
9073 raise errors.ProgrammerError("Disk template '%s' size requirement"
9074 " is unknown" % disk_template)
9076 return req_size_dict[disk_template]
9079 def _ComputeDiskSize(disk_template, disks):
9080 """Compute disk size requirements in the volume group
9083 # Required free disk space as a function of disk and swap space
9085 constants.DT_DISKLESS: None,
9086 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9087 # 128 MB are added for drbd metadata for each disk
9089 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9090 constants.DT_FILE: None,
9091 constants.DT_SHARED_FILE: 0,
9092 constants.DT_BLOCK: 0,
9093 constants.DT_RBD: 0,
9096 if disk_template not in req_size_dict:
9097 raise errors.ProgrammerError("Disk template '%s' size requirement"
9098 " is unknown" % disk_template)
9100 return req_size_dict[disk_template]
9103 def _FilterVmNodes(lu, nodenames):
9104 """Filters out non-vm_capable nodes from a list.
9106 @type lu: L{LogicalUnit}
9107 @param lu: the logical unit for which we check
9108 @type nodenames: list
9109 @param nodenames: the list of nodes on which we should check
9111 @return: the list of vm-capable nodes
9114 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9115 return [name for name in nodenames if name not in vm_nodes]
9118 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9119 """Hypervisor parameter validation.
9121 This function abstract the hypervisor parameter validation to be
9122 used in both instance create and instance modify.
9124 @type lu: L{LogicalUnit}
9125 @param lu: the logical unit for which we check
9126 @type nodenames: list
9127 @param nodenames: the list of nodes on which we should check
9128 @type hvname: string
9129 @param hvname: the name of the hypervisor we should use
9130 @type hvparams: dict
9131 @param hvparams: the parameters which we need to check
9132 @raise errors.OpPrereqError: if the parameters are not valid
9135 nodenames = _FilterVmNodes(lu, nodenames)
9137 cluster = lu.cfg.GetClusterInfo()
9138 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9140 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9141 for node in nodenames:
9145 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9148 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9149 """OS parameters validation.
9151 @type lu: L{LogicalUnit}
9152 @param lu: the logical unit for which we check
9153 @type required: boolean
9154 @param required: whether the validation should fail if the OS is not
9156 @type nodenames: list
9157 @param nodenames: the list of nodes on which we should check
9158 @type osname: string
9159 @param osname: the name of the hypervisor we should use
9160 @type osparams: dict
9161 @param osparams: the parameters which we need to check
9162 @raise errors.OpPrereqError: if the parameters are not valid
9165 nodenames = _FilterVmNodes(lu, nodenames)
9166 result = lu.rpc.call_os_validate(nodenames, required, osname,
9167 [constants.OS_VALIDATE_PARAMETERS],
9169 for node, nres in result.items():
9170 # we don't check for offline cases since this should be run only
9171 # against the master node and/or an instance's nodes
9172 nres.Raise("OS Parameters validation failed on node %s" % node)
9173 if not nres.payload:
9174 lu.LogInfo("OS %s not found on node %s, validation skipped",
9178 class LUInstanceCreate(LogicalUnit):
9179 """Create an instance.
9182 HPATH = "instance-add"
9183 HTYPE = constants.HTYPE_INSTANCE
9186 def CheckArguments(self):
9190 # do not require name_check to ease forward/backward compatibility
9192 if self.op.no_install and self.op.start:
9193 self.LogInfo("No-installation mode selected, disabling startup")
9194 self.op.start = False
9195 # validate/normalize the instance name
9196 self.op.instance_name = \
9197 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9199 if self.op.ip_check and not self.op.name_check:
9200 # TODO: make the ip check more flexible and not depend on the name check
9201 raise errors.OpPrereqError("Cannot do IP address check without a name"
9202 " check", errors.ECODE_INVAL)
9204 # check nics' parameter names
9205 for nic in self.op.nics:
9206 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9208 # check disks. parameter names and consistent adopt/no-adopt strategy
9209 has_adopt = has_no_adopt = False
9210 for disk in self.op.disks:
9211 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9212 if constants.IDISK_ADOPT in disk:
9216 if has_adopt and has_no_adopt:
9217 raise errors.OpPrereqError("Either all disks are adopted or none is",
9220 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9221 raise errors.OpPrereqError("Disk adoption is not supported for the"
9222 " '%s' disk template" %
9223 self.op.disk_template,
9225 if self.op.iallocator is not None:
9226 raise errors.OpPrereqError("Disk adoption not allowed with an"
9227 " iallocator script", errors.ECODE_INVAL)
9228 if self.op.mode == constants.INSTANCE_IMPORT:
9229 raise errors.OpPrereqError("Disk adoption not allowed for"
9230 " instance import", errors.ECODE_INVAL)
9232 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9233 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9234 " but no 'adopt' parameter given" %
9235 self.op.disk_template,
9238 self.adopt_disks = has_adopt
9240 # instance name verification
9241 if self.op.name_check:
9242 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9243 self.op.instance_name = self.hostname1.name
9244 # used in CheckPrereq for ip ping check
9245 self.check_ip = self.hostname1.ip
9247 self.check_ip = None
9249 # file storage checks
9250 if (self.op.file_driver and
9251 not self.op.file_driver in constants.FILE_DRIVER):
9252 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9253 self.op.file_driver, errors.ECODE_INVAL)
9255 if self.op.disk_template == constants.DT_FILE:
9256 opcodes.RequireFileStorage()
9257 elif self.op.disk_template == constants.DT_SHARED_FILE:
9258 opcodes.RequireSharedFileStorage()
9260 ### Node/iallocator related checks
9261 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9263 if self.op.pnode is not None:
9264 if self.op.disk_template in constants.DTS_INT_MIRROR:
9265 if self.op.snode is None:
9266 raise errors.OpPrereqError("The networked disk templates need"
9267 " a mirror node", errors.ECODE_INVAL)
9269 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9271 self.op.snode = None
9273 self._cds = _GetClusterDomainSecret()
9275 if self.op.mode == constants.INSTANCE_IMPORT:
9276 # On import force_variant must be True, because if we forced it at
9277 # initial install, our only chance when importing it back is that it
9279 self.op.force_variant = True
9281 if self.op.no_install:
9282 self.LogInfo("No-installation mode has no effect during import")
9284 elif self.op.mode == constants.INSTANCE_CREATE:
9285 if self.op.os_type is None:
9286 raise errors.OpPrereqError("No guest OS specified",
9288 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9289 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9290 " installation" % self.op.os_type,
9292 if self.op.disk_template is None:
9293 raise errors.OpPrereqError("No disk template specified",
9296 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9297 # Check handshake to ensure both clusters have the same domain secret
9298 src_handshake = self.op.source_handshake
9299 if not src_handshake:
9300 raise errors.OpPrereqError("Missing source handshake",
9303 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9306 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9309 # Load and check source CA
9310 self.source_x509_ca_pem = self.op.source_x509_ca
9311 if not self.source_x509_ca_pem:
9312 raise errors.OpPrereqError("Missing source X509 CA",
9316 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9318 except OpenSSL.crypto.Error, err:
9319 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9320 (err, ), errors.ECODE_INVAL)
9322 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9323 if errcode is not None:
9324 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9327 self.source_x509_ca = cert
9329 src_instance_name = self.op.source_instance_name
9330 if not src_instance_name:
9331 raise errors.OpPrereqError("Missing source instance name",
9334 self.source_instance_name = \
9335 netutils.GetHostname(name=src_instance_name).name
9338 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9339 self.op.mode, errors.ECODE_INVAL)
9341 def ExpandNames(self):
9342 """ExpandNames for CreateInstance.
9344 Figure out the right locks for instance creation.
9347 self.needed_locks = {}
9349 instance_name = self.op.instance_name
9350 # this is just a preventive check, but someone might still add this
9351 # instance in the meantime, and creation will fail at lock-add time
9352 if instance_name in self.cfg.GetInstanceList():
9353 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9354 instance_name, errors.ECODE_EXISTS)
9356 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9358 if self.op.iallocator:
9359 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9360 # specifying a group on instance creation and then selecting nodes from
9362 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9363 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9365 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9366 nodelist = [self.op.pnode]
9367 if self.op.snode is not None:
9368 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9369 nodelist.append(self.op.snode)
9370 self.needed_locks[locking.LEVEL_NODE] = nodelist
9371 # Lock resources of instance's primary and secondary nodes (copy to
9372 # prevent accidential modification)
9373 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9375 # in case of import lock the source node too
9376 if self.op.mode == constants.INSTANCE_IMPORT:
9377 src_node = self.op.src_node
9378 src_path = self.op.src_path
9380 if src_path is None:
9381 self.op.src_path = src_path = self.op.instance_name
9383 if src_node is None:
9384 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9385 self.op.src_node = None
9386 if os.path.isabs(src_path):
9387 raise errors.OpPrereqError("Importing an instance from a path"
9388 " requires a source node option",
9391 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9392 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9393 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9394 if not os.path.isabs(src_path):
9395 self.op.src_path = src_path = \
9396 utils.PathJoin(constants.EXPORT_DIR, src_path)
9398 def _RunAllocator(self):
9399 """Run the allocator based on input opcode.
9402 nics = [n.ToDict() for n in self.nics]
9403 ial = IAllocator(self.cfg, self.rpc,
9404 mode=constants.IALLOCATOR_MODE_ALLOC,
9405 name=self.op.instance_name,
9406 disk_template=self.op.disk_template,
9409 vcpus=self.be_full[constants.BE_VCPUS],
9410 memory=self.be_full[constants.BE_MAXMEM],
9411 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9414 hypervisor=self.op.hypervisor,
9417 ial.Run(self.op.iallocator)
9420 raise errors.OpPrereqError("Can't compute nodes using"
9421 " iallocator '%s': %s" %
9422 (self.op.iallocator, ial.info),
9424 if len(ial.result) != ial.required_nodes:
9425 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9426 " of nodes (%s), required %s" %
9427 (self.op.iallocator, len(ial.result),
9428 ial.required_nodes), errors.ECODE_FAULT)
9429 self.op.pnode = ial.result[0]
9430 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9431 self.op.instance_name, self.op.iallocator,
9432 utils.CommaJoin(ial.result))
9433 if ial.required_nodes == 2:
9434 self.op.snode = ial.result[1]
9436 def BuildHooksEnv(self):
9439 This runs on master, primary and secondary nodes of the instance.
9443 "ADD_MODE": self.op.mode,
9445 if self.op.mode == constants.INSTANCE_IMPORT:
9446 env["SRC_NODE"] = self.op.src_node
9447 env["SRC_PATH"] = self.op.src_path
9448 env["SRC_IMAGES"] = self.src_images
9450 env.update(_BuildInstanceHookEnv(
9451 name=self.op.instance_name,
9452 primary_node=self.op.pnode,
9453 secondary_nodes=self.secondaries,
9454 status=self.op.start,
9455 os_type=self.op.os_type,
9456 minmem=self.be_full[constants.BE_MINMEM],
9457 maxmem=self.be_full[constants.BE_MAXMEM],
9458 vcpus=self.be_full[constants.BE_VCPUS],
9459 nics=_NICListToTuple(self, self.nics),
9460 disk_template=self.op.disk_template,
9461 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9462 for d in self.disks],
9465 hypervisor_name=self.op.hypervisor,
9471 def BuildHooksNodes(self):
9472 """Build hooks nodes.
9475 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9478 def _ReadExportInfo(self):
9479 """Reads the export information from disk.
9481 It will override the opcode source node and path with the actual
9482 information, if these two were not specified before.
9484 @return: the export information
9487 assert self.op.mode == constants.INSTANCE_IMPORT
9489 src_node = self.op.src_node
9490 src_path = self.op.src_path
9492 if src_node is None:
9493 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9494 exp_list = self.rpc.call_export_list(locked_nodes)
9496 for node in exp_list:
9497 if exp_list[node].fail_msg:
9499 if src_path in exp_list[node].payload:
9501 self.op.src_node = src_node = node
9502 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9506 raise errors.OpPrereqError("No export found for relative path %s" %
9507 src_path, errors.ECODE_INVAL)
9509 _CheckNodeOnline(self, src_node)
9510 result = self.rpc.call_export_info(src_node, src_path)
9511 result.Raise("No export or invalid export found in dir %s" % src_path)
9513 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9514 if not export_info.has_section(constants.INISECT_EXP):
9515 raise errors.ProgrammerError("Corrupted export config",
9516 errors.ECODE_ENVIRON)
9518 ei_version = export_info.get(constants.INISECT_EXP, "version")
9519 if (int(ei_version) != constants.EXPORT_VERSION):
9520 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9521 (ei_version, constants.EXPORT_VERSION),
9522 errors.ECODE_ENVIRON)
9525 def _ReadExportParams(self, einfo):
9526 """Use export parameters as defaults.
9528 In case the opcode doesn't specify (as in override) some instance
9529 parameters, then try to use them from the export information, if
9533 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9535 if self.op.disk_template is None:
9536 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9537 self.op.disk_template = einfo.get(constants.INISECT_INS,
9539 if self.op.disk_template not in constants.DISK_TEMPLATES:
9540 raise errors.OpPrereqError("Disk template specified in configuration"
9541 " file is not one of the allowed values:"
9542 " %s" % " ".join(constants.DISK_TEMPLATES))
9544 raise errors.OpPrereqError("No disk template specified and the export"
9545 " is missing the disk_template information",
9548 if not self.op.disks:
9550 # TODO: import the disk iv_name too
9551 for idx in range(constants.MAX_DISKS):
9552 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9553 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9554 disks.append({constants.IDISK_SIZE: disk_sz})
9555 self.op.disks = disks
9556 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9557 raise errors.OpPrereqError("No disk info specified and the export"
9558 " is missing the disk information",
9561 if not self.op.nics:
9563 for idx in range(constants.MAX_NICS):
9564 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9566 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9567 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9574 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9575 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9577 if (self.op.hypervisor is None and
9578 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9579 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9581 if einfo.has_section(constants.INISECT_HYP):
9582 # use the export parameters but do not override the ones
9583 # specified by the user
9584 for name, value in einfo.items(constants.INISECT_HYP):
9585 if name not in self.op.hvparams:
9586 self.op.hvparams[name] = value
9588 if einfo.has_section(constants.INISECT_BEP):
9589 # use the parameters, without overriding
9590 for name, value in einfo.items(constants.INISECT_BEP):
9591 if name not in self.op.beparams:
9592 self.op.beparams[name] = value
9593 # Compatibility for the old "memory" be param
9594 if name == constants.BE_MEMORY:
9595 if constants.BE_MAXMEM not in self.op.beparams:
9596 self.op.beparams[constants.BE_MAXMEM] = value
9597 if constants.BE_MINMEM not in self.op.beparams:
9598 self.op.beparams[constants.BE_MINMEM] = value
9600 # try to read the parameters old style, from the main section
9601 for name in constants.BES_PARAMETERS:
9602 if (name not in self.op.beparams and
9603 einfo.has_option(constants.INISECT_INS, name)):
9604 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9606 if einfo.has_section(constants.INISECT_OSP):
9607 # use the parameters, without overriding
9608 for name, value in einfo.items(constants.INISECT_OSP):
9609 if name not in self.op.osparams:
9610 self.op.osparams[name] = value
9612 def _RevertToDefaults(self, cluster):
9613 """Revert the instance parameters to the default values.
9617 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9618 for name in self.op.hvparams.keys():
9619 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9620 del self.op.hvparams[name]
9622 be_defs = cluster.SimpleFillBE({})
9623 for name in self.op.beparams.keys():
9624 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9625 del self.op.beparams[name]
9627 nic_defs = cluster.SimpleFillNIC({})
9628 for nic in self.op.nics:
9629 for name in constants.NICS_PARAMETERS:
9630 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9633 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9634 for name in self.op.osparams.keys():
9635 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9636 del self.op.osparams[name]
9638 def _CalculateFileStorageDir(self):
9639 """Calculate final instance file storage dir.
9642 # file storage dir calculation/check
9643 self.instance_file_storage_dir = None
9644 if self.op.disk_template in constants.DTS_FILEBASED:
9645 # build the full file storage dir path
9648 if self.op.disk_template == constants.DT_SHARED_FILE:
9649 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9651 get_fsd_fn = self.cfg.GetFileStorageDir
9653 cfg_storagedir = get_fsd_fn()
9654 if not cfg_storagedir:
9655 raise errors.OpPrereqError("Cluster file storage dir not defined")
9656 joinargs.append(cfg_storagedir)
9658 if self.op.file_storage_dir is not None:
9659 joinargs.append(self.op.file_storage_dir)
9661 joinargs.append(self.op.instance_name)
9663 # pylint: disable=W0142
9664 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9666 def CheckPrereq(self): # pylint: disable=R0914
9667 """Check prerequisites.
9670 self._CalculateFileStorageDir()
9672 if self.op.mode == constants.INSTANCE_IMPORT:
9673 export_info = self._ReadExportInfo()
9674 self._ReadExportParams(export_info)
9675 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9677 self._old_instance_name = None
9679 if (not self.cfg.GetVGName() and
9680 self.op.disk_template not in constants.DTS_NOT_LVM):
9681 raise errors.OpPrereqError("Cluster does not support lvm-based"
9682 " instances", errors.ECODE_STATE)
9684 if (self.op.hypervisor is None or
9685 self.op.hypervisor == constants.VALUE_AUTO):
9686 self.op.hypervisor = self.cfg.GetHypervisorType()
9688 cluster = self.cfg.GetClusterInfo()
9689 enabled_hvs = cluster.enabled_hypervisors
9690 if self.op.hypervisor not in enabled_hvs:
9691 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9692 " cluster (%s)" % (self.op.hypervisor,
9693 ",".join(enabled_hvs)),
9696 # Check tag validity
9697 for tag in self.op.tags:
9698 objects.TaggableObject.ValidateTag(tag)
9700 # check hypervisor parameter syntax (locally)
9701 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9702 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9704 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9705 hv_type.CheckParameterSyntax(filled_hvp)
9706 self.hv_full = filled_hvp
9707 # check that we don't specify global parameters on an instance
9708 _CheckGlobalHvParams(self.op.hvparams)
9710 # fill and remember the beparams dict
9711 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9712 for param, value in self.op.beparams.iteritems():
9713 if value == constants.VALUE_AUTO:
9714 self.op.beparams[param] = default_beparams[param]
9715 objects.UpgradeBeParams(self.op.beparams)
9716 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9717 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9719 # build os parameters
9720 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9722 # now that hvp/bep are in final format, let's reset to defaults,
9724 if self.op.identify_defaults:
9725 self._RevertToDefaults(cluster)
9729 for idx, nic in enumerate(self.op.nics):
9730 nic_mode_req = nic.get(constants.INIC_MODE, None)
9731 nic_mode = nic_mode_req
9732 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9733 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9735 # in routed mode, for the first nic, the default ip is 'auto'
9736 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9737 default_ip_mode = constants.VALUE_AUTO
9739 default_ip_mode = constants.VALUE_NONE
9741 # ip validity checks
9742 ip = nic.get(constants.INIC_IP, default_ip_mode)
9743 if ip is None or ip.lower() == constants.VALUE_NONE:
9745 elif ip.lower() == constants.VALUE_AUTO:
9746 if not self.op.name_check:
9747 raise errors.OpPrereqError("IP address set to auto but name checks"
9748 " have been skipped",
9750 nic_ip = self.hostname1.ip
9752 if not netutils.IPAddress.IsValid(ip):
9753 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9757 # TODO: check the ip address for uniqueness
9758 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9759 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9762 # MAC address verification
9763 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9764 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9765 mac = utils.NormalizeAndValidateMac(mac)
9768 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9769 except errors.ReservationError:
9770 raise errors.OpPrereqError("MAC address %s already in use"
9771 " in cluster" % mac,
9772 errors.ECODE_NOTUNIQUE)
9774 # Build nic parameters
9775 link = nic.get(constants.INIC_LINK, None)
9776 if link == constants.VALUE_AUTO:
9777 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9780 nicparams[constants.NIC_MODE] = nic_mode
9782 nicparams[constants.NIC_LINK] = link
9784 check_params = cluster.SimpleFillNIC(nicparams)
9785 objects.NIC.CheckParameterSyntax(check_params)
9786 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9788 # disk checks/pre-build
9789 default_vg = self.cfg.GetVGName()
9791 for disk in self.op.disks:
9792 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9793 if mode not in constants.DISK_ACCESS_SET:
9794 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9795 mode, errors.ECODE_INVAL)
9796 size = disk.get(constants.IDISK_SIZE, None)
9798 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9801 except (TypeError, ValueError):
9802 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9805 data_vg = disk.get(constants.IDISK_VG, default_vg)
9807 constants.IDISK_SIZE: size,
9808 constants.IDISK_MODE: mode,
9809 constants.IDISK_VG: data_vg,
9811 if constants.IDISK_METAVG in disk:
9812 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9813 if constants.IDISK_ADOPT in disk:
9814 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9815 self.disks.append(new_disk)
9817 if self.op.mode == constants.INSTANCE_IMPORT:
9819 for idx in range(len(self.disks)):
9820 option = "disk%d_dump" % idx
9821 if export_info.has_option(constants.INISECT_INS, option):
9822 # FIXME: are the old os-es, disk sizes, etc. useful?
9823 export_name = export_info.get(constants.INISECT_INS, option)
9824 image = utils.PathJoin(self.op.src_path, export_name)
9825 disk_images.append(image)
9827 disk_images.append(False)
9829 self.src_images = disk_images
9831 if self.op.instance_name == self._old_instance_name:
9832 for idx, nic in enumerate(self.nics):
9833 if nic.mac == constants.VALUE_AUTO:
9834 nic_mac_ini = "nic%d_mac" % idx
9835 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9837 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9839 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9840 if self.op.ip_check:
9841 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9842 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9843 (self.check_ip, self.op.instance_name),
9844 errors.ECODE_NOTUNIQUE)
9846 #### mac address generation
9847 # By generating here the mac address both the allocator and the hooks get
9848 # the real final mac address rather than the 'auto' or 'generate' value.
9849 # There is a race condition between the generation and the instance object
9850 # creation, which means that we know the mac is valid now, but we're not
9851 # sure it will be when we actually add the instance. If things go bad
9852 # adding the instance will abort because of a duplicate mac, and the
9853 # creation job will fail.
9854 for nic in self.nics:
9855 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9856 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9860 if self.op.iallocator is not None:
9861 self._RunAllocator()
9863 # Release all unneeded node locks
9864 _ReleaseLocks(self, locking.LEVEL_NODE,
9865 keep=filter(None, [self.op.pnode, self.op.snode,
9867 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9868 keep=filter(None, [self.op.pnode, self.op.snode,
9871 #### node related checks
9873 # check primary node
9874 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9875 assert self.pnode is not None, \
9876 "Cannot retrieve locked node %s" % self.op.pnode
9878 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9879 pnode.name, errors.ECODE_STATE)
9881 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9882 pnode.name, errors.ECODE_STATE)
9883 if not pnode.vm_capable:
9884 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9885 " '%s'" % pnode.name, errors.ECODE_STATE)
9887 self.secondaries = []
9889 # mirror node verification
9890 if self.op.disk_template in constants.DTS_INT_MIRROR:
9891 if self.op.snode == pnode.name:
9892 raise errors.OpPrereqError("The secondary node cannot be the"
9893 " primary node", errors.ECODE_INVAL)
9894 _CheckNodeOnline(self, self.op.snode)
9895 _CheckNodeNotDrained(self, self.op.snode)
9896 _CheckNodeVmCapable(self, self.op.snode)
9897 self.secondaries.append(self.op.snode)
9899 snode = self.cfg.GetNodeInfo(self.op.snode)
9900 if pnode.group != snode.group:
9901 self.LogWarning("The primary and secondary nodes are in two"
9902 " different node groups; the disk parameters"
9903 " from the first disk's node group will be"
9906 nodenames = [pnode.name] + self.secondaries
9908 # Verify instance specs
9909 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9911 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9912 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9913 constants.ISPEC_DISK_COUNT: len(self.disks),
9914 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9915 constants.ISPEC_NIC_COUNT: len(self.nics),
9916 constants.ISPEC_SPINDLE_USE: spindle_use,
9919 group_info = self.cfg.GetNodeGroup(pnode.group)
9920 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9921 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9922 if not self.op.ignore_ipolicy and res:
9923 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9924 " policy: %s") % (pnode.group,
9925 utils.CommaJoin(res)),
9928 if not self.adopt_disks:
9929 if self.op.disk_template == constants.DT_RBD:
9930 # _CheckRADOSFreeSpace() is just a placeholder.
9931 # Any function that checks prerequisites can be placed here.
9932 # Check if there is enough space on the RADOS cluster.
9933 _CheckRADOSFreeSpace()
9935 # Check lv size requirements, if not adopting
9936 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9937 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9939 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9940 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9941 disk[constants.IDISK_ADOPT])
9942 for disk in self.disks])
9943 if len(all_lvs) != len(self.disks):
9944 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9946 for lv_name in all_lvs:
9948 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9949 # to ReserveLV uses the same syntax
9950 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9951 except errors.ReservationError:
9952 raise errors.OpPrereqError("LV named %s used by another instance" %
9953 lv_name, errors.ECODE_NOTUNIQUE)
9955 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9956 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9958 node_lvs = self.rpc.call_lv_list([pnode.name],
9959 vg_names.payload.keys())[pnode.name]
9960 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9961 node_lvs = node_lvs.payload
9963 delta = all_lvs.difference(node_lvs.keys())
9965 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9966 utils.CommaJoin(delta),
9968 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9970 raise errors.OpPrereqError("Online logical volumes found, cannot"
9971 " adopt: %s" % utils.CommaJoin(online_lvs),
9973 # update the size of disk based on what is found
9974 for dsk in self.disks:
9975 dsk[constants.IDISK_SIZE] = \
9976 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9977 dsk[constants.IDISK_ADOPT])][0]))
9979 elif self.op.disk_template == constants.DT_BLOCK:
9980 # Normalize and de-duplicate device paths
9981 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9982 for disk in self.disks])
9983 if len(all_disks) != len(self.disks):
9984 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9986 baddisks = [d for d in all_disks
9987 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9989 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9990 " cannot be adopted" %
9991 (", ".join(baddisks),
9992 constants.ADOPTABLE_BLOCKDEV_ROOT),
9995 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9996 list(all_disks))[pnode.name]
9997 node_disks.Raise("Cannot get block device information from node %s" %
9999 node_disks = node_disks.payload
10000 delta = all_disks.difference(node_disks.keys())
10002 raise errors.OpPrereqError("Missing block device(s): %s" %
10003 utils.CommaJoin(delta),
10004 errors.ECODE_INVAL)
10005 for dsk in self.disks:
10006 dsk[constants.IDISK_SIZE] = \
10007 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10009 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10011 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10012 # check OS parameters (remotely)
10013 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10015 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10017 # memory check on primary node
10018 #TODO(dynmem): use MINMEM for checking
10020 _CheckNodeFreeMemory(self, self.pnode.name,
10021 "creating instance %s" % self.op.instance_name,
10022 self.be_full[constants.BE_MAXMEM],
10023 self.op.hypervisor)
10025 self.dry_run_result = list(nodenames)
10027 def Exec(self, feedback_fn):
10028 """Create and add the instance to the cluster.
10031 instance = self.op.instance_name
10032 pnode_name = self.pnode.name
10034 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10035 self.owned_locks(locking.LEVEL_NODE)), \
10036 "Node locks differ from node resource locks"
10038 ht_kind = self.op.hypervisor
10039 if ht_kind in constants.HTS_REQ_PORT:
10040 network_port = self.cfg.AllocatePort()
10042 network_port = None
10044 # This is ugly but we got a chicken-egg problem here
10045 # We can only take the group disk parameters, as the instance
10046 # has no disks yet (we are generating them right here).
10047 node = self.cfg.GetNodeInfo(pnode_name)
10048 nodegroup = self.cfg.GetNodeGroup(node.group)
10049 disks = _GenerateDiskTemplate(self,
10050 self.op.disk_template,
10051 instance, pnode_name,
10054 self.instance_file_storage_dir,
10055 self.op.file_driver,
10058 self.cfg.GetGroupDiskParams(nodegroup))
10060 iobj = objects.Instance(name=instance, os=self.op.os_type,
10061 primary_node=pnode_name,
10062 nics=self.nics, disks=disks,
10063 disk_template=self.op.disk_template,
10064 admin_state=constants.ADMINST_DOWN,
10065 network_port=network_port,
10066 beparams=self.op.beparams,
10067 hvparams=self.op.hvparams,
10068 hypervisor=self.op.hypervisor,
10069 osparams=self.op.osparams,
10073 for tag in self.op.tags:
10076 if self.adopt_disks:
10077 if self.op.disk_template == constants.DT_PLAIN:
10078 # rename LVs to the newly-generated names; we need to construct
10079 # 'fake' LV disks with the old data, plus the new unique_id
10080 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10082 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10083 rename_to.append(t_dsk.logical_id)
10084 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10085 self.cfg.SetDiskID(t_dsk, pnode_name)
10086 result = self.rpc.call_blockdev_rename(pnode_name,
10087 zip(tmp_disks, rename_to))
10088 result.Raise("Failed to rename adoped LVs")
10090 feedback_fn("* creating instance disks...")
10092 _CreateDisks(self, iobj)
10093 except errors.OpExecError:
10094 self.LogWarning("Device creation failed, reverting...")
10096 _RemoveDisks(self, iobj)
10098 self.cfg.ReleaseDRBDMinors(instance)
10101 feedback_fn("adding instance %s to cluster config" % instance)
10103 self.cfg.AddInstance(iobj, self.proc.GetECId())
10105 # Declare that we don't want to remove the instance lock anymore, as we've
10106 # added the instance to the config
10107 del self.remove_locks[locking.LEVEL_INSTANCE]
10109 if self.op.mode == constants.INSTANCE_IMPORT:
10110 # Release unused nodes
10111 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10113 # Release all nodes
10114 _ReleaseLocks(self, locking.LEVEL_NODE)
10117 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10118 feedback_fn("* wiping instance disks...")
10120 _WipeDisks(self, iobj)
10121 except errors.OpExecError, err:
10122 logging.exception("Wiping disks failed")
10123 self.LogWarning("Wiping instance disks failed (%s)", err)
10127 # Something is already wrong with the disks, don't do anything else
10129 elif self.op.wait_for_sync:
10130 disk_abort = not _WaitForSync(self, iobj)
10131 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10132 # make sure the disks are not degraded (still sync-ing is ok)
10133 feedback_fn("* checking mirrors status")
10134 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10139 _RemoveDisks(self, iobj)
10140 self.cfg.RemoveInstance(iobj.name)
10141 # Make sure the instance lock gets removed
10142 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10143 raise errors.OpExecError("There are some degraded disks for"
10146 # Release all node resource locks
10147 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10149 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10150 # we need to set the disks ID to the primary node, since the
10151 # preceding code might or might have not done it, depending on
10152 # disk template and other options
10153 for disk in iobj.disks:
10154 self.cfg.SetDiskID(disk, pnode_name)
10155 if self.op.mode == constants.INSTANCE_CREATE:
10156 if not self.op.no_install:
10157 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10158 not self.op.wait_for_sync)
10160 feedback_fn("* pausing disk sync to install instance OS")
10161 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10164 for idx, success in enumerate(result.payload):
10166 logging.warn("pause-sync of instance %s for disk %d failed",
10169 feedback_fn("* running the instance OS create scripts...")
10170 # FIXME: pass debug option from opcode to backend
10172 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10173 self.op.debug_level)
10175 feedback_fn("* resuming disk sync")
10176 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10179 for idx, success in enumerate(result.payload):
10181 logging.warn("resume-sync of instance %s for disk %d failed",
10184 os_add_result.Raise("Could not add os for instance %s"
10185 " on node %s" % (instance, pnode_name))
10188 if self.op.mode == constants.INSTANCE_IMPORT:
10189 feedback_fn("* running the instance OS import scripts...")
10193 for idx, image in enumerate(self.src_images):
10197 # FIXME: pass debug option from opcode to backend
10198 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10199 constants.IEIO_FILE, (image, ),
10200 constants.IEIO_SCRIPT,
10201 (iobj.disks[idx], idx),
10203 transfers.append(dt)
10206 masterd.instance.TransferInstanceData(self, feedback_fn,
10207 self.op.src_node, pnode_name,
10208 self.pnode.secondary_ip,
10210 if not compat.all(import_result):
10211 self.LogWarning("Some disks for instance %s on node %s were not"
10212 " imported successfully" % (instance, pnode_name))
10214 rename_from = self._old_instance_name
10216 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10217 feedback_fn("* preparing remote import...")
10218 # The source cluster will stop the instance before attempting to make
10219 # a connection. In some cases stopping an instance can take a long
10220 # time, hence the shutdown timeout is added to the connection
10222 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10223 self.op.source_shutdown_timeout)
10224 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10226 assert iobj.primary_node == self.pnode.name
10228 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10229 self.source_x509_ca,
10230 self._cds, timeouts)
10231 if not compat.all(disk_results):
10232 # TODO: Should the instance still be started, even if some disks
10233 # failed to import (valid for local imports, too)?
10234 self.LogWarning("Some disks for instance %s on node %s were not"
10235 " imported successfully" % (instance, pnode_name))
10237 rename_from = self.source_instance_name
10240 # also checked in the prereq part
10241 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10244 # Run rename script on newly imported instance
10245 assert iobj.name == instance
10246 feedback_fn("Running rename script for %s" % instance)
10247 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10249 self.op.debug_level)
10250 if result.fail_msg:
10251 self.LogWarning("Failed to run rename script for %s on node"
10252 " %s: %s" % (instance, pnode_name, result.fail_msg))
10254 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10257 iobj.admin_state = constants.ADMINST_UP
10258 self.cfg.Update(iobj, feedback_fn)
10259 logging.info("Starting instance %s on node %s", instance, pnode_name)
10260 feedback_fn("* starting instance...")
10261 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10263 result.Raise("Could not start instance")
10265 return list(iobj.all_nodes)
10268 def _CheckRADOSFreeSpace():
10269 """Compute disk size requirements inside the RADOS cluster.
10272 # For the RADOS cluster we assume there is always enough space.
10276 class LUInstanceConsole(NoHooksLU):
10277 """Connect to an instance's console.
10279 This is somewhat special in that it returns the command line that
10280 you need to run on the master node in order to connect to the
10286 def ExpandNames(self):
10287 self.share_locks = _ShareAll()
10288 self._ExpandAndLockInstance()
10290 def CheckPrereq(self):
10291 """Check prerequisites.
10293 This checks that the instance is in the cluster.
10296 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10297 assert self.instance is not None, \
10298 "Cannot retrieve locked instance %s" % self.op.instance_name
10299 _CheckNodeOnline(self, self.instance.primary_node)
10301 def Exec(self, feedback_fn):
10302 """Connect to the console of an instance
10305 instance = self.instance
10306 node = instance.primary_node
10308 node_insts = self.rpc.call_instance_list([node],
10309 [instance.hypervisor])[node]
10310 node_insts.Raise("Can't get node information from %s" % node)
10312 if instance.name not in node_insts.payload:
10313 if instance.admin_state == constants.ADMINST_UP:
10314 state = constants.INSTST_ERRORDOWN
10315 elif instance.admin_state == constants.ADMINST_DOWN:
10316 state = constants.INSTST_ADMINDOWN
10318 state = constants.INSTST_ADMINOFFLINE
10319 raise errors.OpExecError("Instance %s is not running (state %s)" %
10320 (instance.name, state))
10322 logging.debug("Connecting to console of %s on %s", instance.name, node)
10324 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10327 def _GetInstanceConsole(cluster, instance):
10328 """Returns console information for an instance.
10330 @type cluster: L{objects.Cluster}
10331 @type instance: L{objects.Instance}
10335 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10336 # beparams and hvparams are passed separately, to avoid editing the
10337 # instance and then saving the defaults in the instance itself.
10338 hvparams = cluster.FillHV(instance)
10339 beparams = cluster.FillBE(instance)
10340 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10342 assert console.instance == instance.name
10343 assert console.Validate()
10345 return console.ToDict()
10348 class LUInstanceReplaceDisks(LogicalUnit):
10349 """Replace the disks of an instance.
10352 HPATH = "mirrors-replace"
10353 HTYPE = constants.HTYPE_INSTANCE
10356 def CheckArguments(self):
10357 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10358 self.op.iallocator)
10360 def ExpandNames(self):
10361 self._ExpandAndLockInstance()
10363 assert locking.LEVEL_NODE not in self.needed_locks
10364 assert locking.LEVEL_NODE_RES not in self.needed_locks
10365 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10367 assert self.op.iallocator is None or self.op.remote_node is None, \
10368 "Conflicting options"
10370 if self.op.remote_node is not None:
10371 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10373 # Warning: do not remove the locking of the new secondary here
10374 # unless DRBD8.AddChildren is changed to work in parallel;
10375 # currently it doesn't since parallel invocations of
10376 # FindUnusedMinor will conflict
10377 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10378 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10380 self.needed_locks[locking.LEVEL_NODE] = []
10381 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10383 if self.op.iallocator is not None:
10384 # iallocator will select a new node in the same group
10385 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10387 self.needed_locks[locking.LEVEL_NODE_RES] = []
10389 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10390 self.op.iallocator, self.op.remote_node,
10391 self.op.disks, False, self.op.early_release,
10392 self.op.ignore_ipolicy)
10394 self.tasklets = [self.replacer]
10396 def DeclareLocks(self, level):
10397 if level == locking.LEVEL_NODEGROUP:
10398 assert self.op.remote_node is None
10399 assert self.op.iallocator is not None
10400 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10402 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10403 # Lock all groups used by instance optimistically; this requires going
10404 # via the node before it's locked, requiring verification later on
10405 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10406 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10408 elif level == locking.LEVEL_NODE:
10409 if self.op.iallocator is not None:
10410 assert self.op.remote_node is None
10411 assert not self.needed_locks[locking.LEVEL_NODE]
10413 # Lock member nodes of all locked groups
10414 self.needed_locks[locking.LEVEL_NODE] = [node_name
10415 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10416 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10418 self._LockInstancesNodes()
10419 elif level == locking.LEVEL_NODE_RES:
10421 self.needed_locks[locking.LEVEL_NODE_RES] = \
10422 self.needed_locks[locking.LEVEL_NODE]
10424 def BuildHooksEnv(self):
10425 """Build hooks env.
10427 This runs on the master, the primary and all the secondaries.
10430 instance = self.replacer.instance
10432 "MODE": self.op.mode,
10433 "NEW_SECONDARY": self.op.remote_node,
10434 "OLD_SECONDARY": instance.secondary_nodes[0],
10436 env.update(_BuildInstanceHookEnvByObject(self, instance))
10439 def BuildHooksNodes(self):
10440 """Build hooks nodes.
10443 instance = self.replacer.instance
10445 self.cfg.GetMasterNode(),
10446 instance.primary_node,
10448 if self.op.remote_node is not None:
10449 nl.append(self.op.remote_node)
10452 def CheckPrereq(self):
10453 """Check prerequisites.
10456 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10457 self.op.iallocator is None)
10459 # Verify if node group locks are still correct
10460 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10462 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10464 return LogicalUnit.CheckPrereq(self)
10467 class TLReplaceDisks(Tasklet):
10468 """Replaces disks for an instance.
10470 Note: Locking is not within the scope of this class.
10473 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10474 disks, delay_iallocator, early_release, ignore_ipolicy):
10475 """Initializes this class.
10478 Tasklet.__init__(self, lu)
10481 self.instance_name = instance_name
10483 self.iallocator_name = iallocator_name
10484 self.remote_node = remote_node
10486 self.delay_iallocator = delay_iallocator
10487 self.early_release = early_release
10488 self.ignore_ipolicy = ignore_ipolicy
10491 self.instance = None
10492 self.new_node = None
10493 self.target_node = None
10494 self.other_node = None
10495 self.remote_node_info = None
10496 self.node_secondary_ip = None
10499 def CheckArguments(mode, remote_node, iallocator):
10500 """Helper function for users of this class.
10503 # check for valid parameter combination
10504 if mode == constants.REPLACE_DISK_CHG:
10505 if remote_node is None and iallocator is None:
10506 raise errors.OpPrereqError("When changing the secondary either an"
10507 " iallocator script must be used or the"
10508 " new node given", errors.ECODE_INVAL)
10510 if remote_node is not None and iallocator is not None:
10511 raise errors.OpPrereqError("Give either the iallocator or the new"
10512 " secondary, not both", errors.ECODE_INVAL)
10514 elif remote_node is not None or iallocator is not None:
10515 # Not replacing the secondary
10516 raise errors.OpPrereqError("The iallocator and new node options can"
10517 " only be used when changing the"
10518 " secondary node", errors.ECODE_INVAL)
10521 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10522 """Compute a new secondary node using an IAllocator.
10525 ial = IAllocator(lu.cfg, lu.rpc,
10526 mode=constants.IALLOCATOR_MODE_RELOC,
10527 name=instance_name,
10528 relocate_from=list(relocate_from))
10530 ial.Run(iallocator_name)
10532 if not ial.success:
10533 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10534 " %s" % (iallocator_name, ial.info),
10535 errors.ECODE_NORES)
10537 if len(ial.result) != ial.required_nodes:
10538 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10539 " of nodes (%s), required %s" %
10541 len(ial.result), ial.required_nodes),
10542 errors.ECODE_FAULT)
10544 remote_node_name = ial.result[0]
10546 lu.LogInfo("Selected new secondary for instance '%s': %s",
10547 instance_name, remote_node_name)
10549 return remote_node_name
10551 def _FindFaultyDisks(self, node_name):
10552 """Wrapper for L{_FindFaultyInstanceDisks}.
10555 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10558 def _CheckDisksActivated(self, instance):
10559 """Checks if the instance disks are activated.
10561 @param instance: The instance to check disks
10562 @return: True if they are activated, False otherwise
10565 nodes = instance.all_nodes
10567 for idx, dev in enumerate(instance.disks):
10569 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10570 self.cfg.SetDiskID(dev, node)
10572 result = _BlockdevFind(self, node, dev, instance)
10576 elif result.fail_msg or not result.payload:
10581 def CheckPrereq(self):
10582 """Check prerequisites.
10584 This checks that the instance is in the cluster.
10587 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10588 assert instance is not None, \
10589 "Cannot retrieve locked instance %s" % self.instance_name
10591 if instance.disk_template != constants.DT_DRBD8:
10592 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10593 " instances", errors.ECODE_INVAL)
10595 if len(instance.secondary_nodes) != 1:
10596 raise errors.OpPrereqError("The instance has a strange layout,"
10597 " expected one secondary but found %d" %
10598 len(instance.secondary_nodes),
10599 errors.ECODE_FAULT)
10601 if not self.delay_iallocator:
10602 self._CheckPrereq2()
10604 def _CheckPrereq2(self):
10605 """Check prerequisites, second part.
10607 This function should always be part of CheckPrereq. It was separated and is
10608 now called from Exec because during node evacuation iallocator was only
10609 called with an unmodified cluster model, not taking planned changes into
10613 instance = self.instance
10614 secondary_node = instance.secondary_nodes[0]
10616 if self.iallocator_name is None:
10617 remote_node = self.remote_node
10619 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10620 instance.name, instance.secondary_nodes)
10622 if remote_node is None:
10623 self.remote_node_info = None
10625 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10626 "Remote node '%s' is not locked" % remote_node
10628 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10629 assert self.remote_node_info is not None, \
10630 "Cannot retrieve locked node %s" % remote_node
10632 if remote_node == self.instance.primary_node:
10633 raise errors.OpPrereqError("The specified node is the primary node of"
10634 " the instance", errors.ECODE_INVAL)
10636 if remote_node == secondary_node:
10637 raise errors.OpPrereqError("The specified node is already the"
10638 " secondary node of the instance",
10639 errors.ECODE_INVAL)
10641 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10642 constants.REPLACE_DISK_CHG):
10643 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10644 errors.ECODE_INVAL)
10646 if self.mode == constants.REPLACE_DISK_AUTO:
10647 if not self._CheckDisksActivated(instance):
10648 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10649 " first" % self.instance_name,
10650 errors.ECODE_STATE)
10651 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10652 faulty_secondary = self._FindFaultyDisks(secondary_node)
10654 if faulty_primary and faulty_secondary:
10655 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10656 " one node and can not be repaired"
10657 " automatically" % self.instance_name,
10658 errors.ECODE_STATE)
10661 self.disks = faulty_primary
10662 self.target_node = instance.primary_node
10663 self.other_node = secondary_node
10664 check_nodes = [self.target_node, self.other_node]
10665 elif faulty_secondary:
10666 self.disks = faulty_secondary
10667 self.target_node = secondary_node
10668 self.other_node = instance.primary_node
10669 check_nodes = [self.target_node, self.other_node]
10675 # Non-automatic modes
10676 if self.mode == constants.REPLACE_DISK_PRI:
10677 self.target_node = instance.primary_node
10678 self.other_node = secondary_node
10679 check_nodes = [self.target_node, self.other_node]
10681 elif self.mode == constants.REPLACE_DISK_SEC:
10682 self.target_node = secondary_node
10683 self.other_node = instance.primary_node
10684 check_nodes = [self.target_node, self.other_node]
10686 elif self.mode == constants.REPLACE_DISK_CHG:
10687 self.new_node = remote_node
10688 self.other_node = instance.primary_node
10689 self.target_node = secondary_node
10690 check_nodes = [self.new_node, self.other_node]
10692 _CheckNodeNotDrained(self.lu, remote_node)
10693 _CheckNodeVmCapable(self.lu, remote_node)
10695 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10696 assert old_node_info is not None
10697 if old_node_info.offline and not self.early_release:
10698 # doesn't make sense to delay the release
10699 self.early_release = True
10700 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10701 " early-release mode", secondary_node)
10704 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10707 # If not specified all disks should be replaced
10709 self.disks = range(len(self.instance.disks))
10711 # TODO: This is ugly, but right now we can't distinguish between internal
10712 # submitted opcode and external one. We should fix that.
10713 if self.remote_node_info:
10714 # We change the node, lets verify it still meets instance policy
10715 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10716 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10718 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10719 ignore=self.ignore_ipolicy)
10721 for node in check_nodes:
10722 _CheckNodeOnline(self.lu, node)
10724 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10727 if node_name is not None)
10729 # Release unneeded node and node resource locks
10730 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10731 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10733 # Release any owned node group
10734 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10735 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10737 # Check whether disks are valid
10738 for disk_idx in self.disks:
10739 instance.FindDisk(disk_idx)
10741 # Get secondary node IP addresses
10742 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10743 in self.cfg.GetMultiNodeInfo(touched_nodes))
10745 def Exec(self, feedback_fn):
10746 """Execute disk replacement.
10748 This dispatches the disk replacement to the appropriate handler.
10751 if self.delay_iallocator:
10752 self._CheckPrereq2()
10755 # Verify owned locks before starting operation
10756 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10757 assert set(owned_nodes) == set(self.node_secondary_ip), \
10758 ("Incorrect node locks, owning %s, expected %s" %
10759 (owned_nodes, self.node_secondary_ip.keys()))
10760 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10761 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10763 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10764 assert list(owned_instances) == [self.instance_name], \
10765 "Instance '%s' not locked" % self.instance_name
10767 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10768 "Should not own any node group lock at this point"
10771 feedback_fn("No disks need replacement")
10774 feedback_fn("Replacing disk(s) %s for %s" %
10775 (utils.CommaJoin(self.disks), self.instance.name))
10777 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10779 # Activate the instance disks if we're replacing them on a down instance
10781 _StartInstanceDisks(self.lu, self.instance, True)
10784 # Should we replace the secondary node?
10785 if self.new_node is not None:
10786 fn = self._ExecDrbd8Secondary
10788 fn = self._ExecDrbd8DiskOnly
10790 result = fn(feedback_fn)
10792 # Deactivate the instance disks if we're replacing them on a
10795 _SafeShutdownInstanceDisks(self.lu, self.instance)
10797 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10800 # Verify owned locks
10801 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10802 nodes = frozenset(self.node_secondary_ip)
10803 assert ((self.early_release and not owned_nodes) or
10804 (not self.early_release and not (set(owned_nodes) - nodes))), \
10805 ("Not owning the correct locks, early_release=%s, owned=%r,"
10806 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10810 def _CheckVolumeGroup(self, nodes):
10811 self.lu.LogInfo("Checking volume groups")
10813 vgname = self.cfg.GetVGName()
10815 # Make sure volume group exists on all involved nodes
10816 results = self.rpc.call_vg_list(nodes)
10818 raise errors.OpExecError("Can't list volume groups on the nodes")
10821 res = results[node]
10822 res.Raise("Error checking node %s" % node)
10823 if vgname not in res.payload:
10824 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10827 def _CheckDisksExistence(self, nodes):
10828 # Check disk existence
10829 for idx, dev in enumerate(self.instance.disks):
10830 if idx not in self.disks:
10834 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10835 self.cfg.SetDiskID(dev, node)
10837 result = _BlockdevFind(self, node, dev, self.instance)
10839 msg = result.fail_msg
10840 if msg or not result.payload:
10842 msg = "disk not found"
10843 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10846 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10847 for idx, dev in enumerate(self.instance.disks):
10848 if idx not in self.disks:
10851 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10854 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10855 on_primary, ldisk=ldisk):
10856 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10857 " replace disks for instance %s" %
10858 (node_name, self.instance.name))
10860 def _CreateNewStorage(self, node_name):
10861 """Create new storage on the primary or secondary node.
10863 This is only used for same-node replaces, not for changing the
10864 secondary node, hence we don't want to modify the existing disk.
10869 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10870 for idx, dev in enumerate(disks):
10871 if idx not in self.disks:
10874 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10876 self.cfg.SetDiskID(dev, node_name)
10878 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10879 names = _GenerateUniqueNames(self.lu, lv_names)
10881 (data_disk, meta_disk) = dev.children
10882 vg_data = data_disk.logical_id[0]
10883 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10884 logical_id=(vg_data, names[0]),
10885 params=data_disk.params)
10886 vg_meta = meta_disk.logical_id[0]
10887 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10888 logical_id=(vg_meta, names[1]),
10889 params=meta_disk.params)
10891 new_lvs = [lv_data, lv_meta]
10892 old_lvs = [child.Copy() for child in dev.children]
10893 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10895 # we pass force_create=True to force the LVM creation
10896 for new_lv in new_lvs:
10897 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10898 _GetInstanceInfoText(self.instance), False)
10902 def _CheckDevices(self, node_name, iv_names):
10903 for name, (dev, _, _) in iv_names.iteritems():
10904 self.cfg.SetDiskID(dev, node_name)
10906 result = _BlockdevFind(self, node_name, dev, self.instance)
10908 msg = result.fail_msg
10909 if msg or not result.payload:
10911 msg = "disk not found"
10912 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10915 if result.payload.is_degraded:
10916 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10918 def _RemoveOldStorage(self, node_name, iv_names):
10919 for name, (_, old_lvs, _) in iv_names.iteritems():
10920 self.lu.LogInfo("Remove logical volumes for %s" % name)
10923 self.cfg.SetDiskID(lv, node_name)
10925 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10927 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10928 hint="remove unused LVs manually")
10930 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10931 """Replace a disk on the primary or secondary for DRBD 8.
10933 The algorithm for replace is quite complicated:
10935 1. for each disk to be replaced:
10937 1. create new LVs on the target node with unique names
10938 1. detach old LVs from the drbd device
10939 1. rename old LVs to name_replaced.<time_t>
10940 1. rename new LVs to old LVs
10941 1. attach the new LVs (with the old names now) to the drbd device
10943 1. wait for sync across all devices
10945 1. for each modified disk:
10947 1. remove old LVs (which have the name name_replaces.<time_t>)
10949 Failures are not very well handled.
10954 # Step: check device activation
10955 self.lu.LogStep(1, steps_total, "Check device existence")
10956 self._CheckDisksExistence([self.other_node, self.target_node])
10957 self._CheckVolumeGroup([self.target_node, self.other_node])
10959 # Step: check other node consistency
10960 self.lu.LogStep(2, steps_total, "Check peer consistency")
10961 self._CheckDisksConsistency(self.other_node,
10962 self.other_node == self.instance.primary_node,
10965 # Step: create new storage
10966 self.lu.LogStep(3, steps_total, "Allocate new storage")
10967 iv_names = self._CreateNewStorage(self.target_node)
10969 # Step: for each lv, detach+rename*2+attach
10970 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10971 for dev, old_lvs, new_lvs in iv_names.itervalues():
10972 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10974 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10976 result.Raise("Can't detach drbd from local storage on node"
10977 " %s for device %s" % (self.target_node, dev.iv_name))
10979 #cfg.Update(instance)
10981 # ok, we created the new LVs, so now we know we have the needed
10982 # storage; as such, we proceed on the target node to rename
10983 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10984 # using the assumption that logical_id == physical_id (which in
10985 # turn is the unique_id on that node)
10987 # FIXME(iustin): use a better name for the replaced LVs
10988 temp_suffix = int(time.time())
10989 ren_fn = lambda d, suff: (d.physical_id[0],
10990 d.physical_id[1] + "_replaced-%s" % suff)
10992 # Build the rename list based on what LVs exist on the node
10993 rename_old_to_new = []
10994 for to_ren in old_lvs:
10995 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10996 if not result.fail_msg and result.payload:
10998 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11000 self.lu.LogInfo("Renaming the old LVs on the target node")
11001 result = self.rpc.call_blockdev_rename(self.target_node,
11003 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11005 # Now we rename the new LVs to the old LVs
11006 self.lu.LogInfo("Renaming the new LVs on the target node")
11007 rename_new_to_old = [(new, old.physical_id)
11008 for old, new in zip(old_lvs, new_lvs)]
11009 result = self.rpc.call_blockdev_rename(self.target_node,
11011 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11013 # Intermediate steps of in memory modifications
11014 for old, new in zip(old_lvs, new_lvs):
11015 new.logical_id = old.logical_id
11016 self.cfg.SetDiskID(new, self.target_node)
11018 # We need to modify old_lvs so that removal later removes the
11019 # right LVs, not the newly added ones; note that old_lvs is a
11021 for disk in old_lvs:
11022 disk.logical_id = ren_fn(disk, temp_suffix)
11023 self.cfg.SetDiskID(disk, self.target_node)
11025 # Now that the new lvs have the old name, we can add them to the device
11026 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11027 result = self.rpc.call_blockdev_addchildren(self.target_node,
11028 (dev, self.instance), new_lvs)
11029 msg = result.fail_msg
11031 for new_lv in new_lvs:
11032 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11035 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11036 hint=("cleanup manually the unused logical"
11038 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11040 cstep = itertools.count(5)
11042 if self.early_release:
11043 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11044 self._RemoveOldStorage(self.target_node, iv_names)
11045 # TODO: Check if releasing locks early still makes sense
11046 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11048 # Release all resource locks except those used by the instance
11049 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11050 keep=self.node_secondary_ip.keys())
11052 # Release all node locks while waiting for sync
11053 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11055 # TODO: Can the instance lock be downgraded here? Take the optional disk
11056 # shutdown in the caller into consideration.
11059 # This can fail as the old devices are degraded and _WaitForSync
11060 # does a combined result over all disks, so we don't check its return value
11061 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11062 _WaitForSync(self.lu, self.instance)
11064 # Check all devices manually
11065 self._CheckDevices(self.instance.primary_node, iv_names)
11067 # Step: remove old storage
11068 if not self.early_release:
11069 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11070 self._RemoveOldStorage(self.target_node, iv_names)
11072 def _ExecDrbd8Secondary(self, feedback_fn):
11073 """Replace the secondary node for DRBD 8.
11075 The algorithm for replace is quite complicated:
11076 - for all disks of the instance:
11077 - create new LVs on the new node with same names
11078 - shutdown the drbd device on the old secondary
11079 - disconnect the drbd network on the primary
11080 - create the drbd device on the new secondary
11081 - network attach the drbd on the primary, using an artifice:
11082 the drbd code for Attach() will connect to the network if it
11083 finds a device which is connected to the good local disks but
11084 not network enabled
11085 - wait for sync across all devices
11086 - remove all disks from the old secondary
11088 Failures are not very well handled.
11093 pnode = self.instance.primary_node
11095 # Step: check device activation
11096 self.lu.LogStep(1, steps_total, "Check device existence")
11097 self._CheckDisksExistence([self.instance.primary_node])
11098 self._CheckVolumeGroup([self.instance.primary_node])
11100 # Step: check other node consistency
11101 self.lu.LogStep(2, steps_total, "Check peer consistency")
11102 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11104 # Step: create new storage
11105 self.lu.LogStep(3, steps_total, "Allocate new storage")
11106 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11107 for idx, dev in enumerate(disks):
11108 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11109 (self.new_node, idx))
11110 # we pass force_create=True to force LVM creation
11111 for new_lv in dev.children:
11112 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11113 True, _GetInstanceInfoText(self.instance), False)
11115 # Step 4: dbrd minors and drbd setups changes
11116 # after this, we must manually remove the drbd minors on both the
11117 # error and the success paths
11118 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11119 minors = self.cfg.AllocateDRBDMinor([self.new_node
11120 for dev in self.instance.disks],
11121 self.instance.name)
11122 logging.debug("Allocated minors %r", minors)
11125 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11126 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11127 (self.new_node, idx))
11128 # create new devices on new_node; note that we create two IDs:
11129 # one without port, so the drbd will be activated without
11130 # networking information on the new node at this stage, and one
11131 # with network, for the latter activation in step 4
11132 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11133 if self.instance.primary_node == o_node1:
11136 assert self.instance.primary_node == o_node2, "Three-node instance?"
11139 new_alone_id = (self.instance.primary_node, self.new_node, None,
11140 p_minor, new_minor, o_secret)
11141 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11142 p_minor, new_minor, o_secret)
11144 iv_names[idx] = (dev, dev.children, new_net_id)
11145 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11147 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11148 logical_id=new_alone_id,
11149 children=dev.children,
11152 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11155 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11157 _GetInstanceInfoText(self.instance), False)
11158 except errors.GenericError:
11159 self.cfg.ReleaseDRBDMinors(self.instance.name)
11162 # We have new devices, shutdown the drbd on the old secondary
11163 for idx, dev in enumerate(self.instance.disks):
11164 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11165 self.cfg.SetDiskID(dev, self.target_node)
11166 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11167 (dev, self.instance)).fail_msg
11169 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11170 "node: %s" % (idx, msg),
11171 hint=("Please cleanup this device manually as"
11172 " soon as possible"))
11174 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11175 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11176 self.instance.disks)[pnode]
11178 msg = result.fail_msg
11180 # detaches didn't succeed (unlikely)
11181 self.cfg.ReleaseDRBDMinors(self.instance.name)
11182 raise errors.OpExecError("Can't detach the disks from the network on"
11183 " old node: %s" % (msg,))
11185 # if we managed to detach at least one, we update all the disks of
11186 # the instance to point to the new secondary
11187 self.lu.LogInfo("Updating instance configuration")
11188 for dev, _, new_logical_id in iv_names.itervalues():
11189 dev.logical_id = new_logical_id
11190 self.cfg.SetDiskID(dev, self.instance.primary_node)
11192 self.cfg.Update(self.instance, feedback_fn)
11194 # Release all node locks (the configuration has been updated)
11195 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11197 # and now perform the drbd attach
11198 self.lu.LogInfo("Attaching primary drbds to new secondary"
11199 " (standalone => connected)")
11200 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11202 self.node_secondary_ip,
11203 (self.instance.disks, self.instance),
11204 self.instance.name,
11206 for to_node, to_result in result.items():
11207 msg = to_result.fail_msg
11209 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11211 hint=("please do a gnt-instance info to see the"
11212 " status of disks"))
11214 cstep = itertools.count(5)
11216 if self.early_release:
11217 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11218 self._RemoveOldStorage(self.target_node, iv_names)
11219 # TODO: Check if releasing locks early still makes sense
11220 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11222 # Release all resource locks except those used by the instance
11223 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11224 keep=self.node_secondary_ip.keys())
11226 # TODO: Can the instance lock be downgraded here? Take the optional disk
11227 # shutdown in the caller into consideration.
11230 # This can fail as the old devices are degraded and _WaitForSync
11231 # does a combined result over all disks, so we don't check its return value
11232 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11233 _WaitForSync(self.lu, self.instance)
11235 # Check all devices manually
11236 self._CheckDevices(self.instance.primary_node, iv_names)
11238 # Step: remove old storage
11239 if not self.early_release:
11240 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11241 self._RemoveOldStorage(self.target_node, iv_names)
11244 class LURepairNodeStorage(NoHooksLU):
11245 """Repairs the volume group on a node.
11250 def CheckArguments(self):
11251 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11253 storage_type = self.op.storage_type
11255 if (constants.SO_FIX_CONSISTENCY not in
11256 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11257 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11258 " repaired" % storage_type,
11259 errors.ECODE_INVAL)
11261 def ExpandNames(self):
11262 self.needed_locks = {
11263 locking.LEVEL_NODE: [self.op.node_name],
11266 def _CheckFaultyDisks(self, instance, node_name):
11267 """Ensure faulty disks abort the opcode or at least warn."""
11269 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11271 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11272 " node '%s'" % (instance.name, node_name),
11273 errors.ECODE_STATE)
11274 except errors.OpPrereqError, err:
11275 if self.op.ignore_consistency:
11276 self.proc.LogWarning(str(err.args[0]))
11280 def CheckPrereq(self):
11281 """Check prerequisites.
11284 # Check whether any instance on this node has faulty disks
11285 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11286 if inst.admin_state != constants.ADMINST_UP:
11288 check_nodes = set(inst.all_nodes)
11289 check_nodes.discard(self.op.node_name)
11290 for inst_node_name in check_nodes:
11291 self._CheckFaultyDisks(inst, inst_node_name)
11293 def Exec(self, feedback_fn):
11294 feedback_fn("Repairing storage unit '%s' on %s ..." %
11295 (self.op.name, self.op.node_name))
11297 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11298 result = self.rpc.call_storage_execute(self.op.node_name,
11299 self.op.storage_type, st_args,
11301 constants.SO_FIX_CONSISTENCY)
11302 result.Raise("Failed to repair storage unit '%s' on %s" %
11303 (self.op.name, self.op.node_name))
11306 class LUNodeEvacuate(NoHooksLU):
11307 """Evacuates instances off a list of nodes.
11312 _MODE2IALLOCATOR = {
11313 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11314 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11315 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11317 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11318 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11319 constants.IALLOCATOR_NEVAC_MODES)
11321 def CheckArguments(self):
11322 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11324 def ExpandNames(self):
11325 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11327 if self.op.remote_node is not None:
11328 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11329 assert self.op.remote_node
11331 if self.op.remote_node == self.op.node_name:
11332 raise errors.OpPrereqError("Can not use evacuated node as a new"
11333 " secondary node", errors.ECODE_INVAL)
11335 if self.op.mode != constants.NODE_EVAC_SEC:
11336 raise errors.OpPrereqError("Without the use of an iallocator only"
11337 " secondary instances can be evacuated",
11338 errors.ECODE_INVAL)
11341 self.share_locks = _ShareAll()
11342 self.needed_locks = {
11343 locking.LEVEL_INSTANCE: [],
11344 locking.LEVEL_NODEGROUP: [],
11345 locking.LEVEL_NODE: [],
11348 # Determine nodes (via group) optimistically, needs verification once locks
11349 # have been acquired
11350 self.lock_nodes = self._DetermineNodes()
11352 def _DetermineNodes(self):
11353 """Gets the list of nodes to operate on.
11356 if self.op.remote_node is None:
11357 # Iallocator will choose any node(s) in the same group
11358 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11360 group_nodes = frozenset([self.op.remote_node])
11362 # Determine nodes to be locked
11363 return set([self.op.node_name]) | group_nodes
11365 def _DetermineInstances(self):
11366 """Builds list of instances to operate on.
11369 assert self.op.mode in constants.NODE_EVAC_MODES
11371 if self.op.mode == constants.NODE_EVAC_PRI:
11372 # Primary instances only
11373 inst_fn = _GetNodePrimaryInstances
11374 assert self.op.remote_node is None, \
11375 "Evacuating primary instances requires iallocator"
11376 elif self.op.mode == constants.NODE_EVAC_SEC:
11377 # Secondary instances only
11378 inst_fn = _GetNodeSecondaryInstances
11381 assert self.op.mode == constants.NODE_EVAC_ALL
11382 inst_fn = _GetNodeInstances
11383 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11385 raise errors.OpPrereqError("Due to an issue with the iallocator"
11386 " interface it is not possible to evacuate"
11387 " all instances at once; specify explicitly"
11388 " whether to evacuate primary or secondary"
11390 errors.ECODE_INVAL)
11392 return inst_fn(self.cfg, self.op.node_name)
11394 def DeclareLocks(self, level):
11395 if level == locking.LEVEL_INSTANCE:
11396 # Lock instances optimistically, needs verification once node and group
11397 # locks have been acquired
11398 self.needed_locks[locking.LEVEL_INSTANCE] = \
11399 set(i.name for i in self._DetermineInstances())
11401 elif level == locking.LEVEL_NODEGROUP:
11402 # Lock node groups for all potential target nodes optimistically, needs
11403 # verification once nodes have been acquired
11404 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11405 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11407 elif level == locking.LEVEL_NODE:
11408 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11410 def CheckPrereq(self):
11412 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11413 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11414 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11416 need_nodes = self._DetermineNodes()
11418 if not owned_nodes.issuperset(need_nodes):
11419 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11420 " locks were acquired, current nodes are"
11421 " are '%s', used to be '%s'; retry the"
11423 (self.op.node_name,
11424 utils.CommaJoin(need_nodes),
11425 utils.CommaJoin(owned_nodes)),
11426 errors.ECODE_STATE)
11428 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11429 if owned_groups != wanted_groups:
11430 raise errors.OpExecError("Node groups changed since locks were acquired,"
11431 " current groups are '%s', used to be '%s';"
11432 " retry the operation" %
11433 (utils.CommaJoin(wanted_groups),
11434 utils.CommaJoin(owned_groups)))
11436 # Determine affected instances
11437 self.instances = self._DetermineInstances()
11438 self.instance_names = [i.name for i in self.instances]
11440 if set(self.instance_names) != owned_instances:
11441 raise errors.OpExecError("Instances on node '%s' changed since locks"
11442 " were acquired, current instances are '%s',"
11443 " used to be '%s'; retry the operation" %
11444 (self.op.node_name,
11445 utils.CommaJoin(self.instance_names),
11446 utils.CommaJoin(owned_instances)))
11448 if self.instance_names:
11449 self.LogInfo("Evacuating instances from node '%s': %s",
11451 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11453 self.LogInfo("No instances to evacuate from node '%s'",
11456 if self.op.remote_node is not None:
11457 for i in self.instances:
11458 if i.primary_node == self.op.remote_node:
11459 raise errors.OpPrereqError("Node %s is the primary node of"
11460 " instance %s, cannot use it as"
11462 (self.op.remote_node, i.name),
11463 errors.ECODE_INVAL)
11465 def Exec(self, feedback_fn):
11466 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11468 if not self.instance_names:
11469 # No instances to evacuate
11472 elif self.op.iallocator is not None:
11473 # TODO: Implement relocation to other group
11474 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11475 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11476 instances=list(self.instance_names))
11478 ial.Run(self.op.iallocator)
11480 if not ial.success:
11481 raise errors.OpPrereqError("Can't compute node evacuation using"
11482 " iallocator '%s': %s" %
11483 (self.op.iallocator, ial.info),
11484 errors.ECODE_NORES)
11486 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11488 elif self.op.remote_node is not None:
11489 assert self.op.mode == constants.NODE_EVAC_SEC
11491 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11492 remote_node=self.op.remote_node,
11494 mode=constants.REPLACE_DISK_CHG,
11495 early_release=self.op.early_release)]
11496 for instance_name in self.instance_names
11500 raise errors.ProgrammerError("No iallocator or remote node")
11502 return ResultWithJobs(jobs)
11505 def _SetOpEarlyRelease(early_release, op):
11506 """Sets C{early_release} flag on opcodes if available.
11510 op.early_release = early_release
11511 except AttributeError:
11512 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11517 def _NodeEvacDest(use_nodes, group, nodes):
11518 """Returns group or nodes depending on caller's choice.
11522 return utils.CommaJoin(nodes)
11527 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11528 """Unpacks the result of change-group and node-evacuate iallocator requests.
11530 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11531 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11533 @type lu: L{LogicalUnit}
11534 @param lu: Logical unit instance
11535 @type alloc_result: tuple/list
11536 @param alloc_result: Result from iallocator
11537 @type early_release: bool
11538 @param early_release: Whether to release locks early if possible
11539 @type use_nodes: bool
11540 @param use_nodes: Whether to display node names instead of groups
11543 (moved, failed, jobs) = alloc_result
11546 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11547 for (name, reason) in failed)
11548 lu.LogWarning("Unable to evacuate instances %s", failreason)
11549 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11552 lu.LogInfo("Instances to be moved: %s",
11553 utils.CommaJoin("%s (to %s)" %
11554 (name, _NodeEvacDest(use_nodes, group, nodes))
11555 for (name, group, nodes) in moved))
11557 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11558 map(opcodes.OpCode.LoadOpCode, ops))
11562 class LUInstanceGrowDisk(LogicalUnit):
11563 """Grow a disk of an instance.
11566 HPATH = "disk-grow"
11567 HTYPE = constants.HTYPE_INSTANCE
11570 def ExpandNames(self):
11571 self._ExpandAndLockInstance()
11572 self.needed_locks[locking.LEVEL_NODE] = []
11573 self.needed_locks[locking.LEVEL_NODE_RES] = []
11574 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11575 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11577 def DeclareLocks(self, level):
11578 if level == locking.LEVEL_NODE:
11579 self._LockInstancesNodes()
11580 elif level == locking.LEVEL_NODE_RES:
11582 self.needed_locks[locking.LEVEL_NODE_RES] = \
11583 self.needed_locks[locking.LEVEL_NODE][:]
11585 def BuildHooksEnv(self):
11586 """Build hooks env.
11588 This runs on the master, the primary and all the secondaries.
11592 "DISK": self.op.disk,
11593 "AMOUNT": self.op.amount,
11594 "ABSOLUTE": self.op.absolute,
11596 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11599 def BuildHooksNodes(self):
11600 """Build hooks nodes.
11603 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11606 def CheckPrereq(self):
11607 """Check prerequisites.
11609 This checks that the instance is in the cluster.
11612 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11613 assert instance is not None, \
11614 "Cannot retrieve locked instance %s" % self.op.instance_name
11615 nodenames = list(instance.all_nodes)
11616 for node in nodenames:
11617 _CheckNodeOnline(self, node)
11619 self.instance = instance
11621 if instance.disk_template not in constants.DTS_GROWABLE:
11622 raise errors.OpPrereqError("Instance's disk layout does not support"
11623 " growing", errors.ECODE_INVAL)
11625 self.disk = instance.FindDisk(self.op.disk)
11627 if self.op.absolute:
11628 self.target = self.op.amount
11629 self.delta = self.target - self.disk.size
11631 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11632 "current disk size (%s)" %
11633 (utils.FormatUnit(self.target, "h"),
11634 utils.FormatUnit(self.disk.size, "h")),
11635 errors.ECODE_STATE)
11637 self.delta = self.op.amount
11638 self.target = self.disk.size + self.delta
11640 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11641 utils.FormatUnit(self.delta, "h"),
11642 errors.ECODE_INVAL)
11644 if instance.disk_template not in (constants.DT_FILE,
11645 constants.DT_SHARED_FILE,
11647 # TODO: check the free disk space for file, when that feature will be
11649 _CheckNodesFreeDiskPerVG(self, nodenames,
11650 self.disk.ComputeGrowth(self.delta))
11652 def Exec(self, feedback_fn):
11653 """Execute disk grow.
11656 instance = self.instance
11659 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11660 assert (self.owned_locks(locking.LEVEL_NODE) ==
11661 self.owned_locks(locking.LEVEL_NODE_RES))
11663 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11665 raise errors.OpExecError("Cannot activate block device to grow")
11667 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11668 (self.op.disk, instance.name,
11669 utils.FormatUnit(self.delta, "h"),
11670 utils.FormatUnit(self.target, "h")))
11672 # First run all grow ops in dry-run mode
11673 for node in instance.all_nodes:
11674 self.cfg.SetDiskID(disk, node)
11675 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11677 result.Raise("Grow request failed to node %s" % node)
11679 # We know that (as far as we can test) operations across different
11680 # nodes will succeed, time to run it for real
11681 for node in instance.all_nodes:
11682 self.cfg.SetDiskID(disk, node)
11683 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11685 result.Raise("Grow request failed to node %s" % node)
11687 # TODO: Rewrite code to work properly
11688 # DRBD goes into sync mode for a short amount of time after executing the
11689 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11690 # calling "resize" in sync mode fails. Sleeping for a short amount of
11691 # time is a work-around.
11694 disk.RecordGrow(self.delta)
11695 self.cfg.Update(instance, feedback_fn)
11697 # Changes have been recorded, release node lock
11698 _ReleaseLocks(self, locking.LEVEL_NODE)
11700 # Downgrade lock while waiting for sync
11701 self.glm.downgrade(locking.LEVEL_INSTANCE)
11703 if self.op.wait_for_sync:
11704 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11706 self.proc.LogWarning("Disk sync-ing has not returned a good"
11707 " status; please check the instance")
11708 if instance.admin_state != constants.ADMINST_UP:
11709 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11710 elif instance.admin_state != constants.ADMINST_UP:
11711 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11712 " not supposed to be running because no wait for"
11713 " sync mode was requested")
11715 assert self.owned_locks(locking.LEVEL_NODE_RES)
11716 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11719 class LUInstanceQueryData(NoHooksLU):
11720 """Query runtime instance data.
11725 def ExpandNames(self):
11726 self.needed_locks = {}
11728 # Use locking if requested or when non-static information is wanted
11729 if not (self.op.static or self.op.use_locking):
11730 self.LogWarning("Non-static data requested, locks need to be acquired")
11731 self.op.use_locking = True
11733 if self.op.instances or not self.op.use_locking:
11734 # Expand instance names right here
11735 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11737 # Will use acquired locks
11738 self.wanted_names = None
11740 if self.op.use_locking:
11741 self.share_locks = _ShareAll()
11743 if self.wanted_names is None:
11744 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11746 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11748 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11749 self.needed_locks[locking.LEVEL_NODE] = []
11750 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11752 def DeclareLocks(self, level):
11753 if self.op.use_locking:
11754 if level == locking.LEVEL_NODEGROUP:
11755 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11757 # Lock all groups used by instances optimistically; this requires going
11758 # via the node before it's locked, requiring verification later on
11759 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11760 frozenset(group_uuid
11761 for instance_name in owned_instances
11763 self.cfg.GetInstanceNodeGroups(instance_name))
11765 elif level == locking.LEVEL_NODE:
11766 self._LockInstancesNodes()
11768 def CheckPrereq(self):
11769 """Check prerequisites.
11771 This only checks the optional instance list against the existing names.
11774 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11775 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11776 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11778 if self.wanted_names is None:
11779 assert self.op.use_locking, "Locking was not used"
11780 self.wanted_names = owned_instances
11782 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11784 if self.op.use_locking:
11785 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11788 assert not (owned_instances or owned_groups or owned_nodes)
11790 self.wanted_instances = instances.values()
11792 def _ComputeBlockdevStatus(self, node, instance, dev):
11793 """Returns the status of a block device
11796 if self.op.static or not node:
11799 self.cfg.SetDiskID(dev, node)
11801 result = self.rpc.call_blockdev_find(node, dev)
11805 result.Raise("Can't compute disk status for %s" % instance.name)
11807 status = result.payload
11811 return (status.dev_path, status.major, status.minor,
11812 status.sync_percent, status.estimated_time,
11813 status.is_degraded, status.ldisk_status)
11815 def _ComputeDiskStatus(self, instance, snode, dev):
11816 """Compute block device status.
11819 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11821 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11823 def _ComputeDiskStatusInner(self, instance, snode, dev):
11824 """Compute block device status.
11826 @attention: The device has to be annotated already.
11829 if dev.dev_type in constants.LDS_DRBD:
11830 # we change the snode then (otherwise we use the one passed in)
11831 if dev.logical_id[0] == instance.primary_node:
11832 snode = dev.logical_id[1]
11834 snode = dev.logical_id[0]
11836 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11838 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11841 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11848 "iv_name": dev.iv_name,
11849 "dev_type": dev.dev_type,
11850 "logical_id": dev.logical_id,
11851 "physical_id": dev.physical_id,
11852 "pstatus": dev_pstatus,
11853 "sstatus": dev_sstatus,
11854 "children": dev_children,
11859 def Exec(self, feedback_fn):
11860 """Gather and return data"""
11863 cluster = self.cfg.GetClusterInfo()
11865 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11866 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11868 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11869 for node in nodes.values()))
11871 group2name_fn = lambda uuid: groups[uuid].name
11873 for instance in self.wanted_instances:
11874 pnode = nodes[instance.primary_node]
11876 if self.op.static or pnode.offline:
11877 remote_state = None
11879 self.LogWarning("Primary node %s is marked offline, returning static"
11880 " information only for instance %s" %
11881 (pnode.name, instance.name))
11883 remote_info = self.rpc.call_instance_info(instance.primary_node,
11885 instance.hypervisor)
11886 remote_info.Raise("Error checking node %s" % instance.primary_node)
11887 remote_info = remote_info.payload
11888 if remote_info and "state" in remote_info:
11889 remote_state = "up"
11891 if instance.admin_state == constants.ADMINST_UP:
11892 remote_state = "down"
11894 remote_state = instance.admin_state
11896 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11899 snodes_group_uuids = [nodes[snode_name].group
11900 for snode_name in instance.secondary_nodes]
11902 result[instance.name] = {
11903 "name": instance.name,
11904 "config_state": instance.admin_state,
11905 "run_state": remote_state,
11906 "pnode": instance.primary_node,
11907 "pnode_group_uuid": pnode.group,
11908 "pnode_group_name": group2name_fn(pnode.group),
11909 "snodes": instance.secondary_nodes,
11910 "snodes_group_uuids": snodes_group_uuids,
11911 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11913 # this happens to be the same format used for hooks
11914 "nics": _NICListToTuple(self, instance.nics),
11915 "disk_template": instance.disk_template,
11917 "hypervisor": instance.hypervisor,
11918 "network_port": instance.network_port,
11919 "hv_instance": instance.hvparams,
11920 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11921 "be_instance": instance.beparams,
11922 "be_actual": cluster.FillBE(instance),
11923 "os_instance": instance.osparams,
11924 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11925 "serial_no": instance.serial_no,
11926 "mtime": instance.mtime,
11927 "ctime": instance.ctime,
11928 "uuid": instance.uuid,
11934 def PrepareContainerMods(mods, private_fn):
11935 """Prepares a list of container modifications by adding a private data field.
11937 @type mods: list of tuples; (operation, index, parameters)
11938 @param mods: List of modifications
11939 @type private_fn: callable or None
11940 @param private_fn: Callable for constructing a private data field for a
11945 if private_fn is None:
11950 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11953 #: Type description for changes as returned by L{ApplyContainerMods}'s
11955 _TApplyContModsCbChanges = \
11956 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11957 ht.TNonEmptyString,
11962 def ApplyContainerMods(kind, container, chgdesc, mods,
11963 create_fn, modify_fn, remove_fn):
11964 """Applies descriptions in C{mods} to C{container}.
11967 @param kind: One-word item description
11968 @type container: list
11969 @param container: Container to modify
11970 @type chgdesc: None or list
11971 @param chgdesc: List of applied changes
11973 @param mods: Modifications as returned by L{PrepareContainerMods}
11974 @type create_fn: callable
11975 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11976 receives absolute item index, parameters and private data object as added
11977 by L{PrepareContainerMods}, returns tuple containing new item and changes
11979 @type modify_fn: callable
11980 @param modify_fn: Callback for modifying an existing item
11981 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11982 and private data object as added by L{PrepareContainerMods}, returns
11984 @type remove_fn: callable
11985 @param remove_fn: Callback on removing item; receives absolute item index,
11986 item and private data object as added by L{PrepareContainerMods}
11989 for (op, idx, params, private) in mods:
11992 absidx = len(container) - 1
11994 raise IndexError("Not accepting negative indices other than -1")
11995 elif idx > len(container):
11996 raise IndexError("Got %s index %s, but there are only %s" %
11997 (kind, idx, len(container)))
12003 if op == constants.DDM_ADD:
12004 # Calculate where item will be added
12006 addidx = len(container)
12010 if create_fn is None:
12013 (item, changes) = create_fn(addidx, params, private)
12016 container.append(item)
12019 assert idx <= len(container)
12020 # list.insert does so before the specified index
12021 container.insert(idx, item)
12023 # Retrieve existing item
12025 item = container[absidx]
12027 raise IndexError("Invalid %s index %s" % (kind, idx))
12029 if op == constants.DDM_REMOVE:
12032 if remove_fn is not None:
12033 remove_fn(absidx, item, private)
12035 changes = [("%s/%s" % (kind, absidx), "remove")]
12037 assert container[absidx] == item
12038 del container[absidx]
12039 elif op == constants.DDM_MODIFY:
12040 if modify_fn is not None:
12041 changes = modify_fn(absidx, item, params, private)
12043 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12045 assert _TApplyContModsCbChanges(changes)
12047 if not (chgdesc is None or changes is None):
12048 chgdesc.extend(changes)
12051 def _UpdateIvNames(base_index, disks):
12052 """Updates the C{iv_name} attribute of disks.
12054 @type disks: list of L{objects.Disk}
12057 for (idx, disk) in enumerate(disks):
12058 disk.iv_name = "disk/%s" % (base_index + idx, )
12061 class _InstNicModPrivate:
12062 """Data structure for network interface modifications.
12064 Used by L{LUInstanceSetParams}.
12067 def __init__(self):
12072 class LUInstanceSetParams(LogicalUnit):
12073 """Modifies an instances's parameters.
12076 HPATH = "instance-modify"
12077 HTYPE = constants.HTYPE_INSTANCE
12081 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12082 assert ht.TList(mods)
12083 assert not mods or len(mods[0]) in (2, 3)
12085 if mods and len(mods[0]) == 2:
12089 for op, params in mods:
12090 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12091 result.append((op, -1, params))
12095 raise errors.OpPrereqError("Only one %s add or remove operation is"
12096 " supported at a time" % kind,
12097 errors.ECODE_INVAL)
12099 result.append((constants.DDM_MODIFY, op, params))
12101 assert verify_fn(result)
12108 def _CheckMods(kind, mods, key_types, item_fn):
12109 """Ensures requested disk/NIC modifications are valid.
12112 for (op, _, params) in mods:
12113 assert ht.TDict(params)
12115 utils.ForceDictType(params, key_types)
12117 if op == constants.DDM_REMOVE:
12119 raise errors.OpPrereqError("No settings should be passed when"
12120 " removing a %s" % kind,
12121 errors.ECODE_INVAL)
12122 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12123 item_fn(op, params)
12125 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12128 def _VerifyDiskModification(op, params):
12129 """Verifies a disk modification.
12132 if op == constants.DDM_ADD:
12133 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12134 if mode not in constants.DISK_ACCESS_SET:
12135 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12136 errors.ECODE_INVAL)
12138 size = params.get(constants.IDISK_SIZE, None)
12140 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12141 constants.IDISK_SIZE, errors.ECODE_INVAL)
12145 except (TypeError, ValueError), err:
12146 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12147 errors.ECODE_INVAL)
12149 params[constants.IDISK_SIZE] = size
12151 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12152 raise errors.OpPrereqError("Disk size change not possible, use"
12153 " grow-disk", errors.ECODE_INVAL)
12156 def _VerifyNicModification(op, params):
12157 """Verifies a network interface modification.
12160 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12161 ip = params.get(constants.INIC_IP, None)
12164 elif ip.lower() == constants.VALUE_NONE:
12165 params[constants.INIC_IP] = None
12166 elif not netutils.IPAddress.IsValid(ip):
12167 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12168 errors.ECODE_INVAL)
12170 bridge = params.get("bridge", None)
12171 link = params.get(constants.INIC_LINK, None)
12172 if bridge and link:
12173 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12174 " at the same time", errors.ECODE_INVAL)
12175 elif bridge and bridge.lower() == constants.VALUE_NONE:
12176 params["bridge"] = None
12177 elif link and link.lower() == constants.VALUE_NONE:
12178 params[constants.INIC_LINK] = None
12180 if op == constants.DDM_ADD:
12181 macaddr = params.get(constants.INIC_MAC, None)
12182 if macaddr is None:
12183 params[constants.INIC_MAC] = constants.VALUE_AUTO
12185 if constants.INIC_MAC in params:
12186 macaddr = params[constants.INIC_MAC]
12187 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12188 macaddr = utils.NormalizeAndValidateMac(macaddr)
12190 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12191 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12192 " modifying an existing NIC",
12193 errors.ECODE_INVAL)
12195 def CheckArguments(self):
12196 if not (self.op.nics or self.op.disks or self.op.disk_template or
12197 self.op.hvparams or self.op.beparams or self.op.os_name or
12198 self.op.offline is not None or self.op.runtime_mem):
12199 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12201 if self.op.hvparams:
12202 _CheckGlobalHvParams(self.op.hvparams)
12205 self._UpgradeDiskNicMods("disk", self.op.disks,
12206 opcodes.OpInstanceSetParams.TestDiskModifications)
12208 self._UpgradeDiskNicMods("NIC", self.op.nics,
12209 opcodes.OpInstanceSetParams.TestNicModifications)
12211 # Check disk modifications
12212 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12213 self._VerifyDiskModification)
12215 if self.op.disks and self.op.disk_template is not None:
12216 raise errors.OpPrereqError("Disk template conversion and other disk"
12217 " changes not supported at the same time",
12218 errors.ECODE_INVAL)
12220 if (self.op.disk_template and
12221 self.op.disk_template in constants.DTS_INT_MIRROR and
12222 self.op.remote_node is None):
12223 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12224 " one requires specifying a secondary node",
12225 errors.ECODE_INVAL)
12227 # Check NIC modifications
12228 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12229 self._VerifyNicModification)
12231 def ExpandNames(self):
12232 self._ExpandAndLockInstance()
12233 # Can't even acquire node locks in shared mode as upcoming changes in
12234 # Ganeti 2.6 will start to modify the node object on disk conversion
12235 self.needed_locks[locking.LEVEL_NODE] = []
12236 self.needed_locks[locking.LEVEL_NODE_RES] = []
12237 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12239 def DeclareLocks(self, level):
12240 # TODO: Acquire group lock in shared mode (disk parameters)
12241 if level == locking.LEVEL_NODE:
12242 self._LockInstancesNodes()
12243 if self.op.disk_template and self.op.remote_node:
12244 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12245 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12246 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12248 self.needed_locks[locking.LEVEL_NODE_RES] = \
12249 self.needed_locks[locking.LEVEL_NODE][:]
12251 def BuildHooksEnv(self):
12252 """Build hooks env.
12254 This runs on the master, primary and secondaries.
12258 if constants.BE_MINMEM in self.be_new:
12259 args["minmem"] = self.be_new[constants.BE_MINMEM]
12260 if constants.BE_MAXMEM in self.be_new:
12261 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12262 if constants.BE_VCPUS in self.be_new:
12263 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12264 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12265 # information at all.
12267 if self._new_nics is not None:
12270 for nic in self._new_nics:
12271 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12272 mode = nicparams[constants.NIC_MODE]
12273 link = nicparams[constants.NIC_LINK]
12274 nics.append((nic.ip, nic.mac, mode, link))
12276 args["nics"] = nics
12278 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12279 if self.op.disk_template:
12280 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12281 if self.op.runtime_mem:
12282 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12286 def BuildHooksNodes(self):
12287 """Build hooks nodes.
12290 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12293 def _PrepareNicModification(self, params, private, old_ip, old_params,
12295 update_params_dict = dict([(key, params[key])
12296 for key in constants.NICS_PARAMETERS
12299 if "bridge" in params:
12300 update_params_dict[constants.NIC_LINK] = params["bridge"]
12302 new_params = _GetUpdatedParams(old_params, update_params_dict)
12303 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12305 new_filled_params = cluster.SimpleFillNIC(new_params)
12306 objects.NIC.CheckParameterSyntax(new_filled_params)
12308 new_mode = new_filled_params[constants.NIC_MODE]
12309 if new_mode == constants.NIC_MODE_BRIDGED:
12310 bridge = new_filled_params[constants.NIC_LINK]
12311 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12313 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12315 self.warn.append(msg)
12317 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12319 elif new_mode == constants.NIC_MODE_ROUTED:
12320 ip = params.get(constants.INIC_IP, old_ip)
12322 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12323 " on a routed NIC", errors.ECODE_INVAL)
12325 if constants.INIC_MAC in params:
12326 mac = params[constants.INIC_MAC]
12328 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12329 errors.ECODE_INVAL)
12330 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12331 # otherwise generate the MAC address
12332 params[constants.INIC_MAC] = \
12333 self.cfg.GenerateMAC(self.proc.GetECId())
12335 # or validate/reserve the current one
12337 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12338 except errors.ReservationError:
12339 raise errors.OpPrereqError("MAC address '%s' already in use"
12340 " in cluster" % mac,
12341 errors.ECODE_NOTUNIQUE)
12343 private.params = new_params
12344 private.filled = new_filled_params
12346 def CheckPrereq(self):
12347 """Check prerequisites.
12349 This only checks the instance list against the existing names.
12352 # checking the new params on the primary/secondary nodes
12354 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12355 cluster = self.cluster = self.cfg.GetClusterInfo()
12356 assert self.instance is not None, \
12357 "Cannot retrieve locked instance %s" % self.op.instance_name
12358 pnode = instance.primary_node
12359 nodelist = list(instance.all_nodes)
12360 pnode_info = self.cfg.GetNodeInfo(pnode)
12361 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12363 # Prepare disk/NIC modifications
12364 self.diskmod = PrepareContainerMods(self.op.disks, None)
12365 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12368 if self.op.os_name and not self.op.force:
12369 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12370 self.op.force_variant)
12371 instance_os = self.op.os_name
12373 instance_os = instance.os
12375 assert not (self.op.disk_template and self.op.disks), \
12376 "Can't modify disk template and apply disk changes at the same time"
12378 if self.op.disk_template:
12379 if instance.disk_template == self.op.disk_template:
12380 raise errors.OpPrereqError("Instance already has disk template %s" %
12381 instance.disk_template, errors.ECODE_INVAL)
12383 if (instance.disk_template,
12384 self.op.disk_template) not in self._DISK_CONVERSIONS:
12385 raise errors.OpPrereqError("Unsupported disk template conversion from"
12386 " %s to %s" % (instance.disk_template,
12387 self.op.disk_template),
12388 errors.ECODE_INVAL)
12389 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12390 msg="cannot change disk template")
12391 if self.op.disk_template in constants.DTS_INT_MIRROR:
12392 if self.op.remote_node == pnode:
12393 raise errors.OpPrereqError("Given new secondary node %s is the same"
12394 " as the primary node of the instance" %
12395 self.op.remote_node, errors.ECODE_STATE)
12396 _CheckNodeOnline(self, self.op.remote_node)
12397 _CheckNodeNotDrained(self, self.op.remote_node)
12398 # FIXME: here we assume that the old instance type is DT_PLAIN
12399 assert instance.disk_template == constants.DT_PLAIN
12400 disks = [{constants.IDISK_SIZE: d.size,
12401 constants.IDISK_VG: d.logical_id[0]}
12402 for d in instance.disks]
12403 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12404 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12406 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12407 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12408 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12409 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12410 ignore=self.op.ignore_ipolicy)
12411 if pnode_info.group != snode_info.group:
12412 self.LogWarning("The primary and secondary nodes are in two"
12413 " different node groups; the disk parameters"
12414 " from the first disk's node group will be"
12417 # hvparams processing
12418 if self.op.hvparams:
12419 hv_type = instance.hypervisor
12420 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12421 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12422 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12425 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12426 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12427 self.hv_proposed = self.hv_new = hv_new # the new actual values
12428 self.hv_inst = i_hvdict # the new dict (without defaults)
12430 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12432 self.hv_new = self.hv_inst = {}
12434 # beparams processing
12435 if self.op.beparams:
12436 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12438 objects.UpgradeBeParams(i_bedict)
12439 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12440 be_new = cluster.SimpleFillBE(i_bedict)
12441 self.be_proposed = self.be_new = be_new # the new actual values
12442 self.be_inst = i_bedict # the new dict (without defaults)
12444 self.be_new = self.be_inst = {}
12445 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12446 be_old = cluster.FillBE(instance)
12448 # CPU param validation -- checking every time a parameter is
12449 # changed to cover all cases where either CPU mask or vcpus have
12451 if (constants.BE_VCPUS in self.be_proposed and
12452 constants.HV_CPU_MASK in self.hv_proposed):
12454 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12455 # Verify mask is consistent with number of vCPUs. Can skip this
12456 # test if only 1 entry in the CPU mask, which means same mask
12457 # is applied to all vCPUs.
12458 if (len(cpu_list) > 1 and
12459 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12460 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12462 (self.be_proposed[constants.BE_VCPUS],
12463 self.hv_proposed[constants.HV_CPU_MASK]),
12464 errors.ECODE_INVAL)
12466 # Only perform this test if a new CPU mask is given
12467 if constants.HV_CPU_MASK in self.hv_new:
12468 # Calculate the largest CPU number requested
12469 max_requested_cpu = max(map(max, cpu_list))
12470 # Check that all of the instance's nodes have enough physical CPUs to
12471 # satisfy the requested CPU mask
12472 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12473 max_requested_cpu + 1, instance.hypervisor)
12475 # osparams processing
12476 if self.op.osparams:
12477 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12478 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12479 self.os_inst = i_osdict # the new dict (without defaults)
12485 #TODO(dynmem): do the appropriate check involving MINMEM
12486 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12487 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12488 mem_check_list = [pnode]
12489 if be_new[constants.BE_AUTO_BALANCE]:
12490 # either we changed auto_balance to yes or it was from before
12491 mem_check_list.extend(instance.secondary_nodes)
12492 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12493 instance.hypervisor)
12494 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12495 [instance.hypervisor])
12496 pninfo = nodeinfo[pnode]
12497 msg = pninfo.fail_msg
12499 # Assume the primary node is unreachable and go ahead
12500 self.warn.append("Can't get info from primary node %s: %s" %
12503 (_, _, (pnhvinfo, )) = pninfo.payload
12504 if not isinstance(pnhvinfo.get("memory_free", None), int):
12505 self.warn.append("Node data from primary node %s doesn't contain"
12506 " free memory information" % pnode)
12507 elif instance_info.fail_msg:
12508 self.warn.append("Can't get instance runtime information: %s" %
12509 instance_info.fail_msg)
12511 if instance_info.payload:
12512 current_mem = int(instance_info.payload["memory"])
12514 # Assume instance not running
12515 # (there is a slight race condition here, but it's not very
12516 # probable, and we have no other way to check)
12517 # TODO: Describe race condition
12519 #TODO(dynmem): do the appropriate check involving MINMEM
12520 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12521 pnhvinfo["memory_free"])
12523 raise errors.OpPrereqError("This change will prevent the instance"
12524 " from starting, due to %d MB of memory"
12525 " missing on its primary node" %
12527 errors.ECODE_NORES)
12529 if be_new[constants.BE_AUTO_BALANCE]:
12530 for node, nres in nodeinfo.items():
12531 if node not in instance.secondary_nodes:
12533 nres.Raise("Can't get info from secondary node %s" % node,
12534 prereq=True, ecode=errors.ECODE_STATE)
12535 (_, _, (nhvinfo, )) = nres.payload
12536 if not isinstance(nhvinfo.get("memory_free", None), int):
12537 raise errors.OpPrereqError("Secondary node %s didn't return free"
12538 " memory information" % node,
12539 errors.ECODE_STATE)
12540 #TODO(dynmem): do the appropriate check involving MINMEM
12541 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12542 raise errors.OpPrereqError("This change will prevent the instance"
12543 " from failover to its secondary node"
12544 " %s, due to not enough memory" % node,
12545 errors.ECODE_STATE)
12547 if self.op.runtime_mem:
12548 remote_info = self.rpc.call_instance_info(instance.primary_node,
12550 instance.hypervisor)
12551 remote_info.Raise("Error checking node %s" % instance.primary_node)
12552 if not remote_info.payload: # not running already
12553 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12554 errors.ECODE_STATE)
12556 current_memory = remote_info.payload["memory"]
12557 if (not self.op.force and
12558 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12559 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12560 raise errors.OpPrereqError("Instance %s must have memory between %d"
12561 " and %d MB of memory unless --force is"
12562 " given" % (instance.name,
12563 self.be_proposed[constants.BE_MINMEM],
12564 self.be_proposed[constants.BE_MAXMEM]),
12565 errors.ECODE_INVAL)
12567 if self.op.runtime_mem > current_memory:
12568 _CheckNodeFreeMemory(self, instance.primary_node,
12569 "ballooning memory for instance %s" %
12571 self.op.memory - current_memory,
12572 instance.hypervisor)
12574 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12575 raise errors.OpPrereqError("Disk operations not supported for"
12576 " diskless instances",
12577 errors.ECODE_INVAL)
12579 def _PrepareNicCreate(_, params, private):
12580 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12581 return (None, None)
12583 def _PrepareNicMod(_, nic, params, private):
12584 self._PrepareNicModification(params, private, nic.ip,
12585 nic.nicparams, cluster, pnode)
12588 # Verify NIC changes (operating on copy)
12589 nics = instance.nics[:]
12590 ApplyContainerMods("NIC", nics, None, self.nicmod,
12591 _PrepareNicCreate, _PrepareNicMod, None)
12592 if len(nics) > constants.MAX_NICS:
12593 raise errors.OpPrereqError("Instance has too many network interfaces"
12594 " (%d), cannot add more" % constants.MAX_NICS,
12595 errors.ECODE_STATE)
12597 # Verify disk changes (operating on a copy)
12598 disks = instance.disks[:]
12599 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12600 if len(disks) > constants.MAX_DISKS:
12601 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12602 " more" % constants.MAX_DISKS,
12603 errors.ECODE_STATE)
12605 if self.op.offline is not None:
12606 if self.op.offline:
12607 msg = "can't change to offline"
12609 msg = "can't change to online"
12610 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12612 # Pre-compute NIC changes (necessary to use result in hooks)
12613 self._nic_chgdesc = []
12615 # Operate on copies as this is still in prereq
12616 nics = [nic.Copy() for nic in instance.nics]
12617 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12618 self._CreateNewNic, self._ApplyNicMods, None)
12619 self._new_nics = nics
12621 self._new_nics = None
12623 def _ConvertPlainToDrbd(self, feedback_fn):
12624 """Converts an instance from plain to drbd.
12627 feedback_fn("Converting template to drbd")
12628 instance = self.instance
12629 pnode = instance.primary_node
12630 snode = self.op.remote_node
12632 assert instance.disk_template == constants.DT_PLAIN
12634 # create a fake disk info for _GenerateDiskTemplate
12635 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12636 constants.IDISK_VG: d.logical_id[0]}
12637 for d in instance.disks]
12638 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12639 instance.name, pnode, [snode],
12640 disk_info, None, None, 0, feedback_fn,
12642 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12644 info = _GetInstanceInfoText(instance)
12645 feedback_fn("Creating additional volumes...")
12646 # first, create the missing data and meta devices
12647 for disk in anno_disks:
12648 # unfortunately this is... not too nice
12649 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12651 for child in disk.children:
12652 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12653 # at this stage, all new LVs have been created, we can rename the
12655 feedback_fn("Renaming original volumes...")
12656 rename_list = [(o, n.children[0].logical_id)
12657 for (o, n) in zip(instance.disks, new_disks)]
12658 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12659 result.Raise("Failed to rename original LVs")
12661 feedback_fn("Initializing DRBD devices...")
12662 # all child devices are in place, we can now create the DRBD devices
12663 for disk in anno_disks:
12664 for node in [pnode, snode]:
12665 f_create = node == pnode
12666 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12668 # at this point, the instance has been modified
12669 instance.disk_template = constants.DT_DRBD8
12670 instance.disks = new_disks
12671 self.cfg.Update(instance, feedback_fn)
12673 # Release node locks while waiting for sync
12674 _ReleaseLocks(self, locking.LEVEL_NODE)
12676 # disks are created, waiting for sync
12677 disk_abort = not _WaitForSync(self, instance,
12678 oneshot=not self.op.wait_for_sync)
12680 raise errors.OpExecError("There are some degraded disks for"
12681 " this instance, please cleanup manually")
12683 # Node resource locks will be released by caller
12685 def _ConvertDrbdToPlain(self, feedback_fn):
12686 """Converts an instance from drbd to plain.
12689 instance = self.instance
12691 assert len(instance.secondary_nodes) == 1
12692 assert instance.disk_template == constants.DT_DRBD8
12694 pnode = instance.primary_node
12695 snode = instance.secondary_nodes[0]
12696 feedback_fn("Converting template to plain")
12698 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12699 new_disks = [d.children[0] for d in instance.disks]
12701 # copy over size and mode
12702 for parent, child in zip(old_disks, new_disks):
12703 child.size = parent.size
12704 child.mode = parent.mode
12706 # this is a DRBD disk, return its port to the pool
12707 # NOTE: this must be done right before the call to cfg.Update!
12708 for disk in old_disks:
12709 tcp_port = disk.logical_id[2]
12710 self.cfg.AddTcpUdpPort(tcp_port)
12712 # update instance structure
12713 instance.disks = new_disks
12714 instance.disk_template = constants.DT_PLAIN
12715 self.cfg.Update(instance, feedback_fn)
12717 # Release locks in case removing disks takes a while
12718 _ReleaseLocks(self, locking.LEVEL_NODE)
12720 feedback_fn("Removing volumes on the secondary node...")
12721 for disk in old_disks:
12722 self.cfg.SetDiskID(disk, snode)
12723 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12725 self.LogWarning("Could not remove block device %s on node %s,"
12726 " continuing anyway: %s", disk.iv_name, snode, msg)
12728 feedback_fn("Removing unneeded volumes on the primary node...")
12729 for idx, disk in enumerate(old_disks):
12730 meta = disk.children[1]
12731 self.cfg.SetDiskID(meta, pnode)
12732 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12734 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12735 " continuing anyway: %s", idx, pnode, msg)
12737 def _CreateNewDisk(self, idx, params, _):
12738 """Creates a new disk.
12741 instance = self.instance
12744 if instance.disk_template in constants.DTS_FILEBASED:
12745 (file_driver, file_path) = instance.disks[0].logical_id
12746 file_path = os.path.dirname(file_path)
12748 file_driver = file_path = None
12751 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12752 instance.primary_node, instance.secondary_nodes,
12753 [params], file_path, file_driver, idx,
12754 self.Log, self.diskparams)[0]
12756 info = _GetInstanceInfoText(instance)
12758 logging.info("Creating volume %s for instance %s",
12759 disk.iv_name, instance.name)
12760 # Note: this needs to be kept in sync with _CreateDisks
12762 for node in instance.all_nodes:
12763 f_create = (node == instance.primary_node)
12765 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12766 except errors.OpExecError, err:
12767 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12768 disk.iv_name, disk, node, err)
12771 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12775 def _ModifyDisk(idx, disk, params, _):
12776 """Modifies a disk.
12779 disk.mode = params[constants.IDISK_MODE]
12782 ("disk.mode/%d" % idx, disk.mode),
12785 def _RemoveDisk(self, idx, root, _):
12789 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12790 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12791 self.cfg.SetDiskID(disk, node)
12792 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12794 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12795 " continuing anyway", idx, node, msg)
12797 # if this is a DRBD disk, return its port to the pool
12798 if root.dev_type in constants.LDS_DRBD:
12799 self.cfg.AddTcpUdpPort(root.logical_id[2])
12802 def _CreateNewNic(idx, params, private):
12803 """Creates data structure for a new network interface.
12806 mac = params[constants.INIC_MAC]
12807 ip = params.get(constants.INIC_IP, None)
12808 nicparams = private.params
12810 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12812 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12813 (mac, ip, private.filled[constants.NIC_MODE],
12814 private.filled[constants.NIC_LINK])),
12818 def _ApplyNicMods(idx, nic, params, private):
12819 """Modifies a network interface.
12824 for key in [constants.INIC_MAC, constants.INIC_IP]:
12826 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12827 setattr(nic, key, params[key])
12830 nic.nicparams = private.params
12832 for (key, val) in params.items():
12833 changes.append(("nic.%s/%d" % (key, idx), val))
12837 def Exec(self, feedback_fn):
12838 """Modifies an instance.
12840 All parameters take effect only at the next restart of the instance.
12843 # Process here the warnings from CheckPrereq, as we don't have a
12844 # feedback_fn there.
12845 # TODO: Replace with self.LogWarning
12846 for warn in self.warn:
12847 feedback_fn("WARNING: %s" % warn)
12849 assert ((self.op.disk_template is None) ^
12850 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12851 "Not owning any node resource locks"
12854 instance = self.instance
12857 if self.op.runtime_mem:
12858 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12860 self.op.runtime_mem)
12861 rpcres.Raise("Cannot modify instance runtime memory")
12862 result.append(("runtime_memory", self.op.runtime_mem))
12864 # Apply disk changes
12865 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12866 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12867 _UpdateIvNames(0, instance.disks)
12869 if self.op.disk_template:
12871 check_nodes = set(instance.all_nodes)
12872 if self.op.remote_node:
12873 check_nodes.add(self.op.remote_node)
12874 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12875 owned = self.owned_locks(level)
12876 assert not (check_nodes - owned), \
12877 ("Not owning the correct locks, owning %r, expected at least %r" %
12878 (owned, check_nodes))
12880 r_shut = _ShutdownInstanceDisks(self, instance)
12882 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12883 " proceed with disk template conversion")
12884 mode = (instance.disk_template, self.op.disk_template)
12886 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12888 self.cfg.ReleaseDRBDMinors(instance.name)
12890 result.append(("disk_template", self.op.disk_template))
12892 assert instance.disk_template == self.op.disk_template, \
12893 ("Expected disk template '%s', found '%s'" %
12894 (self.op.disk_template, instance.disk_template))
12896 # Release node and resource locks if there are any (they might already have
12897 # been released during disk conversion)
12898 _ReleaseLocks(self, locking.LEVEL_NODE)
12899 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12901 # Apply NIC changes
12902 if self._new_nics is not None:
12903 instance.nics = self._new_nics
12904 result.extend(self._nic_chgdesc)
12907 if self.op.hvparams:
12908 instance.hvparams = self.hv_inst
12909 for key, val in self.op.hvparams.iteritems():
12910 result.append(("hv/%s" % key, val))
12913 if self.op.beparams:
12914 instance.beparams = self.be_inst
12915 for key, val in self.op.beparams.iteritems():
12916 result.append(("be/%s" % key, val))
12919 if self.op.os_name:
12920 instance.os = self.op.os_name
12923 if self.op.osparams:
12924 instance.osparams = self.os_inst
12925 for key, val in self.op.osparams.iteritems():
12926 result.append(("os/%s" % key, val))
12928 if self.op.offline is None:
12931 elif self.op.offline:
12932 # Mark instance as offline
12933 self.cfg.MarkInstanceOffline(instance.name)
12934 result.append(("admin_state", constants.ADMINST_OFFLINE))
12936 # Mark instance as online, but stopped
12937 self.cfg.MarkInstanceDown(instance.name)
12938 result.append(("admin_state", constants.ADMINST_DOWN))
12940 self.cfg.Update(instance, feedback_fn)
12942 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12943 self.owned_locks(locking.LEVEL_NODE)), \
12944 "All node locks should have been released by now"
12948 _DISK_CONVERSIONS = {
12949 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12950 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12954 class LUInstanceChangeGroup(LogicalUnit):
12955 HPATH = "instance-change-group"
12956 HTYPE = constants.HTYPE_INSTANCE
12959 def ExpandNames(self):
12960 self.share_locks = _ShareAll()
12961 self.needed_locks = {
12962 locking.LEVEL_NODEGROUP: [],
12963 locking.LEVEL_NODE: [],
12966 self._ExpandAndLockInstance()
12968 if self.op.target_groups:
12969 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12970 self.op.target_groups)
12972 self.req_target_uuids = None
12974 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12976 def DeclareLocks(self, level):
12977 if level == locking.LEVEL_NODEGROUP:
12978 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12980 if self.req_target_uuids:
12981 lock_groups = set(self.req_target_uuids)
12983 # Lock all groups used by instance optimistically; this requires going
12984 # via the node before it's locked, requiring verification later on
12985 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12986 lock_groups.update(instance_groups)
12988 # No target groups, need to lock all of them
12989 lock_groups = locking.ALL_SET
12991 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12993 elif level == locking.LEVEL_NODE:
12994 if self.req_target_uuids:
12995 # Lock all nodes used by instances
12996 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12997 self._LockInstancesNodes()
12999 # Lock all nodes in all potential target groups
13000 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13001 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13002 member_nodes = [node_name
13003 for group in lock_groups
13004 for node_name in self.cfg.GetNodeGroup(group).members]
13005 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13007 # Lock all nodes as all groups are potential targets
13008 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13010 def CheckPrereq(self):
13011 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13012 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13013 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13015 assert (self.req_target_uuids is None or
13016 owned_groups.issuperset(self.req_target_uuids))
13017 assert owned_instances == set([self.op.instance_name])
13019 # Get instance information
13020 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13022 # Check if node groups for locked instance are still correct
13023 assert owned_nodes.issuperset(self.instance.all_nodes), \
13024 ("Instance %s's nodes changed while we kept the lock" %
13025 self.op.instance_name)
13027 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13030 if self.req_target_uuids:
13031 # User requested specific target groups
13032 self.target_uuids = frozenset(self.req_target_uuids)
13034 # All groups except those used by the instance are potential targets
13035 self.target_uuids = owned_groups - inst_groups
13037 conflicting_groups = self.target_uuids & inst_groups
13038 if conflicting_groups:
13039 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13040 " used by the instance '%s'" %
13041 (utils.CommaJoin(conflicting_groups),
13042 self.op.instance_name),
13043 errors.ECODE_INVAL)
13045 if not self.target_uuids:
13046 raise errors.OpPrereqError("There are no possible target groups",
13047 errors.ECODE_INVAL)
13049 def BuildHooksEnv(self):
13050 """Build hooks env.
13053 assert self.target_uuids
13056 "TARGET_GROUPS": " ".join(self.target_uuids),
13059 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13063 def BuildHooksNodes(self):
13064 """Build hooks nodes.
13067 mn = self.cfg.GetMasterNode()
13068 return ([mn], [mn])
13070 def Exec(self, feedback_fn):
13071 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13073 assert instances == [self.op.instance_name], "Instance not locked"
13075 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13076 instances=instances, target_groups=list(self.target_uuids))
13078 ial.Run(self.op.iallocator)
13080 if not ial.success:
13081 raise errors.OpPrereqError("Can't compute solution for changing group of"
13082 " instance '%s' using iallocator '%s': %s" %
13083 (self.op.instance_name, self.op.iallocator,
13085 errors.ECODE_NORES)
13087 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13089 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13090 " instance '%s'", len(jobs), self.op.instance_name)
13092 return ResultWithJobs(jobs)
13095 class LUBackupQuery(NoHooksLU):
13096 """Query the exports list
13101 def CheckArguments(self):
13102 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13103 ["node", "export"], self.op.use_locking)
13105 def ExpandNames(self):
13106 self.expq.ExpandNames(self)
13108 def DeclareLocks(self, level):
13109 self.expq.DeclareLocks(self, level)
13111 def Exec(self, feedback_fn):
13114 for (node, expname) in self.expq.OldStyleQuery(self):
13115 if expname is None:
13116 result[node] = False
13118 result.setdefault(node, []).append(expname)
13123 class _ExportQuery(_QueryBase):
13124 FIELDS = query.EXPORT_FIELDS
13126 #: The node name is not a unique key for this query
13127 SORT_FIELD = "node"
13129 def ExpandNames(self, lu):
13130 lu.needed_locks = {}
13132 # The following variables interact with _QueryBase._GetNames
13134 self.wanted = _GetWantedNodes(lu, self.names)
13136 self.wanted = locking.ALL_SET
13138 self.do_locking = self.use_locking
13140 if self.do_locking:
13141 lu.share_locks = _ShareAll()
13142 lu.needed_locks = {
13143 locking.LEVEL_NODE: self.wanted,
13146 def DeclareLocks(self, lu, level):
13149 def _GetQueryData(self, lu):
13150 """Computes the list of nodes and their attributes.
13153 # Locking is not used
13155 assert not (compat.any(lu.glm.is_owned(level)
13156 for level in locking.LEVELS
13157 if level != locking.LEVEL_CLUSTER) or
13158 self.do_locking or self.use_locking)
13160 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13164 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13166 result.append((node, None))
13168 result.extend((node, expname) for expname in nres.payload)
13173 class LUBackupPrepare(NoHooksLU):
13174 """Prepares an instance for an export and returns useful information.
13179 def ExpandNames(self):
13180 self._ExpandAndLockInstance()
13182 def CheckPrereq(self):
13183 """Check prerequisites.
13186 instance_name = self.op.instance_name
13188 self.instance = self.cfg.GetInstanceInfo(instance_name)
13189 assert self.instance is not None, \
13190 "Cannot retrieve locked instance %s" % self.op.instance_name
13191 _CheckNodeOnline(self, self.instance.primary_node)
13193 self._cds = _GetClusterDomainSecret()
13195 def Exec(self, feedback_fn):
13196 """Prepares an instance for an export.
13199 instance = self.instance
13201 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13202 salt = utils.GenerateSecret(8)
13204 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13205 result = self.rpc.call_x509_cert_create(instance.primary_node,
13206 constants.RIE_CERT_VALIDITY)
13207 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13209 (name, cert_pem) = result.payload
13211 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13215 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13216 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13218 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13224 class LUBackupExport(LogicalUnit):
13225 """Export an instance to an image in the cluster.
13228 HPATH = "instance-export"
13229 HTYPE = constants.HTYPE_INSTANCE
13232 def CheckArguments(self):
13233 """Check the arguments.
13236 self.x509_key_name = self.op.x509_key_name
13237 self.dest_x509_ca_pem = self.op.destination_x509_ca
13239 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13240 if not self.x509_key_name:
13241 raise errors.OpPrereqError("Missing X509 key name for encryption",
13242 errors.ECODE_INVAL)
13244 if not self.dest_x509_ca_pem:
13245 raise errors.OpPrereqError("Missing destination X509 CA",
13246 errors.ECODE_INVAL)
13248 def ExpandNames(self):
13249 self._ExpandAndLockInstance()
13251 # Lock all nodes for local exports
13252 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13253 # FIXME: lock only instance primary and destination node
13255 # Sad but true, for now we have do lock all nodes, as we don't know where
13256 # the previous export might be, and in this LU we search for it and
13257 # remove it from its current node. In the future we could fix this by:
13258 # - making a tasklet to search (share-lock all), then create the
13259 # new one, then one to remove, after
13260 # - removing the removal operation altogether
13261 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13263 def DeclareLocks(self, level):
13264 """Last minute lock declaration."""
13265 # All nodes are locked anyway, so nothing to do here.
13267 def BuildHooksEnv(self):
13268 """Build hooks env.
13270 This will run on the master, primary node and target node.
13274 "EXPORT_MODE": self.op.mode,
13275 "EXPORT_NODE": self.op.target_node,
13276 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13277 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13278 # TODO: Generic function for boolean env variables
13279 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13282 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13286 def BuildHooksNodes(self):
13287 """Build hooks nodes.
13290 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13292 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13293 nl.append(self.op.target_node)
13297 def CheckPrereq(self):
13298 """Check prerequisites.
13300 This checks that the instance and node names are valid.
13303 instance_name = self.op.instance_name
13305 self.instance = self.cfg.GetInstanceInfo(instance_name)
13306 assert self.instance is not None, \
13307 "Cannot retrieve locked instance %s" % self.op.instance_name
13308 _CheckNodeOnline(self, self.instance.primary_node)
13310 if (self.op.remove_instance and
13311 self.instance.admin_state == constants.ADMINST_UP and
13312 not self.op.shutdown):
13313 raise errors.OpPrereqError("Can not remove instance without shutting it"
13316 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13317 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13318 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13319 assert self.dst_node is not None
13321 _CheckNodeOnline(self, self.dst_node.name)
13322 _CheckNodeNotDrained(self, self.dst_node.name)
13325 self.dest_disk_info = None
13326 self.dest_x509_ca = None
13328 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13329 self.dst_node = None
13331 if len(self.op.target_node) != len(self.instance.disks):
13332 raise errors.OpPrereqError(("Received destination information for %s"
13333 " disks, but instance %s has %s disks") %
13334 (len(self.op.target_node), instance_name,
13335 len(self.instance.disks)),
13336 errors.ECODE_INVAL)
13338 cds = _GetClusterDomainSecret()
13340 # Check X509 key name
13342 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13343 except (TypeError, ValueError), err:
13344 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13346 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13347 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13348 errors.ECODE_INVAL)
13350 # Load and verify CA
13352 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13353 except OpenSSL.crypto.Error, err:
13354 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13355 (err, ), errors.ECODE_INVAL)
13357 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13358 if errcode is not None:
13359 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13360 (msg, ), errors.ECODE_INVAL)
13362 self.dest_x509_ca = cert
13364 # Verify target information
13366 for idx, disk_data in enumerate(self.op.target_node):
13368 (host, port, magic) = \
13369 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13370 except errors.GenericError, err:
13371 raise errors.OpPrereqError("Target info for disk %s: %s" %
13372 (idx, err), errors.ECODE_INVAL)
13374 disk_info.append((host, port, magic))
13376 assert len(disk_info) == len(self.op.target_node)
13377 self.dest_disk_info = disk_info
13380 raise errors.ProgrammerError("Unhandled export mode %r" %
13383 # instance disk type verification
13384 # TODO: Implement export support for file-based disks
13385 for disk in self.instance.disks:
13386 if disk.dev_type == constants.LD_FILE:
13387 raise errors.OpPrereqError("Export not supported for instances with"
13388 " file-based disks", errors.ECODE_INVAL)
13390 def _CleanupExports(self, feedback_fn):
13391 """Removes exports of current instance from all other nodes.
13393 If an instance in a cluster with nodes A..D was exported to node C, its
13394 exports will be removed from the nodes A, B and D.
13397 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13399 nodelist = self.cfg.GetNodeList()
13400 nodelist.remove(self.dst_node.name)
13402 # on one-node clusters nodelist will be empty after the removal
13403 # if we proceed the backup would be removed because OpBackupQuery
13404 # substitutes an empty list with the full cluster node list.
13405 iname = self.instance.name
13407 feedback_fn("Removing old exports for instance %s" % iname)
13408 exportlist = self.rpc.call_export_list(nodelist)
13409 for node in exportlist:
13410 if exportlist[node].fail_msg:
13412 if iname in exportlist[node].payload:
13413 msg = self.rpc.call_export_remove(node, iname).fail_msg
13415 self.LogWarning("Could not remove older export for instance %s"
13416 " on node %s: %s", iname, node, msg)
13418 def Exec(self, feedback_fn):
13419 """Export an instance to an image in the cluster.
13422 assert self.op.mode in constants.EXPORT_MODES
13424 instance = self.instance
13425 src_node = instance.primary_node
13427 if self.op.shutdown:
13428 # shutdown the instance, but not the disks
13429 feedback_fn("Shutting down instance %s" % instance.name)
13430 result = self.rpc.call_instance_shutdown(src_node, instance,
13431 self.op.shutdown_timeout)
13432 # TODO: Maybe ignore failures if ignore_remove_failures is set
13433 result.Raise("Could not shutdown instance %s on"
13434 " node %s" % (instance.name, src_node))
13436 # set the disks ID correctly since call_instance_start needs the
13437 # correct drbd minor to create the symlinks
13438 for disk in instance.disks:
13439 self.cfg.SetDiskID(disk, src_node)
13441 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13444 # Activate the instance disks if we'exporting a stopped instance
13445 feedback_fn("Activating disks for %s" % instance.name)
13446 _StartInstanceDisks(self, instance, None)
13449 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13452 helper.CreateSnapshots()
13454 if (self.op.shutdown and
13455 instance.admin_state == constants.ADMINST_UP and
13456 not self.op.remove_instance):
13457 assert not activate_disks
13458 feedback_fn("Starting instance %s" % instance.name)
13459 result = self.rpc.call_instance_start(src_node,
13460 (instance, None, None), False)
13461 msg = result.fail_msg
13463 feedback_fn("Failed to start instance: %s" % msg)
13464 _ShutdownInstanceDisks(self, instance)
13465 raise errors.OpExecError("Could not start instance: %s" % msg)
13467 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13468 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13469 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13470 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13471 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13473 (key_name, _, _) = self.x509_key_name
13476 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13479 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13480 key_name, dest_ca_pem,
13485 # Check for backwards compatibility
13486 assert len(dresults) == len(instance.disks)
13487 assert compat.all(isinstance(i, bool) for i in dresults), \
13488 "Not all results are boolean: %r" % dresults
13492 feedback_fn("Deactivating disks for %s" % instance.name)
13493 _ShutdownInstanceDisks(self, instance)
13495 if not (compat.all(dresults) and fin_resu):
13498 failures.append("export finalization")
13499 if not compat.all(dresults):
13500 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13502 failures.append("disk export: disk(s) %s" % fdsk)
13504 raise errors.OpExecError("Export failed, errors in %s" %
13505 utils.CommaJoin(failures))
13507 # At this point, the export was successful, we can cleanup/finish
13509 # Remove instance if requested
13510 if self.op.remove_instance:
13511 feedback_fn("Removing instance %s" % instance.name)
13512 _RemoveInstance(self, feedback_fn, instance,
13513 self.op.ignore_remove_failures)
13515 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13516 self._CleanupExports(feedback_fn)
13518 return fin_resu, dresults
13521 class LUBackupRemove(NoHooksLU):
13522 """Remove exports related to the named instance.
13527 def ExpandNames(self):
13528 self.needed_locks = {}
13529 # We need all nodes to be locked in order for RemoveExport to work, but we
13530 # don't need to lock the instance itself, as nothing will happen to it (and
13531 # we can remove exports also for a removed instance)
13532 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13534 def Exec(self, feedback_fn):
13535 """Remove any export.
13538 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13539 # If the instance was not found we'll try with the name that was passed in.
13540 # This will only work if it was an FQDN, though.
13542 if not instance_name:
13544 instance_name = self.op.instance_name
13546 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13547 exportlist = self.rpc.call_export_list(locked_nodes)
13549 for node in exportlist:
13550 msg = exportlist[node].fail_msg
13552 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13554 if instance_name in exportlist[node].payload:
13556 result = self.rpc.call_export_remove(node, instance_name)
13557 msg = result.fail_msg
13559 logging.error("Could not remove export for instance %s"
13560 " on node %s: %s", instance_name, node, msg)
13562 if fqdn_warn and not found:
13563 feedback_fn("Export not found. If trying to remove an export belonging"
13564 " to a deleted instance please use its Fully Qualified"
13568 class LUGroupAdd(LogicalUnit):
13569 """Logical unit for creating node groups.
13572 HPATH = "group-add"
13573 HTYPE = constants.HTYPE_GROUP
13576 def ExpandNames(self):
13577 # We need the new group's UUID here so that we can create and acquire the
13578 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13579 # that it should not check whether the UUID exists in the configuration.
13580 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13581 self.needed_locks = {}
13582 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13584 def CheckPrereq(self):
13585 """Check prerequisites.
13587 This checks that the given group name is not an existing node group
13592 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13593 except errors.OpPrereqError:
13596 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13597 " node group (UUID: %s)" %
13598 (self.op.group_name, existing_uuid),
13599 errors.ECODE_EXISTS)
13601 if self.op.ndparams:
13602 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13604 if self.op.hv_state:
13605 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13607 self.new_hv_state = None
13609 if self.op.disk_state:
13610 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13612 self.new_disk_state = None
13614 if self.op.diskparams:
13615 for templ in constants.DISK_TEMPLATES:
13616 if templ in self.op.diskparams:
13617 utils.ForceDictType(self.op.diskparams[templ],
13618 constants.DISK_DT_TYPES)
13619 self.new_diskparams = self.op.diskparams
13621 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13622 except errors.OpPrereqError, err:
13623 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13624 errors.ECODE_INVAL)
13626 self.new_diskparams = {}
13628 if self.op.ipolicy:
13629 cluster = self.cfg.GetClusterInfo()
13630 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13632 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13633 except errors.ConfigurationError, err:
13634 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13635 errors.ECODE_INVAL)
13637 def BuildHooksEnv(self):
13638 """Build hooks env.
13642 "GROUP_NAME": self.op.group_name,
13645 def BuildHooksNodes(self):
13646 """Build hooks nodes.
13649 mn = self.cfg.GetMasterNode()
13650 return ([mn], [mn])
13652 def Exec(self, feedback_fn):
13653 """Add the node group to the cluster.
13656 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13657 uuid=self.group_uuid,
13658 alloc_policy=self.op.alloc_policy,
13659 ndparams=self.op.ndparams,
13660 diskparams=self.new_diskparams,
13661 ipolicy=self.op.ipolicy,
13662 hv_state_static=self.new_hv_state,
13663 disk_state_static=self.new_disk_state)
13665 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13666 del self.remove_locks[locking.LEVEL_NODEGROUP]
13669 class LUGroupAssignNodes(NoHooksLU):
13670 """Logical unit for assigning nodes to groups.
13675 def ExpandNames(self):
13676 # These raise errors.OpPrereqError on their own:
13677 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13678 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13680 # We want to lock all the affected nodes and groups. We have readily
13681 # available the list of nodes, and the *destination* group. To gather the
13682 # list of "source" groups, we need to fetch node information later on.
13683 self.needed_locks = {
13684 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13685 locking.LEVEL_NODE: self.op.nodes,
13688 def DeclareLocks(self, level):
13689 if level == locking.LEVEL_NODEGROUP:
13690 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13692 # Try to get all affected nodes' groups without having the group or node
13693 # lock yet. Needs verification later in the code flow.
13694 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13696 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13698 def CheckPrereq(self):
13699 """Check prerequisites.
13702 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13703 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13704 frozenset(self.op.nodes))
13706 expected_locks = (set([self.group_uuid]) |
13707 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13708 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13709 if actual_locks != expected_locks:
13710 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13711 " current groups are '%s', used to be '%s'" %
13712 (utils.CommaJoin(expected_locks),
13713 utils.CommaJoin(actual_locks)))
13715 self.node_data = self.cfg.GetAllNodesInfo()
13716 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13717 instance_data = self.cfg.GetAllInstancesInfo()
13719 if self.group is None:
13720 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13721 (self.op.group_name, self.group_uuid))
13723 (new_splits, previous_splits) = \
13724 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13725 for node in self.op.nodes],
13726 self.node_data, instance_data)
13729 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13731 if not self.op.force:
13732 raise errors.OpExecError("The following instances get split by this"
13733 " change and --force was not given: %s" %
13736 self.LogWarning("This operation will split the following instances: %s",
13739 if previous_splits:
13740 self.LogWarning("In addition, these already-split instances continue"
13741 " to be split across groups: %s",
13742 utils.CommaJoin(utils.NiceSort(previous_splits)))
13744 def Exec(self, feedback_fn):
13745 """Assign nodes to a new group.
13748 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13750 self.cfg.AssignGroupNodes(mods)
13753 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13754 """Check for split instances after a node assignment.
13756 This method considers a series of node assignments as an atomic operation,
13757 and returns information about split instances after applying the set of
13760 In particular, it returns information about newly split instances, and
13761 instances that were already split, and remain so after the change.
13763 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13766 @type changes: list of (node_name, new_group_uuid) pairs.
13767 @param changes: list of node assignments to consider.
13768 @param node_data: a dict with data for all nodes
13769 @param instance_data: a dict with all instances to consider
13770 @rtype: a two-tuple
13771 @return: a list of instances that were previously okay and result split as a
13772 consequence of this change, and a list of instances that were previously
13773 split and this change does not fix.
13776 changed_nodes = dict((node, group) for node, group in changes
13777 if node_data[node].group != group)
13779 all_split_instances = set()
13780 previously_split_instances = set()
13782 def InstanceNodes(instance):
13783 return [instance.primary_node] + list(instance.secondary_nodes)
13785 for inst in instance_data.values():
13786 if inst.disk_template not in constants.DTS_INT_MIRROR:
13789 instance_nodes = InstanceNodes(inst)
13791 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13792 previously_split_instances.add(inst.name)
13794 if len(set(changed_nodes.get(node, node_data[node].group)
13795 for node in instance_nodes)) > 1:
13796 all_split_instances.add(inst.name)
13798 return (list(all_split_instances - previously_split_instances),
13799 list(previously_split_instances & all_split_instances))
13802 class _GroupQuery(_QueryBase):
13803 FIELDS = query.GROUP_FIELDS
13805 def ExpandNames(self, lu):
13806 lu.needed_locks = {}
13808 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13809 self._cluster = lu.cfg.GetClusterInfo()
13810 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13813 self.wanted = [name_to_uuid[name]
13814 for name in utils.NiceSort(name_to_uuid.keys())]
13816 # Accept names to be either names or UUIDs.
13819 all_uuid = frozenset(self._all_groups.keys())
13821 for name in self.names:
13822 if name in all_uuid:
13823 self.wanted.append(name)
13824 elif name in name_to_uuid:
13825 self.wanted.append(name_to_uuid[name])
13827 missing.append(name)
13830 raise errors.OpPrereqError("Some groups do not exist: %s" %
13831 utils.CommaJoin(missing),
13832 errors.ECODE_NOENT)
13834 def DeclareLocks(self, lu, level):
13837 def _GetQueryData(self, lu):
13838 """Computes the list of node groups and their attributes.
13841 do_nodes = query.GQ_NODE in self.requested_data
13842 do_instances = query.GQ_INST in self.requested_data
13844 group_to_nodes = None
13845 group_to_instances = None
13847 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13848 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13849 # latter GetAllInstancesInfo() is not enough, for we have to go through
13850 # instance->node. Hence, we will need to process nodes even if we only need
13851 # instance information.
13852 if do_nodes or do_instances:
13853 all_nodes = lu.cfg.GetAllNodesInfo()
13854 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13857 for node in all_nodes.values():
13858 if node.group in group_to_nodes:
13859 group_to_nodes[node.group].append(node.name)
13860 node_to_group[node.name] = node.group
13863 all_instances = lu.cfg.GetAllInstancesInfo()
13864 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13866 for instance in all_instances.values():
13867 node = instance.primary_node
13868 if node in node_to_group:
13869 group_to_instances[node_to_group[node]].append(instance.name)
13872 # Do not pass on node information if it was not requested.
13873 group_to_nodes = None
13875 return query.GroupQueryData(self._cluster,
13876 [self._all_groups[uuid]
13877 for uuid in self.wanted],
13878 group_to_nodes, group_to_instances,
13879 query.GQ_DISKPARAMS in self.requested_data)
13882 class LUGroupQuery(NoHooksLU):
13883 """Logical unit for querying node groups.
13888 def CheckArguments(self):
13889 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13890 self.op.output_fields, False)
13892 def ExpandNames(self):
13893 self.gq.ExpandNames(self)
13895 def DeclareLocks(self, level):
13896 self.gq.DeclareLocks(self, level)
13898 def Exec(self, feedback_fn):
13899 return self.gq.OldStyleQuery(self)
13902 class LUGroupSetParams(LogicalUnit):
13903 """Modifies the parameters of a node group.
13906 HPATH = "group-modify"
13907 HTYPE = constants.HTYPE_GROUP
13910 def CheckArguments(self):
13913 self.op.diskparams,
13914 self.op.alloc_policy,
13916 self.op.disk_state,
13920 if all_changes.count(None) == len(all_changes):
13921 raise errors.OpPrereqError("Please pass at least one modification",
13922 errors.ECODE_INVAL)
13924 def ExpandNames(self):
13925 # This raises errors.OpPrereqError on its own:
13926 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13928 self.needed_locks = {
13929 locking.LEVEL_INSTANCE: [],
13930 locking.LEVEL_NODEGROUP: [self.group_uuid],
13933 self.share_locks[locking.LEVEL_INSTANCE] = 1
13935 def DeclareLocks(self, level):
13936 if level == locking.LEVEL_INSTANCE:
13937 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13939 # Lock instances optimistically, needs verification once group lock has
13941 self.needed_locks[locking.LEVEL_INSTANCE] = \
13942 self.cfg.GetNodeGroupInstances(self.group_uuid)
13945 def _UpdateAndVerifyDiskParams(old, new):
13946 """Updates and verifies disk parameters.
13949 new_params = _GetUpdatedParams(old, new)
13950 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13953 def CheckPrereq(self):
13954 """Check prerequisites.
13957 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13959 # Check if locked instances are still correct
13960 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13962 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13963 cluster = self.cfg.GetClusterInfo()
13965 if self.group is None:
13966 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13967 (self.op.group_name, self.group_uuid))
13969 if self.op.ndparams:
13970 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13971 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13972 self.new_ndparams = new_ndparams
13974 if self.op.diskparams:
13975 diskparams = self.group.diskparams
13976 uavdp = self._UpdateAndVerifyDiskParams
13977 # For each disktemplate subdict update and verify the values
13978 new_diskparams = dict((dt,
13979 uavdp(diskparams.get(dt, {}),
13980 self.op.diskparams[dt]))
13981 for dt in constants.DISK_TEMPLATES
13982 if dt in self.op.diskparams)
13983 # As we've all subdicts of diskparams ready, lets merge the actual
13984 # dict with all updated subdicts
13985 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
13987 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13988 except errors.OpPrereqError, err:
13989 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13990 errors.ECODE_INVAL)
13992 if self.op.hv_state:
13993 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13994 self.group.hv_state_static)
13996 if self.op.disk_state:
13997 self.new_disk_state = \
13998 _MergeAndVerifyDiskState(self.op.disk_state,
13999 self.group.disk_state_static)
14001 if self.op.ipolicy:
14002 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14006 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14007 inst_filter = lambda inst: inst.name in owned_instances
14008 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14010 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14012 new_ipolicy, instances)
14015 self.LogWarning("After the ipolicy change the following instances"
14016 " violate them: %s",
14017 utils.CommaJoin(violations))
14019 def BuildHooksEnv(self):
14020 """Build hooks env.
14024 "GROUP_NAME": self.op.group_name,
14025 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14028 def BuildHooksNodes(self):
14029 """Build hooks nodes.
14032 mn = self.cfg.GetMasterNode()
14033 return ([mn], [mn])
14035 def Exec(self, feedback_fn):
14036 """Modifies the node group.
14041 if self.op.ndparams:
14042 self.group.ndparams = self.new_ndparams
14043 result.append(("ndparams", str(self.group.ndparams)))
14045 if self.op.diskparams:
14046 self.group.diskparams = self.new_diskparams
14047 result.append(("diskparams", str(self.group.diskparams)))
14049 if self.op.alloc_policy:
14050 self.group.alloc_policy = self.op.alloc_policy
14052 if self.op.hv_state:
14053 self.group.hv_state_static = self.new_hv_state
14055 if self.op.disk_state:
14056 self.group.disk_state_static = self.new_disk_state
14058 if self.op.ipolicy:
14059 self.group.ipolicy = self.new_ipolicy
14061 self.cfg.Update(self.group, feedback_fn)
14065 class LUGroupRemove(LogicalUnit):
14066 HPATH = "group-remove"
14067 HTYPE = constants.HTYPE_GROUP
14070 def ExpandNames(self):
14071 # This will raises errors.OpPrereqError on its own:
14072 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14073 self.needed_locks = {
14074 locking.LEVEL_NODEGROUP: [self.group_uuid],
14077 def CheckPrereq(self):
14078 """Check prerequisites.
14080 This checks that the given group name exists as a node group, that is
14081 empty (i.e., contains no nodes), and that is not the last group of the
14085 # Verify that the group is empty.
14086 group_nodes = [node.name
14087 for node in self.cfg.GetAllNodesInfo().values()
14088 if node.group == self.group_uuid]
14091 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14093 (self.op.group_name,
14094 utils.CommaJoin(utils.NiceSort(group_nodes))),
14095 errors.ECODE_STATE)
14097 # Verify the cluster would not be left group-less.
14098 if len(self.cfg.GetNodeGroupList()) == 1:
14099 raise errors.OpPrereqError("Group '%s' is the only group,"
14100 " cannot be removed" %
14101 self.op.group_name,
14102 errors.ECODE_STATE)
14104 def BuildHooksEnv(self):
14105 """Build hooks env.
14109 "GROUP_NAME": self.op.group_name,
14112 def BuildHooksNodes(self):
14113 """Build hooks nodes.
14116 mn = self.cfg.GetMasterNode()
14117 return ([mn], [mn])
14119 def Exec(self, feedback_fn):
14120 """Remove the node group.
14124 self.cfg.RemoveNodeGroup(self.group_uuid)
14125 except errors.ConfigurationError:
14126 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14127 (self.op.group_name, self.group_uuid))
14129 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14132 class LUGroupRename(LogicalUnit):
14133 HPATH = "group-rename"
14134 HTYPE = constants.HTYPE_GROUP
14137 def ExpandNames(self):
14138 # This raises errors.OpPrereqError on its own:
14139 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14141 self.needed_locks = {
14142 locking.LEVEL_NODEGROUP: [self.group_uuid],
14145 def CheckPrereq(self):
14146 """Check prerequisites.
14148 Ensures requested new name is not yet used.
14152 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14153 except errors.OpPrereqError:
14156 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14157 " node group (UUID: %s)" %
14158 (self.op.new_name, new_name_uuid),
14159 errors.ECODE_EXISTS)
14161 def BuildHooksEnv(self):
14162 """Build hooks env.
14166 "OLD_NAME": self.op.group_name,
14167 "NEW_NAME": self.op.new_name,
14170 def BuildHooksNodes(self):
14171 """Build hooks nodes.
14174 mn = self.cfg.GetMasterNode()
14176 all_nodes = self.cfg.GetAllNodesInfo()
14177 all_nodes.pop(mn, None)
14180 run_nodes.extend(node.name for node in all_nodes.values()
14181 if node.group == self.group_uuid)
14183 return (run_nodes, run_nodes)
14185 def Exec(self, feedback_fn):
14186 """Rename the node group.
14189 group = self.cfg.GetNodeGroup(self.group_uuid)
14192 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14193 (self.op.group_name, self.group_uuid))
14195 group.name = self.op.new_name
14196 self.cfg.Update(group, feedback_fn)
14198 return self.op.new_name
14201 class LUGroupEvacuate(LogicalUnit):
14202 HPATH = "group-evacuate"
14203 HTYPE = constants.HTYPE_GROUP
14206 def ExpandNames(self):
14207 # This raises errors.OpPrereqError on its own:
14208 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14210 if self.op.target_groups:
14211 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14212 self.op.target_groups)
14214 self.req_target_uuids = []
14216 if self.group_uuid in self.req_target_uuids:
14217 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14218 " as a target group (targets are %s)" %
14220 utils.CommaJoin(self.req_target_uuids)),
14221 errors.ECODE_INVAL)
14223 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14225 self.share_locks = _ShareAll()
14226 self.needed_locks = {
14227 locking.LEVEL_INSTANCE: [],
14228 locking.LEVEL_NODEGROUP: [],
14229 locking.LEVEL_NODE: [],
14232 def DeclareLocks(self, level):
14233 if level == locking.LEVEL_INSTANCE:
14234 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14236 # Lock instances optimistically, needs verification once node and group
14237 # locks have been acquired
14238 self.needed_locks[locking.LEVEL_INSTANCE] = \
14239 self.cfg.GetNodeGroupInstances(self.group_uuid)
14241 elif level == locking.LEVEL_NODEGROUP:
14242 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14244 if self.req_target_uuids:
14245 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14247 # Lock all groups used by instances optimistically; this requires going
14248 # via the node before it's locked, requiring verification later on
14249 lock_groups.update(group_uuid
14250 for instance_name in
14251 self.owned_locks(locking.LEVEL_INSTANCE)
14253 self.cfg.GetInstanceNodeGroups(instance_name))
14255 # No target groups, need to lock all of them
14256 lock_groups = locking.ALL_SET
14258 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14260 elif level == locking.LEVEL_NODE:
14261 # This will only lock the nodes in the group to be evacuated which
14262 # contain actual instances
14263 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14264 self._LockInstancesNodes()
14266 # Lock all nodes in group to be evacuated and target groups
14267 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14268 assert self.group_uuid in owned_groups
14269 member_nodes = [node_name
14270 for group in owned_groups
14271 for node_name in self.cfg.GetNodeGroup(group).members]
14272 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14274 def CheckPrereq(self):
14275 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14276 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14277 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14279 assert owned_groups.issuperset(self.req_target_uuids)
14280 assert self.group_uuid in owned_groups
14282 # Check if locked instances are still correct
14283 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14285 # Get instance information
14286 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14288 # Check if node groups for locked instances are still correct
14289 _CheckInstancesNodeGroups(self.cfg, self.instances,
14290 owned_groups, owned_nodes, self.group_uuid)
14292 if self.req_target_uuids:
14293 # User requested specific target groups
14294 self.target_uuids = self.req_target_uuids
14296 # All groups except the one to be evacuated are potential targets
14297 self.target_uuids = [group_uuid for group_uuid in owned_groups
14298 if group_uuid != self.group_uuid]
14300 if not self.target_uuids:
14301 raise errors.OpPrereqError("There are no possible target groups",
14302 errors.ECODE_INVAL)
14304 def BuildHooksEnv(self):
14305 """Build hooks env.
14309 "GROUP_NAME": self.op.group_name,
14310 "TARGET_GROUPS": " ".join(self.target_uuids),
14313 def BuildHooksNodes(self):
14314 """Build hooks nodes.
14317 mn = self.cfg.GetMasterNode()
14319 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14321 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14323 return (run_nodes, run_nodes)
14325 def Exec(self, feedback_fn):
14326 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14328 assert self.group_uuid not in self.target_uuids
14330 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14331 instances=instances, target_groups=self.target_uuids)
14333 ial.Run(self.op.iallocator)
14335 if not ial.success:
14336 raise errors.OpPrereqError("Can't compute group evacuation using"
14337 " iallocator '%s': %s" %
14338 (self.op.iallocator, ial.info),
14339 errors.ECODE_NORES)
14341 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14343 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14344 len(jobs), self.op.group_name)
14346 return ResultWithJobs(jobs)
14349 class TagsLU(NoHooksLU): # pylint: disable=W0223
14350 """Generic tags LU.
14352 This is an abstract class which is the parent of all the other tags LUs.
14355 def ExpandNames(self):
14356 self.group_uuid = None
14357 self.needed_locks = {}
14359 if self.op.kind == constants.TAG_NODE:
14360 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14361 lock_level = locking.LEVEL_NODE
14362 lock_name = self.op.name
14363 elif self.op.kind == constants.TAG_INSTANCE:
14364 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14365 lock_level = locking.LEVEL_INSTANCE
14366 lock_name = self.op.name
14367 elif self.op.kind == constants.TAG_NODEGROUP:
14368 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14369 lock_level = locking.LEVEL_NODEGROUP
14370 lock_name = self.group_uuid
14375 if lock_level and getattr(self.op, "use_locking", True):
14376 self.needed_locks[lock_level] = lock_name
14378 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14379 # not possible to acquire the BGL based on opcode parameters)
14381 def CheckPrereq(self):
14382 """Check prerequisites.
14385 if self.op.kind == constants.TAG_CLUSTER:
14386 self.target = self.cfg.GetClusterInfo()
14387 elif self.op.kind == constants.TAG_NODE:
14388 self.target = self.cfg.GetNodeInfo(self.op.name)
14389 elif self.op.kind == constants.TAG_INSTANCE:
14390 self.target = self.cfg.GetInstanceInfo(self.op.name)
14391 elif self.op.kind == constants.TAG_NODEGROUP:
14392 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14394 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14395 str(self.op.kind), errors.ECODE_INVAL)
14398 class LUTagsGet(TagsLU):
14399 """Returns the tags of a given object.
14404 def ExpandNames(self):
14405 TagsLU.ExpandNames(self)
14407 # Share locks as this is only a read operation
14408 self.share_locks = _ShareAll()
14410 def Exec(self, feedback_fn):
14411 """Returns the tag list.
14414 return list(self.target.GetTags())
14417 class LUTagsSearch(NoHooksLU):
14418 """Searches the tags for a given pattern.
14423 def ExpandNames(self):
14424 self.needed_locks = {}
14426 def CheckPrereq(self):
14427 """Check prerequisites.
14429 This checks the pattern passed for validity by compiling it.
14433 self.re = re.compile(self.op.pattern)
14434 except re.error, err:
14435 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14436 (self.op.pattern, err), errors.ECODE_INVAL)
14438 def Exec(self, feedback_fn):
14439 """Returns the tag list.
14443 tgts = [("/cluster", cfg.GetClusterInfo())]
14444 ilist = cfg.GetAllInstancesInfo().values()
14445 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14446 nlist = cfg.GetAllNodesInfo().values()
14447 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14448 tgts.extend(("/nodegroup/%s" % n.name, n)
14449 for n in cfg.GetAllNodeGroupsInfo().values())
14451 for path, target in tgts:
14452 for tag in target.GetTags():
14453 if self.re.search(tag):
14454 results.append((path, tag))
14458 class LUTagsSet(TagsLU):
14459 """Sets a tag on a given object.
14464 def CheckPrereq(self):
14465 """Check prerequisites.
14467 This checks the type and length of the tag name and value.
14470 TagsLU.CheckPrereq(self)
14471 for tag in self.op.tags:
14472 objects.TaggableObject.ValidateTag(tag)
14474 def Exec(self, feedback_fn):
14479 for tag in self.op.tags:
14480 self.target.AddTag(tag)
14481 except errors.TagError, err:
14482 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14483 self.cfg.Update(self.target, feedback_fn)
14486 class LUTagsDel(TagsLU):
14487 """Delete a list of tags from a given object.
14492 def CheckPrereq(self):
14493 """Check prerequisites.
14495 This checks that we have the given tag.
14498 TagsLU.CheckPrereq(self)
14499 for tag in self.op.tags:
14500 objects.TaggableObject.ValidateTag(tag)
14501 del_tags = frozenset(self.op.tags)
14502 cur_tags = self.target.GetTags()
14504 diff_tags = del_tags - cur_tags
14506 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14507 raise errors.OpPrereqError("Tag(s) %s not found" %
14508 (utils.CommaJoin(diff_names), ),
14509 errors.ECODE_NOENT)
14511 def Exec(self, feedback_fn):
14512 """Remove the tag from the object.
14515 for tag in self.op.tags:
14516 self.target.RemoveTag(tag)
14517 self.cfg.Update(self.target, feedback_fn)
14520 class LUTestDelay(NoHooksLU):
14521 """Sleep for a specified amount of time.
14523 This LU sleeps on the master and/or nodes for a specified amount of
14529 def ExpandNames(self):
14530 """Expand names and set required locks.
14532 This expands the node list, if any.
14535 self.needed_locks = {}
14536 if self.op.on_nodes:
14537 # _GetWantedNodes can be used here, but is not always appropriate to use
14538 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14539 # more information.
14540 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14541 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14543 def _TestDelay(self):
14544 """Do the actual sleep.
14547 if self.op.on_master:
14548 if not utils.TestDelay(self.op.duration):
14549 raise errors.OpExecError("Error during master delay test")
14550 if self.op.on_nodes:
14551 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14552 for node, node_result in result.items():
14553 node_result.Raise("Failure during rpc call to node %s" % node)
14555 def Exec(self, feedback_fn):
14556 """Execute the test delay opcode, with the wanted repetitions.
14559 if self.op.repeat == 0:
14562 top_value = self.op.repeat - 1
14563 for i in range(self.op.repeat):
14564 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14568 class LUTestJqueue(NoHooksLU):
14569 """Utility LU to test some aspects of the job queue.
14574 # Must be lower than default timeout for WaitForJobChange to see whether it
14575 # notices changed jobs
14576 _CLIENT_CONNECT_TIMEOUT = 20.0
14577 _CLIENT_CONFIRM_TIMEOUT = 60.0
14580 def _NotifyUsingSocket(cls, cb, errcls):
14581 """Opens a Unix socket and waits for another program to connect.
14584 @param cb: Callback to send socket name to client
14585 @type errcls: class
14586 @param errcls: Exception class to use for errors
14589 # Using a temporary directory as there's no easy way to create temporary
14590 # sockets without writing a custom loop around tempfile.mktemp and
14592 tmpdir = tempfile.mkdtemp()
14594 tmpsock = utils.PathJoin(tmpdir, "sock")
14596 logging.debug("Creating temporary socket at %s", tmpsock)
14597 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14602 # Send details to client
14605 # Wait for client to connect before continuing
14606 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14608 (conn, _) = sock.accept()
14609 except socket.error, err:
14610 raise errcls("Client didn't connect in time (%s)" % err)
14614 # Remove as soon as client is connected
14615 shutil.rmtree(tmpdir)
14617 # Wait for client to close
14620 # pylint: disable=E1101
14621 # Instance of '_socketobject' has no ... member
14622 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14624 except socket.error, err:
14625 raise errcls("Client failed to confirm notification (%s)" % err)
14629 def _SendNotification(self, test, arg, sockname):
14630 """Sends a notification to the client.
14633 @param test: Test name
14634 @param arg: Test argument (depends on test)
14635 @type sockname: string
14636 @param sockname: Socket path
14639 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14641 def _Notify(self, prereq, test, arg):
14642 """Notifies the client of a test.
14645 @param prereq: Whether this is a prereq-phase test
14647 @param test: Test name
14648 @param arg: Test argument (depends on test)
14652 errcls = errors.OpPrereqError
14654 errcls = errors.OpExecError
14656 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14660 def CheckArguments(self):
14661 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14662 self.expandnames_calls = 0
14664 def ExpandNames(self):
14665 checkargs_calls = getattr(self, "checkargs_calls", 0)
14666 if checkargs_calls < 1:
14667 raise errors.ProgrammerError("CheckArguments was not called")
14669 self.expandnames_calls += 1
14671 if self.op.notify_waitlock:
14672 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14674 self.LogInfo("Expanding names")
14676 # Get lock on master node (just to get a lock, not for a particular reason)
14677 self.needed_locks = {
14678 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14681 def Exec(self, feedback_fn):
14682 if self.expandnames_calls < 1:
14683 raise errors.ProgrammerError("ExpandNames was not called")
14685 if self.op.notify_exec:
14686 self._Notify(False, constants.JQT_EXEC, None)
14688 self.LogInfo("Executing")
14690 if self.op.log_messages:
14691 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14692 for idx, msg in enumerate(self.op.log_messages):
14693 self.LogInfo("Sending log message %s", idx + 1)
14694 feedback_fn(constants.JQT_MSGPREFIX + msg)
14695 # Report how many test messages have been sent
14696 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14699 raise errors.OpExecError("Opcode failure was requested")
14704 class IAllocator(object):
14705 """IAllocator framework.
14707 An IAllocator instance has three sets of attributes:
14708 - cfg that is needed to query the cluster
14709 - input data (all members of the _KEYS class attribute are required)
14710 - four buffer attributes (in|out_data|text), that represent the
14711 input (to the external script) in text and data structure format,
14712 and the output from it, again in two formats
14713 - the result variables from the script (success, info, nodes) for
14717 # pylint: disable=R0902
14718 # lots of instance attributes
14720 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14722 self.rpc = rpc_runner
14723 # init buffer variables
14724 self.in_text = self.out_text = self.in_data = self.out_data = None
14725 # init all input fields so that pylint is happy
14727 self.memory = self.disks = self.disk_template = self.spindle_use = None
14728 self.os = self.tags = self.nics = self.vcpus = None
14729 self.hypervisor = None
14730 self.relocate_from = None
14732 self.instances = None
14733 self.evac_mode = None
14734 self.target_groups = []
14736 self.required_nodes = None
14737 # init result fields
14738 self.success = self.info = self.result = None
14741 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14743 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14744 " IAllocator" % self.mode)
14746 keyset = [n for (n, _) in keydata]
14749 if key not in keyset:
14750 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14751 " IAllocator" % key)
14752 setattr(self, key, kwargs[key])
14755 if key not in kwargs:
14756 raise errors.ProgrammerError("Missing input parameter '%s' to"
14757 " IAllocator" % key)
14758 self._BuildInputData(compat.partial(fn, self), keydata)
14760 def _ComputeClusterData(self):
14761 """Compute the generic allocator input data.
14763 This is the data that is independent of the actual operation.
14767 cluster_info = cfg.GetClusterInfo()
14770 "version": constants.IALLOCATOR_VERSION,
14771 "cluster_name": cfg.GetClusterName(),
14772 "cluster_tags": list(cluster_info.GetTags()),
14773 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14774 "ipolicy": cluster_info.ipolicy,
14776 ninfo = cfg.GetAllNodesInfo()
14777 iinfo = cfg.GetAllInstancesInfo().values()
14778 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14781 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14783 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14784 hypervisor_name = self.hypervisor
14785 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14786 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14788 hypervisor_name = cluster_info.primary_hypervisor
14790 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14793 self.rpc.call_all_instances_info(node_list,
14794 cluster_info.enabled_hypervisors)
14796 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14798 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14799 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14800 i_list, config_ndata)
14801 assert len(data["nodes"]) == len(ninfo), \
14802 "Incomplete node data computed"
14804 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14806 self.in_data = data
14809 def _ComputeNodeGroupData(cfg):
14810 """Compute node groups data.
14813 cluster = cfg.GetClusterInfo()
14814 ng = dict((guuid, {
14815 "name": gdata.name,
14816 "alloc_policy": gdata.alloc_policy,
14817 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14819 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14824 def _ComputeBasicNodeData(cfg, node_cfg):
14825 """Compute global node data.
14828 @returns: a dict of name: (node dict, node config)
14831 # fill in static (config-based) values
14832 node_results = dict((ninfo.name, {
14833 "tags": list(ninfo.GetTags()),
14834 "primary_ip": ninfo.primary_ip,
14835 "secondary_ip": ninfo.secondary_ip,
14836 "offline": ninfo.offline,
14837 "drained": ninfo.drained,
14838 "master_candidate": ninfo.master_candidate,
14839 "group": ninfo.group,
14840 "master_capable": ninfo.master_capable,
14841 "vm_capable": ninfo.vm_capable,
14842 "ndparams": cfg.GetNdParams(ninfo),
14844 for ninfo in node_cfg.values())
14846 return node_results
14849 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14851 """Compute global node data.
14853 @param node_results: the basic node structures as filled from the config
14856 #TODO(dynmem): compute the right data on MAX and MIN memory
14857 # make a copy of the current dict
14858 node_results = dict(node_results)
14859 for nname, nresult in node_data.items():
14860 assert nname in node_results, "Missing basic data for node %s" % nname
14861 ninfo = node_cfg[nname]
14863 if not (ninfo.offline or ninfo.drained):
14864 nresult.Raise("Can't get data for node %s" % nname)
14865 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14867 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14869 for attr in ["memory_total", "memory_free", "memory_dom0",
14870 "vg_size", "vg_free", "cpu_total"]:
14871 if attr not in remote_info:
14872 raise errors.OpExecError("Node '%s' didn't return attribute"
14873 " '%s'" % (nname, attr))
14874 if not isinstance(remote_info[attr], int):
14875 raise errors.OpExecError("Node '%s' returned invalid value"
14877 (nname, attr, remote_info[attr]))
14878 # compute memory used by primary instances
14879 i_p_mem = i_p_up_mem = 0
14880 for iinfo, beinfo in i_list:
14881 if iinfo.primary_node == nname:
14882 i_p_mem += beinfo[constants.BE_MAXMEM]
14883 if iinfo.name not in node_iinfo[nname].payload:
14886 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14887 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14888 remote_info["memory_free"] -= max(0, i_mem_diff)
14890 if iinfo.admin_state == constants.ADMINST_UP:
14891 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14893 # compute memory used by instances
14895 "total_memory": remote_info["memory_total"],
14896 "reserved_memory": remote_info["memory_dom0"],
14897 "free_memory": remote_info["memory_free"],
14898 "total_disk": remote_info["vg_size"],
14899 "free_disk": remote_info["vg_free"],
14900 "total_cpus": remote_info["cpu_total"],
14901 "i_pri_memory": i_p_mem,
14902 "i_pri_up_memory": i_p_up_mem,
14904 pnr_dyn.update(node_results[nname])
14905 node_results[nname] = pnr_dyn
14907 return node_results
14910 def _ComputeInstanceData(cluster_info, i_list):
14911 """Compute global instance data.
14915 for iinfo, beinfo in i_list:
14917 for nic in iinfo.nics:
14918 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14922 "mode": filled_params[constants.NIC_MODE],
14923 "link": filled_params[constants.NIC_LINK],
14925 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14926 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14927 nic_data.append(nic_dict)
14929 "tags": list(iinfo.GetTags()),
14930 "admin_state": iinfo.admin_state,
14931 "vcpus": beinfo[constants.BE_VCPUS],
14932 "memory": beinfo[constants.BE_MAXMEM],
14933 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14935 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14937 "disks": [{constants.IDISK_SIZE: dsk.size,
14938 constants.IDISK_MODE: dsk.mode}
14939 for dsk in iinfo.disks],
14940 "disk_template": iinfo.disk_template,
14941 "hypervisor": iinfo.hypervisor,
14943 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14945 instance_data[iinfo.name] = pir
14947 return instance_data
14949 def _AddNewInstance(self):
14950 """Add new instance data to allocator structure.
14952 This in combination with _AllocatorGetClusterData will create the
14953 correct structure needed as input for the allocator.
14955 The checks for the completeness of the opcode must have already been
14959 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14961 if self.disk_template in constants.DTS_INT_MIRROR:
14962 self.required_nodes = 2
14964 self.required_nodes = 1
14968 "disk_template": self.disk_template,
14971 "vcpus": self.vcpus,
14972 "memory": self.memory,
14973 "spindle_use": self.spindle_use,
14974 "disks": self.disks,
14975 "disk_space_total": disk_space,
14977 "required_nodes": self.required_nodes,
14978 "hypervisor": self.hypervisor,
14983 def _AddRelocateInstance(self):
14984 """Add relocate instance data to allocator structure.
14986 This in combination with _IAllocatorGetClusterData will create the
14987 correct structure needed as input for the allocator.
14989 The checks for the completeness of the opcode must have already been
14993 instance = self.cfg.GetInstanceInfo(self.name)
14994 if instance is None:
14995 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14996 " IAllocator" % self.name)
14998 if instance.disk_template not in constants.DTS_MIRRORED:
14999 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15000 errors.ECODE_INVAL)
15002 if instance.disk_template in constants.DTS_INT_MIRROR and \
15003 len(instance.secondary_nodes) != 1:
15004 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15005 errors.ECODE_STATE)
15007 self.required_nodes = 1
15008 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15009 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15013 "disk_space_total": disk_space,
15014 "required_nodes": self.required_nodes,
15015 "relocate_from": self.relocate_from,
15019 def _AddNodeEvacuate(self):
15020 """Get data for node-evacuate requests.
15024 "instances": self.instances,
15025 "evac_mode": self.evac_mode,
15028 def _AddChangeGroup(self):
15029 """Get data for node-evacuate requests.
15033 "instances": self.instances,
15034 "target_groups": self.target_groups,
15037 def _BuildInputData(self, fn, keydata):
15038 """Build input data structures.
15041 self._ComputeClusterData()
15044 request["type"] = self.mode
15045 for keyname, keytype in keydata:
15046 if keyname not in request:
15047 raise errors.ProgrammerError("Request parameter %s is missing" %
15049 val = request[keyname]
15050 if not keytype(val):
15051 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15052 " validation, value %s, expected"
15053 " type %s" % (keyname, val, keytype))
15054 self.in_data["request"] = request
15056 self.in_text = serializer.Dump(self.in_data)
15058 _STRING_LIST = ht.TListOf(ht.TString)
15059 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15060 # pylint: disable=E1101
15061 # Class '...' has no 'OP_ID' member
15062 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15063 opcodes.OpInstanceMigrate.OP_ID,
15064 opcodes.OpInstanceReplaceDisks.OP_ID])
15068 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15069 ht.TItems([ht.TNonEmptyString,
15070 ht.TNonEmptyString,
15071 ht.TListOf(ht.TNonEmptyString),
15074 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15075 ht.TItems([ht.TNonEmptyString,
15078 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15079 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15082 constants.IALLOCATOR_MODE_ALLOC:
15085 ("name", ht.TString),
15086 ("memory", ht.TInt),
15087 ("spindle_use", ht.TInt),
15088 ("disks", ht.TListOf(ht.TDict)),
15089 ("disk_template", ht.TString),
15090 ("os", ht.TString),
15091 ("tags", _STRING_LIST),
15092 ("nics", ht.TListOf(ht.TDict)),
15093 ("vcpus", ht.TInt),
15094 ("hypervisor", ht.TString),
15096 constants.IALLOCATOR_MODE_RELOC:
15097 (_AddRelocateInstance,
15098 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15100 constants.IALLOCATOR_MODE_NODE_EVAC:
15101 (_AddNodeEvacuate, [
15102 ("instances", _STRING_LIST),
15103 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15105 constants.IALLOCATOR_MODE_CHG_GROUP:
15106 (_AddChangeGroup, [
15107 ("instances", _STRING_LIST),
15108 ("target_groups", _STRING_LIST),
15112 def Run(self, name, validate=True, call_fn=None):
15113 """Run an instance allocator and return the results.
15116 if call_fn is None:
15117 call_fn = self.rpc.call_iallocator_runner
15119 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15120 result.Raise("Failure while running the iallocator script")
15122 self.out_text = result.payload
15124 self._ValidateResult()
15126 def _ValidateResult(self):
15127 """Process the allocator results.
15129 This will process and if successful save the result in
15130 self.out_data and the other parameters.
15134 rdict = serializer.Load(self.out_text)
15135 except Exception, err:
15136 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15138 if not isinstance(rdict, dict):
15139 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15141 # TODO: remove backwards compatiblity in later versions
15142 if "nodes" in rdict and "result" not in rdict:
15143 rdict["result"] = rdict["nodes"]
15146 for key in "success", "info", "result":
15147 if key not in rdict:
15148 raise errors.OpExecError("Can't parse iallocator results:"
15149 " missing key '%s'" % key)
15150 setattr(self, key, rdict[key])
15152 if not self._result_check(self.result):
15153 raise errors.OpExecError("Iallocator returned invalid result,"
15154 " expected %s, got %s" %
15155 (self._result_check, self.result),
15156 errors.ECODE_INVAL)
15158 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15159 assert self.relocate_from is not None
15160 assert self.required_nodes == 1
15162 node2group = dict((name, ndata["group"])
15163 for (name, ndata) in self.in_data["nodes"].items())
15165 fn = compat.partial(self._NodesToGroups, node2group,
15166 self.in_data["nodegroups"])
15168 instance = self.cfg.GetInstanceInfo(self.name)
15169 request_groups = fn(self.relocate_from + [instance.primary_node])
15170 result_groups = fn(rdict["result"] + [instance.primary_node])
15172 if self.success and not set(result_groups).issubset(request_groups):
15173 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15174 " differ from original groups (%s)" %
15175 (utils.CommaJoin(result_groups),
15176 utils.CommaJoin(request_groups)))
15178 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15179 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15181 self.out_data = rdict
15184 def _NodesToGroups(node2group, groups, nodes):
15185 """Returns a list of unique group names for a list of nodes.
15187 @type node2group: dict
15188 @param node2group: Map from node name to group UUID
15190 @param groups: Group information
15192 @param nodes: Node names
15199 group_uuid = node2group[node]
15201 # Ignore unknown node
15205 group = groups[group_uuid]
15207 # Can't find group, let's use UUID
15208 group_name = group_uuid
15210 group_name = group["name"]
15212 result.add(group_name)
15214 return sorted(result)
15217 class LUTestAllocator(NoHooksLU):
15218 """Run allocator tests.
15220 This LU runs the allocator tests
15223 def CheckPrereq(self):
15224 """Check prerequisites.
15226 This checks the opcode parameters depending on the director and mode test.
15229 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15230 for attr in ["memory", "disks", "disk_template",
15231 "os", "tags", "nics", "vcpus"]:
15232 if not hasattr(self.op, attr):
15233 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15234 attr, errors.ECODE_INVAL)
15235 iname = self.cfg.ExpandInstanceName(self.op.name)
15236 if iname is not None:
15237 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15238 iname, errors.ECODE_EXISTS)
15239 if not isinstance(self.op.nics, list):
15240 raise errors.OpPrereqError("Invalid parameter 'nics'",
15241 errors.ECODE_INVAL)
15242 if not isinstance(self.op.disks, list):
15243 raise errors.OpPrereqError("Invalid parameter 'disks'",
15244 errors.ECODE_INVAL)
15245 for row in self.op.disks:
15246 if (not isinstance(row, dict) or
15247 constants.IDISK_SIZE not in row or
15248 not isinstance(row[constants.IDISK_SIZE], int) or
15249 constants.IDISK_MODE not in row or
15250 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15251 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15252 " parameter", errors.ECODE_INVAL)
15253 if self.op.hypervisor is None:
15254 self.op.hypervisor = self.cfg.GetHypervisorType()
15255 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15256 fname = _ExpandInstanceName(self.cfg, self.op.name)
15257 self.op.name = fname
15258 self.relocate_from = \
15259 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15260 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15261 constants.IALLOCATOR_MODE_NODE_EVAC):
15262 if not self.op.instances:
15263 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15264 self.op.instances = _GetWantedInstances(self, self.op.instances)
15266 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15267 self.op.mode, errors.ECODE_INVAL)
15269 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15270 if self.op.allocator is None:
15271 raise errors.OpPrereqError("Missing allocator name",
15272 errors.ECODE_INVAL)
15273 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15274 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15275 self.op.direction, errors.ECODE_INVAL)
15277 def Exec(self, feedback_fn):
15278 """Run the allocator test.
15281 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15282 ial = IAllocator(self.cfg, self.rpc,
15285 memory=self.op.memory,
15286 disks=self.op.disks,
15287 disk_template=self.op.disk_template,
15291 vcpus=self.op.vcpus,
15292 hypervisor=self.op.hypervisor,
15294 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15295 ial = IAllocator(self.cfg, self.rpc,
15298 relocate_from=list(self.relocate_from),
15300 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15301 ial = IAllocator(self.cfg, self.rpc,
15303 instances=self.op.instances,
15304 target_groups=self.op.target_groups)
15305 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15306 ial = IAllocator(self.cfg, self.rpc,
15308 instances=self.op.instances,
15309 evac_mode=self.op.evac_mode)
15311 raise errors.ProgrammerError("Uncatched mode %s in"
15312 " LUTestAllocator.Exec", self.op.mode)
15314 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15315 result = ial.in_text
15317 ial.Run(self.op.allocator, validate=False)
15318 result = ial.out_text
15322 #: Query type implementations
15324 constants.QR_CLUSTER: _ClusterQuery,
15325 constants.QR_INSTANCE: _InstanceQuery,
15326 constants.QR_NODE: _NodeQuery,
15327 constants.QR_GROUP: _GroupQuery,
15328 constants.QR_OS: _OsQuery,
15329 constants.QR_EXPORT: _ExportQuery,
15332 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15335 def _GetQueryImplementation(name):
15336 """Returns the implemtnation for a query type.
15338 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15342 return _QUERY_IMPL[name]
15344 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15345 errors.ECODE_INVAL)