4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import rpc
62 from ganeti import runtime
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
141 # Dicts used to declare locking needs to mcpu
142 self.needed_locks = None
143 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.remove_locks = {}
146 # Used to force good behavior when calling helper functions
147 self.recalculate_locks = {}
149 self.Log = processor.Log # pylint: disable=C0103
150 self.LogWarning = processor.LogWarning # pylint: disable=C0103
151 self.LogInfo = processor.LogInfo # pylint: disable=C0103
152 self.LogStep = processor.LogStep # pylint: disable=C0103
153 # support for dry-run
154 self.dry_run_result = None
155 # support for generic debug attribute
156 if (not hasattr(self.op, "debug_level") or
157 not isinstance(self.op.debug_level, int)):
158 self.op.debug_level = 0
163 # Validate opcode parameters and set defaults
164 self.op.Validate(True)
166 self.CheckArguments()
168 def CheckArguments(self):
169 """Check syntactic validity for the opcode arguments.
171 This method is for doing a simple syntactic check and ensure
172 validity of opcode parameters, without any cluster-related
173 checks. While the same can be accomplished in ExpandNames and/or
174 CheckPrereq, doing these separate is better because:
176 - ExpandNames is left as as purely a lock-related function
177 - CheckPrereq is run after we have acquired locks (and possible
180 The function is allowed to change the self.op attribute so that
181 later methods can no longer worry about missing parameters.
186 def ExpandNames(self):
187 """Expand names for this LU.
189 This method is called before starting to execute the opcode, and it should
190 update all the parameters of the opcode to their canonical form (e.g. a
191 short node name must be fully expanded after this method has successfully
192 completed). This way locking, hooks, logging, etc. can work correctly.
194 LUs which implement this method must also populate the self.needed_locks
195 member, as a dict with lock levels as keys, and a list of needed lock names
198 - use an empty dict if you don't need any lock
199 - if you don't need any lock at a particular level omit that
200 level (note that in this case C{DeclareLocks} won't be called
201 at all for that level)
202 - if you need locks at a level, but you can't calculate it in
203 this function, initialise that level with an empty list and do
204 further processing in L{LogicalUnit.DeclareLocks} (see that
205 function's docstring)
206 - don't put anything for the BGL level
207 - if you want all locks at a level use L{locking.ALL_SET} as a value
209 If you need to share locks (rather than acquire them exclusively) at one
210 level you can modify self.share_locks, setting a true value (usually 1) for
211 that level. By default locks are not shared.
213 This function can also define a list of tasklets, which then will be
214 executed in order instead of the usual LU-level CheckPrereq and Exec
215 functions, if those are not defined by the LU.
219 # Acquire all nodes and one instance
220 self.needed_locks = {
221 locking.LEVEL_NODE: locking.ALL_SET,
222 locking.LEVEL_INSTANCE: ['instance1.example.com'],
224 # Acquire just two nodes
225 self.needed_locks = {
226 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
229 self.needed_locks = {} # No, you can't leave it to the default value None
232 # The implementation of this method is mandatory only if the new LU is
233 # concurrent, so that old LUs don't need to be changed all at the same
236 self.needed_locks = {} # Exclusive LUs don't need locks.
238 raise NotImplementedError
240 def DeclareLocks(self, level):
241 """Declare LU locking needs for a level
243 While most LUs can just declare their locking needs at ExpandNames time,
244 sometimes there's the need to calculate some locks after having acquired
245 the ones before. This function is called just before acquiring locks at a
246 particular level, but after acquiring the ones at lower levels, and permits
247 such calculations. It can be used to modify self.needed_locks, and by
248 default it does nothing.
250 This function is only called if you have something already set in
251 self.needed_locks for the level.
253 @param level: Locking level which is going to be locked
254 @type level: member of L{ganeti.locking.LEVELS}
258 def CheckPrereq(self):
259 """Check prerequisites for this LU.
261 This method should check that the prerequisites for the execution
262 of this LU are fulfilled. It can do internode communication, but
263 it should be idempotent - no cluster or system changes are
266 The method should raise errors.OpPrereqError in case something is
267 not fulfilled. Its return value is ignored.
269 This method should also update all the parameters of the opcode to
270 their canonical form if it hasn't been done by ExpandNames before.
273 if self.tasklets is not None:
274 for (idx, tl) in enumerate(self.tasklets):
275 logging.debug("Checking prerequisites for tasklet %s/%s",
276 idx + 1, len(self.tasklets))
281 def Exec(self, feedback_fn):
284 This method should implement the actual work. It should raise
285 errors.OpExecError for failures that are somewhat dealt with in
289 if self.tasklets is not None:
290 for (idx, tl) in enumerate(self.tasklets):
291 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
294 raise NotImplementedError
296 def BuildHooksEnv(self):
297 """Build hooks environment for this LU.
300 @return: Dictionary containing the environment that will be used for
301 running the hooks for this LU. The keys of the dict must not be prefixed
302 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
303 will extend the environment with additional variables. If no environment
304 should be defined, an empty dictionary should be returned (not C{None}).
305 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309 raise NotImplementedError
311 def BuildHooksNodes(self):
312 """Build list of nodes to run LU's hooks.
314 @rtype: tuple; (list, list)
315 @return: Tuple containing a list of node names on which the hook
316 should run before the execution and a list of node names on which the
317 hook should run after the execution. No nodes should be returned as an
318 empty list (and not None).
319 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
323 raise NotImplementedError
325 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
326 """Notify the LU about the results of its hooks.
328 This method is called every time a hooks phase is executed, and notifies
329 the Logical Unit about the hooks' result. The LU can then use it to alter
330 its result based on the hooks. By default the method does nothing and the
331 previous result is passed back unchanged but any LU can define it if it
332 wants to use the local cluster hook-scripts somehow.
334 @param phase: one of L{constants.HOOKS_PHASE_POST} or
335 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
336 @param hook_results: the results of the multi-node hooks rpc call
337 @param feedback_fn: function used send feedback back to the caller
338 @param lu_result: the previous Exec result this LU had, or None
340 @return: the new Exec result, based on the previous result
344 # API must be kept, thus we ignore the unused argument and could
345 # be a function warnings
346 # pylint: disable=W0613,R0201
349 def _ExpandAndLockInstance(self):
350 """Helper function to expand and lock an instance.
352 Many LUs that work on an instance take its name in self.op.instance_name
353 and need to expand it and then declare the expanded name for locking. This
354 function does it, and then updates self.op.instance_name to the expanded
355 name. It also initializes needed_locks as a dict, if this hasn't been done
359 if self.needed_locks is None:
360 self.needed_locks = {}
362 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
363 "_ExpandAndLockInstance called with instance-level locks set"
364 self.op.instance_name = _ExpandInstanceName(self.cfg,
365 self.op.instance_name)
366 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
368 def _LockInstancesNodes(self, primary_only=False,
369 level=locking.LEVEL_NODE):
370 """Helper function to declare instances' nodes for locking.
372 This function should be called after locking one or more instances to lock
373 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
374 with all primary or secondary nodes for instances already locked and
375 present in self.needed_locks[locking.LEVEL_INSTANCE].
377 It should be called from DeclareLocks, and for safety only works if
378 self.recalculate_locks[locking.LEVEL_NODE] is set.
380 In the future it may grow parameters to just lock some instance's nodes, or
381 to just lock primaries or secondary nodes, if needed.
383 If should be called in DeclareLocks in a way similar to::
385 if level == locking.LEVEL_NODE:
386 self._LockInstancesNodes()
388 @type primary_only: boolean
389 @param primary_only: only lock primary nodes of locked instances
390 @param level: Which lock level to use for locking nodes
393 assert level in self.recalculate_locks, \
394 "_LockInstancesNodes helper function called with no nodes to recalculate"
396 # TODO: check if we're really been called with the instance locks held
398 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
399 # future we might want to have different behaviors depending on the value
400 # of self.recalculate_locks[locking.LEVEL_NODE]
402 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
403 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
404 wanted_nodes.append(instance.primary_node)
406 wanted_nodes.extend(instance.secondary_nodes)
408 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
409 self.needed_locks[level] = wanted_nodes
410 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
411 self.needed_locks[level].extend(wanted_nodes)
413 raise errors.ProgrammerError("Unknown recalculation mode")
415 del self.recalculate_locks[level]
418 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
419 """Simple LU which runs no hooks.
421 This LU is intended as a parent for other LogicalUnits which will
422 run no hooks, in order to reduce duplicate code.
428 def BuildHooksEnv(self):
429 """Empty BuildHooksEnv for NoHooksLu.
431 This just raises an error.
434 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
436 def BuildHooksNodes(self):
437 """Empty BuildHooksNodes for NoHooksLU.
440 raise AssertionError("BuildHooksNodes called for NoHooksLU")
444 """Tasklet base class.
446 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
447 they can mix legacy code with tasklets. Locking needs to be done in the LU,
448 tasklets know nothing about locks.
450 Subclasses must follow these rules:
451 - Implement CheckPrereq
455 def __init__(self, lu):
462 def CheckPrereq(self):
463 """Check prerequisites for this tasklets.
465 This method should check whether the prerequisites for the execution of
466 this tasklet are fulfilled. It can do internode communication, but it
467 should be idempotent - no cluster or system changes are allowed.
469 The method should raise errors.OpPrereqError in case something is not
470 fulfilled. Its return value is ignored.
472 This method should also update all parameters to their canonical form if it
473 hasn't been done before.
478 def Exec(self, feedback_fn):
479 """Execute the tasklet.
481 This method should implement the actual work. It should raise
482 errors.OpExecError for failures that are somewhat dealt with in code, or
486 raise NotImplementedError
490 """Base for query utility classes.
493 #: Attribute holding field definitions
499 def __init__(self, qfilter, fields, use_locking):
500 """Initializes this class.
503 self.use_locking = use_locking
505 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
506 namefield=self.SORT_FIELD)
507 self.requested_data = self.query.RequestedData()
508 self.names = self.query.RequestedNames()
510 # Sort only if no names were requested
511 self.sort_by_name = not self.names
513 self.do_locking = None
516 def _GetNames(self, lu, all_names, lock_level):
517 """Helper function to determine names asked for in the query.
521 names = lu.owned_locks(lock_level)
525 if self.wanted == locking.ALL_SET:
526 assert not self.names
527 # caller didn't specify names, so ordering is not important
528 return utils.NiceSort(names)
530 # caller specified names and we must keep the same order
532 assert not self.do_locking or lu.glm.is_owned(lock_level)
534 missing = set(self.wanted).difference(names)
536 raise errors.OpExecError("Some items were removed before retrieving"
537 " their data: %s" % missing)
539 # Return expanded names
542 def ExpandNames(self, lu):
543 """Expand names for this query.
545 See L{LogicalUnit.ExpandNames}.
548 raise NotImplementedError()
550 def DeclareLocks(self, lu, level):
551 """Declare locks for this query.
553 See L{LogicalUnit.DeclareLocks}.
556 raise NotImplementedError()
558 def _GetQueryData(self, lu):
559 """Collects all data for this query.
561 @return: Query data object
564 raise NotImplementedError()
566 def NewStyleQuery(self, lu):
567 """Collect data and execute query.
570 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
571 sort_by_name=self.sort_by_name)
573 def OldStyleQuery(self, lu):
574 """Collect data and execute query.
577 return self.query.OldStyleQuery(self._GetQueryData(lu),
578 sort_by_name=self.sort_by_name)
582 """Returns a dict declaring all lock levels shared.
585 return dict.fromkeys(locking.LEVELS, 1)
588 def _MakeLegacyNodeInfo(data):
589 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
591 Converts the data into a single dictionary. This is fine for most use cases,
592 but some require information from more than one volume group or hypervisor.
595 (bootid, (vg_info, ), (hv_info, )) = data
597 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
602 def _AnnotateDiskParams(instance, devs, cfg):
603 """Little helper wrapper to the rpc annotation method.
605 @param instance: The instance object
606 @type devs: List of L{objects.Disk}
607 @param devs: The root devices (not any of its children!)
608 @param cfg: The config object
609 @returns The annotated disk copies
610 @see L{rpc.AnnotateDiskParams}
613 return rpc.AnnotateDiskParams(instance.disk_template, devs,
614 cfg.GetInstanceDiskParams(instance))
617 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
619 """Checks if node groups for locked instances are still correct.
621 @type cfg: L{config.ConfigWriter}
622 @param cfg: Cluster configuration
623 @type instances: dict; string as key, L{objects.Instance} as value
624 @param instances: Dictionary, instance name as key, instance object as value
625 @type owned_groups: iterable of string
626 @param owned_groups: List of owned groups
627 @type owned_nodes: iterable of string
628 @param owned_nodes: List of owned nodes
629 @type cur_group_uuid: string or None
630 @param cur_group_uuid: Optional group UUID to check against instance's groups
633 for (name, inst) in instances.items():
634 assert owned_nodes.issuperset(inst.all_nodes), \
635 "Instance %s's nodes changed while we kept the lock" % name
637 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
639 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
640 "Instance %s has no node in group %s" % (name, cur_group_uuid)
643 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
644 """Checks if the owned node groups are still correct for an instance.
646 @type cfg: L{config.ConfigWriter}
647 @param cfg: The cluster configuration
648 @type instance_name: string
649 @param instance_name: Instance name
650 @type owned_groups: set or frozenset
651 @param owned_groups: List of currently owned node groups
654 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
656 if not owned_groups.issuperset(inst_groups):
657 raise errors.OpPrereqError("Instance %s's node groups changed since"
658 " locks were acquired, current groups are"
659 " are '%s', owning groups '%s'; retry the"
662 utils.CommaJoin(inst_groups),
663 utils.CommaJoin(owned_groups)),
669 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
670 """Checks if the instances in a node group are still correct.
672 @type cfg: L{config.ConfigWriter}
673 @param cfg: The cluster configuration
674 @type group_uuid: string
675 @param group_uuid: Node group UUID
676 @type owned_instances: set or frozenset
677 @param owned_instances: List of currently owned instances
680 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
681 if owned_instances != wanted_instances:
682 raise errors.OpPrereqError("Instances in node group '%s' changed since"
683 " locks were acquired, wanted '%s', have '%s';"
684 " retry the operation" %
686 utils.CommaJoin(wanted_instances),
687 utils.CommaJoin(owned_instances)),
690 return wanted_instances
693 def _SupportsOob(cfg, node):
694 """Tells if node supports OOB.
696 @type cfg: L{config.ConfigWriter}
697 @param cfg: The cluster configuration
698 @type node: L{objects.Node}
699 @param node: The node
700 @return: The OOB script if supported or an empty string otherwise
703 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
706 def _GetWantedNodes(lu, nodes):
707 """Returns list of checked and expanded node names.
709 @type lu: L{LogicalUnit}
710 @param lu: the logical unit on whose behalf we execute
712 @param nodes: list of node names or None for all nodes
714 @return: the list of nodes, sorted
715 @raise errors.ProgrammerError: if the nodes parameter is wrong type
719 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
721 return utils.NiceSort(lu.cfg.GetNodeList())
724 def _GetWantedInstances(lu, instances):
725 """Returns list of checked and expanded instance names.
727 @type lu: L{LogicalUnit}
728 @param lu: the logical unit on whose behalf we execute
729 @type instances: list
730 @param instances: list of instance names or None for all instances
732 @return: the list of instances, sorted
733 @raise errors.OpPrereqError: if the instances parameter is wrong type
734 @raise errors.OpPrereqError: if any of the passed instances is not found
738 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
740 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
744 def _GetUpdatedParams(old_params, update_dict,
745 use_default=True, use_none=False):
746 """Return the new version of a parameter dictionary.
748 @type old_params: dict
749 @param old_params: old parameters
750 @type update_dict: dict
751 @param update_dict: dict containing new parameter values, or
752 constants.VALUE_DEFAULT to reset the parameter to its default
754 @param use_default: boolean
755 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
756 values as 'to be deleted' values
757 @param use_none: boolean
758 @type use_none: whether to recognise C{None} values as 'to be
761 @return: the new parameter dictionary
764 params_copy = copy.deepcopy(old_params)
765 for key, val in update_dict.iteritems():
766 if ((use_default and val == constants.VALUE_DEFAULT) or
767 (use_none and val is None)):
773 params_copy[key] = val
777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
778 """Return the new version of a instance policy.
780 @param group_policy: whether this policy applies to a group and thus
781 we should support removal of policy entries
784 use_none = use_default = group_policy
785 ipolicy = copy.deepcopy(old_ipolicy)
786 for key, value in new_ipolicy.items():
787 if key not in constants.IPOLICY_ALL_KEYS:
788 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
790 if key in constants.IPOLICY_ISPECS:
791 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
792 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
794 use_default=use_default)
796 if (not value or value == [constants.VALUE_DEFAULT] or
797 value == constants.VALUE_DEFAULT):
801 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
802 " on the cluster'" % key,
805 if key in constants.IPOLICY_PARAMETERS:
806 # FIXME: we assume all such values are float
808 ipolicy[key] = float(value)
809 except (TypeError, ValueError), err:
810 raise errors.OpPrereqError("Invalid value for attribute"
811 " '%s': '%s', error: %s" %
812 (key, value, err), errors.ECODE_INVAL)
814 # FIXME: we assume all others are lists; this should be redone
816 ipolicy[key] = list(value)
818 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
819 except errors.ConfigurationError, err:
820 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
825 def _UpdateAndVerifySubDict(base, updates, type_check):
826 """Updates and verifies a dict with sub dicts of the same type.
828 @param base: The dict with the old data
829 @param updates: The dict with the new data
830 @param type_check: Dict suitable to ForceDictType to verify correct types
831 @returns: A new dict with updated and verified values
835 new = _GetUpdatedParams(old, value)
836 utils.ForceDictType(new, type_check)
839 ret = copy.deepcopy(base)
840 ret.update(dict((key, fn(base.get(key, {}), value))
841 for key, value in updates.items()))
845 def _MergeAndVerifyHvState(op_input, obj_input):
846 """Combines the hv state from an opcode with the one of the object
848 @param op_input: The input dict from the opcode
849 @param obj_input: The input dict from the objects
850 @return: The verified and updated dict
854 invalid_hvs = set(op_input) - constants.HYPER_TYPES
856 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
857 " %s" % utils.CommaJoin(invalid_hvs),
859 if obj_input is None:
861 type_check = constants.HVSTS_PARAMETER_TYPES
862 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
867 def _MergeAndVerifyDiskState(op_input, obj_input):
868 """Combines the disk state from an opcode with the one of the object
870 @param op_input: The input dict from the opcode
871 @param obj_input: The input dict from the objects
872 @return: The verified and updated dict
875 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
877 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
878 utils.CommaJoin(invalid_dst),
880 type_check = constants.DSS_PARAMETER_TYPES
881 if obj_input is None:
883 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
885 for key, value in op_input.items())
890 def _ReleaseLocks(lu, level, names=None, keep=None):
891 """Releases locks owned by an LU.
893 @type lu: L{LogicalUnit}
894 @param level: Lock level
895 @type names: list or None
896 @param names: Names of locks to release
897 @type keep: list or None
898 @param keep: Names of locks to retain
901 assert not (keep is not None and names is not None), \
902 "Only one of the 'names' and the 'keep' parameters can be given"
904 if names is not None:
905 should_release = names.__contains__
907 should_release = lambda name: name not in keep
909 should_release = None
911 owned = lu.owned_locks(level)
913 # Not owning any lock at this level, do nothing
920 # Determine which locks to release
922 if should_release(name):
927 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
929 # Release just some locks
930 lu.glm.release(level, names=release)
932 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
935 lu.glm.release(level)
937 assert not lu.glm.is_owned(level), "No locks should be owned"
940 def _MapInstanceDisksToNodes(instances):
941 """Creates a map from (node, volume) to instance name.
943 @type instances: list of L{objects.Instance}
944 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
947 return dict(((node, vol), inst.name)
948 for inst in instances
949 for (node, vols) in inst.MapLVsByNode().items()
953 def _RunPostHook(lu, node_name):
954 """Runs the post-hook for an opcode on a single node.
957 hm = lu.proc.BuildHooksManager(lu)
959 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
961 # pylint: disable=W0702
962 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
965 def _CheckOutputFields(static, dynamic, selected):
966 """Checks whether all selected fields are valid.
968 @type static: L{utils.FieldSet}
969 @param static: static fields set
970 @type dynamic: L{utils.FieldSet}
971 @param dynamic: dynamic fields set
978 delta = f.NonMatching(selected)
980 raise errors.OpPrereqError("Unknown output fields selected: %s"
981 % ",".join(delta), errors.ECODE_INVAL)
984 def _CheckGlobalHvParams(params):
985 """Validates that given hypervisor params are not global ones.
987 This will ensure that instances don't get customised versions of
991 used_globals = constants.HVC_GLOBALS.intersection(params)
993 msg = ("The following hypervisor parameters are global and cannot"
994 " be customized at instance level, please modify them at"
995 " cluster level: %s" % utils.CommaJoin(used_globals))
996 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
999 def _CheckNodeOnline(lu, node, msg=None):
1000 """Ensure that a given node is online.
1002 @param lu: the LU on behalf of which we make the check
1003 @param node: the node to check
1004 @param msg: if passed, should be a message to replace the default one
1005 @raise errors.OpPrereqError: if the node is offline
1009 msg = "Can't use offline node"
1010 if lu.cfg.GetNodeInfo(node).offline:
1011 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1014 def _CheckNodeNotDrained(lu, node):
1015 """Ensure that a given node is not drained.
1017 @param lu: the LU on behalf of which we make the check
1018 @param node: the node to check
1019 @raise errors.OpPrereqError: if the node is drained
1022 if lu.cfg.GetNodeInfo(node).drained:
1023 raise errors.OpPrereqError("Can't use drained node %s" % node,
1027 def _CheckNodeVmCapable(lu, node):
1028 """Ensure that a given node is vm capable.
1030 @param lu: the LU on behalf of which we make the check
1031 @param node: the node to check
1032 @raise errors.OpPrereqError: if the node is not vm capable
1035 if not lu.cfg.GetNodeInfo(node).vm_capable:
1036 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1040 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1041 """Ensure that a node supports a given OS.
1043 @param lu: the LU on behalf of which we make the check
1044 @param node: the node to check
1045 @param os_name: the OS to query about
1046 @param force_variant: whether to ignore variant errors
1047 @raise errors.OpPrereqError: if the node is not supporting the OS
1050 result = lu.rpc.call_os_get(node, os_name)
1051 result.Raise("OS '%s' not in supported OS list for node %s" %
1053 prereq=True, ecode=errors.ECODE_INVAL)
1054 if not force_variant:
1055 _CheckOSVariant(result.payload, os_name)
1058 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1059 """Ensure that a node has the given secondary ip.
1061 @type lu: L{LogicalUnit}
1062 @param lu: the LU on behalf of which we make the check
1064 @param node: the node to check
1065 @type secondary_ip: string
1066 @param secondary_ip: the ip to check
1067 @type prereq: boolean
1068 @param prereq: whether to throw a prerequisite or an execute error
1069 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1070 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1073 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1074 result.Raise("Failure checking secondary ip on node %s" % node,
1075 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1076 if not result.payload:
1077 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1078 " please fix and re-run this command" % secondary_ip)
1080 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1082 raise errors.OpExecError(msg)
1085 def _GetClusterDomainSecret():
1086 """Reads the cluster domain secret.
1089 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1093 def _CheckInstanceState(lu, instance, req_states, msg=None):
1094 """Ensure that an instance is in one of the required states.
1096 @param lu: the LU on behalf of which we make the check
1097 @param instance: the instance to check
1098 @param msg: if passed, should be a message to replace the default one
1099 @raise errors.OpPrereqError: if the instance is not in the required state
1103 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1104 if instance.admin_state not in req_states:
1105 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1106 (instance.name, instance.admin_state, msg),
1109 if constants.ADMINST_UP not in req_states:
1110 pnode = instance.primary_node
1111 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113 prereq=True, ecode=errors.ECODE_ENVIRON)
1115 if instance.name in ins_l.payload:
1116 raise errors.OpPrereqError("Instance %s is running, %s" %
1117 (instance.name, msg), errors.ECODE_STATE)
1120 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1121 """Computes if value is in the desired range.
1123 @param name: name of the parameter for which we perform the check
1124 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1126 @param ipolicy: dictionary containing min, max and std values
1127 @param value: actual value that we want to use
1128 @return: None or element not meeting the criteria
1132 if value in [None, constants.VALUE_AUTO]:
1134 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1135 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1136 if value > max_v or min_v > value:
1138 fqn = "%s/%s" % (name, qualifier)
1141 return ("%s value %s is not in range [%s, %s]" %
1142 (fqn, value, min_v, max_v))
1146 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1147 nic_count, disk_sizes, spindle_use,
1148 _compute_fn=_ComputeMinMaxSpec):
1149 """Verifies ipolicy against provided specs.
1152 @param ipolicy: The ipolicy
1154 @param mem_size: The memory size
1155 @type cpu_count: int
1156 @param cpu_count: Used cpu cores
1157 @type disk_count: int
1158 @param disk_count: Number of disks used
1159 @type nic_count: int
1160 @param nic_count: Number of nics used
1161 @type disk_sizes: list of ints
1162 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1163 @type spindle_use: int
1164 @param spindle_use: The number of spindles this instance uses
1165 @param _compute_fn: The compute function (unittest only)
1166 @return: A list of violations, or an empty list of no violations are found
1169 assert disk_count == len(disk_sizes)
1172 (constants.ISPEC_MEM_SIZE, "", mem_size),
1173 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1174 (constants.ISPEC_DISK_COUNT, "", disk_count),
1175 (constants.ISPEC_NIC_COUNT, "", nic_count),
1176 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1177 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1178 for idx, d in enumerate(disk_sizes)]
1181 (_compute_fn(name, qualifier, ipolicy, value)
1182 for (name, qualifier, value) in test_settings))
1185 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1186 _compute_fn=_ComputeIPolicySpecViolation):
1187 """Compute if instance meets the specs of ipolicy.
1190 @param ipolicy: The ipolicy to verify against
1191 @type instance: L{objects.Instance}
1192 @param instance: The instance to verify
1193 @param _compute_fn: The function to verify ipolicy (unittest only)
1194 @see: L{_ComputeIPolicySpecViolation}
1197 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1198 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1199 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1200 disk_count = len(instance.disks)
1201 disk_sizes = [disk.size for disk in instance.disks]
1202 nic_count = len(instance.nics)
1204 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1205 disk_sizes, spindle_use)
1208 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1209 _compute_fn=_ComputeIPolicySpecViolation):
1210 """Compute if instance specs meets the specs of ipolicy.
1213 @param ipolicy: The ipolicy to verify against
1214 @param instance_spec: dict
1215 @param instance_spec: The instance spec to verify
1216 @param _compute_fn: The function to verify ipolicy (unittest only)
1217 @see: L{_ComputeIPolicySpecViolation}
1220 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1221 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1222 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1223 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1224 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1225 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1227 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1228 disk_sizes, spindle_use)
1231 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1233 _compute_fn=_ComputeIPolicyInstanceViolation):
1234 """Compute if instance meets the specs of the new target group.
1236 @param ipolicy: The ipolicy to verify
1237 @param instance: The instance object to verify
1238 @param current_group: The current group of the instance
1239 @param target_group: The new group of the instance
1240 @param _compute_fn: The function to verify ipolicy (unittest only)
1241 @see: L{_ComputeIPolicySpecViolation}
1244 if current_group == target_group:
1247 return _compute_fn(ipolicy, instance)
1250 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1251 _compute_fn=_ComputeIPolicyNodeViolation):
1252 """Checks that the target node is correct in terms of instance policy.
1254 @param ipolicy: The ipolicy to verify
1255 @param instance: The instance object to verify
1256 @param node: The new node to relocate
1257 @param ignore: Ignore violations of the ipolicy
1258 @param _compute_fn: The function to verify ipolicy (unittest only)
1259 @see: L{_ComputeIPolicySpecViolation}
1262 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1263 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1266 msg = ("Instance does not meet target node group's (%s) instance"
1267 " policy: %s") % (node.group, utils.CommaJoin(res))
1271 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1274 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1275 """Computes a set of any instances that would violate the new ipolicy.
1277 @param old_ipolicy: The current (still in-place) ipolicy
1278 @param new_ipolicy: The new (to become) ipolicy
1279 @param instances: List of instances to verify
1280 @return: A list of instances which violates the new ipolicy but
1284 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1285 _ComputeViolatingInstances(old_ipolicy, instances))
1288 def _ExpandItemName(fn, name, kind):
1289 """Expand an item name.
1291 @param fn: the function to use for expansion
1292 @param name: requested item name
1293 @param kind: text description ('Node' or 'Instance')
1294 @return: the resolved (full) name
1295 @raise errors.OpPrereqError: if the item is not found
1298 full_name = fn(name)
1299 if full_name is None:
1300 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1305 def _ExpandNodeName(cfg, name):
1306 """Wrapper over L{_ExpandItemName} for nodes."""
1307 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1310 def _ExpandInstanceName(cfg, name):
1311 """Wrapper over L{_ExpandItemName} for instance."""
1312 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1315 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1316 minmem, maxmem, vcpus, nics, disk_template, disks,
1317 bep, hvp, hypervisor_name, tags):
1318 """Builds instance related env variables for hooks
1320 This builds the hook environment from individual variables.
1323 @param name: the name of the instance
1324 @type primary_node: string
1325 @param primary_node: the name of the instance's primary node
1326 @type secondary_nodes: list
1327 @param secondary_nodes: list of secondary nodes as strings
1328 @type os_type: string
1329 @param os_type: the name of the instance's OS
1330 @type status: string
1331 @param status: the desired status of the instance
1332 @type minmem: string
1333 @param minmem: the minimum memory size of the instance
1334 @type maxmem: string
1335 @param maxmem: the maximum memory size of the instance
1337 @param vcpus: the count of VCPUs the instance has
1339 @param nics: list of tuples (ip, mac, mode, link) representing
1340 the NICs the instance has
1341 @type disk_template: string
1342 @param disk_template: the disk template of the instance
1344 @param disks: the list of (size, mode) pairs
1346 @param bep: the backend parameters for the instance
1348 @param hvp: the hypervisor parameters for the instance
1349 @type hypervisor_name: string
1350 @param hypervisor_name: the hypervisor for the instance
1352 @param tags: list of instance tags as strings
1354 @return: the hook environment for this instance
1359 "INSTANCE_NAME": name,
1360 "INSTANCE_PRIMARY": primary_node,
1361 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1362 "INSTANCE_OS_TYPE": os_type,
1363 "INSTANCE_STATUS": status,
1364 "INSTANCE_MINMEM": minmem,
1365 "INSTANCE_MAXMEM": maxmem,
1366 # TODO(2.7) remove deprecated "memory" value
1367 "INSTANCE_MEMORY": maxmem,
1368 "INSTANCE_VCPUS": vcpus,
1369 "INSTANCE_DISK_TEMPLATE": disk_template,
1370 "INSTANCE_HYPERVISOR": hypervisor_name,
1373 nic_count = len(nics)
1374 for idx, (ip, mac, mode, link) in enumerate(nics):
1377 env["INSTANCE_NIC%d_IP" % idx] = ip
1378 env["INSTANCE_NIC%d_MAC" % idx] = mac
1379 env["INSTANCE_NIC%d_MODE" % idx] = mode
1380 env["INSTANCE_NIC%d_LINK" % idx] = link
1381 if mode == constants.NIC_MODE_BRIDGED:
1382 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1386 env["INSTANCE_NIC_COUNT"] = nic_count
1389 disk_count = len(disks)
1390 for idx, (size, mode) in enumerate(disks):
1391 env["INSTANCE_DISK%d_SIZE" % idx] = size
1392 env["INSTANCE_DISK%d_MODE" % idx] = mode
1396 env["INSTANCE_DISK_COUNT"] = disk_count
1401 env["INSTANCE_TAGS"] = " ".join(tags)
1403 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1404 for key, value in source.items():
1405 env["INSTANCE_%s_%s" % (kind, key)] = value
1410 def _NICListToTuple(lu, nics):
1411 """Build a list of nic information tuples.
1413 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1414 value in LUInstanceQueryData.
1416 @type lu: L{LogicalUnit}
1417 @param lu: the logical unit on whose behalf we execute
1418 @type nics: list of L{objects.NIC}
1419 @param nics: list of nics to convert to hooks tuples
1423 cluster = lu.cfg.GetClusterInfo()
1427 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1428 mode = filled_params[constants.NIC_MODE]
1429 link = filled_params[constants.NIC_LINK]
1430 hooks_nics.append((ip, mac, mode, link))
1434 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1435 """Builds instance related env variables for hooks from an object.
1437 @type lu: L{LogicalUnit}
1438 @param lu: the logical unit on whose behalf we execute
1439 @type instance: L{objects.Instance}
1440 @param instance: the instance for which we should build the
1442 @type override: dict
1443 @param override: dictionary with key/values that will override
1446 @return: the hook environment dictionary
1449 cluster = lu.cfg.GetClusterInfo()
1450 bep = cluster.FillBE(instance)
1451 hvp = cluster.FillHV(instance)
1453 "name": instance.name,
1454 "primary_node": instance.primary_node,
1455 "secondary_nodes": instance.secondary_nodes,
1456 "os_type": instance.os,
1457 "status": instance.admin_state,
1458 "maxmem": bep[constants.BE_MAXMEM],
1459 "minmem": bep[constants.BE_MINMEM],
1460 "vcpus": bep[constants.BE_VCPUS],
1461 "nics": _NICListToTuple(lu, instance.nics),
1462 "disk_template": instance.disk_template,
1463 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1466 "hypervisor_name": instance.hypervisor,
1467 "tags": instance.tags,
1470 args.update(override)
1471 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1474 def _AdjustCandidatePool(lu, exceptions):
1475 """Adjust the candidate pool after node operations.
1478 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1480 lu.LogInfo("Promoted nodes to master candidate role: %s",
1481 utils.CommaJoin(node.name for node in mod_list))
1482 for name in mod_list:
1483 lu.context.ReaddNode(name)
1484 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1486 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1490 def _DecideSelfPromotion(lu, exceptions=None):
1491 """Decide whether I should promote myself as a master candidate.
1494 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1495 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1496 # the new node will increase mc_max with one, so:
1497 mc_should = min(mc_should + 1, cp_size)
1498 return mc_now < mc_should
1501 def _CalculateGroupIPolicy(cluster, group):
1502 """Calculate instance policy for group.
1505 return cluster.SimpleFillIPolicy(group.ipolicy)
1508 def _ComputeViolatingInstances(ipolicy, instances):
1509 """Computes a set of instances who violates given ipolicy.
1511 @param ipolicy: The ipolicy to verify
1512 @type instances: object.Instance
1513 @param instances: List of instances to verify
1514 @return: A frozenset of instance names violating the ipolicy
1517 return frozenset([inst.name for inst in instances
1518 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1521 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1522 """Check that the brigdes needed by a list of nics exist.
1525 cluster = lu.cfg.GetClusterInfo()
1526 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1527 brlist = [params[constants.NIC_LINK] for params in paramslist
1528 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1530 result = lu.rpc.call_bridges_exist(target_node, brlist)
1531 result.Raise("Error checking bridges on destination node '%s'" %
1532 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1535 def _CheckInstanceBridgesExist(lu, instance, node=None):
1536 """Check that the brigdes needed by an instance exist.
1540 node = instance.primary_node
1541 _CheckNicsBridgesExist(lu, instance.nics, node)
1544 def _CheckOSVariant(os_obj, name):
1545 """Check whether an OS name conforms to the os variants specification.
1547 @type os_obj: L{objects.OS}
1548 @param os_obj: OS object to check
1550 @param name: OS name passed by the user, to check for validity
1553 variant = objects.OS.GetVariant(name)
1554 if not os_obj.supported_variants:
1556 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1557 " passed)" % (os_obj.name, variant),
1561 raise errors.OpPrereqError("OS name must include a variant",
1564 if variant not in os_obj.supported_variants:
1565 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1568 def _GetNodeInstancesInner(cfg, fn):
1569 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1572 def _GetNodeInstances(cfg, node_name):
1573 """Returns a list of all primary and secondary instances on a node.
1577 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1580 def _GetNodePrimaryInstances(cfg, node_name):
1581 """Returns primary instances on a node.
1584 return _GetNodeInstancesInner(cfg,
1585 lambda inst: node_name == inst.primary_node)
1588 def _GetNodeSecondaryInstances(cfg, node_name):
1589 """Returns secondary instances on a node.
1592 return _GetNodeInstancesInner(cfg,
1593 lambda inst: node_name in inst.secondary_nodes)
1596 def _GetStorageTypeArgs(cfg, storage_type):
1597 """Returns the arguments for a storage type.
1600 # Special case for file storage
1601 if storage_type == constants.ST_FILE:
1602 # storage.FileStorage wants a list of storage directories
1603 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1608 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1611 for dev in instance.disks:
1612 cfg.SetDiskID(dev, node_name)
1614 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1616 result.Raise("Failed to get disk status from node %s" % node_name,
1617 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1619 for idx, bdev_status in enumerate(result.payload):
1620 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1626 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1627 """Check the sanity of iallocator and node arguments and use the
1628 cluster-wide iallocator if appropriate.
1630 Check that at most one of (iallocator, node) is specified. If none is
1631 specified, then the LU's opcode's iallocator slot is filled with the
1632 cluster-wide default iallocator.
1634 @type iallocator_slot: string
1635 @param iallocator_slot: the name of the opcode iallocator slot
1636 @type node_slot: string
1637 @param node_slot: the name of the opcode target node slot
1640 node = getattr(lu.op, node_slot, None)
1641 iallocator = getattr(lu.op, iallocator_slot, None)
1643 if node is not None and iallocator is not None:
1644 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1646 elif node is None and iallocator is None:
1647 default_iallocator = lu.cfg.GetDefaultIAllocator()
1648 if default_iallocator:
1649 setattr(lu.op, iallocator_slot, default_iallocator)
1651 raise errors.OpPrereqError("No iallocator or node given and no"
1652 " cluster-wide default iallocator found;"
1653 " please specify either an iallocator or a"
1654 " node, or set a cluster-wide default"
1658 def _GetDefaultIAllocator(cfg, iallocator):
1659 """Decides on which iallocator to use.
1661 @type cfg: L{config.ConfigWriter}
1662 @param cfg: Cluster configuration object
1663 @type iallocator: string or None
1664 @param iallocator: Iallocator specified in opcode
1666 @return: Iallocator name
1670 # Use default iallocator
1671 iallocator = cfg.GetDefaultIAllocator()
1674 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1675 " opcode nor as a cluster-wide default",
1681 class LUClusterPostInit(LogicalUnit):
1682 """Logical unit for running hooks after cluster initialization.
1685 HPATH = "cluster-init"
1686 HTYPE = constants.HTYPE_CLUSTER
1688 def BuildHooksEnv(self):
1693 "OP_TARGET": self.cfg.GetClusterName(),
1696 def BuildHooksNodes(self):
1697 """Build hooks nodes.
1700 return ([], [self.cfg.GetMasterNode()])
1702 def Exec(self, feedback_fn):
1709 class LUClusterDestroy(LogicalUnit):
1710 """Logical unit for destroying the cluster.
1713 HPATH = "cluster-destroy"
1714 HTYPE = constants.HTYPE_CLUSTER
1716 def BuildHooksEnv(self):
1721 "OP_TARGET": self.cfg.GetClusterName(),
1724 def BuildHooksNodes(self):
1725 """Build hooks nodes.
1730 def CheckPrereq(self):
1731 """Check prerequisites.
1733 This checks whether the cluster is empty.
1735 Any errors are signaled by raising errors.OpPrereqError.
1738 master = self.cfg.GetMasterNode()
1740 nodelist = self.cfg.GetNodeList()
1741 if len(nodelist) != 1 or nodelist[0] != master:
1742 raise errors.OpPrereqError("There are still %d node(s) in"
1743 " this cluster." % (len(nodelist) - 1),
1745 instancelist = self.cfg.GetInstanceList()
1747 raise errors.OpPrereqError("There are still %d instance(s) in"
1748 " this cluster." % len(instancelist),
1751 def Exec(self, feedback_fn):
1752 """Destroys the cluster.
1755 master_params = self.cfg.GetMasterNetworkParameters()
1757 # Run post hooks on master node before it's removed
1758 _RunPostHook(self, master_params.name)
1760 ems = self.cfg.GetUseExternalMipScript()
1761 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1764 self.LogWarning("Error disabling the master IP address: %s",
1767 return master_params.name
1770 def _VerifyCertificate(filename):
1771 """Verifies a certificate for L{LUClusterVerifyConfig}.
1773 @type filename: string
1774 @param filename: Path to PEM file
1778 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1779 utils.ReadFile(filename))
1780 except Exception, err: # pylint: disable=W0703
1781 return (LUClusterVerifyConfig.ETYPE_ERROR,
1782 "Failed to load X509 certificate %s: %s" % (filename, err))
1785 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1786 constants.SSL_CERT_EXPIRATION_ERROR)
1789 fnamemsg = "While verifying %s: %s" % (filename, msg)
1794 return (None, fnamemsg)
1795 elif errcode == utils.CERT_WARNING:
1796 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1797 elif errcode == utils.CERT_ERROR:
1798 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1800 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1803 def _GetAllHypervisorParameters(cluster, instances):
1804 """Compute the set of all hypervisor parameters.
1806 @type cluster: L{objects.Cluster}
1807 @param cluster: the cluster object
1808 @param instances: list of L{objects.Instance}
1809 @param instances: additional instances from which to obtain parameters
1810 @rtype: list of (origin, hypervisor, parameters)
1811 @return: a list with all parameters found, indicating the hypervisor they
1812 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1817 for hv_name in cluster.enabled_hypervisors:
1818 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1820 for os_name, os_hvp in cluster.os_hvp.items():
1821 for hv_name, hv_params in os_hvp.items():
1823 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1824 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1826 # TODO: collapse identical parameter values in a single one
1827 for instance in instances:
1828 if instance.hvparams:
1829 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1830 cluster.FillHV(instance)))
1835 class _VerifyErrors(object):
1836 """Mix-in for cluster/group verify LUs.
1838 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1839 self.op and self._feedback_fn to be available.)
1843 ETYPE_FIELD = "code"
1844 ETYPE_ERROR = "ERROR"
1845 ETYPE_WARNING = "WARNING"
1847 def _Error(self, ecode, item, msg, *args, **kwargs):
1848 """Format an error message.
1850 Based on the opcode's error_codes parameter, either format a
1851 parseable error code, or a simpler error string.
1853 This must be called only from Exec and functions called from Exec.
1856 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1857 itype, etxt, _ = ecode
1858 # first complete the msg
1861 # then format the whole message
1862 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1863 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1869 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1870 # and finally report it via the feedback_fn
1871 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1873 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1874 """Log an error message if the passed condition is True.
1878 or self.op.debug_simulate_errors) # pylint: disable=E1101
1880 # If the error code is in the list of ignored errors, demote the error to a
1882 (_, etxt, _) = ecode
1883 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1884 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1887 self._Error(ecode, *args, **kwargs)
1889 # do not mark the operation as failed for WARN cases only
1890 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1891 self.bad = self.bad or cond
1894 class LUClusterVerify(NoHooksLU):
1895 """Submits all jobs necessary to verify the cluster.
1900 def ExpandNames(self):
1901 self.needed_locks = {}
1903 def Exec(self, feedback_fn):
1906 if self.op.group_name:
1907 groups = [self.op.group_name]
1908 depends_fn = lambda: None
1910 groups = self.cfg.GetNodeGroupList()
1912 # Verify global configuration
1914 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1917 # Always depend on global verification
1918 depends_fn = lambda: [(-len(jobs), [])]
1920 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1921 ignore_errors=self.op.ignore_errors,
1922 depends=depends_fn())]
1923 for group in groups)
1925 # Fix up all parameters
1926 for op in itertools.chain(*jobs): # pylint: disable=W0142
1927 op.debug_simulate_errors = self.op.debug_simulate_errors
1928 op.verbose = self.op.verbose
1929 op.error_codes = self.op.error_codes
1931 op.skip_checks = self.op.skip_checks
1932 except AttributeError:
1933 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1935 return ResultWithJobs(jobs)
1938 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1939 """Verifies the cluster config.
1944 def _VerifyHVP(self, hvp_data):
1945 """Verifies locally the syntax of the hypervisor parameters.
1948 for item, hv_name, hv_params in hvp_data:
1949 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1952 hv_class = hypervisor.GetHypervisor(hv_name)
1953 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1954 hv_class.CheckParameterSyntax(hv_params)
1955 except errors.GenericError, err:
1956 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1958 def ExpandNames(self):
1959 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1960 self.share_locks = _ShareAll()
1962 def CheckPrereq(self):
1963 """Check prerequisites.
1966 # Retrieve all information
1967 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1968 self.all_node_info = self.cfg.GetAllNodesInfo()
1969 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1971 def Exec(self, feedback_fn):
1972 """Verify integrity of cluster, performing various test on nodes.
1976 self._feedback_fn = feedback_fn
1978 feedback_fn("* Verifying cluster config")
1980 for msg in self.cfg.VerifyConfig():
1981 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1983 feedback_fn("* Verifying cluster certificate files")
1985 for cert_filename in constants.ALL_CERT_FILES:
1986 (errcode, msg) = _VerifyCertificate(cert_filename)
1987 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1989 feedback_fn("* Verifying hypervisor parameters")
1991 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1992 self.all_inst_info.values()))
1994 feedback_fn("* Verifying all nodes belong to an existing group")
1996 # We do this verification here because, should this bogus circumstance
1997 # occur, it would never be caught by VerifyGroup, which only acts on
1998 # nodes/instances reachable from existing node groups.
2000 dangling_nodes = set(node.name for node in self.all_node_info.values()
2001 if node.group not in self.all_group_info)
2003 dangling_instances = {}
2004 no_node_instances = []
2006 for inst in self.all_inst_info.values():
2007 if inst.primary_node in dangling_nodes:
2008 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2009 elif inst.primary_node not in self.all_node_info:
2010 no_node_instances.append(inst.name)
2015 utils.CommaJoin(dangling_instances.get(node.name,
2017 for node in dangling_nodes]
2019 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2021 "the following nodes (and their instances) belong to a non"
2022 " existing group: %s", utils.CommaJoin(pretty_dangling))
2024 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2026 "the following instances have a non-existing primary-node:"
2027 " %s", utils.CommaJoin(no_node_instances))
2032 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2033 """Verifies the status of a node group.
2036 HPATH = "cluster-verify"
2037 HTYPE = constants.HTYPE_CLUSTER
2040 _HOOKS_INDENT_RE = re.compile("^", re.M)
2042 class NodeImage(object):
2043 """A class representing the logical and physical status of a node.
2046 @ivar name: the node name to which this object refers
2047 @ivar volumes: a structure as returned from
2048 L{ganeti.backend.GetVolumeList} (runtime)
2049 @ivar instances: a list of running instances (runtime)
2050 @ivar pinst: list of configured primary instances (config)
2051 @ivar sinst: list of configured secondary instances (config)
2052 @ivar sbp: dictionary of {primary-node: list of instances} for all
2053 instances for which this node is secondary (config)
2054 @ivar mfree: free memory, as reported by hypervisor (runtime)
2055 @ivar dfree: free disk, as reported by the node (runtime)
2056 @ivar offline: the offline status (config)
2057 @type rpc_fail: boolean
2058 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2059 not whether the individual keys were correct) (runtime)
2060 @type lvm_fail: boolean
2061 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2062 @type hyp_fail: boolean
2063 @ivar hyp_fail: whether the RPC call didn't return the instance list
2064 @type ghost: boolean
2065 @ivar ghost: whether this is a known node or not (config)
2066 @type os_fail: boolean
2067 @ivar os_fail: whether the RPC call didn't return valid OS data
2069 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2070 @type vm_capable: boolean
2071 @ivar vm_capable: whether the node can host instances
2074 def __init__(self, offline=False, name=None, vm_capable=True):
2083 self.offline = offline
2084 self.vm_capable = vm_capable
2085 self.rpc_fail = False
2086 self.lvm_fail = False
2087 self.hyp_fail = False
2089 self.os_fail = False
2092 def ExpandNames(self):
2093 # This raises errors.OpPrereqError on its own:
2094 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2096 # Get instances in node group; this is unsafe and needs verification later
2098 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2100 self.needed_locks = {
2101 locking.LEVEL_INSTANCE: inst_names,
2102 locking.LEVEL_NODEGROUP: [self.group_uuid],
2103 locking.LEVEL_NODE: [],
2106 self.share_locks = _ShareAll()
2108 def DeclareLocks(self, level):
2109 if level == locking.LEVEL_NODE:
2110 # Get members of node group; this is unsafe and needs verification later
2111 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2113 all_inst_info = self.cfg.GetAllInstancesInfo()
2115 # In Exec(), we warn about mirrored instances that have primary and
2116 # secondary living in separate node groups. To fully verify that
2117 # volumes for these instances are healthy, we will need to do an
2118 # extra call to their secondaries. We ensure here those nodes will
2120 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2121 # Important: access only the instances whose lock is owned
2122 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2123 nodes.update(all_inst_info[inst].secondary_nodes)
2125 self.needed_locks[locking.LEVEL_NODE] = nodes
2127 def CheckPrereq(self):
2128 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2129 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2131 group_nodes = set(self.group_info.members)
2133 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2136 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2138 unlocked_instances = \
2139 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2142 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2143 utils.CommaJoin(unlocked_nodes),
2146 if unlocked_instances:
2147 raise errors.OpPrereqError("Missing lock for instances: %s" %
2148 utils.CommaJoin(unlocked_instances),
2151 self.all_node_info = self.cfg.GetAllNodesInfo()
2152 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2154 self.my_node_names = utils.NiceSort(group_nodes)
2155 self.my_inst_names = utils.NiceSort(group_instances)
2157 self.my_node_info = dict((name, self.all_node_info[name])
2158 for name in self.my_node_names)
2160 self.my_inst_info = dict((name, self.all_inst_info[name])
2161 for name in self.my_inst_names)
2163 # We detect here the nodes that will need the extra RPC calls for verifying
2164 # split LV volumes; they should be locked.
2165 extra_lv_nodes = set()
2167 for inst in self.my_inst_info.values():
2168 if inst.disk_template in constants.DTS_INT_MIRROR:
2169 for nname in inst.all_nodes:
2170 if self.all_node_info[nname].group != self.group_uuid:
2171 extra_lv_nodes.add(nname)
2173 unlocked_lv_nodes = \
2174 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2176 if unlocked_lv_nodes:
2177 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2178 utils.CommaJoin(unlocked_lv_nodes),
2180 self.extra_lv_nodes = list(extra_lv_nodes)
2182 def _VerifyNode(self, ninfo, nresult):
2183 """Perform some basic validation on data returned from a node.
2185 - check the result data structure is well formed and has all the
2187 - check ganeti version
2189 @type ninfo: L{objects.Node}
2190 @param ninfo: the node to check
2191 @param nresult: the results from the node
2193 @return: whether overall this call was successful (and we can expect
2194 reasonable values in the respose)
2198 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2200 # main result, nresult should be a non-empty dict
2201 test = not nresult or not isinstance(nresult, dict)
2202 _ErrorIf(test, constants.CV_ENODERPC, node,
2203 "unable to verify node: no data returned")
2207 # compares ganeti version
2208 local_version = constants.PROTOCOL_VERSION
2209 remote_version = nresult.get("version", None)
2210 test = not (remote_version and
2211 isinstance(remote_version, (list, tuple)) and
2212 len(remote_version) == 2)
2213 _ErrorIf(test, constants.CV_ENODERPC, node,
2214 "connection to node returned invalid data")
2218 test = local_version != remote_version[0]
2219 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2220 "incompatible protocol versions: master %s,"
2221 " node %s", local_version, remote_version[0])
2225 # node seems compatible, we can actually try to look into its results
2227 # full package version
2228 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2229 constants.CV_ENODEVERSION, node,
2230 "software version mismatch: master %s, node %s",
2231 constants.RELEASE_VERSION, remote_version[1],
2232 code=self.ETYPE_WARNING)
2234 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2235 if ninfo.vm_capable and isinstance(hyp_result, dict):
2236 for hv_name, hv_result in hyp_result.iteritems():
2237 test = hv_result is not None
2238 _ErrorIf(test, constants.CV_ENODEHV, node,
2239 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2241 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2242 if ninfo.vm_capable and isinstance(hvp_result, list):
2243 for item, hv_name, hv_result in hvp_result:
2244 _ErrorIf(True, constants.CV_ENODEHV, node,
2245 "hypervisor %s parameter verify failure (source %s): %s",
2246 hv_name, item, hv_result)
2248 test = nresult.get(constants.NV_NODESETUP,
2249 ["Missing NODESETUP results"])
2250 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2255 def _VerifyNodeTime(self, ninfo, nresult,
2256 nvinfo_starttime, nvinfo_endtime):
2257 """Check the node time.
2259 @type ninfo: L{objects.Node}
2260 @param ninfo: the node to check
2261 @param nresult: the remote results for the node
2262 @param nvinfo_starttime: the start time of the RPC call
2263 @param nvinfo_endtime: the end time of the RPC call
2267 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2269 ntime = nresult.get(constants.NV_TIME, None)
2271 ntime_merged = utils.MergeTime(ntime)
2272 except (ValueError, TypeError):
2273 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2276 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2277 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2278 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2279 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2283 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2284 "Node time diverges by at least %s from master node time",
2287 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2288 """Check the node LVM results.
2290 @type ninfo: L{objects.Node}
2291 @param ninfo: the node to check
2292 @param nresult: the remote results for the node
2293 @param vg_name: the configured VG name
2300 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2302 # checks vg existence and size > 20G
2303 vglist = nresult.get(constants.NV_VGLIST, None)
2305 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2307 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2308 constants.MIN_VG_SIZE)
2309 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2312 pvlist = nresult.get(constants.NV_PVLIST, None)
2313 test = pvlist is None
2314 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2316 # check that ':' is not present in PV names, since it's a
2317 # special character for lvcreate (denotes the range of PEs to
2319 for _, pvname, owner_vg in pvlist:
2320 test = ":" in pvname
2321 _ErrorIf(test, constants.CV_ENODELVM, node,
2322 "Invalid character ':' in PV '%s' of VG '%s'",
2325 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2326 """Check the node bridges.
2328 @type ninfo: L{objects.Node}
2329 @param ninfo: the node to check
2330 @param nresult: the remote results for the node
2331 @param bridges: the expected list of bridges
2338 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2340 missing = nresult.get(constants.NV_BRIDGES, None)
2341 test = not isinstance(missing, list)
2342 _ErrorIf(test, constants.CV_ENODENET, node,
2343 "did not return valid bridge information")
2345 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2346 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2348 def _VerifyNodeUserScripts(self, ninfo, nresult):
2349 """Check the results of user scripts presence and executability on the node
2351 @type ninfo: L{objects.Node}
2352 @param ninfo: the node to check
2353 @param nresult: the remote results for the node
2358 test = not constants.NV_USERSCRIPTS in nresult
2359 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2360 "did not return user scripts information")
2362 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2364 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2365 "user scripts not present or not executable: %s" %
2366 utils.CommaJoin(sorted(broken_scripts)))
2368 def _VerifyNodeNetwork(self, ninfo, nresult):
2369 """Check the node network connectivity results.
2371 @type ninfo: L{objects.Node}
2372 @param ninfo: the node to check
2373 @param nresult: the remote results for the node
2377 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2379 test = constants.NV_NODELIST not in nresult
2380 _ErrorIf(test, constants.CV_ENODESSH, node,
2381 "node hasn't returned node ssh connectivity data")
2383 if nresult[constants.NV_NODELIST]:
2384 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2385 _ErrorIf(True, constants.CV_ENODESSH, node,
2386 "ssh communication with node '%s': %s", a_node, a_msg)
2388 test = constants.NV_NODENETTEST not in nresult
2389 _ErrorIf(test, constants.CV_ENODENET, node,
2390 "node hasn't returned node tcp connectivity data")
2392 if nresult[constants.NV_NODENETTEST]:
2393 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2395 _ErrorIf(True, constants.CV_ENODENET, node,
2396 "tcp communication with node '%s': %s",
2397 anode, nresult[constants.NV_NODENETTEST][anode])
2399 test = constants.NV_MASTERIP not in nresult
2400 _ErrorIf(test, constants.CV_ENODENET, node,
2401 "node hasn't returned node master IP reachability data")
2403 if not nresult[constants.NV_MASTERIP]:
2404 if node == self.master_node:
2405 msg = "the master node cannot reach the master IP (not configured?)"
2407 msg = "cannot reach the master IP"
2408 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2410 def _VerifyInstance(self, instance, instanceconfig, node_image,
2412 """Verify an instance.
2414 This function checks to see if the required block devices are
2415 available on the instance's node.
2418 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2419 node_current = instanceconfig.primary_node
2421 node_vol_should = {}
2422 instanceconfig.MapLVsByNode(node_vol_should)
2424 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2425 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2426 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2428 for node in node_vol_should:
2429 n_img = node_image[node]
2430 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2431 # ignore missing volumes on offline or broken nodes
2433 for volume in node_vol_should[node]:
2434 test = volume not in n_img.volumes
2435 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2436 "volume %s missing on node %s", volume, node)
2438 if instanceconfig.admin_state == constants.ADMINST_UP:
2439 pri_img = node_image[node_current]
2440 test = instance not in pri_img.instances and not pri_img.offline
2441 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2442 "instance not running on its primary node %s",
2445 diskdata = [(nname, success, status, idx)
2446 for (nname, disks) in diskstatus.items()
2447 for idx, (success, status) in enumerate(disks)]
2449 for nname, success, bdev_status, idx in diskdata:
2450 # the 'ghost node' construction in Exec() ensures that we have a
2452 snode = node_image[nname]
2453 bad_snode = snode.ghost or snode.offline
2454 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2455 not success and not bad_snode,
2456 constants.CV_EINSTANCEFAULTYDISK, instance,
2457 "couldn't retrieve status for disk/%s on %s: %s",
2458 idx, nname, bdev_status)
2459 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2460 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2461 constants.CV_EINSTANCEFAULTYDISK, instance,
2462 "disk/%s on %s is faulty", idx, nname)
2464 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2465 """Verify if there are any unknown volumes in the cluster.
2467 The .os, .swap and backup volumes are ignored. All other volumes are
2468 reported as unknown.
2470 @type reserved: L{ganeti.utils.FieldSet}
2471 @param reserved: a FieldSet of reserved volume names
2474 for node, n_img in node_image.items():
2475 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2476 self.all_node_info[node].group != self.group_uuid):
2477 # skip non-healthy nodes
2479 for volume in n_img.volumes:
2480 test = ((node not in node_vol_should or
2481 volume not in node_vol_should[node]) and
2482 not reserved.Matches(volume))
2483 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2484 "volume %s is unknown", volume)
2486 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2487 """Verify N+1 Memory Resilience.
2489 Check that if one single node dies we can still start all the
2490 instances it was primary for.
2493 cluster_info = self.cfg.GetClusterInfo()
2494 for node, n_img in node_image.items():
2495 # This code checks that every node which is now listed as
2496 # secondary has enough memory to host all instances it is
2497 # supposed to should a single other node in the cluster fail.
2498 # FIXME: not ready for failover to an arbitrary node
2499 # FIXME: does not support file-backed instances
2500 # WARNING: we currently take into account down instances as well
2501 # as up ones, considering that even if they're down someone
2502 # might want to start them even in the event of a node failure.
2503 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2504 # we're skipping nodes marked offline and nodes in other groups from
2505 # the N+1 warning, since most likely we don't have good memory
2506 # infromation from them; we already list instances living on such
2507 # nodes, and that's enough warning
2509 #TODO(dynmem): also consider ballooning out other instances
2510 for prinode, instances in n_img.sbp.items():
2512 for instance in instances:
2513 bep = cluster_info.FillBE(instance_cfg[instance])
2514 if bep[constants.BE_AUTO_BALANCE]:
2515 needed_mem += bep[constants.BE_MINMEM]
2516 test = n_img.mfree < needed_mem
2517 self._ErrorIf(test, constants.CV_ENODEN1, node,
2518 "not enough memory to accomodate instance failovers"
2519 " should node %s fail (%dMiB needed, %dMiB available)",
2520 prinode, needed_mem, n_img.mfree)
2523 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2524 (files_all, files_opt, files_mc, files_vm)):
2525 """Verifies file checksums collected from all nodes.
2527 @param errorif: Callback for reporting errors
2528 @param nodeinfo: List of L{objects.Node} objects
2529 @param master_node: Name of master node
2530 @param all_nvinfo: RPC results
2533 # Define functions determining which nodes to consider for a file
2536 (files_mc, lambda node: (node.master_candidate or
2537 node.name == master_node)),
2538 (files_vm, lambda node: node.vm_capable),
2541 # Build mapping from filename to list of nodes which should have the file
2543 for (files, fn) in files2nodefn:
2545 filenodes = nodeinfo
2547 filenodes = filter(fn, nodeinfo)
2548 nodefiles.update((filename,
2549 frozenset(map(operator.attrgetter("name"), filenodes)))
2550 for filename in files)
2552 assert set(nodefiles) == (files_all | files_mc | files_vm)
2554 fileinfo = dict((filename, {}) for filename in nodefiles)
2555 ignore_nodes = set()
2557 for node in nodeinfo:
2559 ignore_nodes.add(node.name)
2562 nresult = all_nvinfo[node.name]
2564 if nresult.fail_msg or not nresult.payload:
2567 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2569 test = not (node_files and isinstance(node_files, dict))
2570 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2571 "Node did not return file checksum data")
2573 ignore_nodes.add(node.name)
2576 # Build per-checksum mapping from filename to nodes having it
2577 for (filename, checksum) in node_files.items():
2578 assert filename in nodefiles
2579 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2581 for (filename, checksums) in fileinfo.items():
2582 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2584 # Nodes having the file
2585 with_file = frozenset(node_name
2586 for nodes in fileinfo[filename].values()
2587 for node_name in nodes) - ignore_nodes
2589 expected_nodes = nodefiles[filename] - ignore_nodes
2591 # Nodes missing file
2592 missing_file = expected_nodes - with_file
2594 if filename in files_opt:
2596 errorif(missing_file and missing_file != expected_nodes,
2597 constants.CV_ECLUSTERFILECHECK, None,
2598 "File %s is optional, but it must exist on all or no"
2599 " nodes (not found on %s)",
2600 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2602 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2603 "File %s is missing from node(s) %s", filename,
2604 utils.CommaJoin(utils.NiceSort(missing_file)))
2606 # Warn if a node has a file it shouldn't
2607 unexpected = with_file - expected_nodes
2609 constants.CV_ECLUSTERFILECHECK, None,
2610 "File %s should not exist on node(s) %s",
2611 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2613 # See if there are multiple versions of the file
2614 test = len(checksums) > 1
2616 variants = ["variant %s on %s" %
2617 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2618 for (idx, (checksum, nodes)) in
2619 enumerate(sorted(checksums.items()))]
2623 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2624 "File %s found with %s different checksums (%s)",
2625 filename, len(checksums), "; ".join(variants))
2627 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2629 """Verifies and the node DRBD status.
2631 @type ninfo: L{objects.Node}
2632 @param ninfo: the node to check
2633 @param nresult: the remote results for the node
2634 @param instanceinfo: the dict of instances
2635 @param drbd_helper: the configured DRBD usermode helper
2636 @param drbd_map: the DRBD map as returned by
2637 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2641 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2644 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2645 test = (helper_result == None)
2646 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2647 "no drbd usermode helper returned")
2649 status, payload = helper_result
2651 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2652 "drbd usermode helper check unsuccessful: %s", payload)
2653 test = status and (payload != drbd_helper)
2654 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2655 "wrong drbd usermode helper: %s", payload)
2657 # compute the DRBD minors
2659 for minor, instance in drbd_map[node].items():
2660 test = instance not in instanceinfo
2661 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2662 "ghost instance '%s' in temporary DRBD map", instance)
2663 # ghost instance should not be running, but otherwise we
2664 # don't give double warnings (both ghost instance and
2665 # unallocated minor in use)
2667 node_drbd[minor] = (instance, False)
2669 instance = instanceinfo[instance]
2670 node_drbd[minor] = (instance.name,
2671 instance.admin_state == constants.ADMINST_UP)
2673 # and now check them
2674 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2675 test = not isinstance(used_minors, (tuple, list))
2676 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2677 "cannot parse drbd status file: %s", str(used_minors))
2679 # we cannot check drbd status
2682 for minor, (iname, must_exist) in node_drbd.items():
2683 test = minor not in used_minors and must_exist
2684 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2685 "drbd minor %d of instance %s is not active", minor, iname)
2686 for minor in used_minors:
2687 test = minor not in node_drbd
2688 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2689 "unallocated drbd minor %d is in use", minor)
2691 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2692 """Builds the node OS structures.
2694 @type ninfo: L{objects.Node}
2695 @param ninfo: the node to check
2696 @param nresult: the remote results for the node
2697 @param nimg: the node image object
2701 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2703 remote_os = nresult.get(constants.NV_OSLIST, None)
2704 test = (not isinstance(remote_os, list) or
2705 not compat.all(isinstance(v, list) and len(v) == 7
2706 for v in remote_os))
2708 _ErrorIf(test, constants.CV_ENODEOS, node,
2709 "node hasn't returned valid OS data")
2718 for (name, os_path, status, diagnose,
2719 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2721 if name not in os_dict:
2724 # parameters is a list of lists instead of list of tuples due to
2725 # JSON lacking a real tuple type, fix it:
2726 parameters = [tuple(v) for v in parameters]
2727 os_dict[name].append((os_path, status, diagnose,
2728 set(variants), set(parameters), set(api_ver)))
2730 nimg.oslist = os_dict
2732 def _VerifyNodeOS(self, ninfo, nimg, base):
2733 """Verifies the node OS list.
2735 @type ninfo: L{objects.Node}
2736 @param ninfo: the node to check
2737 @param nimg: the node image object
2738 @param base: the 'template' node we match against (e.g. from the master)
2742 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2744 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2746 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2747 for os_name, os_data in nimg.oslist.items():
2748 assert os_data, "Empty OS status for OS %s?!" % os_name
2749 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2750 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2751 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2752 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2753 "OS '%s' has multiple entries (first one shadows the rest): %s",
2754 os_name, utils.CommaJoin([v[0] for v in os_data]))
2755 # comparisons with the 'base' image
2756 test = os_name not in base.oslist
2757 _ErrorIf(test, constants.CV_ENODEOS, node,
2758 "Extra OS %s not present on reference node (%s)",
2762 assert base.oslist[os_name], "Base node has empty OS status?"
2763 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2765 # base OS is invalid, skipping
2767 for kind, a, b in [("API version", f_api, b_api),
2768 ("variants list", f_var, b_var),
2769 ("parameters", beautify_params(f_param),
2770 beautify_params(b_param))]:
2771 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2772 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2773 kind, os_name, base.name,
2774 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2776 # check any missing OSes
2777 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2778 _ErrorIf(missing, constants.CV_ENODEOS, node,
2779 "OSes present on reference node %s but missing on this node: %s",
2780 base.name, utils.CommaJoin(missing))
2782 def _VerifyOob(self, ninfo, nresult):
2783 """Verifies out of band functionality of a node.
2785 @type ninfo: L{objects.Node}
2786 @param ninfo: the node to check
2787 @param nresult: the remote results for the node
2791 # We just have to verify the paths on master and/or master candidates
2792 # as the oob helper is invoked on the master
2793 if ((ninfo.master_candidate or ninfo.master_capable) and
2794 constants.NV_OOB_PATHS in nresult):
2795 for path_result in nresult[constants.NV_OOB_PATHS]:
2796 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2798 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2799 """Verifies and updates the node volume data.
2801 This function will update a L{NodeImage}'s internal structures
2802 with data from the remote call.
2804 @type ninfo: L{objects.Node}
2805 @param ninfo: the node to check
2806 @param nresult: the remote results for the node
2807 @param nimg: the node image object
2808 @param vg_name: the configured VG name
2812 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2814 nimg.lvm_fail = True
2815 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2818 elif isinstance(lvdata, basestring):
2819 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2820 utils.SafeEncode(lvdata))
2821 elif not isinstance(lvdata, dict):
2822 _ErrorIf(True, constants.CV_ENODELVM, node,
2823 "rpc call to node failed (lvlist)")
2825 nimg.volumes = lvdata
2826 nimg.lvm_fail = False
2828 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2829 """Verifies and updates the node instance list.
2831 If the listing was successful, then updates this node's instance
2832 list. Otherwise, it marks the RPC call as failed for the instance
2835 @type ninfo: L{objects.Node}
2836 @param ninfo: the node to check
2837 @param nresult: the remote results for the node
2838 @param nimg: the node image object
2841 idata = nresult.get(constants.NV_INSTANCELIST, None)
2842 test = not isinstance(idata, list)
2843 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2844 "rpc call to node failed (instancelist): %s",
2845 utils.SafeEncode(str(idata)))
2847 nimg.hyp_fail = True
2849 nimg.instances = idata
2851 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2852 """Verifies and computes a node information map
2854 @type ninfo: L{objects.Node}
2855 @param ninfo: the node to check
2856 @param nresult: the remote results for the node
2857 @param nimg: the node image object
2858 @param vg_name: the configured VG name
2862 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2864 # try to read free memory (from the hypervisor)
2865 hv_info = nresult.get(constants.NV_HVINFO, None)
2866 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2867 _ErrorIf(test, constants.CV_ENODEHV, node,
2868 "rpc call to node failed (hvinfo)")
2871 nimg.mfree = int(hv_info["memory_free"])
2872 except (ValueError, TypeError):
2873 _ErrorIf(True, constants.CV_ENODERPC, node,
2874 "node returned invalid nodeinfo, check hypervisor")
2876 # FIXME: devise a free space model for file based instances as well
2877 if vg_name is not None:
2878 test = (constants.NV_VGLIST not in nresult or
2879 vg_name not in nresult[constants.NV_VGLIST])
2880 _ErrorIf(test, constants.CV_ENODELVM, node,
2881 "node didn't return data for the volume group '%s'"
2882 " - it is either missing or broken", vg_name)
2885 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2886 except (ValueError, TypeError):
2887 _ErrorIf(True, constants.CV_ENODERPC, node,
2888 "node returned invalid LVM info, check LVM status")
2890 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2891 """Gets per-disk status information for all instances.
2893 @type nodelist: list of strings
2894 @param nodelist: Node names
2895 @type node_image: dict of (name, L{objects.Node})
2896 @param node_image: Node objects
2897 @type instanceinfo: dict of (name, L{objects.Instance})
2898 @param instanceinfo: Instance objects
2899 @rtype: {instance: {node: [(succes, payload)]}}
2900 @return: a dictionary of per-instance dictionaries with nodes as
2901 keys and disk information as values; the disk information is a
2902 list of tuples (success, payload)
2905 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2908 node_disks_devonly = {}
2909 diskless_instances = set()
2910 diskless = constants.DT_DISKLESS
2912 for nname in nodelist:
2913 node_instances = list(itertools.chain(node_image[nname].pinst,
2914 node_image[nname].sinst))
2915 diskless_instances.update(inst for inst in node_instances
2916 if instanceinfo[inst].disk_template == diskless)
2917 disks = [(inst, disk)
2918 for inst in node_instances
2919 for disk in instanceinfo[inst].disks]
2922 # No need to collect data
2925 node_disks[nname] = disks
2927 # _AnnotateDiskParams makes already copies of the disks
2929 for (inst, dev) in disks:
2930 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2931 self.cfg.SetDiskID(anno_disk, nname)
2932 devonly.append(anno_disk)
2934 node_disks_devonly[nname] = devonly
2936 assert len(node_disks) == len(node_disks_devonly)
2938 # Collect data from all nodes with disks
2939 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2942 assert len(result) == len(node_disks)
2946 for (nname, nres) in result.items():
2947 disks = node_disks[nname]
2950 # No data from this node
2951 data = len(disks) * [(False, "node offline")]
2954 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2955 "while getting disk information: %s", msg)
2957 # No data from this node
2958 data = len(disks) * [(False, msg)]
2961 for idx, i in enumerate(nres.payload):
2962 if isinstance(i, (tuple, list)) and len(i) == 2:
2965 logging.warning("Invalid result from node %s, entry %d: %s",
2967 data.append((False, "Invalid result from the remote node"))
2969 for ((inst, _), status) in zip(disks, data):
2970 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2972 # Add empty entries for diskless instances.
2973 for inst in diskless_instances:
2974 assert inst not in instdisk
2977 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2978 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2979 compat.all(isinstance(s, (tuple, list)) and
2980 len(s) == 2 for s in statuses)
2981 for inst, nnames in instdisk.items()
2982 for nname, statuses in nnames.items())
2983 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2988 def _SshNodeSelector(group_uuid, all_nodes):
2989 """Create endless iterators for all potential SSH check hosts.
2992 nodes = [node for node in all_nodes
2993 if (node.group != group_uuid and
2995 keyfunc = operator.attrgetter("group")
2997 return map(itertools.cycle,
2998 [sorted(map(operator.attrgetter("name"), names))
2999 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3003 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3004 """Choose which nodes should talk to which other nodes.
3006 We will make nodes contact all nodes in their group, and one node from
3009 @warning: This algorithm has a known issue if one node group is much
3010 smaller than others (e.g. just one node). In such a case all other
3011 nodes will talk to the single node.
3014 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3015 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3017 return (online_nodes,
3018 dict((name, sorted([i.next() for i in sel]))
3019 for name in online_nodes))
3021 def BuildHooksEnv(self):
3024 Cluster-Verify hooks just ran in the post phase and their failure makes
3025 the output be logged in the verify output and the verification to fail.
3029 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3032 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3033 for node in self.my_node_info.values())
3037 def BuildHooksNodes(self):
3038 """Build hooks nodes.
3041 return ([], self.my_node_names)
3043 def Exec(self, feedback_fn):
3044 """Verify integrity of the node group, performing various test on nodes.
3047 # This method has too many local variables. pylint: disable=R0914
3048 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3050 if not self.my_node_names:
3052 feedback_fn("* Empty node group, skipping verification")
3056 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3057 verbose = self.op.verbose
3058 self._feedback_fn = feedback_fn
3060 vg_name = self.cfg.GetVGName()
3061 drbd_helper = self.cfg.GetDRBDHelper()
3062 cluster = self.cfg.GetClusterInfo()
3063 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3064 hypervisors = cluster.enabled_hypervisors
3065 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3067 i_non_redundant = [] # Non redundant instances
3068 i_non_a_balanced = [] # Non auto-balanced instances
3069 i_offline = 0 # Count of offline instances
3070 n_offline = 0 # Count of offline nodes
3071 n_drained = 0 # Count of nodes being drained
3072 node_vol_should = {}
3074 # FIXME: verify OS list
3077 filemap = _ComputeAncillaryFiles(cluster, False)
3079 # do local checksums
3080 master_node = self.master_node = self.cfg.GetMasterNode()
3081 master_ip = self.cfg.GetMasterIP()
3083 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3086 if self.cfg.GetUseExternalMipScript():
3087 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3089 node_verify_param = {
3090 constants.NV_FILELIST:
3091 utils.UniqueSequence(filename
3092 for files in filemap
3093 for filename in files),
3094 constants.NV_NODELIST:
3095 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3096 self.all_node_info.values()),
3097 constants.NV_HYPERVISOR: hypervisors,
3098 constants.NV_HVPARAMS:
3099 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3100 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3101 for node in node_data_list
3102 if not node.offline],
3103 constants.NV_INSTANCELIST: hypervisors,
3104 constants.NV_VERSION: None,
3105 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3106 constants.NV_NODESETUP: None,
3107 constants.NV_TIME: None,
3108 constants.NV_MASTERIP: (master_node, master_ip),
3109 constants.NV_OSLIST: None,
3110 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3111 constants.NV_USERSCRIPTS: user_scripts,
3114 if vg_name is not None:
3115 node_verify_param[constants.NV_VGLIST] = None
3116 node_verify_param[constants.NV_LVLIST] = vg_name
3117 node_verify_param[constants.NV_PVLIST] = [vg_name]
3118 node_verify_param[constants.NV_DRBDLIST] = None
3121 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3124 # FIXME: this needs to be changed per node-group, not cluster-wide
3126 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3127 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3128 bridges.add(default_nicpp[constants.NIC_LINK])
3129 for instance in self.my_inst_info.values():
3130 for nic in instance.nics:
3131 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3132 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3133 bridges.add(full_nic[constants.NIC_LINK])
3136 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3138 # Build our expected cluster state
3139 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3141 vm_capable=node.vm_capable))
3142 for node in node_data_list)
3146 for node in self.all_node_info.values():
3147 path = _SupportsOob(self.cfg, node)
3148 if path and path not in oob_paths:
3149 oob_paths.append(path)
3152 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3154 for instance in self.my_inst_names:
3155 inst_config = self.my_inst_info[instance]
3156 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3159 for nname in inst_config.all_nodes:
3160 if nname not in node_image:
3161 gnode = self.NodeImage(name=nname)
3162 gnode.ghost = (nname not in self.all_node_info)
3163 node_image[nname] = gnode
3165 inst_config.MapLVsByNode(node_vol_should)
3167 pnode = inst_config.primary_node
3168 node_image[pnode].pinst.append(instance)
3170 for snode in inst_config.secondary_nodes:
3171 nimg = node_image[snode]
3172 nimg.sinst.append(instance)
3173 if pnode not in nimg.sbp:
3174 nimg.sbp[pnode] = []
3175 nimg.sbp[pnode].append(instance)
3177 # At this point, we have the in-memory data structures complete,
3178 # except for the runtime information, which we'll gather next
3180 # Due to the way our RPC system works, exact response times cannot be
3181 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3182 # time before and after executing the request, we can at least have a time
3184 nvinfo_starttime = time.time()
3185 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3187 self.cfg.GetClusterName())
3188 nvinfo_endtime = time.time()
3190 if self.extra_lv_nodes and vg_name is not None:
3192 self.rpc.call_node_verify(self.extra_lv_nodes,
3193 {constants.NV_LVLIST: vg_name},
3194 self.cfg.GetClusterName())
3196 extra_lv_nvinfo = {}
3198 all_drbd_map = self.cfg.ComputeDRBDMap()
3200 feedback_fn("* Gathering disk information (%s nodes)" %
3201 len(self.my_node_names))
3202 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3205 feedback_fn("* Verifying configuration file consistency")
3207 # If not all nodes are being checked, we need to make sure the master node
3208 # and a non-checked vm_capable node are in the list.
3209 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3211 vf_nvinfo = all_nvinfo.copy()
3212 vf_node_info = list(self.my_node_info.values())
3213 additional_nodes = []
3214 if master_node not in self.my_node_info:
3215 additional_nodes.append(master_node)
3216 vf_node_info.append(self.all_node_info[master_node])
3217 # Add the first vm_capable node we find which is not included,
3218 # excluding the master node (which we already have)
3219 for node in absent_nodes:
3220 nodeinfo = self.all_node_info[node]
3221 if (nodeinfo.vm_capable and not nodeinfo.offline and
3222 node != master_node):
3223 additional_nodes.append(node)
3224 vf_node_info.append(self.all_node_info[node])
3226 key = constants.NV_FILELIST
3227 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3228 {key: node_verify_param[key]},
3229 self.cfg.GetClusterName()))
3231 vf_nvinfo = all_nvinfo
3232 vf_node_info = self.my_node_info.values()
3234 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3236 feedback_fn("* Verifying node status")
3240 for node_i in node_data_list:
3242 nimg = node_image[node]
3246 feedback_fn("* Skipping offline node %s" % (node,))
3250 if node == master_node:
3252 elif node_i.master_candidate:
3253 ntype = "master candidate"
3254 elif node_i.drained:
3260 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3262 msg = all_nvinfo[node].fail_msg
3263 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3266 nimg.rpc_fail = True
3269 nresult = all_nvinfo[node].payload
3271 nimg.call_ok = self._VerifyNode(node_i, nresult)
3272 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3273 self._VerifyNodeNetwork(node_i, nresult)
3274 self._VerifyNodeUserScripts(node_i, nresult)
3275 self._VerifyOob(node_i, nresult)
3278 self._VerifyNodeLVM(node_i, nresult, vg_name)
3279 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3282 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3283 self._UpdateNodeInstances(node_i, nresult, nimg)
3284 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3285 self._UpdateNodeOS(node_i, nresult, nimg)
3287 if not nimg.os_fail:
3288 if refos_img is None:
3290 self._VerifyNodeOS(node_i, nimg, refos_img)
3291 self._VerifyNodeBridges(node_i, nresult, bridges)
3293 # Check whether all running instancies are primary for the node. (This
3294 # can no longer be done from _VerifyInstance below, since some of the
3295 # wrong instances could be from other node groups.)
3296 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3298 for inst in non_primary_inst:
3299 test = inst in self.all_inst_info
3300 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3301 "instance should not run on node %s", node_i.name)
3302 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3303 "node is running unknown instance %s", inst)
3305 for node, result in extra_lv_nvinfo.items():
3306 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3307 node_image[node], vg_name)
3309 feedback_fn("* Verifying instance status")
3310 for instance in self.my_inst_names:
3312 feedback_fn("* Verifying instance %s" % instance)
3313 inst_config = self.my_inst_info[instance]
3314 self._VerifyInstance(instance, inst_config, node_image,
3316 inst_nodes_offline = []
3318 pnode = inst_config.primary_node
3319 pnode_img = node_image[pnode]
3320 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3321 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3322 " primary node failed", instance)
3324 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3326 constants.CV_EINSTANCEBADNODE, instance,
3327 "instance is marked as running and lives on offline node %s",
3328 inst_config.primary_node)
3330 # If the instance is non-redundant we cannot survive losing its primary
3331 # node, so we are not N+1 compliant. On the other hand we have no disk
3332 # templates with more than one secondary so that situation is not well
3334 # FIXME: does not support file-backed instances
3335 if not inst_config.secondary_nodes:
3336 i_non_redundant.append(instance)
3338 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3339 constants.CV_EINSTANCELAYOUT,
3340 instance, "instance has multiple secondary nodes: %s",
3341 utils.CommaJoin(inst_config.secondary_nodes),
3342 code=self.ETYPE_WARNING)
3344 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3345 pnode = inst_config.primary_node
3346 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3347 instance_groups = {}
3349 for node in instance_nodes:
3350 instance_groups.setdefault(self.all_node_info[node].group,
3354 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3355 # Sort so that we always list the primary node first.
3356 for group, nodes in sorted(instance_groups.items(),
3357 key=lambda (_, nodes): pnode in nodes,
3360 self._ErrorIf(len(instance_groups) > 1,
3361 constants.CV_EINSTANCESPLITGROUPS,
3362 instance, "instance has primary and secondary nodes in"
3363 " different groups: %s", utils.CommaJoin(pretty_list),
3364 code=self.ETYPE_WARNING)
3366 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3367 i_non_a_balanced.append(instance)
3369 for snode in inst_config.secondary_nodes:
3370 s_img = node_image[snode]
3371 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3372 snode, "instance %s, connection to secondary node failed",
3376 inst_nodes_offline.append(snode)
3378 # warn that the instance lives on offline nodes
3379 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3380 "instance has offline secondary node(s) %s",
3381 utils.CommaJoin(inst_nodes_offline))
3382 # ... or ghost/non-vm_capable nodes
3383 for node in inst_config.all_nodes:
3384 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3385 instance, "instance lives on ghost node %s", node)
3386 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3387 instance, "instance lives on non-vm_capable node %s", node)
3389 feedback_fn("* Verifying orphan volumes")
3390 reserved = utils.FieldSet(*cluster.reserved_lvs)
3392 # We will get spurious "unknown volume" warnings if any node of this group
3393 # is secondary for an instance whose primary is in another group. To avoid
3394 # them, we find these instances and add their volumes to node_vol_should.
3395 for inst in self.all_inst_info.values():
3396 for secondary in inst.secondary_nodes:
3397 if (secondary in self.my_node_info
3398 and inst.name not in self.my_inst_info):
3399 inst.MapLVsByNode(node_vol_should)
3402 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3404 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3405 feedback_fn("* Verifying N+1 Memory redundancy")
3406 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3408 feedback_fn("* Other Notes")
3410 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3411 % len(i_non_redundant))
3413 if i_non_a_balanced:
3414 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3415 % len(i_non_a_balanced))
3418 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3421 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3424 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3428 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3429 """Analyze the post-hooks' result
3431 This method analyses the hook result, handles it, and sends some
3432 nicely-formatted feedback back to the user.
3434 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3435 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3436 @param hooks_results: the results of the multi-node hooks rpc call
3437 @param feedback_fn: function used send feedback back to the caller
3438 @param lu_result: previous Exec result
3439 @return: the new Exec result, based on the previous result
3443 # We only really run POST phase hooks, only for non-empty groups,
3444 # and are only interested in their results
3445 if not self.my_node_names:
3448 elif phase == constants.HOOKS_PHASE_POST:
3449 # Used to change hooks' output to proper indentation
3450 feedback_fn("* Hooks Results")
3451 assert hooks_results, "invalid result from hooks"
3453 for node_name in hooks_results:
3454 res = hooks_results[node_name]
3456 test = msg and not res.offline
3457 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3458 "Communication failure in hooks execution: %s", msg)
3459 if res.offline or msg:
3460 # No need to investigate payload if node is offline or gave
3463 for script, hkr, output in res.payload:
3464 test = hkr == constants.HKR_FAIL
3465 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3466 "Script %s failed, output:", script)
3468 output = self._HOOKS_INDENT_RE.sub(" ", output)
3469 feedback_fn("%s" % output)
3475 class LUClusterVerifyDisks(NoHooksLU):
3476 """Verifies the cluster disks status.
3481 def ExpandNames(self):
3482 self.share_locks = _ShareAll()
3483 self.needed_locks = {
3484 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3487 def Exec(self, feedback_fn):
3488 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3490 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3491 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3492 for group in group_names])
3495 class LUGroupVerifyDisks(NoHooksLU):
3496 """Verifies the status of all disks in a node group.
3501 def ExpandNames(self):
3502 # Raises errors.OpPrereqError on its own if group can't be found
3503 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3505 self.share_locks = _ShareAll()
3506 self.needed_locks = {
3507 locking.LEVEL_INSTANCE: [],
3508 locking.LEVEL_NODEGROUP: [],
3509 locking.LEVEL_NODE: [],
3512 def DeclareLocks(self, level):
3513 if level == locking.LEVEL_INSTANCE:
3514 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3516 # Lock instances optimistically, needs verification once node and group
3517 # locks have been acquired
3518 self.needed_locks[locking.LEVEL_INSTANCE] = \
3519 self.cfg.GetNodeGroupInstances(self.group_uuid)
3521 elif level == locking.LEVEL_NODEGROUP:
3522 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3524 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3525 set([self.group_uuid] +
3526 # Lock all groups used by instances optimistically; this requires
3527 # going via the node before it's locked, requiring verification
3530 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3531 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3533 elif level == locking.LEVEL_NODE:
3534 # This will only lock the nodes in the group to be verified which contain
3536 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3537 self._LockInstancesNodes()
3539 # Lock all nodes in group to be verified
3540 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3541 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3542 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3544 def CheckPrereq(self):
3545 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3546 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3547 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3549 assert self.group_uuid in owned_groups
3551 # Check if locked instances are still correct
3552 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3554 # Get instance information
3555 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3557 # Check if node groups for locked instances are still correct
3558 _CheckInstancesNodeGroups(self.cfg, self.instances,
3559 owned_groups, owned_nodes, self.group_uuid)
3561 def Exec(self, feedback_fn):
3562 """Verify integrity of cluster disks.
3564 @rtype: tuple of three items
3565 @return: a tuple of (dict of node-to-node_error, list of instances
3566 which need activate-disks, dict of instance: (node, volume) for
3571 res_instances = set()
3574 nv_dict = _MapInstanceDisksToNodes([inst
3575 for inst in self.instances.values()
3576 if inst.admin_state == constants.ADMINST_UP])
3579 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3580 set(self.cfg.GetVmCapableNodeList()))
3582 node_lvs = self.rpc.call_lv_list(nodes, [])
3584 for (node, node_res) in node_lvs.items():
3585 if node_res.offline:
3588 msg = node_res.fail_msg
3590 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3591 res_nodes[node] = msg
3594 for lv_name, (_, _, lv_online) in node_res.payload.items():
3595 inst = nv_dict.pop((node, lv_name), None)
3596 if not (lv_online or inst is None):
3597 res_instances.add(inst)
3599 # any leftover items in nv_dict are missing LVs, let's arrange the data
3601 for key, inst in nv_dict.iteritems():
3602 res_missing.setdefault(inst, []).append(list(key))
3604 return (res_nodes, list(res_instances), res_missing)
3607 class LUClusterRepairDiskSizes(NoHooksLU):
3608 """Verifies the cluster disks sizes.
3613 def ExpandNames(self):
3614 if self.op.instances:
3615 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3616 self.needed_locks = {
3617 locking.LEVEL_NODE_RES: [],
3618 locking.LEVEL_INSTANCE: self.wanted_names,
3620 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3622 self.wanted_names = None
3623 self.needed_locks = {
3624 locking.LEVEL_NODE_RES: locking.ALL_SET,
3625 locking.LEVEL_INSTANCE: locking.ALL_SET,
3627 self.share_locks = {
3628 locking.LEVEL_NODE_RES: 1,
3629 locking.LEVEL_INSTANCE: 0,
3632 def DeclareLocks(self, level):
3633 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3634 self._LockInstancesNodes(primary_only=True, level=level)
3636 def CheckPrereq(self):
3637 """Check prerequisites.
3639 This only checks the optional instance list against the existing names.
3642 if self.wanted_names is None:
3643 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3645 self.wanted_instances = \
3646 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3648 def _EnsureChildSizes(self, disk):
3649 """Ensure children of the disk have the needed disk size.
3651 This is valid mainly for DRBD8 and fixes an issue where the
3652 children have smaller disk size.
3654 @param disk: an L{ganeti.objects.Disk} object
3657 if disk.dev_type == constants.LD_DRBD8:
3658 assert disk.children, "Empty children for DRBD8?"
3659 fchild = disk.children[0]
3660 mismatch = fchild.size < disk.size
3662 self.LogInfo("Child disk has size %d, parent %d, fixing",
3663 fchild.size, disk.size)
3664 fchild.size = disk.size
3666 # and we recurse on this child only, not on the metadev
3667 return self._EnsureChildSizes(fchild) or mismatch
3671 def Exec(self, feedback_fn):
3672 """Verify the size of cluster disks.
3675 # TODO: check child disks too
3676 # TODO: check differences in size between primary/secondary nodes
3678 for instance in self.wanted_instances:
3679 pnode = instance.primary_node
3680 if pnode not in per_node_disks:
3681 per_node_disks[pnode] = []
3682 for idx, disk in enumerate(instance.disks):
3683 per_node_disks[pnode].append((instance, idx, disk))
3685 assert not (frozenset(per_node_disks.keys()) -
3686 self.owned_locks(locking.LEVEL_NODE_RES)), \
3687 "Not owning correct locks"
3688 assert not self.owned_locks(locking.LEVEL_NODE)
3691 for node, dskl in per_node_disks.items():
3692 newl = [v[2].Copy() for v in dskl]
3694 self.cfg.SetDiskID(dsk, node)
3695 result = self.rpc.call_blockdev_getsize(node, newl)
3697 self.LogWarning("Failure in blockdev_getsize call to node"
3698 " %s, ignoring", node)
3700 if len(result.payload) != len(dskl):
3701 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3702 " result.payload=%s", node, len(dskl), result.payload)
3703 self.LogWarning("Invalid result from node %s, ignoring node results",
3706 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3708 self.LogWarning("Disk %d of instance %s did not return size"
3709 " information, ignoring", idx, instance.name)
3711 if not isinstance(size, (int, long)):
3712 self.LogWarning("Disk %d of instance %s did not return valid"
3713 " size information, ignoring", idx, instance.name)
3716 if size != disk.size:
3717 self.LogInfo("Disk %d of instance %s has mismatched size,"
3718 " correcting: recorded %d, actual %d", idx,
3719 instance.name, disk.size, size)
3721 self.cfg.Update(instance, feedback_fn)
3722 changed.append((instance.name, idx, size))
3723 if self._EnsureChildSizes(disk):
3724 self.cfg.Update(instance, feedback_fn)
3725 changed.append((instance.name, idx, disk.size))
3729 class LUClusterRename(LogicalUnit):
3730 """Rename the cluster.
3733 HPATH = "cluster-rename"
3734 HTYPE = constants.HTYPE_CLUSTER
3736 def BuildHooksEnv(self):
3741 "OP_TARGET": self.cfg.GetClusterName(),
3742 "NEW_NAME": self.op.name,
3745 def BuildHooksNodes(self):
3746 """Build hooks nodes.
3749 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3751 def CheckPrereq(self):
3752 """Verify that the passed name is a valid one.
3755 hostname = netutils.GetHostname(name=self.op.name,
3756 family=self.cfg.GetPrimaryIPFamily())
3758 new_name = hostname.name
3759 self.ip = new_ip = hostname.ip
3760 old_name = self.cfg.GetClusterName()
3761 old_ip = self.cfg.GetMasterIP()
3762 if new_name == old_name and new_ip == old_ip:
3763 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3764 " cluster has changed",
3766 if new_ip != old_ip:
3767 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3768 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3769 " reachable on the network" %
3770 new_ip, errors.ECODE_NOTUNIQUE)
3772 self.op.name = new_name
3774 def Exec(self, feedback_fn):
3775 """Rename the cluster.
3778 clustername = self.op.name
3781 # shutdown the master IP
3782 master_params = self.cfg.GetMasterNetworkParameters()
3783 ems = self.cfg.GetUseExternalMipScript()
3784 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3786 result.Raise("Could not disable the master role")
3789 cluster = self.cfg.GetClusterInfo()
3790 cluster.cluster_name = clustername
3791 cluster.master_ip = new_ip
3792 self.cfg.Update(cluster, feedback_fn)
3794 # update the known hosts file
3795 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3796 node_list = self.cfg.GetOnlineNodeList()
3798 node_list.remove(master_params.name)
3801 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3803 master_params.ip = new_ip
3804 result = self.rpc.call_node_activate_master_ip(master_params.name,
3806 msg = result.fail_msg
3808 self.LogWarning("Could not re-enable the master role on"
3809 " the master, please restart manually: %s", msg)
3814 def _ValidateNetmask(cfg, netmask):
3815 """Checks if a netmask is valid.
3817 @type cfg: L{config.ConfigWriter}
3818 @param cfg: The cluster configuration
3820 @param netmask: the netmask to be verified
3821 @raise errors.OpPrereqError: if the validation fails
3824 ip_family = cfg.GetPrimaryIPFamily()
3826 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3827 except errors.ProgrammerError:
3828 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3830 if not ipcls.ValidateNetmask(netmask):
3831 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3835 class LUClusterSetParams(LogicalUnit):
3836 """Change the parameters of the cluster.
3839 HPATH = "cluster-modify"
3840 HTYPE = constants.HTYPE_CLUSTER
3843 def CheckArguments(self):
3847 if self.op.uid_pool:
3848 uidpool.CheckUidPool(self.op.uid_pool)
3850 if self.op.add_uids:
3851 uidpool.CheckUidPool(self.op.add_uids)
3853 if self.op.remove_uids:
3854 uidpool.CheckUidPool(self.op.remove_uids)
3856 if self.op.master_netmask is not None:
3857 _ValidateNetmask(self.cfg, self.op.master_netmask)
3859 if self.op.diskparams:
3860 for dt_params in self.op.diskparams.values():
3861 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3863 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3864 except errors.OpPrereqError, err:
3865 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3868 def ExpandNames(self):
3869 # FIXME: in the future maybe other cluster params won't require checking on
3870 # all nodes to be modified.
3871 self.needed_locks = {
3872 locking.LEVEL_NODE: locking.ALL_SET,
3873 locking.LEVEL_INSTANCE: locking.ALL_SET,
3874 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3876 self.share_locks = {
3877 locking.LEVEL_NODE: 1,
3878 locking.LEVEL_INSTANCE: 1,
3879 locking.LEVEL_NODEGROUP: 1,
3882 def BuildHooksEnv(self):
3887 "OP_TARGET": self.cfg.GetClusterName(),
3888 "NEW_VG_NAME": self.op.vg_name,
3891 def BuildHooksNodes(self):
3892 """Build hooks nodes.
3895 mn = self.cfg.GetMasterNode()
3898 def CheckPrereq(self):
3899 """Check prerequisites.
3901 This checks whether the given params don't conflict and
3902 if the given volume group is valid.
3905 if self.op.vg_name is not None and not self.op.vg_name:
3906 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3907 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3908 " instances exist", errors.ECODE_INVAL)
3910 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3911 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3912 raise errors.OpPrereqError("Cannot disable drbd helper while"
3913 " drbd-based instances exist",
3916 node_list = self.owned_locks(locking.LEVEL_NODE)
3918 # if vg_name not None, checks given volume group on all nodes
3920 vglist = self.rpc.call_vg_list(node_list)
3921 for node in node_list:
3922 msg = vglist[node].fail_msg
3924 # ignoring down node
3925 self.LogWarning("Error while gathering data on node %s"
3926 " (ignoring node): %s", node, msg)
3928 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3930 constants.MIN_VG_SIZE)
3932 raise errors.OpPrereqError("Error on node '%s': %s" %
3933 (node, vgstatus), errors.ECODE_ENVIRON)
3935 if self.op.drbd_helper:
3936 # checks given drbd helper on all nodes
3937 helpers = self.rpc.call_drbd_helper(node_list)
3938 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3940 self.LogInfo("Not checking drbd helper on offline node %s", node)
3942 msg = helpers[node].fail_msg
3944 raise errors.OpPrereqError("Error checking drbd helper on node"
3945 " '%s': %s" % (node, msg),
3946 errors.ECODE_ENVIRON)
3947 node_helper = helpers[node].payload
3948 if node_helper != self.op.drbd_helper:
3949 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3950 (node, node_helper), errors.ECODE_ENVIRON)
3952 self.cluster = cluster = self.cfg.GetClusterInfo()
3953 # validate params changes
3954 if self.op.beparams:
3955 objects.UpgradeBeParams(self.op.beparams)
3956 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3957 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3959 if self.op.ndparams:
3960 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3961 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3963 # TODO: we need a more general way to handle resetting
3964 # cluster-level parameters to default values
3965 if self.new_ndparams["oob_program"] == "":
3966 self.new_ndparams["oob_program"] = \
3967 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3969 if self.op.hv_state:
3970 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3971 self.cluster.hv_state_static)
3972 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3973 for hv, values in new_hv_state.items())
3975 if self.op.disk_state:
3976 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3977 self.cluster.disk_state_static)
3978 self.new_disk_state = \
3979 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3980 for name, values in svalues.items()))
3981 for storage, svalues in new_disk_state.items())
3984 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3987 all_instances = self.cfg.GetAllInstancesInfo().values()
3989 for group in self.cfg.GetAllNodeGroupsInfo().values():
3990 instances = frozenset([inst for inst in all_instances
3991 if compat.any(node in group.members
3992 for node in inst.all_nodes)])
3993 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3994 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3996 new_ipolicy, instances)
3998 violations.update(new)
4001 self.LogWarning("After the ipolicy change the following instances"
4002 " violate them: %s",
4003 utils.CommaJoin(utils.NiceSort(violations)))
4005 if self.op.nicparams:
4006 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4007 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4008 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4011 # check all instances for consistency
4012 for instance in self.cfg.GetAllInstancesInfo().values():
4013 for nic_idx, nic in enumerate(instance.nics):
4014 params_copy = copy.deepcopy(nic.nicparams)
4015 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4017 # check parameter syntax
4019 objects.NIC.CheckParameterSyntax(params_filled)
4020 except errors.ConfigurationError, err:
4021 nic_errors.append("Instance %s, nic/%d: %s" %
4022 (instance.name, nic_idx, err))
4024 # if we're moving instances to routed, check that they have an ip
4025 target_mode = params_filled[constants.NIC_MODE]
4026 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4027 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4028 " address" % (instance.name, nic_idx))
4030 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4031 "\n".join(nic_errors))
4033 # hypervisor list/parameters
4034 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4035 if self.op.hvparams:
4036 for hv_name, hv_dict in self.op.hvparams.items():
4037 if hv_name not in self.new_hvparams:
4038 self.new_hvparams[hv_name] = hv_dict
4040 self.new_hvparams[hv_name].update(hv_dict)
4042 # disk template parameters
4043 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4044 if self.op.diskparams:
4045 for dt_name, dt_params in self.op.diskparams.items():
4046 if dt_name not in self.op.diskparams:
4047 self.new_diskparams[dt_name] = dt_params
4049 self.new_diskparams[dt_name].update(dt_params)
4051 # os hypervisor parameters
4052 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4054 for os_name, hvs in self.op.os_hvp.items():
4055 if os_name not in self.new_os_hvp:
4056 self.new_os_hvp[os_name] = hvs
4058 for hv_name, hv_dict in hvs.items():
4059 if hv_name not in self.new_os_hvp[os_name]:
4060 self.new_os_hvp[os_name][hv_name] = hv_dict
4062 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4065 self.new_osp = objects.FillDict(cluster.osparams, {})
4066 if self.op.osparams:
4067 for os_name, osp in self.op.osparams.items():
4068 if os_name not in self.new_osp:
4069 self.new_osp[os_name] = {}
4071 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4074 if not self.new_osp[os_name]:
4075 # we removed all parameters
4076 del self.new_osp[os_name]
4078 # check the parameter validity (remote check)
4079 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4080 os_name, self.new_osp[os_name])
4082 # changes to the hypervisor list
4083 if self.op.enabled_hypervisors is not None:
4084 self.hv_list = self.op.enabled_hypervisors
4085 for hv in self.hv_list:
4086 # if the hypervisor doesn't already exist in the cluster
4087 # hvparams, we initialize it to empty, and then (in both
4088 # cases) we make sure to fill the defaults, as we might not
4089 # have a complete defaults list if the hypervisor wasn't
4091 if hv not in new_hvp:
4093 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4094 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4096 self.hv_list = cluster.enabled_hypervisors
4098 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4099 # either the enabled list has changed, or the parameters have, validate
4100 for hv_name, hv_params in self.new_hvparams.items():
4101 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4102 (self.op.enabled_hypervisors and
4103 hv_name in self.op.enabled_hypervisors)):
4104 # either this is a new hypervisor, or its parameters have changed
4105 hv_class = hypervisor.GetHypervisor(hv_name)
4106 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4107 hv_class.CheckParameterSyntax(hv_params)
4108 _CheckHVParams(self, node_list, hv_name, hv_params)
4111 # no need to check any newly-enabled hypervisors, since the
4112 # defaults have already been checked in the above code-block
4113 for os_name, os_hvp in self.new_os_hvp.items():
4114 for hv_name, hv_params in os_hvp.items():
4115 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4116 # we need to fill in the new os_hvp on top of the actual hv_p
4117 cluster_defaults = self.new_hvparams.get(hv_name, {})
4118 new_osp = objects.FillDict(cluster_defaults, hv_params)
4119 hv_class = hypervisor.GetHypervisor(hv_name)
4120 hv_class.CheckParameterSyntax(new_osp)
4121 _CheckHVParams(self, node_list, hv_name, new_osp)
4123 if self.op.default_iallocator:
4124 alloc_script = utils.FindFile(self.op.default_iallocator,
4125 constants.IALLOCATOR_SEARCH_PATH,
4127 if alloc_script is None:
4128 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4129 " specified" % self.op.default_iallocator,
4132 def Exec(self, feedback_fn):
4133 """Change the parameters of the cluster.
4136 if self.op.vg_name is not None:
4137 new_volume = self.op.vg_name
4140 if new_volume != self.cfg.GetVGName():
4141 self.cfg.SetVGName(new_volume)
4143 feedback_fn("Cluster LVM configuration already in desired"
4144 " state, not changing")
4145 if self.op.drbd_helper is not None:
4146 new_helper = self.op.drbd_helper
4149 if new_helper != self.cfg.GetDRBDHelper():
4150 self.cfg.SetDRBDHelper(new_helper)
4152 feedback_fn("Cluster DRBD helper already in desired state,"
4154 if self.op.hvparams:
4155 self.cluster.hvparams = self.new_hvparams
4157 self.cluster.os_hvp = self.new_os_hvp
4158 if self.op.enabled_hypervisors is not None:
4159 self.cluster.hvparams = self.new_hvparams
4160 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4161 if self.op.beparams:
4162 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4163 if self.op.nicparams:
4164 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4166 self.cluster.ipolicy = self.new_ipolicy
4167 if self.op.osparams:
4168 self.cluster.osparams = self.new_osp
4169 if self.op.ndparams:
4170 self.cluster.ndparams = self.new_ndparams
4171 if self.op.diskparams:
4172 self.cluster.diskparams = self.new_diskparams
4173 if self.op.hv_state:
4174 self.cluster.hv_state_static = self.new_hv_state
4175 if self.op.disk_state:
4176 self.cluster.disk_state_static = self.new_disk_state
4178 if self.op.candidate_pool_size is not None:
4179 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4180 # we need to update the pool size here, otherwise the save will fail
4181 _AdjustCandidatePool(self, [])
4183 if self.op.maintain_node_health is not None:
4184 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4185 feedback_fn("Note: CONFD was disabled at build time, node health"
4186 " maintenance is not useful (still enabling it)")
4187 self.cluster.maintain_node_health = self.op.maintain_node_health
4189 if self.op.prealloc_wipe_disks is not None:
4190 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4192 if self.op.add_uids is not None:
4193 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4195 if self.op.remove_uids is not None:
4196 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4198 if self.op.uid_pool is not None:
4199 self.cluster.uid_pool = self.op.uid_pool
4201 if self.op.default_iallocator is not None:
4202 self.cluster.default_iallocator = self.op.default_iallocator
4204 if self.op.reserved_lvs is not None:
4205 self.cluster.reserved_lvs = self.op.reserved_lvs
4207 if self.op.use_external_mip_script is not None:
4208 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4210 def helper_os(aname, mods, desc):
4212 lst = getattr(self.cluster, aname)
4213 for key, val in mods:
4214 if key == constants.DDM_ADD:
4216 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4219 elif key == constants.DDM_REMOVE:
4223 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4225 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4227 if self.op.hidden_os:
4228 helper_os("hidden_os", self.op.hidden_os, "hidden")
4230 if self.op.blacklisted_os:
4231 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4233 if self.op.master_netdev:
4234 master_params = self.cfg.GetMasterNetworkParameters()
4235 ems = self.cfg.GetUseExternalMipScript()
4236 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4237 self.cluster.master_netdev)
4238 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4240 result.Raise("Could not disable the master ip")
4241 feedback_fn("Changing master_netdev from %s to %s" %
4242 (master_params.netdev, self.op.master_netdev))
4243 self.cluster.master_netdev = self.op.master_netdev
4245 if self.op.master_netmask:
4246 master_params = self.cfg.GetMasterNetworkParameters()
4247 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4248 result = self.rpc.call_node_change_master_netmask(master_params.name,
4249 master_params.netmask,
4250 self.op.master_netmask,
4252 master_params.netdev)
4254 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4257 self.cluster.master_netmask = self.op.master_netmask
4259 self.cfg.Update(self.cluster, feedback_fn)
4261 if self.op.master_netdev:
4262 master_params = self.cfg.GetMasterNetworkParameters()
4263 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4264 self.op.master_netdev)
4265 ems = self.cfg.GetUseExternalMipScript()
4266 result = self.rpc.call_node_activate_master_ip(master_params.name,
4269 self.LogWarning("Could not re-enable the master ip on"
4270 " the master, please restart manually: %s",
4274 def _UploadHelper(lu, nodes, fname):
4275 """Helper for uploading a file and showing warnings.
4278 if os.path.exists(fname):
4279 result = lu.rpc.call_upload_file(nodes, fname)
4280 for to_node, to_result in result.items():
4281 msg = to_result.fail_msg
4283 msg = ("Copy of file %s to node %s failed: %s" %
4284 (fname, to_node, msg))
4285 lu.proc.LogWarning(msg)
4288 def _ComputeAncillaryFiles(cluster, redist):
4289 """Compute files external to Ganeti which need to be consistent.
4291 @type redist: boolean
4292 @param redist: Whether to include files which need to be redistributed
4295 # Compute files for all nodes
4297 constants.SSH_KNOWN_HOSTS_FILE,
4298 constants.CONFD_HMAC_KEY,
4299 constants.CLUSTER_DOMAIN_SECRET_FILE,
4300 constants.SPICE_CERT_FILE,
4301 constants.SPICE_CACERT_FILE,
4302 constants.RAPI_USERS_FILE,
4306 files_all.update(constants.ALL_CERT_FILES)
4307 files_all.update(ssconf.SimpleStore().GetFileList())
4309 # we need to ship at least the RAPI certificate
4310 files_all.add(constants.RAPI_CERT_FILE)
4312 if cluster.modify_etc_hosts:
4313 files_all.add(constants.ETC_HOSTS)
4315 if cluster.use_external_mip_script:
4316 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4318 # Files which are optional, these must:
4319 # - be present in one other category as well
4320 # - either exist or not exist on all nodes of that category (mc, vm all)
4322 constants.RAPI_USERS_FILE,
4325 # Files which should only be on master candidates
4329 files_mc.add(constants.CLUSTER_CONF_FILE)
4331 # Files which should only be on VM-capable nodes
4332 files_vm = set(filename
4333 for hv_name in cluster.enabled_hypervisors
4334 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4336 files_opt |= set(filename
4337 for hv_name in cluster.enabled_hypervisors
4338 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4340 # Filenames in each category must be unique
4341 all_files_set = files_all | files_mc | files_vm
4342 assert (len(all_files_set) ==
4343 sum(map(len, [files_all, files_mc, files_vm]))), \
4344 "Found file listed in more than one file list"
4346 # Optional files must be present in one other category
4347 assert all_files_set.issuperset(files_opt), \
4348 "Optional file not in a different required list"
4350 return (files_all, files_opt, files_mc, files_vm)
4353 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4354 """Distribute additional files which are part of the cluster configuration.
4356 ConfigWriter takes care of distributing the config and ssconf files, but
4357 there are more files which should be distributed to all nodes. This function
4358 makes sure those are copied.
4360 @param lu: calling logical unit
4361 @param additional_nodes: list of nodes not in the config to distribute to
4362 @type additional_vm: boolean
4363 @param additional_vm: whether the additional nodes are vm-capable or not
4366 # Gather target nodes
4367 cluster = lu.cfg.GetClusterInfo()
4368 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4370 online_nodes = lu.cfg.GetOnlineNodeList()
4371 online_set = frozenset(online_nodes)
4372 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4374 if additional_nodes is not None:
4375 online_nodes.extend(additional_nodes)
4377 vm_nodes.extend(additional_nodes)
4379 # Never distribute to master node
4380 for nodelist in [online_nodes, vm_nodes]:
4381 if master_info.name in nodelist:
4382 nodelist.remove(master_info.name)
4385 (files_all, _, files_mc, files_vm) = \
4386 _ComputeAncillaryFiles(cluster, True)
4388 # Never re-distribute configuration file from here
4389 assert not (constants.CLUSTER_CONF_FILE in files_all or
4390 constants.CLUSTER_CONF_FILE in files_vm)
4391 assert not files_mc, "Master candidates not handled in this function"
4394 (online_nodes, files_all),
4395 (vm_nodes, files_vm),
4399 for (node_list, files) in filemap:
4401 _UploadHelper(lu, node_list, fname)
4404 class LUClusterRedistConf(NoHooksLU):
4405 """Force the redistribution of cluster configuration.
4407 This is a very simple LU.
4412 def ExpandNames(self):
4413 self.needed_locks = {
4414 locking.LEVEL_NODE: locking.ALL_SET,
4416 self.share_locks[locking.LEVEL_NODE] = 1
4418 def Exec(self, feedback_fn):
4419 """Redistribute the configuration.
4422 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4423 _RedistributeAncillaryFiles(self)
4426 class LUClusterActivateMasterIp(NoHooksLU):
4427 """Activate the master IP on the master node.
4430 def Exec(self, feedback_fn):
4431 """Activate the master IP.
4434 master_params = self.cfg.GetMasterNetworkParameters()
4435 ems = self.cfg.GetUseExternalMipScript()
4436 result = self.rpc.call_node_activate_master_ip(master_params.name,
4438 result.Raise("Could not activate the master IP")
4441 class LUClusterDeactivateMasterIp(NoHooksLU):
4442 """Deactivate the master IP on the master node.
4445 def Exec(self, feedback_fn):
4446 """Deactivate the master IP.
4449 master_params = self.cfg.GetMasterNetworkParameters()
4450 ems = self.cfg.GetUseExternalMipScript()
4451 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4453 result.Raise("Could not deactivate the master IP")
4456 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4457 """Sleep and poll for an instance's disk to sync.
4460 if not instance.disks or disks is not None and not disks:
4463 disks = _ExpandCheckDisks(instance, disks)
4466 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4468 node = instance.primary_node
4471 lu.cfg.SetDiskID(dev, node)
4473 # TODO: Convert to utils.Retry
4476 degr_retries = 10 # in seconds, as we sleep 1 second each time
4480 cumul_degraded = False
4481 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4482 msg = rstats.fail_msg
4484 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4487 raise errors.RemoteError("Can't contact node %s for mirror data,"
4488 " aborting." % node)
4491 rstats = rstats.payload
4493 for i, mstat in enumerate(rstats):
4495 lu.LogWarning("Can't compute data for node %s/%s",
4496 node, disks[i].iv_name)
4499 cumul_degraded = (cumul_degraded or
4500 (mstat.is_degraded and mstat.sync_percent is None))
4501 if mstat.sync_percent is not None:
4503 if mstat.estimated_time is not None:
4504 rem_time = ("%s remaining (estimated)" %
4505 utils.FormatSeconds(mstat.estimated_time))
4506 max_time = mstat.estimated_time
4508 rem_time = "no time estimate"
4509 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4510 (disks[i].iv_name, mstat.sync_percent, rem_time))
4512 # if we're done but degraded, let's do a few small retries, to
4513 # make sure we see a stable and not transient situation; therefore
4514 # we force restart of the loop
4515 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4516 logging.info("Degraded disks found, %d retries left", degr_retries)
4524 time.sleep(min(60, max_time))
4527 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4528 return not cumul_degraded
4531 def _BlockdevFind(lu, node, dev, instance):
4532 """Wrapper around call_blockdev_find to annotate diskparams.
4534 @param lu: A reference to the lu object
4535 @param node: The node to call out
4536 @param dev: The device to find
4537 @param instance: The instance object the device belongs to
4538 @returns The result of the rpc call
4541 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4542 return lu.rpc.call_blockdev_find(node, disk)
4545 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4546 """Wrapper around L{_CheckDiskConsistencyInner}.
4549 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4550 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4554 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4556 """Check that mirrors are not degraded.
4558 @attention: The device has to be annotated already.
4560 The ldisk parameter, if True, will change the test from the
4561 is_degraded attribute (which represents overall non-ok status for
4562 the device(s)) to the ldisk (representing the local storage status).
4565 lu.cfg.SetDiskID(dev, node)
4569 if on_primary or dev.AssembleOnSecondary():
4570 rstats = lu.rpc.call_blockdev_find(node, dev)
4571 msg = rstats.fail_msg
4573 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4575 elif not rstats.payload:
4576 lu.LogWarning("Can't find disk on node %s", node)
4580 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4582 result = result and not rstats.payload.is_degraded
4585 for child in dev.children:
4586 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4592 class LUOobCommand(NoHooksLU):
4593 """Logical unit for OOB handling.
4597 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4599 def ExpandNames(self):
4600 """Gather locks we need.
4603 if self.op.node_names:
4604 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4605 lock_names = self.op.node_names
4607 lock_names = locking.ALL_SET
4609 self.needed_locks = {
4610 locking.LEVEL_NODE: lock_names,
4613 def CheckPrereq(self):
4614 """Check prerequisites.
4617 - the node exists in the configuration
4620 Any errors are signaled by raising errors.OpPrereqError.
4624 self.master_node = self.cfg.GetMasterNode()
4626 assert self.op.power_delay >= 0.0
4628 if self.op.node_names:
4629 if (self.op.command in self._SKIP_MASTER and
4630 self.master_node in self.op.node_names):
4631 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4632 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4634 if master_oob_handler:
4635 additional_text = ("run '%s %s %s' if you want to operate on the"
4636 " master regardless") % (master_oob_handler,
4640 additional_text = "it does not support out-of-band operations"
4642 raise errors.OpPrereqError(("Operating on the master node %s is not"
4643 " allowed for %s; %s") %
4644 (self.master_node, self.op.command,
4645 additional_text), errors.ECODE_INVAL)
4647 self.op.node_names = self.cfg.GetNodeList()
4648 if self.op.command in self._SKIP_MASTER:
4649 self.op.node_names.remove(self.master_node)
4651 if self.op.command in self._SKIP_MASTER:
4652 assert self.master_node not in self.op.node_names
4654 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4656 raise errors.OpPrereqError("Node %s not found" % node_name,
4659 self.nodes.append(node)
4661 if (not self.op.ignore_status and
4662 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4663 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4664 " not marked offline") % node_name,
4667 def Exec(self, feedback_fn):
4668 """Execute OOB and return result if we expect any.
4671 master_node = self.master_node
4674 for idx, node in enumerate(utils.NiceSort(self.nodes,
4675 key=lambda node: node.name)):
4676 node_entry = [(constants.RS_NORMAL, node.name)]
4677 ret.append(node_entry)
4679 oob_program = _SupportsOob(self.cfg, node)
4682 node_entry.append((constants.RS_UNAVAIL, None))
4685 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4686 self.op.command, oob_program, node.name)
4687 result = self.rpc.call_run_oob(master_node, oob_program,
4688 self.op.command, node.name,
4692 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4693 node.name, result.fail_msg)
4694 node_entry.append((constants.RS_NODATA, None))
4697 self._CheckPayload(result)
4698 except errors.OpExecError, err:
4699 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4701 node_entry.append((constants.RS_NODATA, None))
4703 if self.op.command == constants.OOB_HEALTH:
4704 # For health we should log important events
4705 for item, status in result.payload:
4706 if status in [constants.OOB_STATUS_WARNING,
4707 constants.OOB_STATUS_CRITICAL]:
4708 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4709 item, node.name, status)
4711 if self.op.command == constants.OOB_POWER_ON:
4713 elif self.op.command == constants.OOB_POWER_OFF:
4714 node.powered = False
4715 elif self.op.command == constants.OOB_POWER_STATUS:
4716 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4717 if powered != node.powered:
4718 logging.warning(("Recorded power state (%s) of node '%s' does not"
4719 " match actual power state (%s)"), node.powered,
4722 # For configuration changing commands we should update the node
4723 if self.op.command in (constants.OOB_POWER_ON,
4724 constants.OOB_POWER_OFF):
4725 self.cfg.Update(node, feedback_fn)
4727 node_entry.append((constants.RS_NORMAL, result.payload))
4729 if (self.op.command == constants.OOB_POWER_ON and
4730 idx < len(self.nodes) - 1):
4731 time.sleep(self.op.power_delay)
4735 def _CheckPayload(self, result):
4736 """Checks if the payload is valid.
4738 @param result: RPC result
4739 @raises errors.OpExecError: If payload is not valid
4743 if self.op.command == constants.OOB_HEALTH:
4744 if not isinstance(result.payload, list):
4745 errs.append("command 'health' is expected to return a list but got %s" %
4746 type(result.payload))
4748 for item, status in result.payload:
4749 if status not in constants.OOB_STATUSES:
4750 errs.append("health item '%s' has invalid status '%s'" %
4753 if self.op.command == constants.OOB_POWER_STATUS:
4754 if not isinstance(result.payload, dict):
4755 errs.append("power-status is expected to return a dict but got %s" %
4756 type(result.payload))
4758 if self.op.command in [
4759 constants.OOB_POWER_ON,
4760 constants.OOB_POWER_OFF,
4761 constants.OOB_POWER_CYCLE,
4763 if result.payload is not None:
4764 errs.append("%s is expected to not return payload but got '%s'" %
4765 (self.op.command, result.payload))
4768 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4769 utils.CommaJoin(errs))
4772 class _OsQuery(_QueryBase):
4773 FIELDS = query.OS_FIELDS
4775 def ExpandNames(self, lu):
4776 # Lock all nodes in shared mode
4777 # Temporary removal of locks, should be reverted later
4778 # TODO: reintroduce locks when they are lighter-weight
4779 lu.needed_locks = {}
4780 #self.share_locks[locking.LEVEL_NODE] = 1
4781 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4783 # The following variables interact with _QueryBase._GetNames
4785 self.wanted = self.names
4787 self.wanted = locking.ALL_SET
4789 self.do_locking = self.use_locking
4791 def DeclareLocks(self, lu, level):
4795 def _DiagnoseByOS(rlist):
4796 """Remaps a per-node return list into an a per-os per-node dictionary
4798 @param rlist: a map with node names as keys and OS objects as values
4801 @return: a dictionary with osnames as keys and as value another
4802 map, with nodes as keys and tuples of (path, status, diagnose,
4803 variants, parameters, api_versions) as values, eg::
4805 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4806 (/srv/..., False, "invalid api")],
4807 "node2": [(/srv/..., True, "", [], [])]}
4812 # we build here the list of nodes that didn't fail the RPC (at RPC
4813 # level), so that nodes with a non-responding node daemon don't
4814 # make all OSes invalid
4815 good_nodes = [node_name for node_name in rlist
4816 if not rlist[node_name].fail_msg]
4817 for node_name, nr in rlist.items():
4818 if nr.fail_msg or not nr.payload:
4820 for (name, path, status, diagnose, variants,
4821 params, api_versions) in nr.payload:
4822 if name not in all_os:
4823 # build a list of nodes for this os containing empty lists
4824 # for each node in node_list
4826 for nname in good_nodes:
4827 all_os[name][nname] = []
4828 # convert params from [name, help] to (name, help)
4829 params = [tuple(v) for v in params]
4830 all_os[name][node_name].append((path, status, diagnose,
4831 variants, params, api_versions))
4834 def _GetQueryData(self, lu):
4835 """Computes the list of nodes and their attributes.
4838 # Locking is not used
4839 assert not (compat.any(lu.glm.is_owned(level)
4840 for level in locking.LEVELS
4841 if level != locking.LEVEL_CLUSTER) or
4842 self.do_locking or self.use_locking)
4844 valid_nodes = [node.name
4845 for node in lu.cfg.GetAllNodesInfo().values()
4846 if not node.offline and node.vm_capable]
4847 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4848 cluster = lu.cfg.GetClusterInfo()
4852 for (os_name, os_data) in pol.items():
4853 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4854 hidden=(os_name in cluster.hidden_os),
4855 blacklisted=(os_name in cluster.blacklisted_os))
4859 api_versions = set()
4861 for idx, osl in enumerate(os_data.values()):
4862 info.valid = bool(info.valid and osl and osl[0][1])
4866 (node_variants, node_params, node_api) = osl[0][3:6]
4869 variants.update(node_variants)
4870 parameters.update(node_params)
4871 api_versions.update(node_api)
4873 # Filter out inconsistent values
4874 variants.intersection_update(node_variants)
4875 parameters.intersection_update(node_params)
4876 api_versions.intersection_update(node_api)
4878 info.variants = list(variants)
4879 info.parameters = list(parameters)
4880 info.api_versions = list(api_versions)
4882 data[os_name] = info
4884 # Prepare data in requested order
4885 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4889 class LUOsDiagnose(NoHooksLU):
4890 """Logical unit for OS diagnose/query.
4896 def _BuildFilter(fields, names):
4897 """Builds a filter for querying OSes.
4900 name_filter = qlang.MakeSimpleFilter("name", names)
4902 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4903 # respective field is not requested
4904 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4905 for fname in ["hidden", "blacklisted"]
4906 if fname not in fields]
4907 if "valid" not in fields:
4908 status_filter.append([qlang.OP_TRUE, "valid"])
4911 status_filter.insert(0, qlang.OP_AND)
4913 status_filter = None
4915 if name_filter and status_filter:
4916 return [qlang.OP_AND, name_filter, status_filter]
4920 return status_filter
4922 def CheckArguments(self):
4923 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4924 self.op.output_fields, False)
4926 def ExpandNames(self):
4927 self.oq.ExpandNames(self)
4929 def Exec(self, feedback_fn):
4930 return self.oq.OldStyleQuery(self)
4933 class LUNodeRemove(LogicalUnit):
4934 """Logical unit for removing a node.
4937 HPATH = "node-remove"
4938 HTYPE = constants.HTYPE_NODE
4940 def BuildHooksEnv(self):
4945 "OP_TARGET": self.op.node_name,
4946 "NODE_NAME": self.op.node_name,
4949 def BuildHooksNodes(self):
4950 """Build hooks nodes.
4952 This doesn't run on the target node in the pre phase as a failed
4953 node would then be impossible to remove.
4956 all_nodes = self.cfg.GetNodeList()
4958 all_nodes.remove(self.op.node_name)
4961 return (all_nodes, all_nodes)
4963 def CheckPrereq(self):
4964 """Check prerequisites.
4967 - the node exists in the configuration
4968 - it does not have primary or secondary instances
4969 - it's not the master
4971 Any errors are signaled by raising errors.OpPrereqError.
4974 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4975 node = self.cfg.GetNodeInfo(self.op.node_name)
4976 assert node is not None
4978 masternode = self.cfg.GetMasterNode()
4979 if node.name == masternode:
4980 raise errors.OpPrereqError("Node is the master node, failover to another"
4981 " node is required", errors.ECODE_INVAL)
4983 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4984 if node.name in instance.all_nodes:
4985 raise errors.OpPrereqError("Instance %s is still running on the node,"
4986 " please remove first" % instance_name,
4988 self.op.node_name = node.name
4991 def Exec(self, feedback_fn):
4992 """Removes the node from the cluster.
4996 logging.info("Stopping the node daemon and removing configs from node %s",
4999 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5001 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5004 # Promote nodes to master candidate as needed
5005 _AdjustCandidatePool(self, exceptions=[node.name])
5006 self.context.RemoveNode(node.name)
5008 # Run post hooks on the node before it's removed
5009 _RunPostHook(self, node.name)
5011 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5012 msg = result.fail_msg
5014 self.LogWarning("Errors encountered on the remote node while leaving"
5015 " the cluster: %s", msg)
5017 # Remove node from our /etc/hosts
5018 if self.cfg.GetClusterInfo().modify_etc_hosts:
5019 master_node = self.cfg.GetMasterNode()
5020 result = self.rpc.call_etc_hosts_modify(master_node,
5021 constants.ETC_HOSTS_REMOVE,
5023 result.Raise("Can't update hosts file with new host data")
5024 _RedistributeAncillaryFiles(self)
5027 class _NodeQuery(_QueryBase):
5028 FIELDS = query.NODE_FIELDS
5030 def ExpandNames(self, lu):
5031 lu.needed_locks = {}
5032 lu.share_locks = _ShareAll()
5035 self.wanted = _GetWantedNodes(lu, self.names)
5037 self.wanted = locking.ALL_SET
5039 self.do_locking = (self.use_locking and
5040 query.NQ_LIVE in self.requested_data)
5043 # If any non-static field is requested we need to lock the nodes
5044 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5046 def DeclareLocks(self, lu, level):
5049 def _GetQueryData(self, lu):
5050 """Computes the list of nodes and their attributes.
5053 all_info = lu.cfg.GetAllNodesInfo()
5055 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5057 # Gather data as requested
5058 if query.NQ_LIVE in self.requested_data:
5059 # filter out non-vm_capable nodes
5060 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5062 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5063 [lu.cfg.GetHypervisorType()])
5064 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5065 for (name, nresult) in node_data.items()
5066 if not nresult.fail_msg and nresult.payload)
5070 if query.NQ_INST in self.requested_data:
5071 node_to_primary = dict([(name, set()) for name in nodenames])
5072 node_to_secondary = dict([(name, set()) for name in nodenames])
5074 inst_data = lu.cfg.GetAllInstancesInfo()
5076 for inst in inst_data.values():
5077 if inst.primary_node in node_to_primary:
5078 node_to_primary[inst.primary_node].add(inst.name)
5079 for secnode in inst.secondary_nodes:
5080 if secnode in node_to_secondary:
5081 node_to_secondary[secnode].add(inst.name)
5083 node_to_primary = None
5084 node_to_secondary = None
5086 if query.NQ_OOB in self.requested_data:
5087 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5088 for name, node in all_info.iteritems())
5092 if query.NQ_GROUP in self.requested_data:
5093 groups = lu.cfg.GetAllNodeGroupsInfo()
5097 return query.NodeQueryData([all_info[name] for name in nodenames],
5098 live_data, lu.cfg.GetMasterNode(),
5099 node_to_primary, node_to_secondary, groups,
5100 oob_support, lu.cfg.GetClusterInfo())
5103 class LUNodeQuery(NoHooksLU):
5104 """Logical unit for querying nodes.
5107 # pylint: disable=W0142
5110 def CheckArguments(self):
5111 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5112 self.op.output_fields, self.op.use_locking)
5114 def ExpandNames(self):
5115 self.nq.ExpandNames(self)
5117 def DeclareLocks(self, level):
5118 self.nq.DeclareLocks(self, level)
5120 def Exec(self, feedback_fn):
5121 return self.nq.OldStyleQuery(self)
5124 class LUNodeQueryvols(NoHooksLU):
5125 """Logical unit for getting volumes on node(s).
5129 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5130 _FIELDS_STATIC = utils.FieldSet("node")
5132 def CheckArguments(self):
5133 _CheckOutputFields(static=self._FIELDS_STATIC,
5134 dynamic=self._FIELDS_DYNAMIC,
5135 selected=self.op.output_fields)
5137 def ExpandNames(self):
5138 self.share_locks = _ShareAll()
5139 self.needed_locks = {}
5141 if not self.op.nodes:
5142 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5144 self.needed_locks[locking.LEVEL_NODE] = \
5145 _GetWantedNodes(self, self.op.nodes)
5147 def Exec(self, feedback_fn):
5148 """Computes the list of nodes and their attributes.
5151 nodenames = self.owned_locks(locking.LEVEL_NODE)
5152 volumes = self.rpc.call_node_volumes(nodenames)
5154 ilist = self.cfg.GetAllInstancesInfo()
5155 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5158 for node in nodenames:
5159 nresult = volumes[node]
5162 msg = nresult.fail_msg
5164 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5167 node_vols = sorted(nresult.payload,
5168 key=operator.itemgetter("dev"))
5170 for vol in node_vols:
5172 for field in self.op.output_fields:
5175 elif field == "phys":
5179 elif field == "name":
5181 elif field == "size":
5182 val = int(float(vol["size"]))
5183 elif field == "instance":
5184 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5186 raise errors.ParameterError(field)
5187 node_output.append(str(val))
5189 output.append(node_output)
5194 class LUNodeQueryStorage(NoHooksLU):
5195 """Logical unit for getting information on storage units on node(s).
5198 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5201 def CheckArguments(self):
5202 _CheckOutputFields(static=self._FIELDS_STATIC,
5203 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5204 selected=self.op.output_fields)
5206 def ExpandNames(self):
5207 self.share_locks = _ShareAll()
5208 self.needed_locks = {}
5211 self.needed_locks[locking.LEVEL_NODE] = \
5212 _GetWantedNodes(self, self.op.nodes)
5214 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5216 def Exec(self, feedback_fn):
5217 """Computes the list of nodes and their attributes.
5220 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5222 # Always get name to sort by
5223 if constants.SF_NAME in self.op.output_fields:
5224 fields = self.op.output_fields[:]
5226 fields = [constants.SF_NAME] + self.op.output_fields
5228 # Never ask for node or type as it's only known to the LU
5229 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5230 while extra in fields:
5231 fields.remove(extra)
5233 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5234 name_idx = field_idx[constants.SF_NAME]
5236 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5237 data = self.rpc.call_storage_list(self.nodes,
5238 self.op.storage_type, st_args,
5239 self.op.name, fields)
5243 for node in utils.NiceSort(self.nodes):
5244 nresult = data[node]
5248 msg = nresult.fail_msg
5250 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5253 rows = dict([(row[name_idx], row) for row in nresult.payload])
5255 for name in utils.NiceSort(rows.keys()):
5260 for field in self.op.output_fields:
5261 if field == constants.SF_NODE:
5263 elif field == constants.SF_TYPE:
5264 val = self.op.storage_type
5265 elif field in field_idx:
5266 val = row[field_idx[field]]
5268 raise errors.ParameterError(field)
5277 class _InstanceQuery(_QueryBase):
5278 FIELDS = query.INSTANCE_FIELDS
5280 def ExpandNames(self, lu):
5281 lu.needed_locks = {}
5282 lu.share_locks = _ShareAll()
5285 self.wanted = _GetWantedInstances(lu, self.names)
5287 self.wanted = locking.ALL_SET
5289 self.do_locking = (self.use_locking and
5290 query.IQ_LIVE in self.requested_data)
5292 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5293 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5294 lu.needed_locks[locking.LEVEL_NODE] = []
5295 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5297 self.do_grouplocks = (self.do_locking and
5298 query.IQ_NODES in self.requested_data)
5300 def DeclareLocks(self, lu, level):
5302 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5303 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5305 # Lock all groups used by instances optimistically; this requires going
5306 # via the node before it's locked, requiring verification later on
5307 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5309 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5310 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5311 elif level == locking.LEVEL_NODE:
5312 lu._LockInstancesNodes() # pylint: disable=W0212
5315 def _CheckGroupLocks(lu):
5316 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5317 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5319 # Check if node groups for locked instances are still correct
5320 for instance_name in owned_instances:
5321 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5323 def _GetQueryData(self, lu):
5324 """Computes the list of instances and their attributes.
5327 if self.do_grouplocks:
5328 self._CheckGroupLocks(lu)
5330 cluster = lu.cfg.GetClusterInfo()
5331 all_info = lu.cfg.GetAllInstancesInfo()
5333 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5335 instance_list = [all_info[name] for name in instance_names]
5336 nodes = frozenset(itertools.chain(*(inst.all_nodes
5337 for inst in instance_list)))
5338 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5341 wrongnode_inst = set()
5343 # Gather data as requested
5344 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5346 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5348 result = node_data[name]
5350 # offline nodes will be in both lists
5351 assert result.fail_msg
5352 offline_nodes.append(name)
5354 bad_nodes.append(name)
5355 elif result.payload:
5356 for inst in result.payload:
5357 if inst in all_info:
5358 if all_info[inst].primary_node == name:
5359 live_data.update(result.payload)
5361 wrongnode_inst.add(inst)
5363 # orphan instance; we don't list it here as we don't
5364 # handle this case yet in the output of instance listing
5365 logging.warning("Orphan instance '%s' found on node %s",
5367 # else no instance is alive
5371 if query.IQ_DISKUSAGE in self.requested_data:
5372 disk_usage = dict((inst.name,
5373 _ComputeDiskSize(inst.disk_template,
5374 [{constants.IDISK_SIZE: disk.size}
5375 for disk in inst.disks]))
5376 for inst in instance_list)
5380 if query.IQ_CONSOLE in self.requested_data:
5382 for inst in instance_list:
5383 if inst.name in live_data:
5384 # Instance is running
5385 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5387 consinfo[inst.name] = None
5388 assert set(consinfo.keys()) == set(instance_names)
5392 if query.IQ_NODES in self.requested_data:
5393 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5395 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5396 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5397 for uuid in set(map(operator.attrgetter("group"),
5403 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5404 disk_usage, offline_nodes, bad_nodes,
5405 live_data, wrongnode_inst, consinfo,
5409 class LUQuery(NoHooksLU):
5410 """Query for resources/items of a certain kind.
5413 # pylint: disable=W0142
5416 def CheckArguments(self):
5417 qcls = _GetQueryImplementation(self.op.what)
5419 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5421 def ExpandNames(self):
5422 self.impl.ExpandNames(self)
5424 def DeclareLocks(self, level):
5425 self.impl.DeclareLocks(self, level)
5427 def Exec(self, feedback_fn):
5428 return self.impl.NewStyleQuery(self)
5431 class LUQueryFields(NoHooksLU):
5432 """Query for resources/items of a certain kind.
5435 # pylint: disable=W0142
5438 def CheckArguments(self):
5439 self.qcls = _GetQueryImplementation(self.op.what)
5441 def ExpandNames(self):
5442 self.needed_locks = {}
5444 def Exec(self, feedback_fn):
5445 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5448 class LUNodeModifyStorage(NoHooksLU):
5449 """Logical unit for modifying a storage volume on a node.
5454 def CheckArguments(self):
5455 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5457 storage_type = self.op.storage_type
5460 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5462 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5463 " modified" % storage_type,
5466 diff = set(self.op.changes.keys()) - modifiable
5468 raise errors.OpPrereqError("The following fields can not be modified for"
5469 " storage units of type '%s': %r" %
5470 (storage_type, list(diff)),
5473 def ExpandNames(self):
5474 self.needed_locks = {
5475 locking.LEVEL_NODE: self.op.node_name,
5478 def Exec(self, feedback_fn):
5479 """Computes the list of nodes and their attributes.
5482 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5483 result = self.rpc.call_storage_modify(self.op.node_name,
5484 self.op.storage_type, st_args,
5485 self.op.name, self.op.changes)
5486 result.Raise("Failed to modify storage unit '%s' on %s" %
5487 (self.op.name, self.op.node_name))
5490 class LUNodeAdd(LogicalUnit):
5491 """Logical unit for adding node to the cluster.
5495 HTYPE = constants.HTYPE_NODE
5496 _NFLAGS = ["master_capable", "vm_capable"]
5498 def CheckArguments(self):
5499 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5500 # validate/normalize the node name
5501 self.hostname = netutils.GetHostname(name=self.op.node_name,
5502 family=self.primary_ip_family)
5503 self.op.node_name = self.hostname.name
5505 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5506 raise errors.OpPrereqError("Cannot readd the master node",
5509 if self.op.readd and self.op.group:
5510 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5511 " being readded", errors.ECODE_INVAL)
5513 def BuildHooksEnv(self):
5516 This will run on all nodes before, and on all nodes + the new node after.
5520 "OP_TARGET": self.op.node_name,
5521 "NODE_NAME": self.op.node_name,
5522 "NODE_PIP": self.op.primary_ip,
5523 "NODE_SIP": self.op.secondary_ip,
5524 "MASTER_CAPABLE": str(self.op.master_capable),
5525 "VM_CAPABLE": str(self.op.vm_capable),
5528 def BuildHooksNodes(self):
5529 """Build hooks nodes.
5532 # Exclude added node
5533 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5534 post_nodes = pre_nodes + [self.op.node_name, ]
5536 return (pre_nodes, post_nodes)
5538 def CheckPrereq(self):
5539 """Check prerequisites.
5542 - the new node is not already in the config
5544 - its parameters (single/dual homed) matches the cluster
5546 Any errors are signaled by raising errors.OpPrereqError.
5550 hostname = self.hostname
5551 node = hostname.name
5552 primary_ip = self.op.primary_ip = hostname.ip
5553 if self.op.secondary_ip is None:
5554 if self.primary_ip_family == netutils.IP6Address.family:
5555 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5556 " IPv4 address must be given as secondary",
5558 self.op.secondary_ip = primary_ip
5560 secondary_ip = self.op.secondary_ip
5561 if not netutils.IP4Address.IsValid(secondary_ip):
5562 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5563 " address" % secondary_ip, errors.ECODE_INVAL)
5565 node_list = cfg.GetNodeList()
5566 if not self.op.readd and node in node_list:
5567 raise errors.OpPrereqError("Node %s is already in the configuration" %
5568 node, errors.ECODE_EXISTS)
5569 elif self.op.readd and node not in node_list:
5570 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5573 self.changed_primary_ip = False
5575 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5576 if self.op.readd and node == existing_node_name:
5577 if existing_node.secondary_ip != secondary_ip:
5578 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5579 " address configuration as before",
5581 if existing_node.primary_ip != primary_ip:
5582 self.changed_primary_ip = True
5586 if (existing_node.primary_ip == primary_ip or
5587 existing_node.secondary_ip == primary_ip or
5588 existing_node.primary_ip == secondary_ip or
5589 existing_node.secondary_ip == secondary_ip):
5590 raise errors.OpPrereqError("New node ip address(es) conflict with"
5591 " existing node %s" % existing_node.name,
5592 errors.ECODE_NOTUNIQUE)
5594 # After this 'if' block, None is no longer a valid value for the
5595 # _capable op attributes
5597 old_node = self.cfg.GetNodeInfo(node)
5598 assert old_node is not None, "Can't retrieve locked node %s" % node
5599 for attr in self._NFLAGS:
5600 if getattr(self.op, attr) is None:
5601 setattr(self.op, attr, getattr(old_node, attr))
5603 for attr in self._NFLAGS:
5604 if getattr(self.op, attr) is None:
5605 setattr(self.op, attr, True)
5607 if self.op.readd and not self.op.vm_capable:
5608 pri, sec = cfg.GetNodeInstances(node)
5610 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5611 " flag set to false, but it already holds"
5612 " instances" % node,
5615 # check that the type of the node (single versus dual homed) is the
5616 # same as for the master
5617 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5618 master_singlehomed = myself.secondary_ip == myself.primary_ip
5619 newbie_singlehomed = secondary_ip == primary_ip
5620 if master_singlehomed != newbie_singlehomed:
5621 if master_singlehomed:
5622 raise errors.OpPrereqError("The master has no secondary ip but the"
5623 " new node has one",
5626 raise errors.OpPrereqError("The master has a secondary ip but the"
5627 " new node doesn't have one",
5630 # checks reachability
5631 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5632 raise errors.OpPrereqError("Node not reachable by ping",
5633 errors.ECODE_ENVIRON)
5635 if not newbie_singlehomed:
5636 # check reachability from my secondary ip to newbie's secondary ip
5637 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5638 source=myself.secondary_ip):
5639 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5640 " based ping to node daemon port",
5641 errors.ECODE_ENVIRON)
5648 if self.op.master_capable:
5649 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5651 self.master_candidate = False
5654 self.new_node = old_node
5656 node_group = cfg.LookupNodeGroup(self.op.group)
5657 self.new_node = objects.Node(name=node,
5658 primary_ip=primary_ip,
5659 secondary_ip=secondary_ip,
5660 master_candidate=self.master_candidate,
5661 offline=False, drained=False,
5664 if self.op.ndparams:
5665 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5667 if self.op.hv_state:
5668 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5670 if self.op.disk_state:
5671 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5673 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5674 # it a property on the base class.
5675 result = rpc.DnsOnlyRunner().call_version([node])[node]
5676 result.Raise("Can't get version information from node %s" % node)
5677 if constants.PROTOCOL_VERSION == result.payload:
5678 logging.info("Communication to node %s fine, sw version %s match",
5679 node, result.payload)
5681 raise errors.OpPrereqError("Version mismatch master version %s,"
5682 " node version %s" %
5683 (constants.PROTOCOL_VERSION, result.payload),
5684 errors.ECODE_ENVIRON)
5686 def Exec(self, feedback_fn):
5687 """Adds the new node to the cluster.
5690 new_node = self.new_node
5691 node = new_node.name
5693 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5696 # We adding a new node so we assume it's powered
5697 new_node.powered = True
5699 # for re-adds, reset the offline/drained/master-candidate flags;
5700 # we need to reset here, otherwise offline would prevent RPC calls
5701 # later in the procedure; this also means that if the re-add
5702 # fails, we are left with a non-offlined, broken node
5704 new_node.drained = new_node.offline = False # pylint: disable=W0201
5705 self.LogInfo("Readding a node, the offline/drained flags were reset")
5706 # if we demote the node, we do cleanup later in the procedure
5707 new_node.master_candidate = self.master_candidate
5708 if self.changed_primary_ip:
5709 new_node.primary_ip = self.op.primary_ip
5711 # copy the master/vm_capable flags
5712 for attr in self._NFLAGS:
5713 setattr(new_node, attr, getattr(self.op, attr))
5715 # notify the user about any possible mc promotion
5716 if new_node.master_candidate:
5717 self.LogInfo("Node will be a master candidate")
5719 if self.op.ndparams:
5720 new_node.ndparams = self.op.ndparams
5722 new_node.ndparams = {}
5724 if self.op.hv_state:
5725 new_node.hv_state_static = self.new_hv_state
5727 if self.op.disk_state:
5728 new_node.disk_state_static = self.new_disk_state
5730 # Add node to our /etc/hosts, and add key to known_hosts
5731 if self.cfg.GetClusterInfo().modify_etc_hosts:
5732 master_node = self.cfg.GetMasterNode()
5733 result = self.rpc.call_etc_hosts_modify(master_node,
5734 constants.ETC_HOSTS_ADD,
5737 result.Raise("Can't update hosts file with new host data")
5739 if new_node.secondary_ip != new_node.primary_ip:
5740 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5743 node_verify_list = [self.cfg.GetMasterNode()]
5744 node_verify_param = {
5745 constants.NV_NODELIST: ([node], {}),
5746 # TODO: do a node-net-test as well?
5749 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5750 self.cfg.GetClusterName())
5751 for verifier in node_verify_list:
5752 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5753 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5755 for failed in nl_payload:
5756 feedback_fn("ssh/hostname verification failed"
5757 " (checking from %s): %s" %
5758 (verifier, nl_payload[failed]))
5759 raise errors.OpExecError("ssh/hostname verification failed")
5762 _RedistributeAncillaryFiles(self)
5763 self.context.ReaddNode(new_node)
5764 # make sure we redistribute the config
5765 self.cfg.Update(new_node, feedback_fn)
5766 # and make sure the new node will not have old files around
5767 if not new_node.master_candidate:
5768 result = self.rpc.call_node_demote_from_mc(new_node.name)
5769 msg = result.fail_msg
5771 self.LogWarning("Node failed to demote itself from master"
5772 " candidate status: %s" % msg)
5774 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5775 additional_vm=self.op.vm_capable)
5776 self.context.AddNode(new_node, self.proc.GetECId())
5779 class LUNodeSetParams(LogicalUnit):
5780 """Modifies the parameters of a node.
5782 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5783 to the node role (as _ROLE_*)
5784 @cvar _R2F: a dictionary from node role to tuples of flags
5785 @cvar _FLAGS: a list of attribute names corresponding to the flags
5788 HPATH = "node-modify"
5789 HTYPE = constants.HTYPE_NODE
5791 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5793 (True, False, False): _ROLE_CANDIDATE,
5794 (False, True, False): _ROLE_DRAINED,
5795 (False, False, True): _ROLE_OFFLINE,
5796 (False, False, False): _ROLE_REGULAR,
5798 _R2F = dict((v, k) for k, v in _F2R.items())
5799 _FLAGS = ["master_candidate", "drained", "offline"]
5801 def CheckArguments(self):
5802 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5803 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5804 self.op.master_capable, self.op.vm_capable,
5805 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5807 if all_mods.count(None) == len(all_mods):
5808 raise errors.OpPrereqError("Please pass at least one modification",
5810 if all_mods.count(True) > 1:
5811 raise errors.OpPrereqError("Can't set the node into more than one"
5812 " state at the same time",
5815 # Boolean value that tells us whether we might be demoting from MC
5816 self.might_demote = (self.op.master_candidate == False or
5817 self.op.offline == True or
5818 self.op.drained == True or
5819 self.op.master_capable == False)
5821 if self.op.secondary_ip:
5822 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5823 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5824 " address" % self.op.secondary_ip,
5827 self.lock_all = self.op.auto_promote and self.might_demote
5828 self.lock_instances = self.op.secondary_ip is not None
5830 def _InstanceFilter(self, instance):
5831 """Filter for getting affected instances.
5834 return (instance.disk_template in constants.DTS_INT_MIRROR and
5835 self.op.node_name in instance.all_nodes)
5837 def ExpandNames(self):
5839 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5841 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5843 # Since modifying a node can have severe effects on currently running
5844 # operations the resource lock is at least acquired in shared mode
5845 self.needed_locks[locking.LEVEL_NODE_RES] = \
5846 self.needed_locks[locking.LEVEL_NODE]
5848 # Get node resource and instance locks in shared mode; they are not used
5849 # for anything but read-only access
5850 self.share_locks[locking.LEVEL_NODE_RES] = 1
5851 self.share_locks[locking.LEVEL_INSTANCE] = 1
5853 if self.lock_instances:
5854 self.needed_locks[locking.LEVEL_INSTANCE] = \
5855 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5857 def BuildHooksEnv(self):
5860 This runs on the master node.
5864 "OP_TARGET": self.op.node_name,
5865 "MASTER_CANDIDATE": str(self.op.master_candidate),
5866 "OFFLINE": str(self.op.offline),
5867 "DRAINED": str(self.op.drained),
5868 "MASTER_CAPABLE": str(self.op.master_capable),
5869 "VM_CAPABLE": str(self.op.vm_capable),
5872 def BuildHooksNodes(self):
5873 """Build hooks nodes.
5876 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5879 def CheckPrereq(self):
5880 """Check prerequisites.
5882 This only checks the instance list against the existing names.
5885 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5887 if self.lock_instances:
5888 affected_instances = \
5889 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5891 # Verify instance locks
5892 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5893 wanted_instances = frozenset(affected_instances.keys())
5894 if wanted_instances - owned_instances:
5895 raise errors.OpPrereqError("Instances affected by changing node %s's"
5896 " secondary IP address have changed since"
5897 " locks were acquired, wanted '%s', have"
5898 " '%s'; retry the operation" %
5900 utils.CommaJoin(wanted_instances),
5901 utils.CommaJoin(owned_instances)),
5904 affected_instances = None
5906 if (self.op.master_candidate is not None or
5907 self.op.drained is not None or
5908 self.op.offline is not None):
5909 # we can't change the master's node flags
5910 if self.op.node_name == self.cfg.GetMasterNode():
5911 raise errors.OpPrereqError("The master role can be changed"
5912 " only via master-failover",
5915 if self.op.master_candidate and not node.master_capable:
5916 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5917 " it a master candidate" % node.name,
5920 if self.op.vm_capable == False:
5921 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5923 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5924 " the vm_capable flag" % node.name,
5927 if node.master_candidate and self.might_demote and not self.lock_all:
5928 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5929 # check if after removing the current node, we're missing master
5931 (mc_remaining, mc_should, _) = \
5932 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5933 if mc_remaining < mc_should:
5934 raise errors.OpPrereqError("Not enough master candidates, please"
5935 " pass auto promote option to allow"
5936 " promotion (--auto-promote or RAPI"
5937 " auto_promote=True)", errors.ECODE_STATE)
5939 self.old_flags = old_flags = (node.master_candidate,
5940 node.drained, node.offline)
5941 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5942 self.old_role = old_role = self._F2R[old_flags]
5944 # Check for ineffective changes
5945 for attr in self._FLAGS:
5946 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5947 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5948 setattr(self.op, attr, None)
5950 # Past this point, any flag change to False means a transition
5951 # away from the respective state, as only real changes are kept
5953 # TODO: We might query the real power state if it supports OOB
5954 if _SupportsOob(self.cfg, node):
5955 if self.op.offline is False and not (node.powered or
5956 self.op.powered == True):
5957 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5958 " offline status can be reset") %
5960 elif self.op.powered is not None:
5961 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5962 " as it does not support out-of-band"
5963 " handling") % self.op.node_name)
5965 # If we're being deofflined/drained, we'll MC ourself if needed
5966 if (self.op.drained == False or self.op.offline == False or
5967 (self.op.master_capable and not node.master_capable)):
5968 if _DecideSelfPromotion(self):
5969 self.op.master_candidate = True
5970 self.LogInfo("Auto-promoting node to master candidate")
5972 # If we're no longer master capable, we'll demote ourselves from MC
5973 if self.op.master_capable == False and node.master_candidate:
5974 self.LogInfo("Demoting from master candidate")
5975 self.op.master_candidate = False
5978 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5979 if self.op.master_candidate:
5980 new_role = self._ROLE_CANDIDATE
5981 elif self.op.drained:
5982 new_role = self._ROLE_DRAINED
5983 elif self.op.offline:
5984 new_role = self._ROLE_OFFLINE
5985 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5986 # False is still in new flags, which means we're un-setting (the
5988 new_role = self._ROLE_REGULAR
5989 else: # no new flags, nothing, keep old role
5992 self.new_role = new_role
5994 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5995 # Trying to transition out of offline status
5996 result = self.rpc.call_version([node.name])[node.name]
5998 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5999 " to report its version: %s" %
6000 (node.name, result.fail_msg),
6003 self.LogWarning("Transitioning node from offline to online state"
6004 " without using re-add. Please make sure the node"
6007 if self.op.secondary_ip:
6008 # Ok even without locking, because this can't be changed by any LU
6009 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6010 master_singlehomed = master.secondary_ip == master.primary_ip
6011 if master_singlehomed and self.op.secondary_ip:
6012 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6013 " homed cluster", errors.ECODE_INVAL)
6015 assert not (frozenset(affected_instances) -
6016 self.owned_locks(locking.LEVEL_INSTANCE))
6019 if affected_instances:
6020 raise errors.OpPrereqError("Cannot change secondary IP address:"
6021 " offline node has instances (%s)"
6022 " configured to use it" %
6023 utils.CommaJoin(affected_instances.keys()))
6025 # On online nodes, check that no instances are running, and that
6026 # the node has the new ip and we can reach it.
6027 for instance in affected_instances.values():
6028 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6029 msg="cannot change secondary ip")
6031 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6032 if master.name != node.name:
6033 # check reachability from master secondary ip to new secondary ip
6034 if not netutils.TcpPing(self.op.secondary_ip,
6035 constants.DEFAULT_NODED_PORT,
6036 source=master.secondary_ip):
6037 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6038 " based ping to node daemon port",
6039 errors.ECODE_ENVIRON)
6041 if self.op.ndparams:
6042 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6043 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6044 self.new_ndparams = new_ndparams
6046 if self.op.hv_state:
6047 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6048 self.node.hv_state_static)
6050 if self.op.disk_state:
6051 self.new_disk_state = \
6052 _MergeAndVerifyDiskState(self.op.disk_state,
6053 self.node.disk_state_static)
6055 def Exec(self, feedback_fn):
6060 old_role = self.old_role
6061 new_role = self.new_role
6065 if self.op.ndparams:
6066 node.ndparams = self.new_ndparams
6068 if self.op.powered is not None:
6069 node.powered = self.op.powered
6071 if self.op.hv_state:
6072 node.hv_state_static = self.new_hv_state
6074 if self.op.disk_state:
6075 node.disk_state_static = self.new_disk_state
6077 for attr in ["master_capable", "vm_capable"]:
6078 val = getattr(self.op, attr)
6080 setattr(node, attr, val)
6081 result.append((attr, str(val)))
6083 if new_role != old_role:
6084 # Tell the node to demote itself, if no longer MC and not offline
6085 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6086 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6088 self.LogWarning("Node failed to demote itself: %s", msg)
6090 new_flags = self._R2F[new_role]
6091 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6093 result.append((desc, str(nf)))
6094 (node.master_candidate, node.drained, node.offline) = new_flags
6096 # we locked all nodes, we adjust the CP before updating this node
6098 _AdjustCandidatePool(self, [node.name])
6100 if self.op.secondary_ip:
6101 node.secondary_ip = self.op.secondary_ip
6102 result.append(("secondary_ip", self.op.secondary_ip))
6104 # this will trigger configuration file update, if needed
6105 self.cfg.Update(node, feedback_fn)
6107 # this will trigger job queue propagation or cleanup if the mc
6109 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6110 self.context.ReaddNode(node)
6115 class LUNodePowercycle(NoHooksLU):
6116 """Powercycles a node.
6121 def CheckArguments(self):
6122 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6123 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6124 raise errors.OpPrereqError("The node is the master and the force"
6125 " parameter was not set",
6128 def ExpandNames(self):
6129 """Locking for PowercycleNode.
6131 This is a last-resort option and shouldn't block on other
6132 jobs. Therefore, we grab no locks.
6135 self.needed_locks = {}
6137 def Exec(self, feedback_fn):
6141 result = self.rpc.call_node_powercycle(self.op.node_name,
6142 self.cfg.GetHypervisorType())
6143 result.Raise("Failed to schedule the reboot")
6144 return result.payload
6147 class LUClusterQuery(NoHooksLU):
6148 """Query cluster configuration.
6153 def ExpandNames(self):
6154 self.needed_locks = {}
6156 def Exec(self, feedback_fn):
6157 """Return cluster config.
6160 cluster = self.cfg.GetClusterInfo()
6163 # Filter just for enabled hypervisors
6164 for os_name, hv_dict in cluster.os_hvp.items():
6165 os_hvp[os_name] = {}
6166 for hv_name, hv_params in hv_dict.items():
6167 if hv_name in cluster.enabled_hypervisors:
6168 os_hvp[os_name][hv_name] = hv_params
6170 # Convert ip_family to ip_version
6171 primary_ip_version = constants.IP4_VERSION
6172 if cluster.primary_ip_family == netutils.IP6Address.family:
6173 primary_ip_version = constants.IP6_VERSION
6176 "software_version": constants.RELEASE_VERSION,
6177 "protocol_version": constants.PROTOCOL_VERSION,
6178 "config_version": constants.CONFIG_VERSION,
6179 "os_api_version": max(constants.OS_API_VERSIONS),
6180 "export_version": constants.EXPORT_VERSION,
6181 "architecture": runtime.GetArchInfo(),
6182 "name": cluster.cluster_name,
6183 "master": cluster.master_node,
6184 "default_hypervisor": cluster.primary_hypervisor,
6185 "enabled_hypervisors": cluster.enabled_hypervisors,
6186 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6187 for hypervisor_name in cluster.enabled_hypervisors]),
6189 "beparams": cluster.beparams,
6190 "osparams": cluster.osparams,
6191 "ipolicy": cluster.ipolicy,
6192 "nicparams": cluster.nicparams,
6193 "ndparams": cluster.ndparams,
6194 "diskparams": cluster.diskparams,
6195 "candidate_pool_size": cluster.candidate_pool_size,
6196 "master_netdev": cluster.master_netdev,
6197 "master_netmask": cluster.master_netmask,
6198 "use_external_mip_script": cluster.use_external_mip_script,
6199 "volume_group_name": cluster.volume_group_name,
6200 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6201 "file_storage_dir": cluster.file_storage_dir,
6202 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6203 "maintain_node_health": cluster.maintain_node_health,
6204 "ctime": cluster.ctime,
6205 "mtime": cluster.mtime,
6206 "uuid": cluster.uuid,
6207 "tags": list(cluster.GetTags()),
6208 "uid_pool": cluster.uid_pool,
6209 "default_iallocator": cluster.default_iallocator,
6210 "reserved_lvs": cluster.reserved_lvs,
6211 "primary_ip_version": primary_ip_version,
6212 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6213 "hidden_os": cluster.hidden_os,
6214 "blacklisted_os": cluster.blacklisted_os,
6220 class LUClusterConfigQuery(NoHooksLU):
6221 """Return configuration values.
6226 def CheckArguments(self):
6227 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6229 def ExpandNames(self):
6230 self.cq.ExpandNames(self)
6232 def DeclareLocks(self, level):
6233 self.cq.DeclareLocks(self, level)
6235 def Exec(self, feedback_fn):
6236 result = self.cq.OldStyleQuery(self)
6238 assert len(result) == 1
6243 class _ClusterQuery(_QueryBase):
6244 FIELDS = query.CLUSTER_FIELDS
6246 #: Do not sort (there is only one item)
6249 def ExpandNames(self, lu):
6250 lu.needed_locks = {}
6252 # The following variables interact with _QueryBase._GetNames
6253 self.wanted = locking.ALL_SET
6254 self.do_locking = self.use_locking
6257 raise errors.OpPrereqError("Can not use locking for cluster queries",
6260 def DeclareLocks(self, lu, level):
6263 def _GetQueryData(self, lu):
6264 """Computes the list of nodes and their attributes.
6267 # Locking is not used
6268 assert not (compat.any(lu.glm.is_owned(level)
6269 for level in locking.LEVELS
6270 if level != locking.LEVEL_CLUSTER) or
6271 self.do_locking or self.use_locking)
6273 if query.CQ_CONFIG in self.requested_data:
6274 cluster = lu.cfg.GetClusterInfo()
6276 cluster = NotImplemented
6278 if query.CQ_QUEUE_DRAINED in self.requested_data:
6279 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6281 drain_flag = NotImplemented
6283 if query.CQ_WATCHER_PAUSE in self.requested_data:
6284 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6286 watcher_pause = NotImplemented
6288 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6291 class LUInstanceActivateDisks(NoHooksLU):
6292 """Bring up an instance's disks.
6297 def ExpandNames(self):
6298 self._ExpandAndLockInstance()
6299 self.needed_locks[locking.LEVEL_NODE] = []
6300 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6302 def DeclareLocks(self, level):
6303 if level == locking.LEVEL_NODE:
6304 self._LockInstancesNodes()
6306 def CheckPrereq(self):
6307 """Check prerequisites.
6309 This checks that the instance is in the cluster.
6312 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6313 assert self.instance is not None, \
6314 "Cannot retrieve locked instance %s" % self.op.instance_name
6315 _CheckNodeOnline(self, self.instance.primary_node)
6317 def Exec(self, feedback_fn):
6318 """Activate the disks.
6321 disks_ok, disks_info = \
6322 _AssembleInstanceDisks(self, self.instance,
6323 ignore_size=self.op.ignore_size)
6325 raise errors.OpExecError("Cannot activate block devices")
6330 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6331 ignore_size=False, check=True):
6332 """Prepare the block devices for an instance.
6334 This sets up the block devices on all nodes.
6336 @type lu: L{LogicalUnit}
6337 @param lu: the logical unit on whose behalf we execute
6338 @type instance: L{objects.Instance}
6339 @param instance: the instance for whose disks we assemble
6340 @type disks: list of L{objects.Disk} or None
6341 @param disks: which disks to assemble (or all, if None)
6342 @type ignore_secondaries: boolean
6343 @param ignore_secondaries: if true, errors on secondary nodes
6344 won't result in an error return from the function
6345 @type ignore_size: boolean
6346 @param ignore_size: if true, the current known size of the disk
6347 will not be used during the disk activation, useful for cases
6348 when the size is wrong
6349 @return: False if the operation failed, otherwise a list of
6350 (host, instance_visible_name, node_visible_name)
6351 with the mapping from node devices to instance devices
6356 iname = instance.name
6358 disks = _ExpandCheckDisks(instance, disks)
6360 # With the two passes mechanism we try to reduce the window of
6361 # opportunity for the race condition of switching DRBD to primary
6362 # before handshaking occured, but we do not eliminate it
6364 # The proper fix would be to wait (with some limits) until the
6365 # connection has been made and drbd transitions from WFConnection
6366 # into any other network-connected state (Connected, SyncTarget,
6369 # 1st pass, assemble on all nodes in secondary mode
6370 for idx, inst_disk in enumerate(disks):
6371 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6373 node_disk = node_disk.Copy()
6374 node_disk.UnsetSize()
6375 lu.cfg.SetDiskID(node_disk, node)
6376 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6378 msg = result.fail_msg
6380 is_offline_secondary = (node in instance.secondary_nodes and
6382 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6383 " (is_primary=False, pass=1): %s",
6384 inst_disk.iv_name, node, msg)
6385 if not (ignore_secondaries or is_offline_secondary):
6388 # FIXME: race condition on drbd migration to primary
6390 # 2nd pass, do only the primary node
6391 for idx, inst_disk in enumerate(disks):
6394 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6395 if node != instance.primary_node:
6398 node_disk = node_disk.Copy()
6399 node_disk.UnsetSize()
6400 lu.cfg.SetDiskID(node_disk, node)
6401 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6403 msg = result.fail_msg
6405 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6406 " (is_primary=True, pass=2): %s",
6407 inst_disk.iv_name, node, msg)
6410 dev_path = result.payload
6412 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6414 # leave the disks configured for the primary node
6415 # this is a workaround that would be fixed better by
6416 # improving the logical/physical id handling
6418 lu.cfg.SetDiskID(disk, instance.primary_node)
6420 return disks_ok, device_info
6423 def _StartInstanceDisks(lu, instance, force):
6424 """Start the disks of an instance.
6427 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6428 ignore_secondaries=force)
6430 _ShutdownInstanceDisks(lu, instance)
6431 if force is not None and not force:
6432 lu.proc.LogWarning("", hint="If the message above refers to a"
6434 " you can retry the operation using '--force'.")
6435 raise errors.OpExecError("Disk consistency error")
6438 class LUInstanceDeactivateDisks(NoHooksLU):
6439 """Shutdown an instance's disks.
6444 def ExpandNames(self):
6445 self._ExpandAndLockInstance()
6446 self.needed_locks[locking.LEVEL_NODE] = []
6447 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6449 def DeclareLocks(self, level):
6450 if level == locking.LEVEL_NODE:
6451 self._LockInstancesNodes()
6453 def CheckPrereq(self):
6454 """Check prerequisites.
6456 This checks that the instance is in the cluster.
6459 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6460 assert self.instance is not None, \
6461 "Cannot retrieve locked instance %s" % self.op.instance_name
6463 def Exec(self, feedback_fn):
6464 """Deactivate the disks
6467 instance = self.instance
6469 _ShutdownInstanceDisks(self, instance)
6471 _SafeShutdownInstanceDisks(self, instance)
6474 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6475 """Shutdown block devices of an instance.
6477 This function checks if an instance is running, before calling
6478 _ShutdownInstanceDisks.
6481 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6482 _ShutdownInstanceDisks(lu, instance, disks=disks)
6485 def _ExpandCheckDisks(instance, disks):
6486 """Return the instance disks selected by the disks list
6488 @type disks: list of L{objects.Disk} or None
6489 @param disks: selected disks
6490 @rtype: list of L{objects.Disk}
6491 @return: selected instance disks to act on
6495 return instance.disks
6497 if not set(disks).issubset(instance.disks):
6498 raise errors.ProgrammerError("Can only act on disks belonging to the"
6503 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6504 """Shutdown block devices of an instance.
6506 This does the shutdown on all nodes of the instance.
6508 If the ignore_primary is false, errors on the primary node are
6513 disks = _ExpandCheckDisks(instance, disks)
6516 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6517 lu.cfg.SetDiskID(top_disk, node)
6518 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6519 msg = result.fail_msg
6521 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6522 disk.iv_name, node, msg)
6523 if ((node == instance.primary_node and not ignore_primary) or
6524 (node != instance.primary_node and not result.offline)):
6529 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6530 """Checks if a node has enough free memory.
6532 This function check if a given node has the needed amount of free
6533 memory. In case the node has less memory or we cannot get the
6534 information from the node, this function raise an OpPrereqError
6537 @type lu: C{LogicalUnit}
6538 @param lu: a logical unit from which we get configuration data
6540 @param node: the node to check
6541 @type reason: C{str}
6542 @param reason: string to use in the error message
6543 @type requested: C{int}
6544 @param requested: the amount of memory in MiB to check for
6545 @type hypervisor_name: C{str}
6546 @param hypervisor_name: the hypervisor to ask for memory stats
6548 @return: node current free memory
6549 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6550 we cannot check the node
6553 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6554 nodeinfo[node].Raise("Can't get data from node %s" % node,
6555 prereq=True, ecode=errors.ECODE_ENVIRON)
6556 (_, _, (hv_info, )) = nodeinfo[node].payload
6558 free_mem = hv_info.get("memory_free", None)
6559 if not isinstance(free_mem, int):
6560 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6561 " was '%s'" % (node, free_mem),
6562 errors.ECODE_ENVIRON)
6563 if requested > free_mem:
6564 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6565 " needed %s MiB, available %s MiB" %
6566 (node, reason, requested, free_mem),
6571 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6572 """Checks if nodes have enough free disk space in the all VGs.
6574 This function check if all given nodes have the needed amount of
6575 free disk. In case any node has less disk or we cannot get the
6576 information from the node, this function raise an OpPrereqError
6579 @type lu: C{LogicalUnit}
6580 @param lu: a logical unit from which we get configuration data
6581 @type nodenames: C{list}
6582 @param nodenames: the list of node names to check
6583 @type req_sizes: C{dict}
6584 @param req_sizes: the hash of vg and corresponding amount of disk in
6586 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6587 or we cannot check the node
6590 for vg, req_size in req_sizes.items():
6591 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6594 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6595 """Checks if nodes have enough free disk space in the specified VG.
6597 This function check if all given nodes have the needed amount of
6598 free disk. In case any node has less disk or we cannot get the
6599 information from the node, this function raise an OpPrereqError
6602 @type lu: C{LogicalUnit}
6603 @param lu: a logical unit from which we get configuration data
6604 @type nodenames: C{list}
6605 @param nodenames: the list of node names to check
6607 @param vg: the volume group to check
6608 @type requested: C{int}
6609 @param requested: the amount of disk in MiB to check for
6610 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6611 or we cannot check the node
6614 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6615 for node in nodenames:
6616 info = nodeinfo[node]
6617 info.Raise("Cannot get current information from node %s" % node,
6618 prereq=True, ecode=errors.ECODE_ENVIRON)
6619 (_, (vg_info, ), _) = info.payload
6620 vg_free = vg_info.get("vg_free", None)
6621 if not isinstance(vg_free, int):
6622 raise errors.OpPrereqError("Can't compute free disk space on node"
6623 " %s for vg %s, result was '%s'" %
6624 (node, vg, vg_free), errors.ECODE_ENVIRON)
6625 if requested > vg_free:
6626 raise errors.OpPrereqError("Not enough disk space on target node %s"
6627 " vg %s: required %d MiB, available %d MiB" %
6628 (node, vg, requested, vg_free),
6632 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6633 """Checks if nodes have enough physical CPUs
6635 This function checks if all given nodes have the needed number of
6636 physical CPUs. In case any node has less CPUs or we cannot get the
6637 information from the node, this function raises an OpPrereqError
6640 @type lu: C{LogicalUnit}
6641 @param lu: a logical unit from which we get configuration data
6642 @type nodenames: C{list}
6643 @param nodenames: the list of node names to check
6644 @type requested: C{int}
6645 @param requested: the minimum acceptable number of physical CPUs
6646 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6647 or we cannot check the node
6650 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6651 for node in nodenames:
6652 info = nodeinfo[node]
6653 info.Raise("Cannot get current information from node %s" % node,
6654 prereq=True, ecode=errors.ECODE_ENVIRON)
6655 (_, _, (hv_info, )) = info.payload
6656 num_cpus = hv_info.get("cpu_total", None)
6657 if not isinstance(num_cpus, int):
6658 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6659 " on node %s, result was '%s'" %
6660 (node, num_cpus), errors.ECODE_ENVIRON)
6661 if requested > num_cpus:
6662 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6663 "required" % (node, num_cpus, requested),
6667 class LUInstanceStartup(LogicalUnit):
6668 """Starts an instance.
6671 HPATH = "instance-start"
6672 HTYPE = constants.HTYPE_INSTANCE
6675 def CheckArguments(self):
6677 if self.op.beparams:
6678 # fill the beparams dict
6679 objects.UpgradeBeParams(self.op.beparams)
6680 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6682 def ExpandNames(self):
6683 self._ExpandAndLockInstance()
6684 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6686 def DeclareLocks(self, level):
6687 if level == locking.LEVEL_NODE_RES:
6688 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6690 def BuildHooksEnv(self):
6693 This runs on master, primary and secondary nodes of the instance.
6697 "FORCE": self.op.force,
6700 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6704 def BuildHooksNodes(self):
6705 """Build hooks nodes.
6708 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6711 def CheckPrereq(self):
6712 """Check prerequisites.
6714 This checks that the instance is in the cluster.
6717 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6718 assert self.instance is not None, \
6719 "Cannot retrieve locked instance %s" % self.op.instance_name
6722 if self.op.hvparams:
6723 # check hypervisor parameter syntax (locally)
6724 cluster = self.cfg.GetClusterInfo()
6725 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6726 filled_hvp = cluster.FillHV(instance)
6727 filled_hvp.update(self.op.hvparams)
6728 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6729 hv_type.CheckParameterSyntax(filled_hvp)
6730 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6732 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6734 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6736 if self.primary_offline and self.op.ignore_offline_nodes:
6737 self.proc.LogWarning("Ignoring offline primary node")
6739 if self.op.hvparams or self.op.beparams:
6740 self.proc.LogWarning("Overridden parameters are ignored")
6742 _CheckNodeOnline(self, instance.primary_node)
6744 bep = self.cfg.GetClusterInfo().FillBE(instance)
6745 bep.update(self.op.beparams)
6747 # check bridges existence
6748 _CheckInstanceBridgesExist(self, instance)
6750 remote_info = self.rpc.call_instance_info(instance.primary_node,
6752 instance.hypervisor)
6753 remote_info.Raise("Error checking node %s" % instance.primary_node,
6754 prereq=True, ecode=errors.ECODE_ENVIRON)
6755 if not remote_info.payload: # not running already
6756 _CheckNodeFreeMemory(self, instance.primary_node,
6757 "starting instance %s" % instance.name,
6758 bep[constants.BE_MINMEM], instance.hypervisor)
6760 def Exec(self, feedback_fn):
6761 """Start the instance.
6764 instance = self.instance
6765 force = self.op.force
6767 if not self.op.no_remember:
6768 self.cfg.MarkInstanceUp(instance.name)
6770 if self.primary_offline:
6771 assert self.op.ignore_offline_nodes
6772 self.proc.LogInfo("Primary node offline, marked instance as started")
6774 node_current = instance.primary_node
6776 _StartInstanceDisks(self, instance, force)
6779 self.rpc.call_instance_start(node_current,
6780 (instance, self.op.hvparams,
6782 self.op.startup_paused)
6783 msg = result.fail_msg
6785 _ShutdownInstanceDisks(self, instance)
6786 raise errors.OpExecError("Could not start instance: %s" % msg)
6789 class LUInstanceReboot(LogicalUnit):
6790 """Reboot an instance.
6793 HPATH = "instance-reboot"
6794 HTYPE = constants.HTYPE_INSTANCE
6797 def ExpandNames(self):
6798 self._ExpandAndLockInstance()
6800 def BuildHooksEnv(self):
6803 This runs on master, primary and secondary nodes of the instance.
6807 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6808 "REBOOT_TYPE": self.op.reboot_type,
6809 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6812 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6816 def BuildHooksNodes(self):
6817 """Build hooks nodes.
6820 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6823 def CheckPrereq(self):
6824 """Check prerequisites.
6826 This checks that the instance is in the cluster.
6829 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6830 assert self.instance is not None, \
6831 "Cannot retrieve locked instance %s" % self.op.instance_name
6832 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6833 _CheckNodeOnline(self, instance.primary_node)
6835 # check bridges existence
6836 _CheckInstanceBridgesExist(self, instance)
6838 def Exec(self, feedback_fn):
6839 """Reboot the instance.
6842 instance = self.instance
6843 ignore_secondaries = self.op.ignore_secondaries
6844 reboot_type = self.op.reboot_type
6846 remote_info = self.rpc.call_instance_info(instance.primary_node,
6848 instance.hypervisor)
6849 remote_info.Raise("Error checking node %s" % instance.primary_node)
6850 instance_running = bool(remote_info.payload)
6852 node_current = instance.primary_node
6854 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6855 constants.INSTANCE_REBOOT_HARD]:
6856 for disk in instance.disks:
6857 self.cfg.SetDiskID(disk, node_current)
6858 result = self.rpc.call_instance_reboot(node_current, instance,
6860 self.op.shutdown_timeout)
6861 result.Raise("Could not reboot instance")
6863 if instance_running:
6864 result = self.rpc.call_instance_shutdown(node_current, instance,
6865 self.op.shutdown_timeout)
6866 result.Raise("Could not shutdown instance for full reboot")
6867 _ShutdownInstanceDisks(self, instance)
6869 self.LogInfo("Instance %s was already stopped, starting now",
6871 _StartInstanceDisks(self, instance, ignore_secondaries)
6872 result = self.rpc.call_instance_start(node_current,
6873 (instance, None, None), False)
6874 msg = result.fail_msg
6876 _ShutdownInstanceDisks(self, instance)
6877 raise errors.OpExecError("Could not start instance for"
6878 " full reboot: %s" % msg)
6880 self.cfg.MarkInstanceUp(instance.name)
6883 class LUInstanceShutdown(LogicalUnit):
6884 """Shutdown an instance.
6887 HPATH = "instance-stop"
6888 HTYPE = constants.HTYPE_INSTANCE
6891 def ExpandNames(self):
6892 self._ExpandAndLockInstance()
6894 def BuildHooksEnv(self):
6897 This runs on master, primary and secondary nodes of the instance.
6900 env = _BuildInstanceHookEnvByObject(self, self.instance)
6901 env["TIMEOUT"] = self.op.timeout
6904 def BuildHooksNodes(self):
6905 """Build hooks nodes.
6908 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6911 def CheckPrereq(self):
6912 """Check prerequisites.
6914 This checks that the instance is in the cluster.
6917 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6918 assert self.instance is not None, \
6919 "Cannot retrieve locked instance %s" % self.op.instance_name
6921 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6923 self.primary_offline = \
6924 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6926 if self.primary_offline and self.op.ignore_offline_nodes:
6927 self.proc.LogWarning("Ignoring offline primary node")
6929 _CheckNodeOnline(self, self.instance.primary_node)
6931 def Exec(self, feedback_fn):
6932 """Shutdown the instance.
6935 instance = self.instance
6936 node_current = instance.primary_node
6937 timeout = self.op.timeout
6939 if not self.op.no_remember:
6940 self.cfg.MarkInstanceDown(instance.name)
6942 if self.primary_offline:
6943 assert self.op.ignore_offline_nodes
6944 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6946 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6947 msg = result.fail_msg
6949 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6951 _ShutdownInstanceDisks(self, instance)
6954 class LUInstanceReinstall(LogicalUnit):
6955 """Reinstall an instance.
6958 HPATH = "instance-reinstall"
6959 HTYPE = constants.HTYPE_INSTANCE
6962 def ExpandNames(self):
6963 self._ExpandAndLockInstance()
6965 def BuildHooksEnv(self):
6968 This runs on master, primary and secondary nodes of the instance.
6971 return _BuildInstanceHookEnvByObject(self, self.instance)
6973 def BuildHooksNodes(self):
6974 """Build hooks nodes.
6977 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6980 def CheckPrereq(self):
6981 """Check prerequisites.
6983 This checks that the instance is in the cluster and is not running.
6986 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6987 assert instance is not None, \
6988 "Cannot retrieve locked instance %s" % self.op.instance_name
6989 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6990 " offline, cannot reinstall")
6992 if instance.disk_template == constants.DT_DISKLESS:
6993 raise errors.OpPrereqError("Instance '%s' has no disks" %
6994 self.op.instance_name,
6996 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6998 if self.op.os_type is not None:
7000 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7001 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7002 instance_os = self.op.os_type
7004 instance_os = instance.os
7006 nodelist = list(instance.all_nodes)
7008 if self.op.osparams:
7009 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7010 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7011 self.os_inst = i_osdict # the new dict (without defaults)
7015 self.instance = instance
7017 def Exec(self, feedback_fn):
7018 """Reinstall the instance.
7021 inst = self.instance
7023 if self.op.os_type is not None:
7024 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7025 inst.os = self.op.os_type
7026 # Write to configuration
7027 self.cfg.Update(inst, feedback_fn)
7029 _StartInstanceDisks(self, inst, None)
7031 feedback_fn("Running the instance OS create scripts...")
7032 # FIXME: pass debug option from opcode to backend
7033 result = self.rpc.call_instance_os_add(inst.primary_node,
7034 (inst, self.os_inst), True,
7035 self.op.debug_level)
7036 result.Raise("Could not install OS for instance %s on node %s" %
7037 (inst.name, inst.primary_node))
7039 _ShutdownInstanceDisks(self, inst)
7042 class LUInstanceRecreateDisks(LogicalUnit):
7043 """Recreate an instance's missing disks.
7046 HPATH = "instance-recreate-disks"
7047 HTYPE = constants.HTYPE_INSTANCE
7050 _MODIFYABLE = frozenset([
7051 constants.IDISK_SIZE,
7052 constants.IDISK_MODE,
7055 # New or changed disk parameters may have different semantics
7056 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7057 constants.IDISK_ADOPT,
7059 # TODO: Implement support changing VG while recreating
7061 constants.IDISK_METAVG,
7064 def CheckArguments(self):
7065 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7066 # Normalize and convert deprecated list of disk indices
7067 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7069 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7071 raise errors.OpPrereqError("Some disks have been specified more than"
7072 " once: %s" % utils.CommaJoin(duplicates),
7075 for (idx, params) in self.op.disks:
7076 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7077 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7079 raise errors.OpPrereqError("Parameters for disk %s try to change"
7080 " unmodifyable parameter(s): %s" %
7081 (idx, utils.CommaJoin(unsupported)),
7084 def ExpandNames(self):
7085 self._ExpandAndLockInstance()
7086 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7088 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7089 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7091 self.needed_locks[locking.LEVEL_NODE] = []
7092 self.needed_locks[locking.LEVEL_NODE_RES] = []
7094 def DeclareLocks(self, level):
7095 if level == locking.LEVEL_NODE:
7096 # if we replace the nodes, we only need to lock the old primary,
7097 # otherwise we need to lock all nodes for disk re-creation
7098 primary_only = bool(self.op.nodes)
7099 self._LockInstancesNodes(primary_only=primary_only)
7100 elif level == locking.LEVEL_NODE_RES:
7102 self.needed_locks[locking.LEVEL_NODE_RES] = \
7103 self.needed_locks[locking.LEVEL_NODE][:]
7105 def BuildHooksEnv(self):
7108 This runs on master, primary and secondary nodes of the instance.
7111 return _BuildInstanceHookEnvByObject(self, self.instance)
7113 def BuildHooksNodes(self):
7114 """Build hooks nodes.
7117 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7120 def CheckPrereq(self):
7121 """Check prerequisites.
7123 This checks that the instance is in the cluster and is not running.
7126 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7127 assert instance is not None, \
7128 "Cannot retrieve locked instance %s" % self.op.instance_name
7130 if len(self.op.nodes) != len(instance.all_nodes):
7131 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7132 " %d replacement nodes were specified" %
7133 (instance.name, len(instance.all_nodes),
7134 len(self.op.nodes)),
7136 assert instance.disk_template != constants.DT_DRBD8 or \
7137 len(self.op.nodes) == 2
7138 assert instance.disk_template != constants.DT_PLAIN or \
7139 len(self.op.nodes) == 1
7140 primary_node = self.op.nodes[0]
7142 primary_node = instance.primary_node
7143 _CheckNodeOnline(self, primary_node)
7145 if instance.disk_template == constants.DT_DISKLESS:
7146 raise errors.OpPrereqError("Instance '%s' has no disks" %
7147 self.op.instance_name, errors.ECODE_INVAL)
7149 # if we replace nodes *and* the old primary is offline, we don't
7151 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7152 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7153 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7154 if not (self.op.nodes and old_pnode.offline):
7155 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7156 msg="cannot recreate disks")
7159 self.disks = dict(self.op.disks)
7161 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7163 maxidx = max(self.disks.keys())
7164 if maxidx >= len(instance.disks):
7165 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7168 if (self.op.nodes and
7169 sorted(self.disks.keys()) != range(len(instance.disks))):
7170 raise errors.OpPrereqError("Can't recreate disks partially and"
7171 " change the nodes at the same time",
7174 self.instance = instance
7176 def Exec(self, feedback_fn):
7177 """Recreate the disks.
7180 instance = self.instance
7182 assert (self.owned_locks(locking.LEVEL_NODE) ==
7183 self.owned_locks(locking.LEVEL_NODE_RES))
7186 mods = [] # keeps track of needed changes
7188 for idx, disk in enumerate(instance.disks):
7190 changes = self.disks[idx]
7192 # Disk should not be recreated
7196 # update secondaries for disks, if needed
7197 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7198 # need to update the nodes and minors
7199 assert len(self.op.nodes) == 2
7200 assert len(disk.logical_id) == 6 # otherwise disk internals
7202 (_, _, old_port, _, _, old_secret) = disk.logical_id
7203 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7204 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7205 new_minors[0], new_minors[1], old_secret)
7206 assert len(disk.logical_id) == len(new_id)
7210 mods.append((idx, new_id, changes))
7212 # now that we have passed all asserts above, we can apply the mods
7213 # in a single run (to avoid partial changes)
7214 for idx, new_id, changes in mods:
7215 disk = instance.disks[idx]
7216 if new_id is not None:
7217 assert disk.dev_type == constants.LD_DRBD8
7218 disk.logical_id = new_id
7220 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7221 mode=changes.get(constants.IDISK_MODE, None))
7223 # change primary node, if needed
7225 instance.primary_node = self.op.nodes[0]
7226 self.LogWarning("Changing the instance's nodes, you will have to"
7227 " remove any disks left on the older nodes manually")
7230 self.cfg.Update(instance, feedback_fn)
7232 _CreateDisks(self, instance, to_skip=to_skip)
7235 class LUInstanceRename(LogicalUnit):
7236 """Rename an instance.
7239 HPATH = "instance-rename"
7240 HTYPE = constants.HTYPE_INSTANCE
7242 def CheckArguments(self):
7246 if self.op.ip_check and not self.op.name_check:
7247 # TODO: make the ip check more flexible and not depend on the name check
7248 raise errors.OpPrereqError("IP address check requires a name check",
7251 def BuildHooksEnv(self):
7254 This runs on master, primary and secondary nodes of the instance.
7257 env = _BuildInstanceHookEnvByObject(self, self.instance)
7258 env["INSTANCE_NEW_NAME"] = self.op.new_name
7261 def BuildHooksNodes(self):
7262 """Build hooks nodes.
7265 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7268 def CheckPrereq(self):
7269 """Check prerequisites.
7271 This checks that the instance is in the cluster and is not running.
7274 self.op.instance_name = _ExpandInstanceName(self.cfg,
7275 self.op.instance_name)
7276 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7277 assert instance is not None
7278 _CheckNodeOnline(self, instance.primary_node)
7279 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7280 msg="cannot rename")
7281 self.instance = instance
7283 new_name = self.op.new_name
7284 if self.op.name_check:
7285 hostname = netutils.GetHostname(name=new_name)
7286 if hostname.name != new_name:
7287 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7289 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7290 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7291 " same as given hostname '%s'") %
7292 (hostname.name, self.op.new_name),
7294 new_name = self.op.new_name = hostname.name
7295 if (self.op.ip_check and
7296 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7297 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7298 (hostname.ip, new_name),
7299 errors.ECODE_NOTUNIQUE)
7301 instance_list = self.cfg.GetInstanceList()
7302 if new_name in instance_list and new_name != instance.name:
7303 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7304 new_name, errors.ECODE_EXISTS)
7306 def Exec(self, feedback_fn):
7307 """Rename the instance.
7310 inst = self.instance
7311 old_name = inst.name
7313 rename_file_storage = False
7314 if (inst.disk_template in constants.DTS_FILEBASED and
7315 self.op.new_name != inst.name):
7316 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7317 rename_file_storage = True
7319 self.cfg.RenameInstance(inst.name, self.op.new_name)
7320 # Change the instance lock. This is definitely safe while we hold the BGL.
7321 # Otherwise the new lock would have to be added in acquired mode.
7323 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7324 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7326 # re-read the instance from the configuration after rename
7327 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7329 if rename_file_storage:
7330 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7331 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7332 old_file_storage_dir,
7333 new_file_storage_dir)
7334 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7335 " (but the instance has been renamed in Ganeti)" %
7336 (inst.primary_node, old_file_storage_dir,
7337 new_file_storage_dir))
7339 _StartInstanceDisks(self, inst, None)
7341 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7342 old_name, self.op.debug_level)
7343 msg = result.fail_msg
7345 msg = ("Could not run OS rename script for instance %s on node %s"
7346 " (but the instance has been renamed in Ganeti): %s" %
7347 (inst.name, inst.primary_node, msg))
7348 self.proc.LogWarning(msg)
7350 _ShutdownInstanceDisks(self, inst)
7355 class LUInstanceRemove(LogicalUnit):
7356 """Remove an instance.
7359 HPATH = "instance-remove"
7360 HTYPE = constants.HTYPE_INSTANCE
7363 def ExpandNames(self):
7364 self._ExpandAndLockInstance()
7365 self.needed_locks[locking.LEVEL_NODE] = []
7366 self.needed_locks[locking.LEVEL_NODE_RES] = []
7367 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7369 def DeclareLocks(self, level):
7370 if level == locking.LEVEL_NODE:
7371 self._LockInstancesNodes()
7372 elif level == locking.LEVEL_NODE_RES:
7374 self.needed_locks[locking.LEVEL_NODE_RES] = \
7375 self.needed_locks[locking.LEVEL_NODE][:]
7377 def BuildHooksEnv(self):
7380 This runs on master, primary and secondary nodes of the instance.
7383 env = _BuildInstanceHookEnvByObject(self, self.instance)
7384 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7387 def BuildHooksNodes(self):
7388 """Build hooks nodes.
7391 nl = [self.cfg.GetMasterNode()]
7392 nl_post = list(self.instance.all_nodes) + nl
7393 return (nl, nl_post)
7395 def CheckPrereq(self):
7396 """Check prerequisites.
7398 This checks that the instance is in the cluster.
7401 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7402 assert self.instance is not None, \
7403 "Cannot retrieve locked instance %s" % self.op.instance_name
7405 def Exec(self, feedback_fn):
7406 """Remove the instance.
7409 instance = self.instance
7410 logging.info("Shutting down instance %s on node %s",
7411 instance.name, instance.primary_node)
7413 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7414 self.op.shutdown_timeout)
7415 msg = result.fail_msg
7417 if self.op.ignore_failures:
7418 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7420 raise errors.OpExecError("Could not shutdown instance %s on"
7422 (instance.name, instance.primary_node, msg))
7424 assert (self.owned_locks(locking.LEVEL_NODE) ==
7425 self.owned_locks(locking.LEVEL_NODE_RES))
7426 assert not (set(instance.all_nodes) -
7427 self.owned_locks(locking.LEVEL_NODE)), \
7428 "Not owning correct locks"
7430 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7433 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7434 """Utility function to remove an instance.
7437 logging.info("Removing block devices for instance %s", instance.name)
7439 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7440 if not ignore_failures:
7441 raise errors.OpExecError("Can't remove instance's disks")
7442 feedback_fn("Warning: can't remove instance's disks")
7444 logging.info("Removing instance %s out of cluster config", instance.name)
7446 lu.cfg.RemoveInstance(instance.name)
7448 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7449 "Instance lock removal conflict"
7451 # Remove lock for the instance
7452 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7455 class LUInstanceQuery(NoHooksLU):
7456 """Logical unit for querying instances.
7459 # pylint: disable=W0142
7462 def CheckArguments(self):
7463 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7464 self.op.output_fields, self.op.use_locking)
7466 def ExpandNames(self):
7467 self.iq.ExpandNames(self)
7469 def DeclareLocks(self, level):
7470 self.iq.DeclareLocks(self, level)
7472 def Exec(self, feedback_fn):
7473 return self.iq.OldStyleQuery(self)
7476 class LUInstanceFailover(LogicalUnit):
7477 """Failover an instance.
7480 HPATH = "instance-failover"
7481 HTYPE = constants.HTYPE_INSTANCE
7484 def CheckArguments(self):
7485 """Check the arguments.
7488 self.iallocator = getattr(self.op, "iallocator", None)
7489 self.target_node = getattr(self.op, "target_node", None)
7491 def ExpandNames(self):
7492 self._ExpandAndLockInstance()
7494 if self.op.target_node is not None:
7495 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7497 self.needed_locks[locking.LEVEL_NODE] = []
7498 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7500 self.needed_locks[locking.LEVEL_NODE_RES] = []
7501 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7503 ignore_consistency = self.op.ignore_consistency
7504 shutdown_timeout = self.op.shutdown_timeout
7505 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7508 ignore_consistency=ignore_consistency,
7509 shutdown_timeout=shutdown_timeout,
7510 ignore_ipolicy=self.op.ignore_ipolicy)
7511 self.tasklets = [self._migrater]
7513 def DeclareLocks(self, level):
7514 if level == locking.LEVEL_NODE:
7515 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7516 if instance.disk_template in constants.DTS_EXT_MIRROR:
7517 if self.op.target_node is None:
7518 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7520 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7521 self.op.target_node]
7522 del self.recalculate_locks[locking.LEVEL_NODE]
7524 self._LockInstancesNodes()
7525 elif level == locking.LEVEL_NODE_RES:
7527 self.needed_locks[locking.LEVEL_NODE_RES] = \
7528 self.needed_locks[locking.LEVEL_NODE][:]
7530 def BuildHooksEnv(self):
7533 This runs on master, primary and secondary nodes of the instance.
7536 instance = self._migrater.instance
7537 source_node = instance.primary_node
7538 target_node = self.op.target_node
7540 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7541 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7542 "OLD_PRIMARY": source_node,
7543 "NEW_PRIMARY": target_node,
7546 if instance.disk_template in constants.DTS_INT_MIRROR:
7547 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7548 env["NEW_SECONDARY"] = source_node
7550 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7552 env.update(_BuildInstanceHookEnvByObject(self, instance))
7556 def BuildHooksNodes(self):
7557 """Build hooks nodes.
7560 instance = self._migrater.instance
7561 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7562 return (nl, nl + [instance.primary_node])
7565 class LUInstanceMigrate(LogicalUnit):
7566 """Migrate an instance.
7568 This is migration without shutting down, compared to the failover,
7569 which is done with shutdown.
7572 HPATH = "instance-migrate"
7573 HTYPE = constants.HTYPE_INSTANCE
7576 def ExpandNames(self):
7577 self._ExpandAndLockInstance()
7579 if self.op.target_node is not None:
7580 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7582 self.needed_locks[locking.LEVEL_NODE] = []
7583 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7585 self.needed_locks[locking.LEVEL_NODE] = []
7586 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7589 TLMigrateInstance(self, self.op.instance_name,
7590 cleanup=self.op.cleanup,
7592 fallback=self.op.allow_failover,
7593 allow_runtime_changes=self.op.allow_runtime_changes,
7594 ignore_ipolicy=self.op.ignore_ipolicy)
7595 self.tasklets = [self._migrater]
7597 def DeclareLocks(self, level):
7598 if level == locking.LEVEL_NODE:
7599 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7600 if instance.disk_template in constants.DTS_EXT_MIRROR:
7601 if self.op.target_node is None:
7602 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7604 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7605 self.op.target_node]
7606 del self.recalculate_locks[locking.LEVEL_NODE]
7608 self._LockInstancesNodes()
7609 elif level == locking.LEVEL_NODE_RES:
7611 self.needed_locks[locking.LEVEL_NODE_RES] = \
7612 self.needed_locks[locking.LEVEL_NODE][:]
7614 def BuildHooksEnv(self):
7617 This runs on master, primary and secondary nodes of the instance.
7620 instance = self._migrater.instance
7621 source_node = instance.primary_node
7622 target_node = self.op.target_node
7623 env = _BuildInstanceHookEnvByObject(self, instance)
7625 "MIGRATE_LIVE": self._migrater.live,
7626 "MIGRATE_CLEANUP": self.op.cleanup,
7627 "OLD_PRIMARY": source_node,
7628 "NEW_PRIMARY": target_node,
7629 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7632 if instance.disk_template in constants.DTS_INT_MIRROR:
7633 env["OLD_SECONDARY"] = target_node
7634 env["NEW_SECONDARY"] = source_node
7636 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7640 def BuildHooksNodes(self):
7641 """Build hooks nodes.
7644 instance = self._migrater.instance
7645 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7646 return (nl, nl + [instance.primary_node])
7649 class LUInstanceMove(LogicalUnit):
7650 """Move an instance by data-copying.
7653 HPATH = "instance-move"
7654 HTYPE = constants.HTYPE_INSTANCE
7657 def ExpandNames(self):
7658 self._ExpandAndLockInstance()
7659 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7660 self.op.target_node = target_node
7661 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7662 self.needed_locks[locking.LEVEL_NODE_RES] = []
7663 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7665 def DeclareLocks(self, level):
7666 if level == locking.LEVEL_NODE:
7667 self._LockInstancesNodes(primary_only=True)
7668 elif level == locking.LEVEL_NODE_RES:
7670 self.needed_locks[locking.LEVEL_NODE_RES] = \
7671 self.needed_locks[locking.LEVEL_NODE][:]
7673 def BuildHooksEnv(self):
7676 This runs on master, primary and secondary nodes of the instance.
7680 "TARGET_NODE": self.op.target_node,
7681 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7683 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7686 def BuildHooksNodes(self):
7687 """Build hooks nodes.
7691 self.cfg.GetMasterNode(),
7692 self.instance.primary_node,
7693 self.op.target_node,
7697 def CheckPrereq(self):
7698 """Check prerequisites.
7700 This checks that the instance is in the cluster.
7703 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7704 assert self.instance is not None, \
7705 "Cannot retrieve locked instance %s" % self.op.instance_name
7707 node = self.cfg.GetNodeInfo(self.op.target_node)
7708 assert node is not None, \
7709 "Cannot retrieve locked node %s" % self.op.target_node
7711 self.target_node = target_node = node.name
7713 if target_node == instance.primary_node:
7714 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7715 (instance.name, target_node),
7718 bep = self.cfg.GetClusterInfo().FillBE(instance)
7720 for idx, dsk in enumerate(instance.disks):
7721 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7722 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7723 " cannot copy" % idx, errors.ECODE_STATE)
7725 _CheckNodeOnline(self, target_node)
7726 _CheckNodeNotDrained(self, target_node)
7727 _CheckNodeVmCapable(self, target_node)
7728 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7729 self.cfg.GetNodeGroup(node.group))
7730 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7731 ignore=self.op.ignore_ipolicy)
7733 if instance.admin_state == constants.ADMINST_UP:
7734 # check memory requirements on the secondary node
7735 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7736 instance.name, bep[constants.BE_MAXMEM],
7737 instance.hypervisor)
7739 self.LogInfo("Not checking memory on the secondary node as"
7740 " instance will not be started")
7742 # check bridge existance
7743 _CheckInstanceBridgesExist(self, instance, node=target_node)
7745 def Exec(self, feedback_fn):
7746 """Move an instance.
7748 The move is done by shutting it down on its present node, copying
7749 the data over (slow) and starting it on the new node.
7752 instance = self.instance
7754 source_node = instance.primary_node
7755 target_node = self.target_node
7757 self.LogInfo("Shutting down instance %s on source node %s",
7758 instance.name, source_node)
7760 assert (self.owned_locks(locking.LEVEL_NODE) ==
7761 self.owned_locks(locking.LEVEL_NODE_RES))
7763 result = self.rpc.call_instance_shutdown(source_node, instance,
7764 self.op.shutdown_timeout)
7765 msg = result.fail_msg
7767 if self.op.ignore_consistency:
7768 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7769 " Proceeding anyway. Please make sure node"
7770 " %s is down. Error details: %s",
7771 instance.name, source_node, source_node, msg)
7773 raise errors.OpExecError("Could not shutdown instance %s on"
7775 (instance.name, source_node, msg))
7777 # create the target disks
7779 _CreateDisks(self, instance, target_node=target_node)
7780 except errors.OpExecError:
7781 self.LogWarning("Device creation failed, reverting...")
7783 _RemoveDisks(self, instance, target_node=target_node)
7785 self.cfg.ReleaseDRBDMinors(instance.name)
7788 cluster_name = self.cfg.GetClusterInfo().cluster_name
7791 # activate, get path, copy the data over
7792 for idx, disk in enumerate(instance.disks):
7793 self.LogInfo("Copying data for disk %d", idx)
7794 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7795 instance.name, True, idx)
7797 self.LogWarning("Can't assemble newly created disk %d: %s",
7798 idx, result.fail_msg)
7799 errs.append(result.fail_msg)
7801 dev_path = result.payload
7802 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7803 target_node, dev_path,
7806 self.LogWarning("Can't copy data over for disk %d: %s",
7807 idx, result.fail_msg)
7808 errs.append(result.fail_msg)
7812 self.LogWarning("Some disks failed to copy, aborting")
7814 _RemoveDisks(self, instance, target_node=target_node)
7816 self.cfg.ReleaseDRBDMinors(instance.name)
7817 raise errors.OpExecError("Errors during disk copy: %s" %
7820 instance.primary_node = target_node
7821 self.cfg.Update(instance, feedback_fn)
7823 self.LogInfo("Removing the disks on the original node")
7824 _RemoveDisks(self, instance, target_node=source_node)
7826 # Only start the instance if it's marked as up
7827 if instance.admin_state == constants.ADMINST_UP:
7828 self.LogInfo("Starting instance %s on node %s",
7829 instance.name, target_node)
7831 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7832 ignore_secondaries=True)
7834 _ShutdownInstanceDisks(self, instance)
7835 raise errors.OpExecError("Can't activate the instance's disks")
7837 result = self.rpc.call_instance_start(target_node,
7838 (instance, None, None), False)
7839 msg = result.fail_msg
7841 _ShutdownInstanceDisks(self, instance)
7842 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7843 (instance.name, target_node, msg))
7846 class LUNodeMigrate(LogicalUnit):
7847 """Migrate all instances from a node.
7850 HPATH = "node-migrate"
7851 HTYPE = constants.HTYPE_NODE
7854 def CheckArguments(self):
7857 def ExpandNames(self):
7858 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7860 self.share_locks = _ShareAll()
7861 self.needed_locks = {
7862 locking.LEVEL_NODE: [self.op.node_name],
7865 def BuildHooksEnv(self):
7868 This runs on the master, the primary and all the secondaries.
7872 "NODE_NAME": self.op.node_name,
7873 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7876 def BuildHooksNodes(self):
7877 """Build hooks nodes.
7880 nl = [self.cfg.GetMasterNode()]
7883 def CheckPrereq(self):
7886 def Exec(self, feedback_fn):
7887 # Prepare jobs for migration instances
7888 allow_runtime_changes = self.op.allow_runtime_changes
7890 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7893 iallocator=self.op.iallocator,
7894 target_node=self.op.target_node,
7895 allow_runtime_changes=allow_runtime_changes,
7896 ignore_ipolicy=self.op.ignore_ipolicy)]
7897 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7900 # TODO: Run iallocator in this opcode and pass correct placement options to
7901 # OpInstanceMigrate. Since other jobs can modify the cluster between
7902 # running the iallocator and the actual migration, a good consistency model
7903 # will have to be found.
7905 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7906 frozenset([self.op.node_name]))
7908 return ResultWithJobs(jobs)
7911 class TLMigrateInstance(Tasklet):
7912 """Tasklet class for instance migration.
7915 @ivar live: whether the migration will be done live or non-live;
7916 this variable is initalized only after CheckPrereq has run
7917 @type cleanup: boolean
7918 @ivar cleanup: Wheater we cleanup from a failed migration
7919 @type iallocator: string
7920 @ivar iallocator: The iallocator used to determine target_node
7921 @type target_node: string
7922 @ivar target_node: If given, the target_node to reallocate the instance to
7923 @type failover: boolean
7924 @ivar failover: Whether operation results in failover or migration
7925 @type fallback: boolean
7926 @ivar fallback: Whether fallback to failover is allowed if migration not
7928 @type ignore_consistency: boolean
7929 @ivar ignore_consistency: Wheter we should ignore consistency between source
7931 @type shutdown_timeout: int
7932 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7933 @type ignore_ipolicy: bool
7934 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7939 _MIGRATION_POLL_INTERVAL = 1 # seconds
7940 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7942 def __init__(self, lu, instance_name, cleanup=False,
7943 failover=False, fallback=False,
7944 ignore_consistency=False,
7945 allow_runtime_changes=True,
7946 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7947 ignore_ipolicy=False):
7948 """Initializes this class.
7951 Tasklet.__init__(self, lu)
7954 self.instance_name = instance_name
7955 self.cleanup = cleanup
7956 self.live = False # will be overridden later
7957 self.failover = failover
7958 self.fallback = fallback
7959 self.ignore_consistency = ignore_consistency
7960 self.shutdown_timeout = shutdown_timeout
7961 self.ignore_ipolicy = ignore_ipolicy
7962 self.allow_runtime_changes = allow_runtime_changes
7964 def CheckPrereq(self):
7965 """Check prerequisites.
7967 This checks that the instance is in the cluster.
7970 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7971 instance = self.cfg.GetInstanceInfo(instance_name)
7972 assert instance is not None
7973 self.instance = instance
7974 cluster = self.cfg.GetClusterInfo()
7976 if (not self.cleanup and
7977 not instance.admin_state == constants.ADMINST_UP and
7978 not self.failover and self.fallback):
7979 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7980 " switching to failover")
7981 self.failover = True
7983 if instance.disk_template not in constants.DTS_MIRRORED:
7988 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7989 " %s" % (instance.disk_template, text),
7992 if instance.disk_template in constants.DTS_EXT_MIRROR:
7993 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7995 if self.lu.op.iallocator:
7996 self._RunAllocator()
7998 # We set set self.target_node as it is required by
8000 self.target_node = self.lu.op.target_node
8002 # Check that the target node is correct in terms of instance policy
8003 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8004 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8005 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8006 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8007 ignore=self.ignore_ipolicy)
8009 # self.target_node is already populated, either directly or by the
8011 target_node = self.target_node
8012 if self.target_node == instance.primary_node:
8013 raise errors.OpPrereqError("Cannot migrate instance %s"
8014 " to its primary (%s)" %
8015 (instance.name, instance.primary_node))
8017 if len(self.lu.tasklets) == 1:
8018 # It is safe to release locks only when we're the only tasklet
8020 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8021 keep=[instance.primary_node, self.target_node])
8024 secondary_nodes = instance.secondary_nodes
8025 if not secondary_nodes:
8026 raise errors.ConfigurationError("No secondary node but using"
8027 " %s disk template" %
8028 instance.disk_template)
8029 target_node = secondary_nodes[0]
8030 if self.lu.op.iallocator or (self.lu.op.target_node and
8031 self.lu.op.target_node != target_node):
8033 text = "failed over"
8036 raise errors.OpPrereqError("Instances with disk template %s cannot"
8037 " be %s to arbitrary nodes"
8038 " (neither an iallocator nor a target"
8039 " node can be passed)" %
8040 (instance.disk_template, text),
8042 nodeinfo = self.cfg.GetNodeInfo(target_node)
8043 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8044 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8045 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8046 ignore=self.ignore_ipolicy)
8048 i_be = cluster.FillBE(instance)
8050 # check memory requirements on the secondary node
8051 if (not self.cleanup and
8052 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8053 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8054 "migrating instance %s" %
8056 i_be[constants.BE_MINMEM],
8057 instance.hypervisor)
8059 self.lu.LogInfo("Not checking memory on the secondary node as"
8060 " instance will not be started")
8062 # check if failover must be forced instead of migration
8063 if (not self.cleanup and not self.failover and
8064 i_be[constants.BE_ALWAYS_FAILOVER]):
8066 self.lu.LogInfo("Instance configured to always failover; fallback"
8068 self.failover = True
8070 raise errors.OpPrereqError("This instance has been configured to"
8071 " always failover, please allow failover",
8074 # check bridge existance
8075 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8077 if not self.cleanup:
8078 _CheckNodeNotDrained(self.lu, target_node)
8079 if not self.failover:
8080 result = self.rpc.call_instance_migratable(instance.primary_node,
8082 if result.fail_msg and self.fallback:
8083 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8085 self.failover = True
8087 result.Raise("Can't migrate, please use failover",
8088 prereq=True, ecode=errors.ECODE_STATE)
8090 assert not (self.failover and self.cleanup)
8092 if not self.failover:
8093 if self.lu.op.live is not None and self.lu.op.mode is not None:
8094 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8095 " parameters are accepted",
8097 if self.lu.op.live is not None:
8099 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8101 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8102 # reset the 'live' parameter to None so that repeated
8103 # invocations of CheckPrereq do not raise an exception
8104 self.lu.op.live = None
8105 elif self.lu.op.mode is None:
8106 # read the default value from the hypervisor
8107 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8108 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8110 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8112 # Failover is never live
8115 if not (self.failover or self.cleanup):
8116 remote_info = self.rpc.call_instance_info(instance.primary_node,
8118 instance.hypervisor)
8119 remote_info.Raise("Error checking instance on node %s" %
8120 instance.primary_node)
8121 instance_running = bool(remote_info.payload)
8122 if instance_running:
8123 self.current_mem = int(remote_info.payload["memory"])
8125 def _RunAllocator(self):
8126 """Run the allocator based on input opcode.
8129 # FIXME: add a self.ignore_ipolicy option
8130 ial = IAllocator(self.cfg, self.rpc,
8131 mode=constants.IALLOCATOR_MODE_RELOC,
8132 name=self.instance_name,
8133 relocate_from=[self.instance.primary_node],
8136 ial.Run(self.lu.op.iallocator)
8139 raise errors.OpPrereqError("Can't compute nodes using"
8140 " iallocator '%s': %s" %
8141 (self.lu.op.iallocator, ial.info),
8143 if len(ial.result) != ial.required_nodes:
8144 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8145 " of nodes (%s), required %s" %
8146 (self.lu.op.iallocator, len(ial.result),
8147 ial.required_nodes), errors.ECODE_FAULT)
8148 self.target_node = ial.result[0]
8149 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8150 self.instance_name, self.lu.op.iallocator,
8151 utils.CommaJoin(ial.result))
8153 def _WaitUntilSync(self):
8154 """Poll with custom rpc for disk sync.
8156 This uses our own step-based rpc call.
8159 self.feedback_fn("* wait until resync is done")
8163 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8165 (self.instance.disks,
8168 for node, nres in result.items():
8169 nres.Raise("Cannot resync disks on node %s" % node)
8170 node_done, node_percent = nres.payload
8171 all_done = all_done and node_done
8172 if node_percent is not None:
8173 min_percent = min(min_percent, node_percent)
8175 if min_percent < 100:
8176 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8179 def _EnsureSecondary(self, node):
8180 """Demote a node to secondary.
8183 self.feedback_fn("* switching node %s to secondary mode" % node)
8185 for dev in self.instance.disks:
8186 self.cfg.SetDiskID(dev, node)
8188 result = self.rpc.call_blockdev_close(node, self.instance.name,
8189 self.instance.disks)
8190 result.Raise("Cannot change disk to secondary on node %s" % node)
8192 def _GoStandalone(self):
8193 """Disconnect from the network.
8196 self.feedback_fn("* changing into standalone mode")
8197 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8198 self.instance.disks)
8199 for node, nres in result.items():
8200 nres.Raise("Cannot disconnect disks node %s" % node)
8202 def _GoReconnect(self, multimaster):
8203 """Reconnect to the network.
8209 msg = "single-master"
8210 self.feedback_fn("* changing disks into %s mode" % msg)
8211 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8212 (self.instance.disks, self.instance),
8213 self.instance.name, multimaster)
8214 for node, nres in result.items():
8215 nres.Raise("Cannot change disks config on node %s" % node)
8217 def _ExecCleanup(self):
8218 """Try to cleanup after a failed migration.
8220 The cleanup is done by:
8221 - check that the instance is running only on one node
8222 (and update the config if needed)
8223 - change disks on its secondary node to secondary
8224 - wait until disks are fully synchronized
8225 - disconnect from the network
8226 - change disks into single-master mode
8227 - wait again until disks are fully synchronized
8230 instance = self.instance
8231 target_node = self.target_node
8232 source_node = self.source_node
8234 # check running on only one node
8235 self.feedback_fn("* checking where the instance actually runs"
8236 " (if this hangs, the hypervisor might be in"
8238 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8239 for node, result in ins_l.items():
8240 result.Raise("Can't contact node %s" % node)
8242 runningon_source = instance.name in ins_l[source_node].payload
8243 runningon_target = instance.name in ins_l[target_node].payload
8245 if runningon_source and runningon_target:
8246 raise errors.OpExecError("Instance seems to be running on two nodes,"
8247 " or the hypervisor is confused; you will have"
8248 " to ensure manually that it runs only on one"
8249 " and restart this operation")
8251 if not (runningon_source or runningon_target):
8252 raise errors.OpExecError("Instance does not seem to be running at all;"
8253 " in this case it's safer to repair by"
8254 " running 'gnt-instance stop' to ensure disk"
8255 " shutdown, and then restarting it")
8257 if runningon_target:
8258 # the migration has actually succeeded, we need to update the config
8259 self.feedback_fn("* instance running on secondary node (%s),"
8260 " updating config" % target_node)
8261 instance.primary_node = target_node
8262 self.cfg.Update(instance, self.feedback_fn)
8263 demoted_node = source_node
8265 self.feedback_fn("* instance confirmed to be running on its"
8266 " primary node (%s)" % source_node)
8267 demoted_node = target_node
8269 if instance.disk_template in constants.DTS_INT_MIRROR:
8270 self._EnsureSecondary(demoted_node)
8272 self._WaitUntilSync()
8273 except errors.OpExecError:
8274 # we ignore here errors, since if the device is standalone, it
8275 # won't be able to sync
8277 self._GoStandalone()
8278 self._GoReconnect(False)
8279 self._WaitUntilSync()
8281 self.feedback_fn("* done")
8283 def _RevertDiskStatus(self):
8284 """Try to revert the disk status after a failed migration.
8287 target_node = self.target_node
8288 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8292 self._EnsureSecondary(target_node)
8293 self._GoStandalone()
8294 self._GoReconnect(False)
8295 self._WaitUntilSync()
8296 except errors.OpExecError, err:
8297 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8298 " please try to recover the instance manually;"
8299 " error '%s'" % str(err))
8301 def _AbortMigration(self):
8302 """Call the hypervisor code to abort a started migration.
8305 instance = self.instance
8306 target_node = self.target_node
8307 source_node = self.source_node
8308 migration_info = self.migration_info
8310 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8314 abort_msg = abort_result.fail_msg
8316 logging.error("Aborting migration failed on target node %s: %s",
8317 target_node, abort_msg)
8318 # Don't raise an exception here, as we stil have to try to revert the
8319 # disk status, even if this step failed.
8321 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8322 instance, False, self.live)
8323 abort_msg = abort_result.fail_msg
8325 logging.error("Aborting migration failed on source node %s: %s",
8326 source_node, abort_msg)
8328 def _ExecMigration(self):
8329 """Migrate an instance.
8331 The migrate is done by:
8332 - change the disks into dual-master mode
8333 - wait until disks are fully synchronized again
8334 - migrate the instance
8335 - change disks on the new secondary node (the old primary) to secondary
8336 - wait until disks are fully synchronized
8337 - change disks into single-master mode
8340 instance = self.instance
8341 target_node = self.target_node
8342 source_node = self.source_node
8344 # Check for hypervisor version mismatch and warn the user.
8345 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8346 None, [self.instance.hypervisor])
8347 for ninfo in nodeinfo.values():
8348 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8350 (_, _, (src_info, )) = nodeinfo[source_node].payload
8351 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8353 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8354 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8355 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8356 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8357 if src_version != dst_version:
8358 self.feedback_fn("* warning: hypervisor version mismatch between"
8359 " source (%s) and target (%s) node" %
8360 (src_version, dst_version))
8362 self.feedback_fn("* checking disk consistency between source and target")
8363 for (idx, dev) in enumerate(instance.disks):
8364 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8365 raise errors.OpExecError("Disk %s is degraded or not fully"
8366 " synchronized on target node,"
8367 " aborting migration" % idx)
8369 if self.current_mem > self.tgt_free_mem:
8370 if not self.allow_runtime_changes:
8371 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8372 " free memory to fit instance %s on target"
8373 " node %s (have %dMB, need %dMB)" %
8374 (instance.name, target_node,
8375 self.tgt_free_mem, self.current_mem))
8376 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8377 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8380 rpcres.Raise("Cannot modify instance runtime memory")
8382 # First get the migration information from the remote node
8383 result = self.rpc.call_migration_info(source_node, instance)
8384 msg = result.fail_msg
8386 log_err = ("Failed fetching source migration information from %s: %s" %
8388 logging.error(log_err)
8389 raise errors.OpExecError(log_err)
8391 self.migration_info = migration_info = result.payload
8393 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8394 # Then switch the disks to master/master mode
8395 self._EnsureSecondary(target_node)
8396 self._GoStandalone()
8397 self._GoReconnect(True)
8398 self._WaitUntilSync()
8400 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8401 result = self.rpc.call_accept_instance(target_node,
8404 self.nodes_ip[target_node])
8406 msg = result.fail_msg
8408 logging.error("Instance pre-migration failed, trying to revert"
8409 " disk status: %s", msg)
8410 self.feedback_fn("Pre-migration failed, aborting")
8411 self._AbortMigration()
8412 self._RevertDiskStatus()
8413 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8414 (instance.name, msg))
8416 self.feedback_fn("* migrating instance to %s" % target_node)
8417 result = self.rpc.call_instance_migrate(source_node, instance,
8418 self.nodes_ip[target_node],
8420 msg = result.fail_msg
8422 logging.error("Instance migration failed, trying to revert"
8423 " disk status: %s", msg)
8424 self.feedback_fn("Migration failed, aborting")
8425 self._AbortMigration()
8426 self._RevertDiskStatus()
8427 raise errors.OpExecError("Could not migrate instance %s: %s" %
8428 (instance.name, msg))
8430 self.feedback_fn("* starting memory transfer")
8431 last_feedback = time.time()
8433 result = self.rpc.call_instance_get_migration_status(source_node,
8435 msg = result.fail_msg
8436 ms = result.payload # MigrationStatus instance
8437 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8438 logging.error("Instance migration failed, trying to revert"
8439 " disk status: %s", msg)
8440 self.feedback_fn("Migration failed, aborting")
8441 self._AbortMigration()
8442 self._RevertDiskStatus()
8443 raise errors.OpExecError("Could not migrate instance %s: %s" %
8444 (instance.name, msg))
8446 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8447 self.feedback_fn("* memory transfer complete")
8450 if (utils.TimeoutExpired(last_feedback,
8451 self._MIGRATION_FEEDBACK_INTERVAL) and
8452 ms.transferred_ram is not None):
8453 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8454 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8455 last_feedback = time.time()
8457 time.sleep(self._MIGRATION_POLL_INTERVAL)
8459 result = self.rpc.call_instance_finalize_migration_src(source_node,
8463 msg = result.fail_msg
8465 logging.error("Instance migration succeeded, but finalization failed"
8466 " on the source node: %s", msg)
8467 raise errors.OpExecError("Could not finalize instance migration: %s" %
8470 instance.primary_node = target_node
8472 # distribute new instance config to the other nodes
8473 self.cfg.Update(instance, self.feedback_fn)
8475 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8479 msg = result.fail_msg
8481 logging.error("Instance migration succeeded, but finalization failed"
8482 " on the target node: %s", msg)
8483 raise errors.OpExecError("Could not finalize instance migration: %s" %
8486 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8487 self._EnsureSecondary(source_node)
8488 self._WaitUntilSync()
8489 self._GoStandalone()
8490 self._GoReconnect(False)
8491 self._WaitUntilSync()
8493 # If the instance's disk template is `rbd' and there was a successful
8494 # migration, unmap the device from the source node.
8495 if self.instance.disk_template == constants.DT_RBD:
8496 disks = _ExpandCheckDisks(instance, instance.disks)
8497 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8499 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8500 msg = result.fail_msg
8502 logging.error("Migration was successful, but couldn't unmap the"
8503 " block device %s on source node %s: %s",
8504 disk.iv_name, source_node, msg)
8505 logging.error("You need to unmap the device %s manually on %s",
8506 disk.iv_name, source_node)
8508 self.feedback_fn("* done")
8510 def _ExecFailover(self):
8511 """Failover an instance.
8513 The failover is done by shutting it down on its present node and
8514 starting it on the secondary.
8517 instance = self.instance
8518 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8520 source_node = instance.primary_node
8521 target_node = self.target_node
8523 if instance.admin_state == constants.ADMINST_UP:
8524 self.feedback_fn("* checking disk consistency between source and target")
8525 for (idx, dev) in enumerate(instance.disks):
8526 # for drbd, these are drbd over lvm
8527 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8529 if primary_node.offline:
8530 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8532 (primary_node.name, idx, target_node))
8533 elif not self.ignore_consistency:
8534 raise errors.OpExecError("Disk %s is degraded on target node,"
8535 " aborting failover" % idx)
8537 self.feedback_fn("* not checking disk consistency as instance is not"
8540 self.feedback_fn("* shutting down instance on source node")
8541 logging.info("Shutting down instance %s on node %s",
8542 instance.name, source_node)
8544 result = self.rpc.call_instance_shutdown(source_node, instance,
8545 self.shutdown_timeout)
8546 msg = result.fail_msg
8548 if self.ignore_consistency or primary_node.offline:
8549 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8550 " proceeding anyway; please make sure node"
8551 " %s is down; error details: %s",
8552 instance.name, source_node, source_node, msg)
8554 raise errors.OpExecError("Could not shutdown instance %s on"
8556 (instance.name, source_node, msg))
8558 self.feedback_fn("* deactivating the instance's disks on source node")
8559 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8560 raise errors.OpExecError("Can't shut down the instance's disks")
8562 instance.primary_node = target_node
8563 # distribute new instance config to the other nodes
8564 self.cfg.Update(instance, self.feedback_fn)
8566 # Only start the instance if it's marked as up
8567 if instance.admin_state == constants.ADMINST_UP:
8568 self.feedback_fn("* activating the instance's disks on target node %s" %
8570 logging.info("Starting instance %s on node %s",
8571 instance.name, target_node)
8573 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8574 ignore_secondaries=True)
8576 _ShutdownInstanceDisks(self.lu, instance)
8577 raise errors.OpExecError("Can't activate the instance's disks")
8579 self.feedback_fn("* starting the instance on the target node %s" %
8581 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8583 msg = result.fail_msg
8585 _ShutdownInstanceDisks(self.lu, instance)
8586 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8587 (instance.name, target_node, msg))
8589 def Exec(self, feedback_fn):
8590 """Perform the migration.
8593 self.feedback_fn = feedback_fn
8594 self.source_node = self.instance.primary_node
8596 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8597 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8598 self.target_node = self.instance.secondary_nodes[0]
8599 # Otherwise self.target_node has been populated either
8600 # directly, or through an iallocator.
8602 self.all_nodes = [self.source_node, self.target_node]
8603 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8604 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8607 feedback_fn("Failover instance %s" % self.instance.name)
8608 self._ExecFailover()
8610 feedback_fn("Migrating instance %s" % self.instance.name)
8613 return self._ExecCleanup()
8615 return self._ExecMigration()
8618 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8620 """Wrapper around L{_CreateBlockDevInner}.
8622 This method annotates the root device first.
8625 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8626 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8630 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8632 """Create a tree of block devices on a given node.
8634 If this device type has to be created on secondaries, create it and
8637 If not, just recurse to children keeping the same 'force' value.
8639 @attention: The device has to be annotated already.
8641 @param lu: the lu on whose behalf we execute
8642 @param node: the node on which to create the device
8643 @type instance: L{objects.Instance}
8644 @param instance: the instance which owns the device
8645 @type device: L{objects.Disk}
8646 @param device: the device to create
8647 @type force_create: boolean
8648 @param force_create: whether to force creation of this device; this
8649 will be change to True whenever we find a device which has
8650 CreateOnSecondary() attribute
8651 @param info: the extra 'metadata' we should attach to the device
8652 (this will be represented as a LVM tag)
8653 @type force_open: boolean
8654 @param force_open: this parameter will be passes to the
8655 L{backend.BlockdevCreate} function where it specifies
8656 whether we run on primary or not, and it affects both
8657 the child assembly and the device own Open() execution
8660 if device.CreateOnSecondary():
8664 for child in device.children:
8665 _CreateBlockDevInner(lu, node, instance, child, force_create,
8668 if not force_create:
8671 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8674 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8675 """Create a single block device on a given node.
8677 This will not recurse over children of the device, so they must be
8680 @param lu: the lu on whose behalf we execute
8681 @param node: the node on which to create the device
8682 @type instance: L{objects.Instance}
8683 @param instance: the instance which owns the device
8684 @type device: L{objects.Disk}
8685 @param device: the device to create
8686 @param info: the extra 'metadata' we should attach to the device
8687 (this will be represented as a LVM tag)
8688 @type force_open: boolean
8689 @param force_open: this parameter will be passes to the
8690 L{backend.BlockdevCreate} function where it specifies
8691 whether we run on primary or not, and it affects both
8692 the child assembly and the device own Open() execution
8695 lu.cfg.SetDiskID(device, node)
8696 result = lu.rpc.call_blockdev_create(node, device, device.size,
8697 instance.name, force_open, info)
8698 result.Raise("Can't create block device %s on"
8699 " node %s for instance %s" % (device, node, instance.name))
8700 if device.physical_id is None:
8701 device.physical_id = result.payload
8704 def _GenerateUniqueNames(lu, exts):
8705 """Generate a suitable LV name.
8707 This will generate a logical volume name for the given instance.
8712 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8713 results.append("%s%s" % (new_id, val))
8716 def _GetPCIInfo(lu, dev_type):
8719 if hasattr(lu, 'hotplug_info'):
8720 info = lu.hotplug_info
8721 elif hasattr(lu, 'instance') and hasattr(lu.instance, 'hotplug_info'):
8722 return lu.cfg.GetPCIInfo(lu.instance.name, dev_type)
8725 idx = getattr(info, dev_type)
8726 setattr(info, dev_type, idx+1)
8727 pci = info.pci_pool.pop()
8733 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8734 iv_name, p_minor, s_minor):
8735 """Generate a drbd8 device complete with its children.
8738 assert len(vgnames) == len(names) == 2
8739 port = lu.cfg.AllocatePort()
8740 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8742 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8743 logical_id=(vgnames[0], names[0]),
8745 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8746 logical_id=(vgnames[1], names[1]),
8749 disk_idx, pci = _GetPCIInfo(lu, 'disks')
8750 drbd_dev = objects.Disk(idx=disk_idx, pci=pci,
8751 dev_type=constants.LD_DRBD8, size=size,
8752 logical_id=(primary, secondary, port,
8755 children=[dev_data, dev_meta],
8756 iv_name=iv_name, params={})
8760 _DISK_TEMPLATE_NAME_PREFIX = {
8761 constants.DT_PLAIN: "",
8762 constants.DT_RBD: ".rbd",
8766 _DISK_TEMPLATE_DEVICE_TYPE = {
8767 constants.DT_PLAIN: constants.LD_LV,
8768 constants.DT_FILE: constants.LD_FILE,
8769 constants.DT_SHARED_FILE: constants.LD_FILE,
8770 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8771 constants.DT_RBD: constants.LD_RBD,
8775 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8776 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8777 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8778 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8779 """Generate the entire disk layout for a given template type.
8782 #TODO: compute space requirements
8784 vgname = lu.cfg.GetVGName()
8785 disk_count = len(disk_info)
8788 if template_name == constants.DT_DISKLESS:
8790 elif template_name == constants.DT_DRBD8:
8791 if len(secondary_nodes) != 1:
8792 raise errors.ProgrammerError("Wrong template configuration")
8793 remote_node = secondary_nodes[0]
8794 minors = lu.cfg.AllocateDRBDMinor(
8795 [primary_node, remote_node] * len(disk_info), instance_name)
8797 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8799 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8802 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8803 for i in range(disk_count)]):
8804 names.append(lv_prefix + "_data")
8805 names.append(lv_prefix + "_meta")
8806 for idx, disk in enumerate(disk_info):
8807 disk_index = idx + base_index
8808 data_vg = disk.get(constants.IDISK_VG, vgname)
8809 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8810 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8811 disk[constants.IDISK_SIZE],
8813 names[idx * 2:idx * 2 + 2],
8814 "disk/%d" % disk_index,
8815 minors[idx * 2], minors[idx * 2 + 1])
8816 disk_dev.mode = disk[constants.IDISK_MODE]
8817 disks.append(disk_dev)
8820 raise errors.ProgrammerError("Wrong template configuration")
8822 if template_name == constants.DT_FILE:
8824 elif template_name == constants.DT_SHARED_FILE:
8825 _req_shr_file_storage()
8827 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8828 if name_prefix is None:
8831 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8832 (name_prefix, base_index + i)
8833 for i in range(disk_count)])
8835 if template_name == constants.DT_PLAIN:
8836 def logical_id_fn(idx, _, disk):
8837 vg = disk.get(constants.IDISK_VG, vgname)
8838 return (vg, names[idx])
8839 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8841 lambda _, disk_index, disk: (file_driver,
8842 "%s/disk%d" % (file_storage_dir,
8844 elif template_name == constants.DT_BLOCK:
8846 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8847 disk[constants.IDISK_ADOPT])
8848 elif template_name == constants.DT_RBD:
8849 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8851 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8853 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8855 for idx, disk in enumerate(disk_info):
8856 disk_index = idx + base_index
8857 size = disk[constants.IDISK_SIZE]
8858 feedback_fn("* disk %s, size %s" %
8859 (disk_index, utils.FormatUnit(size, "h")))
8861 disk_idx, pci = _GetPCIInfo(lu, 'disks')
8863 disks.append(objects.Disk(dev_type=dev_type, size=size,
8864 logical_id=logical_id_fn(idx, disk_index, disk),
8865 iv_name="disk/%d" % disk_index,
8866 mode=disk[constants.IDISK_MODE],
8867 params={}, idx=disk_idx, pci=pci))
8872 def _GetInstanceInfoText(instance):
8873 """Compute that text that should be added to the disk's metadata.
8876 return "originstname+%s" % instance.name
8879 def _CalcEta(time_taken, written, total_size):
8880 """Calculates the ETA based on size written and total size.
8882 @param time_taken: The time taken so far
8883 @param written: amount written so far
8884 @param total_size: The total size of data to be written
8885 @return: The remaining time in seconds
8888 avg_time = time_taken / float(written)
8889 return (total_size - written) * avg_time
8892 def _WipeDisks(lu, instance):
8893 """Wipes instance disks.
8895 @type lu: L{LogicalUnit}
8896 @param lu: the logical unit on whose behalf we execute
8897 @type instance: L{objects.Instance}
8898 @param instance: the instance whose disks we should create
8899 @return: the success of the wipe
8902 node = instance.primary_node
8904 for device in instance.disks:
8905 lu.cfg.SetDiskID(device, node)
8907 logging.info("Pause sync of instance %s disks", instance.name)
8908 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8909 (instance.disks, instance),
8911 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8913 for idx, success in enumerate(result.payload):
8915 logging.warn("pause-sync of instance %s for disks %d failed",
8919 for idx, device in enumerate(instance.disks):
8920 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8921 # MAX_WIPE_CHUNK at max
8922 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8923 constants.MIN_WIPE_CHUNK_PERCENT)
8924 # we _must_ make this an int, otherwise rounding errors will
8926 wipe_chunk_size = int(wipe_chunk_size)
8928 lu.LogInfo("* Wiping disk %d", idx)
8929 logging.info("Wiping disk %d for instance %s, node %s using"
8930 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8935 start_time = time.time()
8937 while offset < size:
8938 wipe_size = min(wipe_chunk_size, size - offset)
8939 logging.debug("Wiping disk %d, offset %s, chunk %s",
8940 idx, offset, wipe_size)
8941 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8943 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8944 (idx, offset, wipe_size))
8947 if now - last_output >= 60:
8948 eta = _CalcEta(now - start_time, offset, size)
8949 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8950 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8953 logging.info("Resume sync of instance %s disks", instance.name)
8955 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8956 (instance.disks, instance),
8960 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
8961 " please have a look at the status and troubleshoot"
8962 " the issue: %s", node, result.fail_msg)
8964 for idx, success in enumerate(result.payload):
8966 lu.LogWarning("Resume sync of disk %d failed, please have a"
8967 " look at the status and troubleshoot the issue", idx)
8968 logging.warn("resume-sync of instance %s for disks %d failed",
8972 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8973 """Create all disks for an instance.
8975 This abstracts away some work from AddInstance.
8977 @type lu: L{LogicalUnit}
8978 @param lu: the logical unit on whose behalf we execute
8979 @type instance: L{objects.Instance}
8980 @param instance: the instance whose disks we should create
8982 @param to_skip: list of indices to skip
8983 @type target_node: string
8984 @param target_node: if passed, overrides the target node for creation
8986 @return: the success of the creation
8989 info = _GetInstanceInfoText(instance)
8990 if target_node is None:
8991 pnode = instance.primary_node
8992 all_nodes = instance.all_nodes
8997 if instance.disk_template in constants.DTS_FILEBASED:
8998 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8999 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9001 result.Raise("Failed to create directory '%s' on"
9002 " node %s" % (file_storage_dir, pnode))
9004 # Note: this needs to be kept in sync with adding of disks in
9005 # LUInstanceSetParams
9006 for idx, device in enumerate(instance.disks):
9007 if to_skip and idx in to_skip:
9009 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9011 for node in all_nodes:
9012 f_create = node == pnode
9013 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9016 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9017 """Remove all disks for an instance.
9019 This abstracts away some work from `AddInstance()` and
9020 `RemoveInstance()`. Note that in case some of the devices couldn't
9021 be removed, the removal will continue with the other ones (compare
9022 with `_CreateDisks()`).
9024 @type lu: L{LogicalUnit}
9025 @param lu: the logical unit on whose behalf we execute
9026 @type instance: L{objects.Instance}
9027 @param instance: the instance whose disks we should remove
9028 @type target_node: string
9029 @param target_node: used to override the node on which to remove the disks
9031 @return: the success of the removal
9034 logging.info("Removing block devices for instance %s", instance.name)
9037 ports_to_release = set()
9038 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9039 for (idx, device) in enumerate(anno_disks):
9041 edata = [(target_node, device)]
9043 edata = device.ComputeNodeTree(instance.primary_node)
9044 for node, disk in edata:
9045 lu.cfg.SetDiskID(disk, node)
9046 result = lu.rpc.call_blockdev_remove(node, disk)
9048 lu.LogWarning("Could not remove disk %s on node %s,"
9049 " continuing anyway: %s", idx, node, result.fail_msg)
9050 if not (result.offline and node != instance.primary_node):
9053 # if this is a DRBD disk, return its port to the pool
9054 if device.dev_type in constants.LDS_DRBD:
9055 ports_to_release.add(device.logical_id[2])
9057 if all_result or ignore_failures:
9058 for port in ports_to_release:
9059 lu.cfg.AddTcpUdpPort(port)
9061 if instance.disk_template == constants.DT_FILE:
9062 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9066 tgt = instance.primary_node
9067 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9069 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9070 file_storage_dir, instance.primary_node, result.fail_msg)
9076 def _ComputeDiskSizePerVG(disk_template, disks):
9077 """Compute disk size requirements in the volume group
9080 def _compute(disks, payload):
9081 """Universal algorithm.
9086 vgs[disk[constants.IDISK_VG]] = \
9087 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9091 # Required free disk space as a function of disk and swap space
9093 constants.DT_DISKLESS: {},
9094 constants.DT_PLAIN: _compute(disks, 0),
9095 # 128 MB are added for drbd metadata for each disk
9096 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9097 constants.DT_FILE: {},
9098 constants.DT_SHARED_FILE: {},
9101 if disk_template not in req_size_dict:
9102 raise errors.ProgrammerError("Disk template '%s' size requirement"
9103 " is unknown" % disk_template)
9105 return req_size_dict[disk_template]
9108 def _ComputeDiskSize(disk_template, disks):
9109 """Compute disk size requirements according to disk template
9112 # Required free disk space as a function of disk and swap space
9114 constants.DT_DISKLESS: None,
9115 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9116 # 128 MB are added for drbd metadata for each disk
9118 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9119 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9120 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9121 constants.DT_BLOCK: 0,
9122 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9125 if disk_template not in req_size_dict:
9126 raise errors.ProgrammerError("Disk template '%s' size requirement"
9127 " is unknown" % disk_template)
9129 return req_size_dict[disk_template]
9132 def _FilterVmNodes(lu, nodenames):
9133 """Filters out non-vm_capable nodes from a list.
9135 @type lu: L{LogicalUnit}
9136 @param lu: the logical unit for which we check
9137 @type nodenames: list
9138 @param nodenames: the list of nodes on which we should check
9140 @return: the list of vm-capable nodes
9143 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9144 return [name for name in nodenames if name not in vm_nodes]
9147 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9148 """Hypervisor parameter validation.
9150 This function abstract the hypervisor parameter validation to be
9151 used in both instance create and instance modify.
9153 @type lu: L{LogicalUnit}
9154 @param lu: the logical unit for which we check
9155 @type nodenames: list
9156 @param nodenames: the list of nodes on which we should check
9157 @type hvname: string
9158 @param hvname: the name of the hypervisor we should use
9159 @type hvparams: dict
9160 @param hvparams: the parameters which we need to check
9161 @raise errors.OpPrereqError: if the parameters are not valid
9164 nodenames = _FilterVmNodes(lu, nodenames)
9166 cluster = lu.cfg.GetClusterInfo()
9167 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9169 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9170 for node in nodenames:
9174 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9177 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9178 """OS parameters validation.
9180 @type lu: L{LogicalUnit}
9181 @param lu: the logical unit for which we check
9182 @type required: boolean
9183 @param required: whether the validation should fail if the OS is not
9185 @type nodenames: list
9186 @param nodenames: the list of nodes on which we should check
9187 @type osname: string
9188 @param osname: the name of the hypervisor we should use
9189 @type osparams: dict
9190 @param osparams: the parameters which we need to check
9191 @raise errors.OpPrereqError: if the parameters are not valid
9194 nodenames = _FilterVmNodes(lu, nodenames)
9195 result = lu.rpc.call_os_validate(nodenames, required, osname,
9196 [constants.OS_VALIDATE_PARAMETERS],
9198 for node, nres in result.items():
9199 # we don't check for offline cases since this should be run only
9200 # against the master node and/or an instance's nodes
9201 nres.Raise("OS Parameters validation failed on node %s" % node)
9202 if not nres.payload:
9203 lu.LogInfo("OS %s not found on node %s, validation skipped",
9207 class LUInstanceCreate(LogicalUnit):
9208 """Create an instance.
9211 HPATH = "instance-add"
9212 HTYPE = constants.HTYPE_INSTANCE
9215 def CheckArguments(self):
9219 # do not require name_check to ease forward/backward compatibility
9221 if self.op.no_install and self.op.start:
9222 self.LogInfo("No-installation mode selected, disabling startup")
9223 self.op.start = False
9224 # validate/normalize the instance name
9225 self.op.instance_name = \
9226 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9228 if self.op.ip_check and not self.op.name_check:
9229 # TODO: make the ip check more flexible and not depend on the name check
9230 raise errors.OpPrereqError("Cannot do IP address check without a name"
9231 " check", errors.ECODE_INVAL)
9233 # check nics' parameter names
9234 for nic in self.op.nics:
9235 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9237 # check disks. parameter names and consistent adopt/no-adopt strategy
9238 has_adopt = has_no_adopt = False
9239 for disk in self.op.disks:
9240 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9241 if constants.IDISK_ADOPT in disk:
9245 if has_adopt and has_no_adopt:
9246 raise errors.OpPrereqError("Either all disks are adopted or none is",
9249 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9250 raise errors.OpPrereqError("Disk adoption is not supported for the"
9251 " '%s' disk template" %
9252 self.op.disk_template,
9254 if self.op.iallocator is not None:
9255 raise errors.OpPrereqError("Disk adoption not allowed with an"
9256 " iallocator script", errors.ECODE_INVAL)
9257 if self.op.mode == constants.INSTANCE_IMPORT:
9258 raise errors.OpPrereqError("Disk adoption not allowed for"
9259 " instance import", errors.ECODE_INVAL)
9261 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9262 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9263 " but no 'adopt' parameter given" %
9264 self.op.disk_template,
9267 self.adopt_disks = has_adopt
9269 # instance name verification
9270 if self.op.name_check:
9271 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9272 self.op.instance_name = self.hostname1.name
9273 # used in CheckPrereq for ip ping check
9274 self.check_ip = self.hostname1.ip
9276 self.check_ip = None
9278 # file storage checks
9279 if (self.op.file_driver and
9280 not self.op.file_driver in constants.FILE_DRIVER):
9281 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9282 self.op.file_driver, errors.ECODE_INVAL)
9284 if self.op.disk_template == constants.DT_FILE:
9285 opcodes.RequireFileStorage()
9286 elif self.op.disk_template == constants.DT_SHARED_FILE:
9287 opcodes.RequireSharedFileStorage()
9289 ### Node/iallocator related checks
9290 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9292 if self.op.pnode is not None:
9293 if self.op.disk_template in constants.DTS_INT_MIRROR:
9294 if self.op.snode is None:
9295 raise errors.OpPrereqError("The networked disk templates need"
9296 " a mirror node", errors.ECODE_INVAL)
9298 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9300 self.op.snode = None
9302 self._cds = _GetClusterDomainSecret()
9304 if self.op.mode == constants.INSTANCE_IMPORT:
9305 # On import force_variant must be True, because if we forced it at
9306 # initial install, our only chance when importing it back is that it
9308 self.op.force_variant = True
9310 if self.op.no_install:
9311 self.LogInfo("No-installation mode has no effect during import")
9313 elif self.op.mode == constants.INSTANCE_CREATE:
9314 if self.op.os_type is None:
9315 raise errors.OpPrereqError("No guest OS specified",
9317 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9318 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9319 " installation" % self.op.os_type,
9321 if self.op.disk_template is None:
9322 raise errors.OpPrereqError("No disk template specified",
9325 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9326 # Check handshake to ensure both clusters have the same domain secret
9327 src_handshake = self.op.source_handshake
9328 if not src_handshake:
9329 raise errors.OpPrereqError("Missing source handshake",
9332 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9335 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9338 # Load and check source CA
9339 self.source_x509_ca_pem = self.op.source_x509_ca
9340 if not self.source_x509_ca_pem:
9341 raise errors.OpPrereqError("Missing source X509 CA",
9345 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9347 except OpenSSL.crypto.Error, err:
9348 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9349 (err, ), errors.ECODE_INVAL)
9351 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9352 if errcode is not None:
9353 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9356 self.source_x509_ca = cert
9358 src_instance_name = self.op.source_instance_name
9359 if not src_instance_name:
9360 raise errors.OpPrereqError("Missing source instance name",
9363 self.source_instance_name = \
9364 netutils.GetHostname(name=src_instance_name).name
9367 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9368 self.op.mode, errors.ECODE_INVAL)
9370 def ExpandNames(self):
9371 """ExpandNames for CreateInstance.
9373 Figure out the right locks for instance creation.
9376 self.needed_locks = {}
9378 instance_name = self.op.instance_name
9379 # this is just a preventive check, but someone might still add this
9380 # instance in the meantime, and creation will fail at lock-add time
9381 if instance_name in self.cfg.GetInstanceList():
9382 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9383 instance_name, errors.ECODE_EXISTS)
9385 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9387 if self.op.iallocator:
9388 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9389 # specifying a group on instance creation and then selecting nodes from
9391 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9392 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9394 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9395 nodelist = [self.op.pnode]
9396 if self.op.snode is not None:
9397 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9398 nodelist.append(self.op.snode)
9399 self.needed_locks[locking.LEVEL_NODE] = nodelist
9400 # Lock resources of instance's primary and secondary nodes (copy to
9401 # prevent accidential modification)
9402 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9404 # in case of import lock the source node too
9405 if self.op.mode == constants.INSTANCE_IMPORT:
9406 src_node = self.op.src_node
9407 src_path = self.op.src_path
9409 if src_path is None:
9410 self.op.src_path = src_path = self.op.instance_name
9412 if src_node is None:
9413 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9414 self.op.src_node = None
9415 if os.path.isabs(src_path):
9416 raise errors.OpPrereqError("Importing an instance from a path"
9417 " requires a source node option",
9420 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9421 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9422 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9423 if not os.path.isabs(src_path):
9424 self.op.src_path = src_path = \
9425 utils.PathJoin(constants.EXPORT_DIR, src_path)
9427 def _RunAllocator(self):
9428 """Run the allocator based on input opcode.
9431 nics = [n.ToDict() for n in self.nics]
9432 ial = IAllocator(self.cfg, self.rpc,
9433 mode=constants.IALLOCATOR_MODE_ALLOC,
9434 name=self.op.instance_name,
9435 disk_template=self.op.disk_template,
9438 vcpus=self.be_full[constants.BE_VCPUS],
9439 memory=self.be_full[constants.BE_MAXMEM],
9440 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9443 hypervisor=self.op.hypervisor,
9446 ial.Run(self.op.iallocator)
9449 raise errors.OpPrereqError("Can't compute nodes using"
9450 " iallocator '%s': %s" %
9451 (self.op.iallocator, ial.info),
9453 if len(ial.result) != ial.required_nodes:
9454 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9455 " of nodes (%s), required %s" %
9456 (self.op.iallocator, len(ial.result),
9457 ial.required_nodes), errors.ECODE_FAULT)
9458 self.op.pnode = ial.result[0]
9459 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9460 self.op.instance_name, self.op.iallocator,
9461 utils.CommaJoin(ial.result))
9462 if ial.required_nodes == 2:
9463 self.op.snode = ial.result[1]
9465 def BuildHooksEnv(self):
9468 This runs on master, primary and secondary nodes of the instance.
9472 "ADD_MODE": self.op.mode,
9474 if self.op.mode == constants.INSTANCE_IMPORT:
9475 env["SRC_NODE"] = self.op.src_node
9476 env["SRC_PATH"] = self.op.src_path
9477 env["SRC_IMAGES"] = self.src_images
9479 env.update(_BuildInstanceHookEnv(
9480 name=self.op.instance_name,
9481 primary_node=self.op.pnode,
9482 secondary_nodes=self.secondaries,
9483 status=self.op.start,
9484 os_type=self.op.os_type,
9485 minmem=self.be_full[constants.BE_MINMEM],
9486 maxmem=self.be_full[constants.BE_MAXMEM],
9487 vcpus=self.be_full[constants.BE_VCPUS],
9488 nics=_NICListToTuple(self, self.nics),
9489 disk_template=self.op.disk_template,
9490 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9491 for d in self.disks],
9494 hypervisor_name=self.op.hypervisor,
9500 def BuildHooksNodes(self):
9501 """Build hooks nodes.
9504 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9507 def _ReadExportInfo(self):
9508 """Reads the export information from disk.
9510 It will override the opcode source node and path with the actual
9511 information, if these two were not specified before.
9513 @return: the export information
9516 assert self.op.mode == constants.INSTANCE_IMPORT
9518 src_node = self.op.src_node
9519 src_path = self.op.src_path
9521 if src_node is None:
9522 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9523 exp_list = self.rpc.call_export_list(locked_nodes)
9525 for node in exp_list:
9526 if exp_list[node].fail_msg:
9528 if src_path in exp_list[node].payload:
9530 self.op.src_node = src_node = node
9531 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9535 raise errors.OpPrereqError("No export found for relative path %s" %
9536 src_path, errors.ECODE_INVAL)
9538 _CheckNodeOnline(self, src_node)
9539 result = self.rpc.call_export_info(src_node, src_path)
9540 result.Raise("No export or invalid export found in dir %s" % src_path)
9542 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9543 if not export_info.has_section(constants.INISECT_EXP):
9544 raise errors.ProgrammerError("Corrupted export config",
9545 errors.ECODE_ENVIRON)
9547 ei_version = export_info.get(constants.INISECT_EXP, "version")
9548 if (int(ei_version) != constants.EXPORT_VERSION):
9549 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9550 (ei_version, constants.EXPORT_VERSION),
9551 errors.ECODE_ENVIRON)
9554 def _ReadExportParams(self, einfo):
9555 """Use export parameters as defaults.
9557 In case the opcode doesn't specify (as in override) some instance
9558 parameters, then try to use them from the export information, if
9562 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9564 if self.op.disk_template is None:
9565 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9566 self.op.disk_template = einfo.get(constants.INISECT_INS,
9568 if self.op.disk_template not in constants.DISK_TEMPLATES:
9569 raise errors.OpPrereqError("Disk template specified in configuration"
9570 " file is not one of the allowed values:"
9571 " %s" % " ".join(constants.DISK_TEMPLATES))
9573 raise errors.OpPrereqError("No disk template specified and the export"
9574 " is missing the disk_template information",
9577 if not self.op.disks:
9579 # TODO: import the disk iv_name too
9580 for idx in range(constants.MAX_DISKS):
9581 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9582 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9583 disks.append({constants.IDISK_SIZE: disk_sz})
9584 self.op.disks = disks
9585 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9586 raise errors.OpPrereqError("No disk info specified and the export"
9587 " is missing the disk information",
9590 if not self.op.nics:
9592 for idx in range(constants.MAX_NICS):
9593 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9595 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9596 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9603 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9604 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9606 if (self.op.hypervisor is None and
9607 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9608 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9610 if einfo.has_section(constants.INISECT_HYP):
9611 # use the export parameters but do not override the ones
9612 # specified by the user
9613 for name, value in einfo.items(constants.INISECT_HYP):
9614 if name not in self.op.hvparams:
9615 self.op.hvparams[name] = value
9617 if einfo.has_section(constants.INISECT_BEP):
9618 # use the parameters, without overriding
9619 for name, value in einfo.items(constants.INISECT_BEP):
9620 if name not in self.op.beparams:
9621 self.op.beparams[name] = value
9622 # Compatibility for the old "memory" be param
9623 if name == constants.BE_MEMORY:
9624 if constants.BE_MAXMEM not in self.op.beparams:
9625 self.op.beparams[constants.BE_MAXMEM] = value
9626 if constants.BE_MINMEM not in self.op.beparams:
9627 self.op.beparams[constants.BE_MINMEM] = value
9629 # try to read the parameters old style, from the main section
9630 for name in constants.BES_PARAMETERS:
9631 if (name not in self.op.beparams and
9632 einfo.has_option(constants.INISECT_INS, name)):
9633 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9635 if einfo.has_section(constants.INISECT_OSP):
9636 # use the parameters, without overriding
9637 for name, value in einfo.items(constants.INISECT_OSP):
9638 if name not in self.op.osparams:
9639 self.op.osparams[name] = value
9641 def _RevertToDefaults(self, cluster):
9642 """Revert the instance parameters to the default values.
9646 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9647 for name in self.op.hvparams.keys():
9648 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9649 del self.op.hvparams[name]
9651 be_defs = cluster.SimpleFillBE({})
9652 for name in self.op.beparams.keys():
9653 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9654 del self.op.beparams[name]
9656 nic_defs = cluster.SimpleFillNIC({})
9657 for nic in self.op.nics:
9658 for name in constants.NICS_PARAMETERS:
9659 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9662 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9663 for name in self.op.osparams.keys():
9664 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9665 del self.op.osparams[name]
9667 def _CalculateFileStorageDir(self):
9668 """Calculate final instance file storage dir.
9671 # file storage dir calculation/check
9672 self.instance_file_storage_dir = None
9673 if self.op.disk_template in constants.DTS_FILEBASED:
9674 # build the full file storage dir path
9677 if self.op.disk_template == constants.DT_SHARED_FILE:
9678 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9680 get_fsd_fn = self.cfg.GetFileStorageDir
9682 cfg_storagedir = get_fsd_fn()
9683 if not cfg_storagedir:
9684 raise errors.OpPrereqError("Cluster file storage dir not defined")
9685 joinargs.append(cfg_storagedir)
9687 if self.op.file_storage_dir is not None:
9688 joinargs.append(self.op.file_storage_dir)
9690 joinargs.append(self.op.instance_name)
9692 # pylint: disable=W0142
9693 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9695 def CheckPrereq(self): # pylint: disable=R0914
9696 """Check prerequisites.
9699 self._CalculateFileStorageDir()
9701 if self.op.mode == constants.INSTANCE_IMPORT:
9702 export_info = self._ReadExportInfo()
9703 self._ReadExportParams(export_info)
9704 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9706 self._old_instance_name = None
9708 if (not self.cfg.GetVGName() and
9709 self.op.disk_template not in constants.DTS_NOT_LVM):
9710 raise errors.OpPrereqError("Cluster does not support lvm-based"
9711 " instances", errors.ECODE_STATE)
9713 if (self.op.hypervisor is None or
9714 self.op.hypervisor == constants.VALUE_AUTO):
9715 self.op.hypervisor = self.cfg.GetHypervisorType()
9717 cluster = self.cfg.GetClusterInfo()
9718 enabled_hvs = cluster.enabled_hypervisors
9719 if self.op.hypervisor not in enabled_hvs:
9720 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9721 " cluster (%s)" % (self.op.hypervisor,
9722 ",".join(enabled_hvs)),
9725 # Check tag validity
9726 for tag in self.op.tags:
9727 objects.TaggableObject.ValidateTag(tag)
9729 # check hypervisor parameter syntax (locally)
9730 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9731 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9733 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9734 hv_type.CheckParameterSyntax(filled_hvp)
9735 self.hv_full = filled_hvp
9736 # check that we don't specify global parameters on an instance
9737 _CheckGlobalHvParams(self.op.hvparams)
9739 # fill and remember the beparams dict
9740 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9741 for param, value in self.op.beparams.iteritems():
9742 if value == constants.VALUE_AUTO:
9743 self.op.beparams[param] = default_beparams[param]
9744 objects.UpgradeBeParams(self.op.beparams)
9745 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9746 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9748 # build os parameters
9749 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9751 # now that hvp/bep are in final format, let's reset to defaults,
9753 if self.op.identify_defaults:
9754 self._RevertToDefaults(cluster)
9756 self.hotplug_info = None
9758 self.hotplug_info = objects.HotplugInfo(disks=0, nics=0,
9759 pci_pool=list(range(16,32)))
9762 for idx, nic in enumerate(self.op.nics):
9763 nic_mode_req = nic.get(constants.INIC_MODE, None)
9764 nic_mode = nic_mode_req
9765 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9766 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9768 # in routed mode, for the first nic, the default ip is 'auto'
9769 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9770 default_ip_mode = constants.VALUE_AUTO
9772 default_ip_mode = constants.VALUE_NONE
9774 # ip validity checks
9775 ip = nic.get(constants.INIC_IP, default_ip_mode)
9776 if ip is None or ip.lower() == constants.VALUE_NONE:
9778 elif ip.lower() == constants.VALUE_AUTO:
9779 if not self.op.name_check:
9780 raise errors.OpPrereqError("IP address set to auto but name checks"
9781 " have been skipped",
9783 nic_ip = self.hostname1.ip
9785 if not netutils.IPAddress.IsValid(ip):
9786 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9790 # TODO: check the ip address for uniqueness
9791 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9792 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9795 # MAC address verification
9796 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9797 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9798 mac = utils.NormalizeAndValidateMac(mac)
9801 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9802 except errors.ReservationError:
9803 raise errors.OpPrereqError("MAC address %s already in use"
9804 " in cluster" % mac,
9805 errors.ECODE_NOTUNIQUE)
9807 # Build nic parameters
9808 link = nic.get(constants.INIC_LINK, None)
9809 if link == constants.VALUE_AUTO:
9810 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9813 nicparams[constants.NIC_MODE] = nic_mode
9815 nicparams[constants.NIC_LINK] = link
9817 check_params = cluster.SimpleFillNIC(nicparams)
9818 objects.NIC.CheckParameterSyntax(check_params)
9819 nic_idx, pci = _GetPCIInfo(self, 'nics')
9820 self.nics.append(objects.NIC(idx=nic_idx, pci=pci,
9822 nicparams=check_params))
9824 # disk checks/pre-build
9825 default_vg = self.cfg.GetVGName()
9827 for disk in self.op.disks:
9828 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9829 if mode not in constants.DISK_ACCESS_SET:
9830 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9831 mode, errors.ECODE_INVAL)
9832 size = disk.get(constants.IDISK_SIZE, None)
9834 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9837 except (TypeError, ValueError):
9838 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9841 data_vg = disk.get(constants.IDISK_VG, default_vg)
9843 constants.IDISK_SIZE: size,
9844 constants.IDISK_MODE: mode,
9845 constants.IDISK_VG: data_vg,
9847 if constants.IDISK_METAVG in disk:
9848 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9849 if constants.IDISK_ADOPT in disk:
9850 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9851 self.disks.append(new_disk)
9853 if self.op.mode == constants.INSTANCE_IMPORT:
9855 for idx in range(len(self.disks)):
9856 option = "disk%d_dump" % idx
9857 if export_info.has_option(constants.INISECT_INS, option):
9858 # FIXME: are the old os-es, disk sizes, etc. useful?
9859 export_name = export_info.get(constants.INISECT_INS, option)
9860 image = utils.PathJoin(self.op.src_path, export_name)
9861 disk_images.append(image)
9863 disk_images.append(False)
9865 self.src_images = disk_images
9867 if self.op.instance_name == self._old_instance_name:
9868 for idx, nic in enumerate(self.nics):
9869 if nic.mac == constants.VALUE_AUTO:
9870 nic_mac_ini = "nic%d_mac" % idx
9871 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9873 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9875 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9876 if self.op.ip_check:
9877 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9878 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9879 (self.check_ip, self.op.instance_name),
9880 errors.ECODE_NOTUNIQUE)
9882 #### mac address generation
9883 # By generating here the mac address both the allocator and the hooks get
9884 # the real final mac address rather than the 'auto' or 'generate' value.
9885 # There is a race condition between the generation and the instance object
9886 # creation, which means that we know the mac is valid now, but we're not
9887 # sure it will be when we actually add the instance. If things go bad
9888 # adding the instance will abort because of a duplicate mac, and the
9889 # creation job will fail.
9890 for nic in self.nics:
9891 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9892 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9896 if self.op.iallocator is not None:
9897 self._RunAllocator()
9899 # Release all unneeded node locks
9900 _ReleaseLocks(self, locking.LEVEL_NODE,
9901 keep=filter(None, [self.op.pnode, self.op.snode,
9903 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9904 keep=filter(None, [self.op.pnode, self.op.snode,
9907 #### node related checks
9909 # check primary node
9910 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9911 assert self.pnode is not None, \
9912 "Cannot retrieve locked node %s" % self.op.pnode
9914 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9915 pnode.name, errors.ECODE_STATE)
9917 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9918 pnode.name, errors.ECODE_STATE)
9919 if not pnode.vm_capable:
9920 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9921 " '%s'" % pnode.name, errors.ECODE_STATE)
9923 self.secondaries = []
9925 # mirror node verification
9926 if self.op.disk_template in constants.DTS_INT_MIRROR:
9927 if self.op.snode == pnode.name:
9928 raise errors.OpPrereqError("The secondary node cannot be the"
9929 " primary node", errors.ECODE_INVAL)
9930 _CheckNodeOnline(self, self.op.snode)
9931 _CheckNodeNotDrained(self, self.op.snode)
9932 _CheckNodeVmCapable(self, self.op.snode)
9933 self.secondaries.append(self.op.snode)
9935 snode = self.cfg.GetNodeInfo(self.op.snode)
9936 if pnode.group != snode.group:
9937 self.LogWarning("The primary and secondary nodes are in two"
9938 " different node groups; the disk parameters"
9939 " from the first disk's node group will be"
9942 nodenames = [pnode.name] + self.secondaries
9944 # Verify instance specs
9945 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9947 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9948 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9949 constants.ISPEC_DISK_COUNT: len(self.disks),
9950 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9951 constants.ISPEC_NIC_COUNT: len(self.nics),
9952 constants.ISPEC_SPINDLE_USE: spindle_use,
9955 group_info = self.cfg.GetNodeGroup(pnode.group)
9956 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9957 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9958 if not self.op.ignore_ipolicy and res:
9959 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9960 " policy: %s") % (pnode.group,
9961 utils.CommaJoin(res)),
9964 if not self.adopt_disks:
9965 if self.op.disk_template == constants.DT_RBD:
9966 # _CheckRADOSFreeSpace() is just a placeholder.
9967 # Any function that checks prerequisites can be placed here.
9968 # Check if there is enough space on the RADOS cluster.
9969 _CheckRADOSFreeSpace()
9971 # Check lv size requirements, if not adopting
9972 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9973 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9975 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9976 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9977 disk[constants.IDISK_ADOPT])
9978 for disk in self.disks])
9979 if len(all_lvs) != len(self.disks):
9980 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9982 for lv_name in all_lvs:
9984 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9985 # to ReserveLV uses the same syntax
9986 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9987 except errors.ReservationError:
9988 raise errors.OpPrereqError("LV named %s used by another instance" %
9989 lv_name, errors.ECODE_NOTUNIQUE)
9991 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9992 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9994 node_lvs = self.rpc.call_lv_list([pnode.name],
9995 vg_names.payload.keys())[pnode.name]
9996 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9997 node_lvs = node_lvs.payload
9999 delta = all_lvs.difference(node_lvs.keys())
10001 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10002 utils.CommaJoin(delta),
10003 errors.ECODE_INVAL)
10004 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10006 raise errors.OpPrereqError("Online logical volumes found, cannot"
10007 " adopt: %s" % utils.CommaJoin(online_lvs),
10008 errors.ECODE_STATE)
10009 # update the size of disk based on what is found
10010 for dsk in self.disks:
10011 dsk[constants.IDISK_SIZE] = \
10012 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10013 dsk[constants.IDISK_ADOPT])][0]))
10015 elif self.op.disk_template == constants.DT_BLOCK:
10016 # Normalize and de-duplicate device paths
10017 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10018 for disk in self.disks])
10019 if len(all_disks) != len(self.disks):
10020 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10021 errors.ECODE_INVAL)
10022 baddisks = [d for d in all_disks
10023 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10025 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10026 " cannot be adopted" %
10027 (", ".join(baddisks),
10028 constants.ADOPTABLE_BLOCKDEV_ROOT),
10029 errors.ECODE_INVAL)
10031 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10032 list(all_disks))[pnode.name]
10033 node_disks.Raise("Cannot get block device information from node %s" %
10035 node_disks = node_disks.payload
10036 delta = all_disks.difference(node_disks.keys())
10038 raise errors.OpPrereqError("Missing block device(s): %s" %
10039 utils.CommaJoin(delta),
10040 errors.ECODE_INVAL)
10041 for dsk in self.disks:
10042 dsk[constants.IDISK_SIZE] = \
10043 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10045 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10047 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10048 # check OS parameters (remotely)
10049 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10051 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10053 # memory check on primary node
10054 #TODO(dynmem): use MINMEM for checking
10056 _CheckNodeFreeMemory(self, self.pnode.name,
10057 "creating instance %s" % self.op.instance_name,
10058 self.be_full[constants.BE_MAXMEM],
10059 self.op.hypervisor)
10061 self.dry_run_result = list(nodenames)
10063 def Exec(self, feedback_fn):
10064 """Create and add the instance to the cluster.
10067 instance = self.op.instance_name
10068 pnode_name = self.pnode.name
10070 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10071 self.owned_locks(locking.LEVEL_NODE)), \
10072 "Node locks differ from node resource locks"
10074 ht_kind = self.op.hypervisor
10075 if ht_kind in constants.HTS_REQ_PORT:
10076 network_port = self.cfg.AllocatePort()
10078 network_port = None
10080 # This is ugly but we got a chicken-egg problem here
10081 # We can only take the group disk parameters, as the instance
10082 # has no disks yet (we are generating them right here).
10083 node = self.cfg.GetNodeInfo(pnode_name)
10084 nodegroup = self.cfg.GetNodeGroup(node.group)
10085 disks = _GenerateDiskTemplate(self,
10086 self.op.disk_template,
10087 instance, pnode_name,
10090 self.instance_file_storage_dir,
10091 self.op.file_driver,
10094 self.cfg.GetGroupDiskParams(nodegroup))
10096 iobj = objects.Instance(name=instance, os=self.op.os_type,
10097 primary_node=pnode_name,
10098 nics=self.nics, disks=disks,
10099 disk_template=self.op.disk_template,
10100 admin_state=constants.ADMINST_DOWN,
10101 network_port=network_port,
10102 beparams=self.op.beparams,
10103 hvparams=self.op.hvparams,
10104 hypervisor=self.op.hypervisor,
10105 osparams=self.op.osparams,
10106 hotplug_info=self.hotplug_info,
10110 for tag in self.op.tags:
10113 if self.adopt_disks:
10114 if self.op.disk_template == constants.DT_PLAIN:
10115 # rename LVs to the newly-generated names; we need to construct
10116 # 'fake' LV disks with the old data, plus the new unique_id
10117 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10119 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10120 rename_to.append(t_dsk.logical_id)
10121 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10122 self.cfg.SetDiskID(t_dsk, pnode_name)
10123 result = self.rpc.call_blockdev_rename(pnode_name,
10124 zip(tmp_disks, rename_to))
10125 result.Raise("Failed to rename adoped LVs")
10127 feedback_fn("* creating instance disks...")
10129 _CreateDisks(self, iobj)
10130 except errors.OpExecError:
10131 self.LogWarning("Device creation failed, reverting...")
10133 _RemoveDisks(self, iobj)
10135 self.cfg.ReleaseDRBDMinors(instance)
10138 feedback_fn("adding instance %s to cluster config" % instance)
10140 self.cfg.AddInstance(iobj, self.proc.GetECId())
10142 # Declare that we don't want to remove the instance lock anymore, as we've
10143 # added the instance to the config
10144 del self.remove_locks[locking.LEVEL_INSTANCE]
10146 if self.op.mode == constants.INSTANCE_IMPORT:
10147 # Release unused nodes
10148 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10150 # Release all nodes
10151 _ReleaseLocks(self, locking.LEVEL_NODE)
10154 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10155 feedback_fn("* wiping instance disks...")
10157 _WipeDisks(self, iobj)
10158 except errors.OpExecError, err:
10159 logging.exception("Wiping disks failed")
10160 self.LogWarning("Wiping instance disks failed (%s)", err)
10164 # Something is already wrong with the disks, don't do anything else
10166 elif self.op.wait_for_sync:
10167 disk_abort = not _WaitForSync(self, iobj)
10168 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10169 # make sure the disks are not degraded (still sync-ing is ok)
10170 feedback_fn("* checking mirrors status")
10171 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10176 _RemoveDisks(self, iobj)
10177 self.cfg.RemoveInstance(iobj.name)
10178 # Make sure the instance lock gets removed
10179 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10180 raise errors.OpExecError("There are some degraded disks for"
10183 # Release all node resource locks
10184 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10186 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10187 # we need to set the disks ID to the primary node, since the
10188 # preceding code might or might have not done it, depending on
10189 # disk template and other options
10190 for disk in iobj.disks:
10191 self.cfg.SetDiskID(disk, pnode_name)
10192 if self.op.mode == constants.INSTANCE_CREATE:
10193 if not self.op.no_install:
10194 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10195 not self.op.wait_for_sync)
10197 feedback_fn("* pausing disk sync to install instance OS")
10198 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10201 for idx, success in enumerate(result.payload):
10203 logging.warn("pause-sync of instance %s for disk %d failed",
10206 feedback_fn("* running the instance OS create scripts...")
10207 # FIXME: pass debug option from opcode to backend
10209 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10210 self.op.debug_level)
10212 feedback_fn("* resuming disk sync")
10213 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10216 for idx, success in enumerate(result.payload):
10218 logging.warn("resume-sync of instance %s for disk %d failed",
10221 os_add_result.Raise("Could not add os for instance %s"
10222 " on node %s" % (instance, pnode_name))
10225 if self.op.mode == constants.INSTANCE_IMPORT:
10226 feedback_fn("* running the instance OS import scripts...")
10230 for idx, image in enumerate(self.src_images):
10234 # FIXME: pass debug option from opcode to backend
10235 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10236 constants.IEIO_FILE, (image, ),
10237 constants.IEIO_SCRIPT,
10238 (iobj.disks[idx], idx),
10240 transfers.append(dt)
10243 masterd.instance.TransferInstanceData(self, feedback_fn,
10244 self.op.src_node, pnode_name,
10245 self.pnode.secondary_ip,
10247 if not compat.all(import_result):
10248 self.LogWarning("Some disks for instance %s on node %s were not"
10249 " imported successfully" % (instance, pnode_name))
10251 rename_from = self._old_instance_name
10253 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10254 feedback_fn("* preparing remote import...")
10255 # The source cluster will stop the instance before attempting to make
10256 # a connection. In some cases stopping an instance can take a long
10257 # time, hence the shutdown timeout is added to the connection
10259 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10260 self.op.source_shutdown_timeout)
10261 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10263 assert iobj.primary_node == self.pnode.name
10265 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10266 self.source_x509_ca,
10267 self._cds, timeouts)
10268 if not compat.all(disk_results):
10269 # TODO: Should the instance still be started, even if some disks
10270 # failed to import (valid for local imports, too)?
10271 self.LogWarning("Some disks for instance %s on node %s were not"
10272 " imported successfully" % (instance, pnode_name))
10274 rename_from = self.source_instance_name
10277 # also checked in the prereq part
10278 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10281 # Run rename script on newly imported instance
10282 assert iobj.name == instance
10283 feedback_fn("Running rename script for %s" % instance)
10284 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10286 self.op.debug_level)
10287 if result.fail_msg:
10288 self.LogWarning("Failed to run rename script for %s on node"
10289 " %s: %s" % (instance, pnode_name, result.fail_msg))
10291 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10294 iobj.admin_state = constants.ADMINST_UP
10295 self.cfg.Update(iobj, feedback_fn)
10296 logging.info("Starting instance %s on node %s", instance, pnode_name)
10297 feedback_fn("* starting instance...")
10298 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10300 result.Raise("Could not start instance")
10302 return list(iobj.all_nodes)
10305 def _CheckRADOSFreeSpace():
10306 """Compute disk size requirements inside the RADOS cluster.
10309 # For the RADOS cluster we assume there is always enough space.
10313 class LUInstanceConsole(NoHooksLU):
10314 """Connect to an instance's console.
10316 This is somewhat special in that it returns the command line that
10317 you need to run on the master node in order to connect to the
10323 def ExpandNames(self):
10324 self.share_locks = _ShareAll()
10325 self._ExpandAndLockInstance()
10327 def CheckPrereq(self):
10328 """Check prerequisites.
10330 This checks that the instance is in the cluster.
10333 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10334 assert self.instance is not None, \
10335 "Cannot retrieve locked instance %s" % self.op.instance_name
10336 _CheckNodeOnline(self, self.instance.primary_node)
10338 def Exec(self, feedback_fn):
10339 """Connect to the console of an instance
10342 instance = self.instance
10343 node = instance.primary_node
10345 node_insts = self.rpc.call_instance_list([node],
10346 [instance.hypervisor])[node]
10347 node_insts.Raise("Can't get node information from %s" % node)
10349 if instance.name not in node_insts.payload:
10350 if instance.admin_state == constants.ADMINST_UP:
10351 state = constants.INSTST_ERRORDOWN
10352 elif instance.admin_state == constants.ADMINST_DOWN:
10353 state = constants.INSTST_ADMINDOWN
10355 state = constants.INSTST_ADMINOFFLINE
10356 raise errors.OpExecError("Instance %s is not running (state %s)" %
10357 (instance.name, state))
10359 logging.debug("Connecting to console of %s on %s", instance.name, node)
10361 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10364 def _GetInstanceConsole(cluster, instance):
10365 """Returns console information for an instance.
10367 @type cluster: L{objects.Cluster}
10368 @type instance: L{objects.Instance}
10372 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10373 # beparams and hvparams are passed separately, to avoid editing the
10374 # instance and then saving the defaults in the instance itself.
10375 hvparams = cluster.FillHV(instance)
10376 beparams = cluster.FillBE(instance)
10377 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10379 assert console.instance == instance.name
10380 assert console.Validate()
10382 return console.ToDict()
10385 class LUInstanceReplaceDisks(LogicalUnit):
10386 """Replace the disks of an instance.
10389 HPATH = "mirrors-replace"
10390 HTYPE = constants.HTYPE_INSTANCE
10393 def CheckArguments(self):
10394 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10395 self.op.iallocator)
10397 def ExpandNames(self):
10398 self._ExpandAndLockInstance()
10400 assert locking.LEVEL_NODE not in self.needed_locks
10401 assert locking.LEVEL_NODE_RES not in self.needed_locks
10402 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10404 assert self.op.iallocator is None or self.op.remote_node is None, \
10405 "Conflicting options"
10407 if self.op.remote_node is not None:
10408 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10410 # Warning: do not remove the locking of the new secondary here
10411 # unless DRBD8.AddChildren is changed to work in parallel;
10412 # currently it doesn't since parallel invocations of
10413 # FindUnusedMinor will conflict
10414 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10415 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10417 self.needed_locks[locking.LEVEL_NODE] = []
10418 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10420 if self.op.iallocator is not None:
10421 # iallocator will select a new node in the same group
10422 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10424 self.needed_locks[locking.LEVEL_NODE_RES] = []
10426 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10427 self.op.iallocator, self.op.remote_node,
10428 self.op.disks, False, self.op.early_release,
10429 self.op.ignore_ipolicy)
10431 self.tasklets = [self.replacer]
10433 def DeclareLocks(self, level):
10434 if level == locking.LEVEL_NODEGROUP:
10435 assert self.op.remote_node is None
10436 assert self.op.iallocator is not None
10437 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10439 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10440 # Lock all groups used by instance optimistically; this requires going
10441 # via the node before it's locked, requiring verification later on
10442 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10443 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10445 elif level == locking.LEVEL_NODE:
10446 if self.op.iallocator is not None:
10447 assert self.op.remote_node is None
10448 assert not self.needed_locks[locking.LEVEL_NODE]
10450 # Lock member nodes of all locked groups
10451 self.needed_locks[locking.LEVEL_NODE] = [node_name
10452 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10453 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10455 self._LockInstancesNodes()
10456 elif level == locking.LEVEL_NODE_RES:
10458 self.needed_locks[locking.LEVEL_NODE_RES] = \
10459 self.needed_locks[locking.LEVEL_NODE]
10461 def BuildHooksEnv(self):
10462 """Build hooks env.
10464 This runs on the master, the primary and all the secondaries.
10467 instance = self.replacer.instance
10469 "MODE": self.op.mode,
10470 "NEW_SECONDARY": self.op.remote_node,
10471 "OLD_SECONDARY": instance.secondary_nodes[0],
10473 env.update(_BuildInstanceHookEnvByObject(self, instance))
10476 def BuildHooksNodes(self):
10477 """Build hooks nodes.
10480 instance = self.replacer.instance
10482 self.cfg.GetMasterNode(),
10483 instance.primary_node,
10485 if self.op.remote_node is not None:
10486 nl.append(self.op.remote_node)
10489 def CheckPrereq(self):
10490 """Check prerequisites.
10493 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10494 self.op.iallocator is None)
10496 # Verify if node group locks are still correct
10497 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10499 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10501 return LogicalUnit.CheckPrereq(self)
10504 class TLReplaceDisks(Tasklet):
10505 """Replaces disks for an instance.
10507 Note: Locking is not within the scope of this class.
10510 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10511 disks, delay_iallocator, early_release, ignore_ipolicy):
10512 """Initializes this class.
10515 Tasklet.__init__(self, lu)
10518 self.instance_name = instance_name
10520 self.iallocator_name = iallocator_name
10521 self.remote_node = remote_node
10523 self.delay_iallocator = delay_iallocator
10524 self.early_release = early_release
10525 self.ignore_ipolicy = ignore_ipolicy
10528 self.instance = None
10529 self.new_node = None
10530 self.target_node = None
10531 self.other_node = None
10532 self.remote_node_info = None
10533 self.node_secondary_ip = None
10536 def CheckArguments(mode, remote_node, iallocator):
10537 """Helper function for users of this class.
10540 # check for valid parameter combination
10541 if mode == constants.REPLACE_DISK_CHG:
10542 if remote_node is None and iallocator is None:
10543 raise errors.OpPrereqError("When changing the secondary either an"
10544 " iallocator script must be used or the"
10545 " new node given", errors.ECODE_INVAL)
10547 if remote_node is not None and iallocator is not None:
10548 raise errors.OpPrereqError("Give either the iallocator or the new"
10549 " secondary, not both", errors.ECODE_INVAL)
10551 elif remote_node is not None or iallocator is not None:
10552 # Not replacing the secondary
10553 raise errors.OpPrereqError("The iallocator and new node options can"
10554 " only be used when changing the"
10555 " secondary node", errors.ECODE_INVAL)
10558 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10559 """Compute a new secondary node using an IAllocator.
10562 ial = IAllocator(lu.cfg, lu.rpc,
10563 mode=constants.IALLOCATOR_MODE_RELOC,
10564 name=instance_name,
10565 relocate_from=list(relocate_from))
10567 ial.Run(iallocator_name)
10569 if not ial.success:
10570 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10571 " %s" % (iallocator_name, ial.info),
10572 errors.ECODE_NORES)
10574 if len(ial.result) != ial.required_nodes:
10575 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10576 " of nodes (%s), required %s" %
10578 len(ial.result), ial.required_nodes),
10579 errors.ECODE_FAULT)
10581 remote_node_name = ial.result[0]
10583 lu.LogInfo("Selected new secondary for instance '%s': %s",
10584 instance_name, remote_node_name)
10586 return remote_node_name
10588 def _FindFaultyDisks(self, node_name):
10589 """Wrapper for L{_FindFaultyInstanceDisks}.
10592 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10595 def _CheckDisksActivated(self, instance):
10596 """Checks if the instance disks are activated.
10598 @param instance: The instance to check disks
10599 @return: True if they are activated, False otherwise
10602 nodes = instance.all_nodes
10604 for idx, dev in enumerate(instance.disks):
10606 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10607 self.cfg.SetDiskID(dev, node)
10609 result = _BlockdevFind(self, node, dev, instance)
10613 elif result.fail_msg or not result.payload:
10618 def CheckPrereq(self):
10619 """Check prerequisites.
10621 This checks that the instance is in the cluster.
10624 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10625 assert instance is not None, \
10626 "Cannot retrieve locked instance %s" % self.instance_name
10628 if instance.disk_template != constants.DT_DRBD8:
10629 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10630 " instances", errors.ECODE_INVAL)
10632 if len(instance.secondary_nodes) != 1:
10633 raise errors.OpPrereqError("The instance has a strange layout,"
10634 " expected one secondary but found %d" %
10635 len(instance.secondary_nodes),
10636 errors.ECODE_FAULT)
10638 if not self.delay_iallocator:
10639 self._CheckPrereq2()
10641 def _CheckPrereq2(self):
10642 """Check prerequisites, second part.
10644 This function should always be part of CheckPrereq. It was separated and is
10645 now called from Exec because during node evacuation iallocator was only
10646 called with an unmodified cluster model, not taking planned changes into
10650 instance = self.instance
10651 secondary_node = instance.secondary_nodes[0]
10653 if self.iallocator_name is None:
10654 remote_node = self.remote_node
10656 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10657 instance.name, instance.secondary_nodes)
10659 if remote_node is None:
10660 self.remote_node_info = None
10662 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10663 "Remote node '%s' is not locked" % remote_node
10665 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10666 assert self.remote_node_info is not None, \
10667 "Cannot retrieve locked node %s" % remote_node
10669 if remote_node == self.instance.primary_node:
10670 raise errors.OpPrereqError("The specified node is the primary node of"
10671 " the instance", errors.ECODE_INVAL)
10673 if remote_node == secondary_node:
10674 raise errors.OpPrereqError("The specified node is already the"
10675 " secondary node of the instance",
10676 errors.ECODE_INVAL)
10678 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10679 constants.REPLACE_DISK_CHG):
10680 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10681 errors.ECODE_INVAL)
10683 if self.mode == constants.REPLACE_DISK_AUTO:
10684 if not self._CheckDisksActivated(instance):
10685 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10686 " first" % self.instance_name,
10687 errors.ECODE_STATE)
10688 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10689 faulty_secondary = self._FindFaultyDisks(secondary_node)
10691 if faulty_primary and faulty_secondary:
10692 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10693 " one node and can not be repaired"
10694 " automatically" % self.instance_name,
10695 errors.ECODE_STATE)
10698 self.disks = faulty_primary
10699 self.target_node = instance.primary_node
10700 self.other_node = secondary_node
10701 check_nodes = [self.target_node, self.other_node]
10702 elif faulty_secondary:
10703 self.disks = faulty_secondary
10704 self.target_node = secondary_node
10705 self.other_node = instance.primary_node
10706 check_nodes = [self.target_node, self.other_node]
10712 # Non-automatic modes
10713 if self.mode == constants.REPLACE_DISK_PRI:
10714 self.target_node = instance.primary_node
10715 self.other_node = secondary_node
10716 check_nodes = [self.target_node, self.other_node]
10718 elif self.mode == constants.REPLACE_DISK_SEC:
10719 self.target_node = secondary_node
10720 self.other_node = instance.primary_node
10721 check_nodes = [self.target_node, self.other_node]
10723 elif self.mode == constants.REPLACE_DISK_CHG:
10724 self.new_node = remote_node
10725 self.other_node = instance.primary_node
10726 self.target_node = secondary_node
10727 check_nodes = [self.new_node, self.other_node]
10729 _CheckNodeNotDrained(self.lu, remote_node)
10730 _CheckNodeVmCapable(self.lu, remote_node)
10732 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10733 assert old_node_info is not None
10734 if old_node_info.offline and not self.early_release:
10735 # doesn't make sense to delay the release
10736 self.early_release = True
10737 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10738 " early-release mode", secondary_node)
10741 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10744 # If not specified all disks should be replaced
10746 self.disks = range(len(self.instance.disks))
10748 # TODO: This is ugly, but right now we can't distinguish between internal
10749 # submitted opcode and external one. We should fix that.
10750 if self.remote_node_info:
10751 # We change the node, lets verify it still meets instance policy
10752 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10753 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10755 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10756 ignore=self.ignore_ipolicy)
10758 for node in check_nodes:
10759 _CheckNodeOnline(self.lu, node)
10761 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10764 if node_name is not None)
10766 # Release unneeded node and node resource locks
10767 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10768 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10770 # Release any owned node group
10771 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10772 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10774 # Check whether disks are valid
10775 for disk_idx in self.disks:
10776 instance.FindDisk(disk_idx)
10778 # Get secondary node IP addresses
10779 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10780 in self.cfg.GetMultiNodeInfo(touched_nodes))
10782 def Exec(self, feedback_fn):
10783 """Execute disk replacement.
10785 This dispatches the disk replacement to the appropriate handler.
10788 if self.delay_iallocator:
10789 self._CheckPrereq2()
10792 # Verify owned locks before starting operation
10793 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10794 assert set(owned_nodes) == set(self.node_secondary_ip), \
10795 ("Incorrect node locks, owning %s, expected %s" %
10796 (owned_nodes, self.node_secondary_ip.keys()))
10797 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10798 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10800 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10801 assert list(owned_instances) == [self.instance_name], \
10802 "Instance '%s' not locked" % self.instance_name
10804 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10805 "Should not own any node group lock at this point"
10808 feedback_fn("No disks need replacement")
10811 feedback_fn("Replacing disk(s) %s for %s" %
10812 (utils.CommaJoin(self.disks), self.instance.name))
10814 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10816 # Activate the instance disks if we're replacing them on a down instance
10818 _StartInstanceDisks(self.lu, self.instance, True)
10821 # Should we replace the secondary node?
10822 if self.new_node is not None:
10823 fn = self._ExecDrbd8Secondary
10825 fn = self._ExecDrbd8DiskOnly
10827 result = fn(feedback_fn)
10829 # Deactivate the instance disks if we're replacing them on a
10832 _SafeShutdownInstanceDisks(self.lu, self.instance)
10834 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10837 # Verify owned locks
10838 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10839 nodes = frozenset(self.node_secondary_ip)
10840 assert ((self.early_release and not owned_nodes) or
10841 (not self.early_release and not (set(owned_nodes) - nodes))), \
10842 ("Not owning the correct locks, early_release=%s, owned=%r,"
10843 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10847 def _CheckVolumeGroup(self, nodes):
10848 self.lu.LogInfo("Checking volume groups")
10850 vgname = self.cfg.GetVGName()
10852 # Make sure volume group exists on all involved nodes
10853 results = self.rpc.call_vg_list(nodes)
10855 raise errors.OpExecError("Can't list volume groups on the nodes")
10858 res = results[node]
10859 res.Raise("Error checking node %s" % node)
10860 if vgname not in res.payload:
10861 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10864 def _CheckDisksExistence(self, nodes):
10865 # Check disk existence
10866 for idx, dev in enumerate(self.instance.disks):
10867 if idx not in self.disks:
10871 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10872 self.cfg.SetDiskID(dev, node)
10874 result = _BlockdevFind(self, node, dev, self.instance)
10876 msg = result.fail_msg
10877 if msg or not result.payload:
10879 msg = "disk not found"
10880 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10883 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10884 for idx, dev in enumerate(self.instance.disks):
10885 if idx not in self.disks:
10888 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10891 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10892 on_primary, ldisk=ldisk):
10893 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10894 " replace disks for instance %s" %
10895 (node_name, self.instance.name))
10897 def _CreateNewStorage(self, node_name):
10898 """Create new storage on the primary or secondary node.
10900 This is only used for same-node replaces, not for changing the
10901 secondary node, hence we don't want to modify the existing disk.
10906 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10907 for idx, dev in enumerate(disks):
10908 if idx not in self.disks:
10911 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10913 self.cfg.SetDiskID(dev, node_name)
10915 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10916 names = _GenerateUniqueNames(self.lu, lv_names)
10918 (data_disk, meta_disk) = dev.children
10919 vg_data = data_disk.logical_id[0]
10920 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10921 logical_id=(vg_data, names[0]),
10922 params=data_disk.params)
10923 vg_meta = meta_disk.logical_id[0]
10924 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10925 logical_id=(vg_meta, names[1]),
10926 params=meta_disk.params)
10928 new_lvs = [lv_data, lv_meta]
10929 old_lvs = [child.Copy() for child in dev.children]
10930 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10932 # we pass force_create=True to force the LVM creation
10933 for new_lv in new_lvs:
10934 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10935 _GetInstanceInfoText(self.instance), False)
10939 def _CheckDevices(self, node_name, iv_names):
10940 for name, (dev, _, _) in iv_names.iteritems():
10941 self.cfg.SetDiskID(dev, node_name)
10943 result = _BlockdevFind(self, node_name, dev, self.instance)
10945 msg = result.fail_msg
10946 if msg or not result.payload:
10948 msg = "disk not found"
10949 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10952 if result.payload.is_degraded:
10953 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10955 def _RemoveOldStorage(self, node_name, iv_names):
10956 for name, (_, old_lvs, _) in iv_names.iteritems():
10957 self.lu.LogInfo("Remove logical volumes for %s" % name)
10960 self.cfg.SetDiskID(lv, node_name)
10962 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10964 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10965 hint="remove unused LVs manually")
10967 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10968 """Replace a disk on the primary or secondary for DRBD 8.
10970 The algorithm for replace is quite complicated:
10972 1. for each disk to be replaced:
10974 1. create new LVs on the target node with unique names
10975 1. detach old LVs from the drbd device
10976 1. rename old LVs to name_replaced.<time_t>
10977 1. rename new LVs to old LVs
10978 1. attach the new LVs (with the old names now) to the drbd device
10980 1. wait for sync across all devices
10982 1. for each modified disk:
10984 1. remove old LVs (which have the name name_replaces.<time_t>)
10986 Failures are not very well handled.
10991 # Step: check device activation
10992 self.lu.LogStep(1, steps_total, "Check device existence")
10993 self._CheckDisksExistence([self.other_node, self.target_node])
10994 self._CheckVolumeGroup([self.target_node, self.other_node])
10996 # Step: check other node consistency
10997 self.lu.LogStep(2, steps_total, "Check peer consistency")
10998 self._CheckDisksConsistency(self.other_node,
10999 self.other_node == self.instance.primary_node,
11002 # Step: create new storage
11003 self.lu.LogStep(3, steps_total, "Allocate new storage")
11004 iv_names = self._CreateNewStorage(self.target_node)
11006 # Step: for each lv, detach+rename*2+attach
11007 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11008 for dev, old_lvs, new_lvs in iv_names.itervalues():
11009 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11011 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11013 result.Raise("Can't detach drbd from local storage on node"
11014 " %s for device %s" % (self.target_node, dev.iv_name))
11016 #cfg.Update(instance)
11018 # ok, we created the new LVs, so now we know we have the needed
11019 # storage; as such, we proceed on the target node to rename
11020 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11021 # using the assumption that logical_id == physical_id (which in
11022 # turn is the unique_id on that node)
11024 # FIXME(iustin): use a better name for the replaced LVs
11025 temp_suffix = int(time.time())
11026 ren_fn = lambda d, suff: (d.physical_id[0],
11027 d.physical_id[1] + "_replaced-%s" % suff)
11029 # Build the rename list based on what LVs exist on the node
11030 rename_old_to_new = []
11031 for to_ren in old_lvs:
11032 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11033 if not result.fail_msg and result.payload:
11035 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11037 self.lu.LogInfo("Renaming the old LVs on the target node")
11038 result = self.rpc.call_blockdev_rename(self.target_node,
11040 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11042 # Now we rename the new LVs to the old LVs
11043 self.lu.LogInfo("Renaming the new LVs on the target node")
11044 rename_new_to_old = [(new, old.physical_id)
11045 for old, new in zip(old_lvs, new_lvs)]
11046 result = self.rpc.call_blockdev_rename(self.target_node,
11048 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11050 # Intermediate steps of in memory modifications
11051 for old, new in zip(old_lvs, new_lvs):
11052 new.logical_id = old.logical_id
11053 self.cfg.SetDiskID(new, self.target_node)
11055 # We need to modify old_lvs so that removal later removes the
11056 # right LVs, not the newly added ones; note that old_lvs is a
11058 for disk in old_lvs:
11059 disk.logical_id = ren_fn(disk, temp_suffix)
11060 self.cfg.SetDiskID(disk, self.target_node)
11062 # Now that the new lvs have the old name, we can add them to the device
11063 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11064 result = self.rpc.call_blockdev_addchildren(self.target_node,
11065 (dev, self.instance), new_lvs)
11066 msg = result.fail_msg
11068 for new_lv in new_lvs:
11069 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11072 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11073 hint=("cleanup manually the unused logical"
11075 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11077 cstep = itertools.count(5)
11079 if self.early_release:
11080 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11081 self._RemoveOldStorage(self.target_node, iv_names)
11082 # TODO: Check if releasing locks early still makes sense
11083 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11085 # Release all resource locks except those used by the instance
11086 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11087 keep=self.node_secondary_ip.keys())
11089 # Release all node locks while waiting for sync
11090 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11092 # TODO: Can the instance lock be downgraded here? Take the optional disk
11093 # shutdown in the caller into consideration.
11096 # This can fail as the old devices are degraded and _WaitForSync
11097 # does a combined result over all disks, so we don't check its return value
11098 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11099 _WaitForSync(self.lu, self.instance)
11101 # Check all devices manually
11102 self._CheckDevices(self.instance.primary_node, iv_names)
11104 # Step: remove old storage
11105 if not self.early_release:
11106 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11107 self._RemoveOldStorage(self.target_node, iv_names)
11109 def _ExecDrbd8Secondary(self, feedback_fn):
11110 """Replace the secondary node for DRBD 8.
11112 The algorithm for replace is quite complicated:
11113 - for all disks of the instance:
11114 - create new LVs on the new node with same names
11115 - shutdown the drbd device on the old secondary
11116 - disconnect the drbd network on the primary
11117 - create the drbd device on the new secondary
11118 - network attach the drbd on the primary, using an artifice:
11119 the drbd code for Attach() will connect to the network if it
11120 finds a device which is connected to the good local disks but
11121 not network enabled
11122 - wait for sync across all devices
11123 - remove all disks from the old secondary
11125 Failures are not very well handled.
11130 pnode = self.instance.primary_node
11132 # Step: check device activation
11133 self.lu.LogStep(1, steps_total, "Check device existence")
11134 self._CheckDisksExistence([self.instance.primary_node])
11135 self._CheckVolumeGroup([self.instance.primary_node])
11137 # Step: check other node consistency
11138 self.lu.LogStep(2, steps_total, "Check peer consistency")
11139 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11141 # Step: create new storage
11142 self.lu.LogStep(3, steps_total, "Allocate new storage")
11143 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11144 for idx, dev in enumerate(disks):
11145 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11146 (self.new_node, idx))
11147 # we pass force_create=True to force LVM creation
11148 for new_lv in dev.children:
11149 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11150 True, _GetInstanceInfoText(self.instance), False)
11152 # Step 4: dbrd minors and drbd setups changes
11153 # after this, we must manually remove the drbd minors on both the
11154 # error and the success paths
11155 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11156 minors = self.cfg.AllocateDRBDMinor([self.new_node
11157 for dev in self.instance.disks],
11158 self.instance.name)
11159 logging.debug("Allocated minors %r", minors)
11162 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11163 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11164 (self.new_node, idx))
11165 # create new devices on new_node; note that we create two IDs:
11166 # one without port, so the drbd will be activated without
11167 # networking information on the new node at this stage, and one
11168 # with network, for the latter activation in step 4
11169 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11170 if self.instance.primary_node == o_node1:
11173 assert self.instance.primary_node == o_node2, "Three-node instance?"
11176 new_alone_id = (self.instance.primary_node, self.new_node, None,
11177 p_minor, new_minor, o_secret)
11178 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11179 p_minor, new_minor, o_secret)
11181 iv_names[idx] = (dev, dev.children, new_net_id)
11182 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11184 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11185 logical_id=new_alone_id,
11186 children=dev.children,
11189 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11192 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11194 _GetInstanceInfoText(self.instance), False)
11195 except errors.GenericError:
11196 self.cfg.ReleaseDRBDMinors(self.instance.name)
11199 # We have new devices, shutdown the drbd on the old secondary
11200 for idx, dev in enumerate(self.instance.disks):
11201 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11202 self.cfg.SetDiskID(dev, self.target_node)
11203 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11204 (dev, self.instance)).fail_msg
11206 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11207 "node: %s" % (idx, msg),
11208 hint=("Please cleanup this device manually as"
11209 " soon as possible"))
11211 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11212 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11213 self.instance.disks)[pnode]
11215 msg = result.fail_msg
11217 # detaches didn't succeed (unlikely)
11218 self.cfg.ReleaseDRBDMinors(self.instance.name)
11219 raise errors.OpExecError("Can't detach the disks from the network on"
11220 " old node: %s" % (msg,))
11222 # if we managed to detach at least one, we update all the disks of
11223 # the instance to point to the new secondary
11224 self.lu.LogInfo("Updating instance configuration")
11225 for dev, _, new_logical_id in iv_names.itervalues():
11226 dev.logical_id = new_logical_id
11227 self.cfg.SetDiskID(dev, self.instance.primary_node)
11229 self.cfg.Update(self.instance, feedback_fn)
11231 # Release all node locks (the configuration has been updated)
11232 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11234 # and now perform the drbd attach
11235 self.lu.LogInfo("Attaching primary drbds to new secondary"
11236 " (standalone => connected)")
11237 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11239 self.node_secondary_ip,
11240 (self.instance.disks, self.instance),
11241 self.instance.name,
11243 for to_node, to_result in result.items():
11244 msg = to_result.fail_msg
11246 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11248 hint=("please do a gnt-instance info to see the"
11249 " status of disks"))
11251 cstep = itertools.count(5)
11253 if self.early_release:
11254 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11255 self._RemoveOldStorage(self.target_node, iv_names)
11256 # TODO: Check if releasing locks early still makes sense
11257 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11259 # Release all resource locks except those used by the instance
11260 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11261 keep=self.node_secondary_ip.keys())
11263 # TODO: Can the instance lock be downgraded here? Take the optional disk
11264 # shutdown in the caller into consideration.
11267 # This can fail as the old devices are degraded and _WaitForSync
11268 # does a combined result over all disks, so we don't check its return value
11269 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11270 _WaitForSync(self.lu, self.instance)
11272 # Check all devices manually
11273 self._CheckDevices(self.instance.primary_node, iv_names)
11275 # Step: remove old storage
11276 if not self.early_release:
11277 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11278 self._RemoveOldStorage(self.target_node, iv_names)
11281 class LURepairNodeStorage(NoHooksLU):
11282 """Repairs the volume group on a node.
11287 def CheckArguments(self):
11288 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11290 storage_type = self.op.storage_type
11292 if (constants.SO_FIX_CONSISTENCY not in
11293 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11294 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11295 " repaired" % storage_type,
11296 errors.ECODE_INVAL)
11298 def ExpandNames(self):
11299 self.needed_locks = {
11300 locking.LEVEL_NODE: [self.op.node_name],
11303 def _CheckFaultyDisks(self, instance, node_name):
11304 """Ensure faulty disks abort the opcode or at least warn."""
11306 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11308 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11309 " node '%s'" % (instance.name, node_name),
11310 errors.ECODE_STATE)
11311 except errors.OpPrereqError, err:
11312 if self.op.ignore_consistency:
11313 self.proc.LogWarning(str(err.args[0]))
11317 def CheckPrereq(self):
11318 """Check prerequisites.
11321 # Check whether any instance on this node has faulty disks
11322 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11323 if inst.admin_state != constants.ADMINST_UP:
11325 check_nodes = set(inst.all_nodes)
11326 check_nodes.discard(self.op.node_name)
11327 for inst_node_name in check_nodes:
11328 self._CheckFaultyDisks(inst, inst_node_name)
11330 def Exec(self, feedback_fn):
11331 feedback_fn("Repairing storage unit '%s' on %s ..." %
11332 (self.op.name, self.op.node_name))
11334 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11335 result = self.rpc.call_storage_execute(self.op.node_name,
11336 self.op.storage_type, st_args,
11338 constants.SO_FIX_CONSISTENCY)
11339 result.Raise("Failed to repair storage unit '%s' on %s" %
11340 (self.op.name, self.op.node_name))
11343 class LUNodeEvacuate(NoHooksLU):
11344 """Evacuates instances off a list of nodes.
11349 _MODE2IALLOCATOR = {
11350 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11351 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11352 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11354 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11355 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11356 constants.IALLOCATOR_NEVAC_MODES)
11358 def CheckArguments(self):
11359 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11361 def ExpandNames(self):
11362 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11364 if self.op.remote_node is not None:
11365 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11366 assert self.op.remote_node
11368 if self.op.remote_node == self.op.node_name:
11369 raise errors.OpPrereqError("Can not use evacuated node as a new"
11370 " secondary node", errors.ECODE_INVAL)
11372 if self.op.mode != constants.NODE_EVAC_SEC:
11373 raise errors.OpPrereqError("Without the use of an iallocator only"
11374 " secondary instances can be evacuated",
11375 errors.ECODE_INVAL)
11378 self.share_locks = _ShareAll()
11379 self.needed_locks = {
11380 locking.LEVEL_INSTANCE: [],
11381 locking.LEVEL_NODEGROUP: [],
11382 locking.LEVEL_NODE: [],
11385 # Determine nodes (via group) optimistically, needs verification once locks
11386 # have been acquired
11387 self.lock_nodes = self._DetermineNodes()
11389 def _DetermineNodes(self):
11390 """Gets the list of nodes to operate on.
11393 if self.op.remote_node is None:
11394 # Iallocator will choose any node(s) in the same group
11395 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11397 group_nodes = frozenset([self.op.remote_node])
11399 # Determine nodes to be locked
11400 return set([self.op.node_name]) | group_nodes
11402 def _DetermineInstances(self):
11403 """Builds list of instances to operate on.
11406 assert self.op.mode in constants.NODE_EVAC_MODES
11408 if self.op.mode == constants.NODE_EVAC_PRI:
11409 # Primary instances only
11410 inst_fn = _GetNodePrimaryInstances
11411 assert self.op.remote_node is None, \
11412 "Evacuating primary instances requires iallocator"
11413 elif self.op.mode == constants.NODE_EVAC_SEC:
11414 # Secondary instances only
11415 inst_fn = _GetNodeSecondaryInstances
11418 assert self.op.mode == constants.NODE_EVAC_ALL
11419 inst_fn = _GetNodeInstances
11420 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11422 raise errors.OpPrereqError("Due to an issue with the iallocator"
11423 " interface it is not possible to evacuate"
11424 " all instances at once; specify explicitly"
11425 " whether to evacuate primary or secondary"
11427 errors.ECODE_INVAL)
11429 return inst_fn(self.cfg, self.op.node_name)
11431 def DeclareLocks(self, level):
11432 if level == locking.LEVEL_INSTANCE:
11433 # Lock instances optimistically, needs verification once node and group
11434 # locks have been acquired
11435 self.needed_locks[locking.LEVEL_INSTANCE] = \
11436 set(i.name for i in self._DetermineInstances())
11438 elif level == locking.LEVEL_NODEGROUP:
11439 # Lock node groups for all potential target nodes optimistically, needs
11440 # verification once nodes have been acquired
11441 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11442 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11444 elif level == locking.LEVEL_NODE:
11445 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11447 def CheckPrereq(self):
11449 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11450 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11451 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11453 need_nodes = self._DetermineNodes()
11455 if not owned_nodes.issuperset(need_nodes):
11456 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11457 " locks were acquired, current nodes are"
11458 " are '%s', used to be '%s'; retry the"
11460 (self.op.node_name,
11461 utils.CommaJoin(need_nodes),
11462 utils.CommaJoin(owned_nodes)),
11463 errors.ECODE_STATE)
11465 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11466 if owned_groups != wanted_groups:
11467 raise errors.OpExecError("Node groups changed since locks were acquired,"
11468 " current groups are '%s', used to be '%s';"
11469 " retry the operation" %
11470 (utils.CommaJoin(wanted_groups),
11471 utils.CommaJoin(owned_groups)))
11473 # Determine affected instances
11474 self.instances = self._DetermineInstances()
11475 self.instance_names = [i.name for i in self.instances]
11477 if set(self.instance_names) != owned_instances:
11478 raise errors.OpExecError("Instances on node '%s' changed since locks"
11479 " were acquired, current instances are '%s',"
11480 " used to be '%s'; retry the operation" %
11481 (self.op.node_name,
11482 utils.CommaJoin(self.instance_names),
11483 utils.CommaJoin(owned_instances)))
11485 if self.instance_names:
11486 self.LogInfo("Evacuating instances from node '%s': %s",
11488 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11490 self.LogInfo("No instances to evacuate from node '%s'",
11493 if self.op.remote_node is not None:
11494 for i in self.instances:
11495 if i.primary_node == self.op.remote_node:
11496 raise errors.OpPrereqError("Node %s is the primary node of"
11497 " instance %s, cannot use it as"
11499 (self.op.remote_node, i.name),
11500 errors.ECODE_INVAL)
11502 def Exec(self, feedback_fn):
11503 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11505 if not self.instance_names:
11506 # No instances to evacuate
11509 elif self.op.iallocator is not None:
11510 # TODO: Implement relocation to other group
11511 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11512 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11513 instances=list(self.instance_names))
11515 ial.Run(self.op.iallocator)
11517 if not ial.success:
11518 raise errors.OpPrereqError("Can't compute node evacuation using"
11519 " iallocator '%s': %s" %
11520 (self.op.iallocator, ial.info),
11521 errors.ECODE_NORES)
11523 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11525 elif self.op.remote_node is not None:
11526 assert self.op.mode == constants.NODE_EVAC_SEC
11528 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11529 remote_node=self.op.remote_node,
11531 mode=constants.REPLACE_DISK_CHG,
11532 early_release=self.op.early_release)]
11533 for instance_name in self.instance_names
11537 raise errors.ProgrammerError("No iallocator or remote node")
11539 return ResultWithJobs(jobs)
11542 def _SetOpEarlyRelease(early_release, op):
11543 """Sets C{early_release} flag on opcodes if available.
11547 op.early_release = early_release
11548 except AttributeError:
11549 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11554 def _NodeEvacDest(use_nodes, group, nodes):
11555 """Returns group or nodes depending on caller's choice.
11559 return utils.CommaJoin(nodes)
11564 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11565 """Unpacks the result of change-group and node-evacuate iallocator requests.
11567 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11568 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11570 @type lu: L{LogicalUnit}
11571 @param lu: Logical unit instance
11572 @type alloc_result: tuple/list
11573 @param alloc_result: Result from iallocator
11574 @type early_release: bool
11575 @param early_release: Whether to release locks early if possible
11576 @type use_nodes: bool
11577 @param use_nodes: Whether to display node names instead of groups
11580 (moved, failed, jobs) = alloc_result
11583 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11584 for (name, reason) in failed)
11585 lu.LogWarning("Unable to evacuate instances %s", failreason)
11586 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11589 lu.LogInfo("Instances to be moved: %s",
11590 utils.CommaJoin("%s (to %s)" %
11591 (name, _NodeEvacDest(use_nodes, group, nodes))
11592 for (name, group, nodes) in moved))
11594 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11595 map(opcodes.OpCode.LoadOpCode, ops))
11599 class LUInstanceGrowDisk(LogicalUnit):
11600 """Grow a disk of an instance.
11603 HPATH = "disk-grow"
11604 HTYPE = constants.HTYPE_INSTANCE
11607 def ExpandNames(self):
11608 self._ExpandAndLockInstance()
11609 self.needed_locks[locking.LEVEL_NODE] = []
11610 self.needed_locks[locking.LEVEL_NODE_RES] = []
11611 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11612 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11614 def DeclareLocks(self, level):
11615 if level == locking.LEVEL_NODE:
11616 self._LockInstancesNodes()
11617 elif level == locking.LEVEL_NODE_RES:
11619 self.needed_locks[locking.LEVEL_NODE_RES] = \
11620 self.needed_locks[locking.LEVEL_NODE][:]
11622 def BuildHooksEnv(self):
11623 """Build hooks env.
11625 This runs on the master, the primary and all the secondaries.
11629 "DISK": self.op.disk,
11630 "AMOUNT": self.op.amount,
11631 "ABSOLUTE": self.op.absolute,
11633 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11636 def BuildHooksNodes(self):
11637 """Build hooks nodes.
11640 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11643 def CheckPrereq(self):
11644 """Check prerequisites.
11646 This checks that the instance is in the cluster.
11649 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11650 assert instance is not None, \
11651 "Cannot retrieve locked instance %s" % self.op.instance_name
11652 nodenames = list(instance.all_nodes)
11653 for node in nodenames:
11654 _CheckNodeOnline(self, node)
11656 self.instance = instance
11658 if instance.disk_template not in constants.DTS_GROWABLE:
11659 raise errors.OpPrereqError("Instance's disk layout does not support"
11660 " growing", errors.ECODE_INVAL)
11662 self.disk = instance.FindDisk(self.op.disk)
11664 if self.op.absolute:
11665 self.target = self.op.amount
11666 self.delta = self.target - self.disk.size
11668 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11669 "current disk size (%s)" %
11670 (utils.FormatUnit(self.target, "h"),
11671 utils.FormatUnit(self.disk.size, "h")),
11672 errors.ECODE_STATE)
11674 self.delta = self.op.amount
11675 self.target = self.disk.size + self.delta
11677 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11678 utils.FormatUnit(self.delta, "h"),
11679 errors.ECODE_INVAL)
11681 if instance.disk_template not in (constants.DT_FILE,
11682 constants.DT_SHARED_FILE,
11684 # TODO: check the free disk space for file, when that feature will be
11686 _CheckNodesFreeDiskPerVG(self, nodenames,
11687 self.disk.ComputeGrowth(self.delta))
11689 def Exec(self, feedback_fn):
11690 """Execute disk grow.
11693 instance = self.instance
11696 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11697 assert (self.owned_locks(locking.LEVEL_NODE) ==
11698 self.owned_locks(locking.LEVEL_NODE_RES))
11700 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11702 raise errors.OpExecError("Cannot activate block device to grow")
11704 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11705 (self.op.disk, instance.name,
11706 utils.FormatUnit(self.delta, "h"),
11707 utils.FormatUnit(self.target, "h")))
11709 # First run all grow ops in dry-run mode
11710 for node in instance.all_nodes:
11711 self.cfg.SetDiskID(disk, node)
11712 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11714 result.Raise("Grow request failed to node %s" % node)
11716 # We know that (as far as we can test) operations across different
11717 # nodes will succeed, time to run it for real
11718 for node in instance.all_nodes:
11719 self.cfg.SetDiskID(disk, node)
11720 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11722 result.Raise("Grow request failed to node %s" % node)
11724 # TODO: Rewrite code to work properly
11725 # DRBD goes into sync mode for a short amount of time after executing the
11726 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11727 # calling "resize" in sync mode fails. Sleeping for a short amount of
11728 # time is a work-around.
11731 disk.RecordGrow(self.delta)
11732 self.cfg.Update(instance, feedback_fn)
11734 # Changes have been recorded, release node lock
11735 _ReleaseLocks(self, locking.LEVEL_NODE)
11737 # Downgrade lock while waiting for sync
11738 self.glm.downgrade(locking.LEVEL_INSTANCE)
11740 if self.op.wait_for_sync:
11741 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11743 self.proc.LogWarning("Disk sync-ing has not returned a good"
11744 " status; please check the instance")
11745 if instance.admin_state != constants.ADMINST_UP:
11746 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11747 elif instance.admin_state != constants.ADMINST_UP:
11748 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11749 " not supposed to be running because no wait for"
11750 " sync mode was requested")
11752 assert self.owned_locks(locking.LEVEL_NODE_RES)
11753 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11756 class LUInstanceQueryData(NoHooksLU):
11757 """Query runtime instance data.
11762 def ExpandNames(self):
11763 self.needed_locks = {}
11765 # Use locking if requested or when non-static information is wanted
11766 if not (self.op.static or self.op.use_locking):
11767 self.LogWarning("Non-static data requested, locks need to be acquired")
11768 self.op.use_locking = True
11770 if self.op.instances or not self.op.use_locking:
11771 # Expand instance names right here
11772 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11774 # Will use acquired locks
11775 self.wanted_names = None
11777 if self.op.use_locking:
11778 self.share_locks = _ShareAll()
11780 if self.wanted_names is None:
11781 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11783 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11785 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11786 self.needed_locks[locking.LEVEL_NODE] = []
11787 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11789 def DeclareLocks(self, level):
11790 if self.op.use_locking:
11791 if level == locking.LEVEL_NODEGROUP:
11792 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11794 # Lock all groups used by instances optimistically; this requires going
11795 # via the node before it's locked, requiring verification later on
11796 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11797 frozenset(group_uuid
11798 for instance_name in owned_instances
11800 self.cfg.GetInstanceNodeGroups(instance_name))
11802 elif level == locking.LEVEL_NODE:
11803 self._LockInstancesNodes()
11805 def CheckPrereq(self):
11806 """Check prerequisites.
11808 This only checks the optional instance list against the existing names.
11811 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11812 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11813 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11815 if self.wanted_names is None:
11816 assert self.op.use_locking, "Locking was not used"
11817 self.wanted_names = owned_instances
11819 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11821 if self.op.use_locking:
11822 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11825 assert not (owned_instances or owned_groups or owned_nodes)
11827 self.wanted_instances = instances.values()
11829 def _ComputeBlockdevStatus(self, node, instance, dev):
11830 """Returns the status of a block device
11833 if self.op.static or not node:
11836 self.cfg.SetDiskID(dev, node)
11838 result = self.rpc.call_blockdev_find(node, dev)
11842 result.Raise("Can't compute disk status for %s" % instance.name)
11844 status = result.payload
11848 return (status.dev_path, status.major, status.minor,
11849 status.sync_percent, status.estimated_time,
11850 status.is_degraded, status.ldisk_status)
11852 def _ComputeDiskStatus(self, instance, snode, dev):
11853 """Compute block device status.
11856 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11858 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11860 def _ComputeDiskStatusInner(self, instance, snode, dev):
11861 """Compute block device status.
11863 @attention: The device has to be annotated already.
11866 if dev.dev_type in constants.LDS_DRBD:
11867 # we change the snode then (otherwise we use the one passed in)
11868 if dev.logical_id[0] == instance.primary_node:
11869 snode = dev.logical_id[1]
11871 snode = dev.logical_id[0]
11873 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11875 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11878 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11885 "iv_name": dev.iv_name,
11886 "dev_type": dev.dev_type,
11887 "logical_id": dev.logical_id,
11888 "physical_id": dev.physical_id,
11889 "pstatus": dev_pstatus,
11890 "sstatus": dev_sstatus,
11891 "children": dev_children,
11896 def Exec(self, feedback_fn):
11897 """Gather and return data"""
11900 cluster = self.cfg.GetClusterInfo()
11902 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11903 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11905 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11906 for node in nodes.values()))
11908 group2name_fn = lambda uuid: groups[uuid].name
11910 for instance in self.wanted_instances:
11911 pnode = nodes[instance.primary_node]
11913 if self.op.static or pnode.offline:
11914 remote_state = None
11916 self.LogWarning("Primary node %s is marked offline, returning static"
11917 " information only for instance %s" %
11918 (pnode.name, instance.name))
11920 remote_info = self.rpc.call_instance_info(instance.primary_node,
11922 instance.hypervisor)
11923 remote_info.Raise("Error checking node %s" % instance.primary_node)
11924 remote_info = remote_info.payload
11925 if remote_info and "state" in remote_info:
11926 remote_state = "up"
11928 if instance.admin_state == constants.ADMINST_UP:
11929 remote_state = "down"
11931 remote_state = instance.admin_state
11933 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11936 snodes_group_uuids = [nodes[snode_name].group
11937 for snode_name in instance.secondary_nodes]
11939 result[instance.name] = {
11940 "name": instance.name,
11941 "config_state": instance.admin_state,
11942 "run_state": remote_state,
11943 "pnode": instance.primary_node,
11944 "pnode_group_uuid": pnode.group,
11945 "pnode_group_name": group2name_fn(pnode.group),
11946 "snodes": instance.secondary_nodes,
11947 "snodes_group_uuids": snodes_group_uuids,
11948 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11950 # this happens to be the same format used for hooks
11951 "nics": _NICListToTuple(self, instance.nics),
11952 "disk_template": instance.disk_template,
11954 "hypervisor": instance.hypervisor,
11955 "network_port": instance.network_port,
11956 "hv_instance": instance.hvparams,
11957 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11958 "be_instance": instance.beparams,
11959 "be_actual": cluster.FillBE(instance),
11960 "os_instance": instance.osparams,
11961 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11962 "serial_no": instance.serial_no,
11963 "mtime": instance.mtime,
11964 "ctime": instance.ctime,
11965 "uuid": instance.uuid,
11971 def PrepareContainerMods(mods, private_fn):
11972 """Prepares a list of container modifications by adding a private data field.
11974 @type mods: list of tuples; (operation, index, parameters)
11975 @param mods: List of modifications
11976 @type private_fn: callable or None
11977 @param private_fn: Callable for constructing a private data field for a
11982 if private_fn is None:
11987 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11990 #: Type description for changes as returned by L{ApplyContainerMods}'s
11992 _TApplyContModsCbChanges = \
11993 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11994 ht.TNonEmptyString,
11999 def ApplyContainerMods(kind, container, chgdesc, mods,
12000 create_fn, modify_fn, remove_fn):
12001 """Applies descriptions in C{mods} to C{container}.
12004 @param kind: One-word item description
12005 @type container: list
12006 @param container: Container to modify
12007 @type chgdesc: None or list
12008 @param chgdesc: List of applied changes
12010 @param mods: Modifications as returned by L{PrepareContainerMods}
12011 @type create_fn: callable
12012 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12013 receives absolute item index, parameters and private data object as added
12014 by L{PrepareContainerMods}, returns tuple containing new item and changes
12016 @type modify_fn: callable
12017 @param modify_fn: Callback for modifying an existing item
12018 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12019 and private data object as added by L{PrepareContainerMods}, returns
12021 @type remove_fn: callable
12022 @param remove_fn: Callback on removing item; receives absolute item index,
12023 item and private data object as added by L{PrepareContainerMods}
12026 for (op, idx, params, private) in mods:
12029 absidx = len(container) - 1
12031 raise IndexError("Not accepting negative indices other than -1")
12032 elif idx > len(container):
12033 raise IndexError("Got %s index %s, but there are only %s" %
12034 (kind, idx, len(container)))
12040 if op == constants.DDM_ADD:
12041 # Calculate where item will be added
12043 addidx = len(container)
12047 if create_fn is None:
12050 (item, changes) = create_fn(addidx, params, private)
12053 container.append(item)
12056 assert idx <= len(container)
12057 # list.insert does so before the specified index
12058 container.insert(idx, item)
12060 # Retrieve existing item
12062 item = container[absidx]
12064 raise IndexError("Invalid %s index %s" % (kind, idx))
12066 if op == constants.DDM_REMOVE:
12069 if remove_fn is not None:
12070 remove_fn(absidx, item, private)
12072 #TODO: include a hotplugged msg in changes
12073 changes = [("%s/%s" % (kind, absidx), "remove")]
12075 assert container[absidx] == item
12076 del container[absidx]
12077 elif op == constants.DDM_MODIFY:
12078 if modify_fn is not None:
12079 #TODO: include a hotplugged msg in changes
12080 changes = modify_fn(absidx, item, params, private)
12083 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12085 assert _TApplyContModsCbChanges(changes)
12087 if not (chgdesc is None or changes is None):
12088 chgdesc.extend(changes)
12091 def _UpdateIvNames(base_index, disks):
12092 """Updates the C{iv_name} attribute of disks.
12094 @type disks: list of L{objects.Disk}
12097 for (idx, disk) in enumerate(disks):
12098 disk.iv_name = "disk/%s" % (base_index + idx, )
12101 class _InstNicModPrivate:
12102 """Data structure for network interface modifications.
12104 Used by L{LUInstanceSetParams}.
12107 def __init__(self):
12112 class LUInstanceSetParams(LogicalUnit):
12113 """Modifies an instances's parameters.
12116 HPATH = "instance-modify"
12117 HTYPE = constants.HTYPE_INSTANCE
12121 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12122 assert ht.TList(mods)
12123 assert not mods or len(mods[0]) in (2, 3)
12125 if mods and len(mods[0]) == 2:
12129 for op, params in mods:
12130 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12131 result.append((op, -1, params))
12135 raise errors.OpPrereqError("Only one %s add or remove operation is"
12136 " supported at a time" % kind,
12137 errors.ECODE_INVAL)
12139 result.append((constants.DDM_MODIFY, op, params))
12141 assert verify_fn(result)
12148 def _CheckMods(kind, mods, key_types, item_fn):
12149 """Ensures requested disk/NIC modifications are valid.
12152 for (op, _, params) in mods:
12153 assert ht.TDict(params)
12155 utils.ForceDictType(params, key_types)
12157 if op == constants.DDM_REMOVE:
12159 raise errors.OpPrereqError("No settings should be passed when"
12160 " removing a %s" % kind,
12161 errors.ECODE_INVAL)
12162 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12163 item_fn(op, params)
12165 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12168 def _VerifyDiskModification(op, params):
12169 """Verifies a disk modification.
12172 if op == constants.DDM_ADD:
12173 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12174 if mode not in constants.DISK_ACCESS_SET:
12175 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12176 errors.ECODE_INVAL)
12178 size = params.get(constants.IDISK_SIZE, None)
12180 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12181 constants.IDISK_SIZE, errors.ECODE_INVAL)
12185 except (TypeError, ValueError), err:
12186 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12187 errors.ECODE_INVAL)
12189 params[constants.IDISK_SIZE] = size
12191 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12192 raise errors.OpPrereqError("Disk size change not possible, use"
12193 " grow-disk", errors.ECODE_INVAL)
12196 def _VerifyNicModification(op, params):
12197 """Verifies a network interface modification.
12200 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12201 ip = params.get(constants.INIC_IP, None)
12204 elif ip.lower() == constants.VALUE_NONE:
12205 params[constants.INIC_IP] = None
12206 elif not netutils.IPAddress.IsValid(ip):
12207 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12208 errors.ECODE_INVAL)
12210 bridge = params.get("bridge", None)
12211 link = params.get(constants.INIC_LINK, None)
12212 if bridge and link:
12213 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12214 " at the same time", errors.ECODE_INVAL)
12215 elif bridge and bridge.lower() == constants.VALUE_NONE:
12216 params["bridge"] = None
12217 elif link and link.lower() == constants.VALUE_NONE:
12218 params[constants.INIC_LINK] = None
12220 if op == constants.DDM_ADD:
12221 macaddr = params.get(constants.INIC_MAC, None)
12222 if macaddr is None:
12223 params[constants.INIC_MAC] = constants.VALUE_AUTO
12225 if constants.INIC_MAC in params:
12226 macaddr = params[constants.INIC_MAC]
12227 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12228 macaddr = utils.NormalizeAndValidateMac(macaddr)
12230 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12231 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12232 " modifying an existing NIC",
12233 errors.ECODE_INVAL)
12235 def CheckArguments(self):
12236 if not (self.op.nics or self.op.disks or self.op.disk_template or
12237 self.op.hvparams or self.op.beparams or self.op.os_name or
12238 self.op.offline is not None or self.op.runtime_mem):
12239 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12241 if self.op.hvparams:
12242 _CheckGlobalHvParams(self.op.hvparams)
12245 self._UpgradeDiskNicMods("disk", self.op.disks,
12246 opcodes.OpInstanceSetParams.TestDiskModifications)
12248 self._UpgradeDiskNicMods("NIC", self.op.nics,
12249 opcodes.OpInstanceSetParams.TestNicModifications)
12251 # Check disk modifications
12252 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12253 self._VerifyDiskModification)
12255 if self.op.disks and self.op.disk_template is not None:
12256 raise errors.OpPrereqError("Disk template conversion and other disk"
12257 " changes not supported at the same time",
12258 errors.ECODE_INVAL)
12260 if (self.op.disk_template and
12261 self.op.disk_template in constants.DTS_INT_MIRROR and
12262 self.op.remote_node is None):
12263 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12264 " one requires specifying a secondary node",
12265 errors.ECODE_INVAL)
12267 # Check NIC modifications
12268 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12269 self._VerifyNicModification)
12271 def ExpandNames(self):
12272 self._ExpandAndLockInstance()
12273 # Can't even acquire node locks in shared mode as upcoming changes in
12274 # Ganeti 2.6 will start to modify the node object on disk conversion
12275 self.needed_locks[locking.LEVEL_NODE] = []
12276 self.needed_locks[locking.LEVEL_NODE_RES] = []
12277 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12279 def DeclareLocks(self, level):
12280 # TODO: Acquire group lock in shared mode (disk parameters)
12281 if level == locking.LEVEL_NODE:
12282 self._LockInstancesNodes()
12283 if self.op.disk_template and self.op.remote_node:
12284 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12285 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12286 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12288 self.needed_locks[locking.LEVEL_NODE_RES] = \
12289 self.needed_locks[locking.LEVEL_NODE][:]
12291 def BuildHooksEnv(self):
12292 """Build hooks env.
12294 This runs on the master, primary and secondaries.
12298 if constants.BE_MINMEM in self.be_new:
12299 args["minmem"] = self.be_new[constants.BE_MINMEM]
12300 if constants.BE_MAXMEM in self.be_new:
12301 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12302 if constants.BE_VCPUS in self.be_new:
12303 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12304 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12305 # information at all.
12307 if self._new_nics is not None:
12310 for nic in self._new_nics:
12311 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12312 mode = nicparams[constants.NIC_MODE]
12313 link = nicparams[constants.NIC_LINK]
12314 nics.append((nic.ip, nic.mac, mode, link))
12316 args["nics"] = nics
12318 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12319 if self.op.disk_template:
12320 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12321 if self.op.runtime_mem:
12322 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12326 def BuildHooksNodes(self):
12327 """Build hooks nodes.
12330 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12333 def _PrepareNicModification(self, params, private, old_ip, old_params,
12335 update_params_dict = dict([(key, params[key])
12336 for key in constants.NICS_PARAMETERS
12339 if "bridge" in params:
12340 update_params_dict[constants.NIC_LINK] = params["bridge"]
12342 new_params = _GetUpdatedParams(old_params, update_params_dict)
12343 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12345 new_filled_params = cluster.SimpleFillNIC(new_params)
12346 objects.NIC.CheckParameterSyntax(new_filled_params)
12348 new_mode = new_filled_params[constants.NIC_MODE]
12349 if new_mode == constants.NIC_MODE_BRIDGED:
12350 bridge = new_filled_params[constants.NIC_LINK]
12351 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12353 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12355 self.warn.append(msg)
12357 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12359 elif new_mode == constants.NIC_MODE_ROUTED:
12360 ip = params.get(constants.INIC_IP, old_ip)
12362 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12363 " on a routed NIC", errors.ECODE_INVAL)
12365 if constants.INIC_MAC in params:
12366 mac = params[constants.INIC_MAC]
12368 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12369 errors.ECODE_INVAL)
12370 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12371 # otherwise generate the MAC address
12372 params[constants.INIC_MAC] = \
12373 self.cfg.GenerateMAC(self.proc.GetECId())
12375 # or validate/reserve the current one
12377 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12378 except errors.ReservationError:
12379 raise errors.OpPrereqError("MAC address '%s' already in use"
12380 " in cluster" % mac,
12381 errors.ECODE_NOTUNIQUE)
12383 logging.info("new_params %s", new_params)
12384 logging.info("new_filled_params %s", new_filled_params)
12385 private.params = new_params
12386 private.filled = new_filled_params
12388 def CheckPrereq(self):
12389 """Check prerequisites.
12391 This only checks the instance list against the existing names.
12394 # checking the new params on the primary/secondary nodes
12396 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12397 cluster = self.cluster = self.cfg.GetClusterInfo()
12398 assert self.instance is not None, \
12399 "Cannot retrieve locked instance %s" % self.op.instance_name
12400 pnode = instance.primary_node
12401 nodelist = list(instance.all_nodes)
12402 pnode_info = self.cfg.GetNodeInfo(pnode)
12403 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12405 # Prepare disk/NIC modifications
12406 self.diskmod = PrepareContainerMods(self.op.disks, None)
12407 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12408 logging.info("nicmod %s", self.nicmod)
12411 if self.op.os_name and not self.op.force:
12412 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12413 self.op.force_variant)
12414 instance_os = self.op.os_name
12416 instance_os = instance.os
12418 assert not (self.op.disk_template and self.op.disks), \
12419 "Can't modify disk template and apply disk changes at the same time"
12421 if self.op.disk_template:
12422 if instance.disk_template == self.op.disk_template:
12423 raise errors.OpPrereqError("Instance already has disk template %s" %
12424 instance.disk_template, errors.ECODE_INVAL)
12426 if (instance.disk_template,
12427 self.op.disk_template) not in self._DISK_CONVERSIONS:
12428 raise errors.OpPrereqError("Unsupported disk template conversion from"
12429 " %s to %s" % (instance.disk_template,
12430 self.op.disk_template),
12431 errors.ECODE_INVAL)
12432 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12433 msg="cannot change disk template")
12434 if self.op.disk_template in constants.DTS_INT_MIRROR:
12435 if self.op.remote_node == pnode:
12436 raise errors.OpPrereqError("Given new secondary node %s is the same"
12437 " as the primary node of the instance" %
12438 self.op.remote_node, errors.ECODE_STATE)
12439 _CheckNodeOnline(self, self.op.remote_node)
12440 _CheckNodeNotDrained(self, self.op.remote_node)
12441 # FIXME: here we assume that the old instance type is DT_PLAIN
12442 assert instance.disk_template == constants.DT_PLAIN
12443 disks = [{constants.IDISK_SIZE: d.size,
12444 constants.IDISK_VG: d.logical_id[0]}
12445 for d in instance.disks]
12446 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12447 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12449 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12450 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12451 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12452 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12453 ignore=self.op.ignore_ipolicy)
12454 if pnode_info.group != snode_info.group:
12455 self.LogWarning("The primary and secondary nodes are in two"
12456 " different node groups; the disk parameters"
12457 " from the first disk's node group will be"
12460 # hvparams processing
12461 if self.op.hvparams:
12462 hv_type = instance.hypervisor
12463 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12464 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12465 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12468 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12469 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12470 self.hv_proposed = self.hv_new = hv_new # the new actual values
12471 self.hv_inst = i_hvdict # the new dict (without defaults)
12473 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12475 self.hv_new = self.hv_inst = {}
12477 # beparams processing
12478 if self.op.beparams:
12479 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12481 objects.UpgradeBeParams(i_bedict)
12482 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12483 be_new = cluster.SimpleFillBE(i_bedict)
12484 self.be_proposed = self.be_new = be_new # the new actual values
12485 self.be_inst = i_bedict # the new dict (without defaults)
12487 self.be_new = self.be_inst = {}
12488 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12489 be_old = cluster.FillBE(instance)
12491 # CPU param validation -- checking every time a parameter is
12492 # changed to cover all cases where either CPU mask or vcpus have
12494 if (constants.BE_VCPUS in self.be_proposed and
12495 constants.HV_CPU_MASK in self.hv_proposed):
12497 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12498 # Verify mask is consistent with number of vCPUs. Can skip this
12499 # test if only 1 entry in the CPU mask, which means same mask
12500 # is applied to all vCPUs.
12501 if (len(cpu_list) > 1 and
12502 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12503 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12505 (self.be_proposed[constants.BE_VCPUS],
12506 self.hv_proposed[constants.HV_CPU_MASK]),
12507 errors.ECODE_INVAL)
12509 # Only perform this test if a new CPU mask is given
12510 if constants.HV_CPU_MASK in self.hv_new:
12511 # Calculate the largest CPU number requested
12512 max_requested_cpu = max(map(max, cpu_list))
12513 # Check that all of the instance's nodes have enough physical CPUs to
12514 # satisfy the requested CPU mask
12515 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12516 max_requested_cpu + 1, instance.hypervisor)
12518 # osparams processing
12519 if self.op.osparams:
12520 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12521 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12522 self.os_inst = i_osdict # the new dict (without defaults)
12528 #TODO(dynmem): do the appropriate check involving MINMEM
12529 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12530 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12531 mem_check_list = [pnode]
12532 if be_new[constants.BE_AUTO_BALANCE]:
12533 # either we changed auto_balance to yes or it was from before
12534 mem_check_list.extend(instance.secondary_nodes)
12535 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12536 instance.hypervisor)
12537 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12538 [instance.hypervisor])
12539 pninfo = nodeinfo[pnode]
12540 msg = pninfo.fail_msg
12542 # Assume the primary node is unreachable and go ahead
12543 self.warn.append("Can't get info from primary node %s: %s" %
12546 (_, _, (pnhvinfo, )) = pninfo.payload
12547 if not isinstance(pnhvinfo.get("memory_free", None), int):
12548 self.warn.append("Node data from primary node %s doesn't contain"
12549 " free memory information" % pnode)
12550 elif instance_info.fail_msg:
12551 self.warn.append("Can't get instance runtime information: %s" %
12552 instance_info.fail_msg)
12554 if instance_info.payload:
12555 current_mem = int(instance_info.payload["memory"])
12557 # Assume instance not running
12558 # (there is a slight race condition here, but it's not very
12559 # probable, and we have no other way to check)
12560 # TODO: Describe race condition
12562 #TODO(dynmem): do the appropriate check involving MINMEM
12563 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12564 pnhvinfo["memory_free"])
12566 raise errors.OpPrereqError("This change will prevent the instance"
12567 " from starting, due to %d MB of memory"
12568 " missing on its primary node" %
12570 errors.ECODE_NORES)
12572 if be_new[constants.BE_AUTO_BALANCE]:
12573 for node, nres in nodeinfo.items():
12574 if node not in instance.secondary_nodes:
12576 nres.Raise("Can't get info from secondary node %s" % node,
12577 prereq=True, ecode=errors.ECODE_STATE)
12578 (_, _, (nhvinfo, )) = nres.payload
12579 if not isinstance(nhvinfo.get("memory_free", None), int):
12580 raise errors.OpPrereqError("Secondary node %s didn't return free"
12581 " memory information" % node,
12582 errors.ECODE_STATE)
12583 #TODO(dynmem): do the appropriate check involving MINMEM
12584 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12585 raise errors.OpPrereqError("This change will prevent the instance"
12586 " from failover to its secondary node"
12587 " %s, due to not enough memory" % node,
12588 errors.ECODE_STATE)
12590 if self.op.runtime_mem:
12591 remote_info = self.rpc.call_instance_info(instance.primary_node,
12593 instance.hypervisor)
12594 remote_info.Raise("Error checking node %s" % instance.primary_node)
12595 if not remote_info.payload: # not running already
12596 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12597 errors.ECODE_STATE)
12599 current_memory = remote_info.payload["memory"]
12600 if (not self.op.force and
12601 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12602 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12603 raise errors.OpPrereqError("Instance %s must have memory between %d"
12604 " and %d MB of memory unless --force is"
12605 " given" % (instance.name,
12606 self.be_proposed[constants.BE_MINMEM],
12607 self.be_proposed[constants.BE_MAXMEM]),
12608 errors.ECODE_INVAL)
12610 if self.op.runtime_mem > current_memory:
12611 _CheckNodeFreeMemory(self, instance.primary_node,
12612 "ballooning memory for instance %s" %
12614 self.op.memory - current_memory,
12615 instance.hypervisor)
12617 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12618 raise errors.OpPrereqError("Disk operations not supported for"
12619 " diskless instances",
12620 errors.ECODE_INVAL)
12622 def _PrepareNicCreate(_, params, private):
12623 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12624 return (None, None)
12626 def _PrepareNicMod(_, nic, params, private):
12627 self._PrepareNicModification(params, private, nic.ip,
12628 nic.nicparams, cluster, pnode)
12631 # Verify NIC changes (operating on copy)
12632 nics = instance.nics[:]
12633 ApplyContainerMods("NIC", nics, None, self.nicmod,
12634 _PrepareNicCreate, _PrepareNicMod, None)
12635 if len(nics) > constants.MAX_NICS:
12636 raise errors.OpPrereqError("Instance has too many network interfaces"
12637 " (%d), cannot add more" % constants.MAX_NICS,
12638 errors.ECODE_STATE)
12641 # Verify disk changes (operating on a copy)
12642 disks = instance.disks[:]
12643 ApplyContainerMods("disk", disks, None, self.diskmod,
12645 if len(disks) > constants.MAX_DISKS:
12646 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12647 " more" % constants.MAX_DISKS,
12648 errors.ECODE_STATE)
12650 if self.op.offline is not None:
12651 if self.op.offline:
12652 msg = "can't change to offline"
12654 msg = "can't change to online"
12655 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12657 # Pre-compute NIC changes (necessary to use result in hooks)
12658 self._nic_chgdesc = []
12660 # Operate on copies as this is still in prereq
12661 nics = [nic.Copy() for nic in instance.nics]
12662 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12663 self._CreateNewNic, self._ApplyNicMods,
12665 self._new_nics = nics
12667 self._new_nics = None
12670 def _ConvertPlainToDrbd(self, feedback_fn):
12671 """Converts an instance from plain to drbd.
12674 feedback_fn("Converting template to drbd")
12675 instance = self.instance
12676 pnode = instance.primary_node
12677 snode = self.op.remote_node
12679 assert instance.disk_template == constants.DT_PLAIN
12681 # create a fake disk info for _GenerateDiskTemplate
12682 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12683 constants.IDISK_VG: d.logical_id[0]}
12684 for d in instance.disks]
12685 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12686 instance.name, pnode, [snode],
12687 disk_info, None, None, 0, feedback_fn,
12689 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12691 info = _GetInstanceInfoText(instance)
12692 feedback_fn("Creating additional volumes...")
12693 # first, create the missing data and meta devices
12694 for disk in anno_disks:
12695 # unfortunately this is... not too nice
12696 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12698 for child in disk.children:
12699 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12700 # at this stage, all new LVs have been created, we can rename the
12702 feedback_fn("Renaming original volumes...")
12703 rename_list = [(o, n.children[0].logical_id)
12704 for (o, n) in zip(instance.disks, new_disks)]
12705 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12706 result.Raise("Failed to rename original LVs")
12708 feedback_fn("Initializing DRBD devices...")
12709 # all child devices are in place, we can now create the DRBD devices
12710 for disk in anno_disks:
12711 for node in [pnode, snode]:
12712 f_create = node == pnode
12713 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12715 # at this point, the instance has been modified
12716 instance.disk_template = constants.DT_DRBD8
12717 instance.disks = new_disks
12718 self.cfg.Update(instance, feedback_fn)
12720 # Release node locks while waiting for sync
12721 _ReleaseLocks(self, locking.LEVEL_NODE)
12723 # disks are created, waiting for sync
12724 disk_abort = not _WaitForSync(self, instance,
12725 oneshot=not self.op.wait_for_sync)
12727 raise errors.OpExecError("There are some degraded disks for"
12728 " this instance, please cleanup manually")
12730 # Node resource locks will be released by caller
12732 def _ConvertDrbdToPlain(self, feedback_fn):
12733 """Converts an instance from drbd to plain.
12736 instance = self.instance
12738 assert len(instance.secondary_nodes) == 1
12739 assert instance.disk_template == constants.DT_DRBD8
12741 pnode = instance.primary_node
12742 snode = instance.secondary_nodes[0]
12743 feedback_fn("Converting template to plain")
12745 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12746 new_disks = [d.children[0] for d in instance.disks]
12748 # copy over size and mode
12749 for parent, child in zip(old_disks, new_disks):
12750 child.size = parent.size
12751 child.mode = parent.mode
12753 # this is a DRBD disk, return its port to the pool
12754 # NOTE: this must be done right before the call to cfg.Update!
12755 for disk in old_disks:
12756 tcp_port = disk.logical_id[2]
12757 self.cfg.AddTcpUdpPort(tcp_port)
12759 # update instance structure
12760 instance.disks = new_disks
12761 instance.disk_template = constants.DT_PLAIN
12762 self.cfg.Update(instance, feedback_fn)
12764 # Release locks in case removing disks takes a while
12765 _ReleaseLocks(self, locking.LEVEL_NODE)
12767 feedback_fn("Removing volumes on the secondary node...")
12768 for disk in old_disks:
12769 self.cfg.SetDiskID(disk, snode)
12770 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12772 self.LogWarning("Could not remove block device %s on node %s,"
12773 " continuing anyway: %s", disk.iv_name, snode, msg)
12775 feedback_fn("Removing unneeded volumes on the primary node...")
12776 for idx, disk in enumerate(old_disks):
12777 meta = disk.children[1]
12778 self.cfg.SetDiskID(meta, pnode)
12779 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12781 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12782 " continuing anyway: %s", idx, pnode, msg)
12784 def _CreateNewDisk(self, idx, params, _):
12785 """Creates a new disk.
12788 instance = self.instance
12791 if instance.disk_template in constants.DTS_FILEBASED:
12792 (file_driver, file_path) = instance.disks[0].logical_id
12793 file_path = os.path.dirname(file_path)
12795 file_driver = file_path = None
12798 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12799 instance.primary_node, instance.secondary_nodes,
12800 [params], file_path, file_driver, idx,
12801 self.Log, self.diskparams)[0]
12803 info = _GetInstanceInfoText(instance)
12805 logging.info("Creating volume %s for instance %s",
12806 disk.iv_name, instance.name)
12807 # Note: this needs to be kept in sync with _CreateDisks
12809 for node in instance.all_nodes:
12810 f_create = (node == instance.primary_node)
12812 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12813 except errors.OpExecError, err:
12814 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12815 disk.iv_name, disk, node, err)
12817 if self.op.hotplug and disk.pci:
12818 disk_ok, device_info = _AssembleInstanceDisks(self, self.instance,
12819 [disk], check=False)
12820 _, _, dev_path = device_info[0]
12821 result = self.rpc.call_hot_add_disk(self.instance.primary_node,
12822 self.instance, disk, dev_path, idx)
12824 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12828 def _ModifyDisk(idx, disk, params, _):
12829 """Modifies a disk.
12832 disk.mode = params[constants.IDISK_MODE]
12835 ("disk.mode/%d" % idx, disk.mode),
12838 def _RemoveDisk(self, idx, root, _):
12842 #TODO: log warning in case hotplug is not possible
12844 if root.pci and not self.op.hotplug:
12845 raise errors.OpPrereqError("Cannot remove a disk that has"
12847 " without removing it with hotplug",
12848 errors.ECODE_INVAL)
12849 if self.op.hotplug and root.pci:
12850 self.rpc.call_hot_del_disk(self.instance.primary_node,
12851 self.instance, root, idx)
12852 _ShutdownInstanceDisks(self, self.instance, [root])
12853 self.cfg.UpdatePCIInfo(self.instance.name, root.pci)
12855 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12856 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12857 self.cfg.SetDiskID(disk, node)
12858 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12860 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12861 " continuing anyway", idx, node, msg)
12863 # if this is a DRBD disk, return its port to the pool
12864 if root.dev_type in constants.LDS_DRBD:
12865 self.cfg.AddTcpUdpPort(root.logical_id[2])
12867 def _CreateNewNic(self, idx, params, private):
12868 """Creates data structure for a new network interface.
12871 mac = params[constants.INIC_MAC]
12872 ip = params.get(constants.INIC_IP, None)
12873 #TODO: not private.filled?? can a nic be saved without nicparams??
12874 nicparams = private.filled
12876 nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12878 #TODO: log warning in case hotplug is not possible
12881 if self.op.hotplug:
12882 nic_idx, pci = _GetPCIInfo(self, 'nics')
12885 result = self.rpc.call_hot_add_nic(self.instance.primary_node,
12886 self.instance, nic, idx)
12889 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12890 (mac, ip, private.filled[constants.NIC_MODE],
12891 private.filled[constants.NIC_LINK])),
12895 def _ApplyNicMods(self, idx, nic, params, private):
12896 """Modifies a network interface.
12901 for key in [constants.INIC_MAC, constants.INIC_IP]:
12903 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12904 setattr(nic, key, params[key])
12907 nic.nicparams = private.params
12909 for (key, val) in params.items():
12910 changes.append(("nic.%s/%d" % (key, idx), val))
12912 #TODO: log warning in case hotplug is not possible
12914 if self.op.hotplug and nic.pci:
12915 self.rpc.call_hot_del_nic(self.instance.primary_node,
12916 self.instance, nic, idx)
12917 result = self.rpc.call_hot_add_nic(self.instance.primary_node,
12918 self.instance, nic, idx)
12921 def _RemoveNic(self, idx, nic, private):
12922 if nic.pci and not self.op.hotplug:
12923 raise errors.OpPrereqError("Cannot remove a nic that has been hotplugged"
12924 " without removing it with hotplug",
12925 errors.ECODE_INVAL)
12926 #TODO: log warning in case hotplug is not possible
12928 if self.op.hotplug and nic.pci:
12929 self.rpc.call_hot_del_nic(self.instance.primary_node,
12930 self.instance, nic, idx)
12931 self.cfg.UpdatePCIInfo(self.instance.name, nic.pci)
12934 def Exec(self, feedback_fn):
12935 """Modifies an instance.
12937 All parameters take effect only at the next restart of the instance.
12940 # Process here the warnings from CheckPrereq, as we don't have a
12941 # feedback_fn there.
12942 # TODO: Replace with self.LogWarning
12943 for warn in self.warn:
12944 feedback_fn("WARNING: %s" % warn)
12946 assert ((self.op.disk_template is None) ^
12947 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12948 "Not owning any node resource locks"
12951 instance = self.instance
12954 if self.op.runtime_mem:
12955 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12957 self.op.runtime_mem)
12958 rpcres.Raise("Cannot modify instance runtime memory")
12959 result.append(("runtime_memory", self.op.runtime_mem))
12961 # Apply disk changes
12962 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12963 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12964 _UpdateIvNames(0, instance.disks)
12966 if self.op.disk_template:
12968 check_nodes = set(instance.all_nodes)
12969 if self.op.remote_node:
12970 check_nodes.add(self.op.remote_node)
12971 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12972 owned = self.owned_locks(level)
12973 assert not (check_nodes - owned), \
12974 ("Not owning the correct locks, owning %r, expected at least %r" %
12975 (owned, check_nodes))
12977 r_shut = _ShutdownInstanceDisks(self, instance)
12979 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12980 " proceed with disk template conversion")
12981 mode = (instance.disk_template, self.op.disk_template)
12983 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12985 self.cfg.ReleaseDRBDMinors(instance.name)
12987 result.append(("disk_template", self.op.disk_template))
12989 assert instance.disk_template == self.op.disk_template, \
12990 ("Expected disk template '%s', found '%s'" %
12991 (self.op.disk_template, instance.disk_template))
12993 # Release node and resource locks if there are any (they might already have
12994 # been released during disk conversion)
12995 _ReleaseLocks(self, locking.LEVEL_NODE)
12996 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12998 # Apply NIC changes
12999 if self._new_nics is not None:
13000 instance.nics = self._new_nics
13001 result.extend(self._nic_chgdesc)
13004 if self.op.hvparams:
13005 instance.hvparams = self.hv_inst
13006 for key, val in self.op.hvparams.iteritems():
13007 result.append(("hv/%s" % key, val))
13010 if self.op.beparams:
13011 instance.beparams = self.be_inst
13012 for key, val in self.op.beparams.iteritems():
13013 result.append(("be/%s" % key, val))
13016 if self.op.os_name:
13017 instance.os = self.op.os_name
13020 if self.op.osparams:
13021 instance.osparams = self.os_inst
13022 for key, val in self.op.osparams.iteritems():
13023 result.append(("os/%s" % key, val))
13025 if self.op.offline is None:
13028 elif self.op.offline:
13029 # Mark instance as offline
13030 self.cfg.MarkInstanceOffline(instance.name)
13031 result.append(("admin_state", constants.ADMINST_OFFLINE))
13033 # Mark instance as online, but stopped
13034 self.cfg.MarkInstanceDown(instance.name)
13035 result.append(("admin_state", constants.ADMINST_DOWN))
13037 self.cfg.Update(instance, feedback_fn)
13039 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13040 self.owned_locks(locking.LEVEL_NODE)), \
13041 "All node locks should have been released by now"
13045 _DISK_CONVERSIONS = {
13046 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13047 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13051 class LUInstanceChangeGroup(LogicalUnit):
13052 HPATH = "instance-change-group"
13053 HTYPE = constants.HTYPE_INSTANCE
13056 def ExpandNames(self):
13057 self.share_locks = _ShareAll()
13058 self.needed_locks = {
13059 locking.LEVEL_NODEGROUP: [],
13060 locking.LEVEL_NODE: [],
13063 self._ExpandAndLockInstance()
13065 if self.op.target_groups:
13066 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13067 self.op.target_groups)
13069 self.req_target_uuids = None
13071 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13073 def DeclareLocks(self, level):
13074 if level == locking.LEVEL_NODEGROUP:
13075 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13077 if self.req_target_uuids:
13078 lock_groups = set(self.req_target_uuids)
13080 # Lock all groups used by instance optimistically; this requires going
13081 # via the node before it's locked, requiring verification later on
13082 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13083 lock_groups.update(instance_groups)
13085 # No target groups, need to lock all of them
13086 lock_groups = locking.ALL_SET
13088 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13090 elif level == locking.LEVEL_NODE:
13091 if self.req_target_uuids:
13092 # Lock all nodes used by instances
13093 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13094 self._LockInstancesNodes()
13096 # Lock all nodes in all potential target groups
13097 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13098 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13099 member_nodes = [node_name
13100 for group in lock_groups
13101 for node_name in self.cfg.GetNodeGroup(group).members]
13102 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13104 # Lock all nodes as all groups are potential targets
13105 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13107 def CheckPrereq(self):
13108 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13109 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13110 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13112 assert (self.req_target_uuids is None or
13113 owned_groups.issuperset(self.req_target_uuids))
13114 assert owned_instances == set([self.op.instance_name])
13116 # Get instance information
13117 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13119 # Check if node groups for locked instance are still correct
13120 assert owned_nodes.issuperset(self.instance.all_nodes), \
13121 ("Instance %s's nodes changed while we kept the lock" %
13122 self.op.instance_name)
13124 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13127 if self.req_target_uuids:
13128 # User requested specific target groups
13129 self.target_uuids = frozenset(self.req_target_uuids)
13131 # All groups except those used by the instance are potential targets
13132 self.target_uuids = owned_groups - inst_groups
13134 conflicting_groups = self.target_uuids & inst_groups
13135 if conflicting_groups:
13136 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13137 " used by the instance '%s'" %
13138 (utils.CommaJoin(conflicting_groups),
13139 self.op.instance_name),
13140 errors.ECODE_INVAL)
13142 if not self.target_uuids:
13143 raise errors.OpPrereqError("There are no possible target groups",
13144 errors.ECODE_INVAL)
13146 def BuildHooksEnv(self):
13147 """Build hooks env.
13150 assert self.target_uuids
13153 "TARGET_GROUPS": " ".join(self.target_uuids),
13156 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13160 def BuildHooksNodes(self):
13161 """Build hooks nodes.
13164 mn = self.cfg.GetMasterNode()
13165 return ([mn], [mn])
13167 def Exec(self, feedback_fn):
13168 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13170 assert instances == [self.op.instance_name], "Instance not locked"
13172 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13173 instances=instances, target_groups=list(self.target_uuids))
13175 ial.Run(self.op.iallocator)
13177 if not ial.success:
13178 raise errors.OpPrereqError("Can't compute solution for changing group of"
13179 " instance '%s' using iallocator '%s': %s" %
13180 (self.op.instance_name, self.op.iallocator,
13182 errors.ECODE_NORES)
13184 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13186 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13187 " instance '%s'", len(jobs), self.op.instance_name)
13189 return ResultWithJobs(jobs)
13192 class LUBackupQuery(NoHooksLU):
13193 """Query the exports list
13198 def CheckArguments(self):
13199 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13200 ["node", "export"], self.op.use_locking)
13202 def ExpandNames(self):
13203 self.expq.ExpandNames(self)
13205 def DeclareLocks(self, level):
13206 self.expq.DeclareLocks(self, level)
13208 def Exec(self, feedback_fn):
13211 for (node, expname) in self.expq.OldStyleQuery(self):
13212 if expname is None:
13213 result[node] = False
13215 result.setdefault(node, []).append(expname)
13220 class _ExportQuery(_QueryBase):
13221 FIELDS = query.EXPORT_FIELDS
13223 #: The node name is not a unique key for this query
13224 SORT_FIELD = "node"
13226 def ExpandNames(self, lu):
13227 lu.needed_locks = {}
13229 # The following variables interact with _QueryBase._GetNames
13231 self.wanted = _GetWantedNodes(lu, self.names)
13233 self.wanted = locking.ALL_SET
13235 self.do_locking = self.use_locking
13237 if self.do_locking:
13238 lu.share_locks = _ShareAll()
13239 lu.needed_locks = {
13240 locking.LEVEL_NODE: self.wanted,
13243 def DeclareLocks(self, lu, level):
13246 def _GetQueryData(self, lu):
13247 """Computes the list of nodes and their attributes.
13250 # Locking is not used
13252 assert not (compat.any(lu.glm.is_owned(level)
13253 for level in locking.LEVELS
13254 if level != locking.LEVEL_CLUSTER) or
13255 self.do_locking or self.use_locking)
13257 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13261 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13263 result.append((node, None))
13265 result.extend((node, expname) for expname in nres.payload)
13270 class LUBackupPrepare(NoHooksLU):
13271 """Prepares an instance for an export and returns useful information.
13276 def ExpandNames(self):
13277 self._ExpandAndLockInstance()
13279 def CheckPrereq(self):
13280 """Check prerequisites.
13283 instance_name = self.op.instance_name
13285 self.instance = self.cfg.GetInstanceInfo(instance_name)
13286 assert self.instance is not None, \
13287 "Cannot retrieve locked instance %s" % self.op.instance_name
13288 _CheckNodeOnline(self, self.instance.primary_node)
13290 self._cds = _GetClusterDomainSecret()
13292 def Exec(self, feedback_fn):
13293 """Prepares an instance for an export.
13296 instance = self.instance
13298 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13299 salt = utils.GenerateSecret(8)
13301 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13302 result = self.rpc.call_x509_cert_create(instance.primary_node,
13303 constants.RIE_CERT_VALIDITY)
13304 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13306 (name, cert_pem) = result.payload
13308 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13312 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13313 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13315 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13321 class LUBackupExport(LogicalUnit):
13322 """Export an instance to an image in the cluster.
13325 HPATH = "instance-export"
13326 HTYPE = constants.HTYPE_INSTANCE
13329 def CheckArguments(self):
13330 """Check the arguments.
13333 self.x509_key_name = self.op.x509_key_name
13334 self.dest_x509_ca_pem = self.op.destination_x509_ca
13336 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13337 if not self.x509_key_name:
13338 raise errors.OpPrereqError("Missing X509 key name for encryption",
13339 errors.ECODE_INVAL)
13341 if not self.dest_x509_ca_pem:
13342 raise errors.OpPrereqError("Missing destination X509 CA",
13343 errors.ECODE_INVAL)
13345 def ExpandNames(self):
13346 self._ExpandAndLockInstance()
13348 # Lock all nodes for local exports
13349 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13350 # FIXME: lock only instance primary and destination node
13352 # Sad but true, for now we have do lock all nodes, as we don't know where
13353 # the previous export might be, and in this LU we search for it and
13354 # remove it from its current node. In the future we could fix this by:
13355 # - making a tasklet to search (share-lock all), then create the
13356 # new one, then one to remove, after
13357 # - removing the removal operation altogether
13358 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13360 def DeclareLocks(self, level):
13361 """Last minute lock declaration."""
13362 # All nodes are locked anyway, so nothing to do here.
13364 def BuildHooksEnv(self):
13365 """Build hooks env.
13367 This will run on the master, primary node and target node.
13371 "EXPORT_MODE": self.op.mode,
13372 "EXPORT_NODE": self.op.target_node,
13373 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13374 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13375 # TODO: Generic function for boolean env variables
13376 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13379 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13383 def BuildHooksNodes(self):
13384 """Build hooks nodes.
13387 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13389 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13390 nl.append(self.op.target_node)
13394 def CheckPrereq(self):
13395 """Check prerequisites.
13397 This checks that the instance and node names are valid.
13400 instance_name = self.op.instance_name
13402 self.instance = self.cfg.GetInstanceInfo(instance_name)
13403 assert self.instance is not None, \
13404 "Cannot retrieve locked instance %s" % self.op.instance_name
13405 _CheckNodeOnline(self, self.instance.primary_node)
13407 if (self.op.remove_instance and
13408 self.instance.admin_state == constants.ADMINST_UP and
13409 not self.op.shutdown):
13410 raise errors.OpPrereqError("Can not remove instance without shutting it"
13413 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13414 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13415 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13416 assert self.dst_node is not None
13418 _CheckNodeOnline(self, self.dst_node.name)
13419 _CheckNodeNotDrained(self, self.dst_node.name)
13422 self.dest_disk_info = None
13423 self.dest_x509_ca = None
13425 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13426 self.dst_node = None
13428 if len(self.op.target_node) != len(self.instance.disks):
13429 raise errors.OpPrereqError(("Received destination information for %s"
13430 " disks, but instance %s has %s disks") %
13431 (len(self.op.target_node), instance_name,
13432 len(self.instance.disks)),
13433 errors.ECODE_INVAL)
13435 cds = _GetClusterDomainSecret()
13437 # Check X509 key name
13439 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13440 except (TypeError, ValueError), err:
13441 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13443 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13444 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13445 errors.ECODE_INVAL)
13447 # Load and verify CA
13449 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13450 except OpenSSL.crypto.Error, err:
13451 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13452 (err, ), errors.ECODE_INVAL)
13454 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13455 if errcode is not None:
13456 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13457 (msg, ), errors.ECODE_INVAL)
13459 self.dest_x509_ca = cert
13461 # Verify target information
13463 for idx, disk_data in enumerate(self.op.target_node):
13465 (host, port, magic) = \
13466 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13467 except errors.GenericError, err:
13468 raise errors.OpPrereqError("Target info for disk %s: %s" %
13469 (idx, err), errors.ECODE_INVAL)
13471 disk_info.append((host, port, magic))
13473 assert len(disk_info) == len(self.op.target_node)
13474 self.dest_disk_info = disk_info
13477 raise errors.ProgrammerError("Unhandled export mode %r" %
13480 # instance disk type verification
13481 # TODO: Implement export support for file-based disks
13482 for disk in self.instance.disks:
13483 if disk.dev_type == constants.LD_FILE:
13484 raise errors.OpPrereqError("Export not supported for instances with"
13485 " file-based disks", errors.ECODE_INVAL)
13487 def _CleanupExports(self, feedback_fn):
13488 """Removes exports of current instance from all other nodes.
13490 If an instance in a cluster with nodes A..D was exported to node C, its
13491 exports will be removed from the nodes A, B and D.
13494 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13496 nodelist = self.cfg.GetNodeList()
13497 nodelist.remove(self.dst_node.name)
13499 # on one-node clusters nodelist will be empty after the removal
13500 # if we proceed the backup would be removed because OpBackupQuery
13501 # substitutes an empty list with the full cluster node list.
13502 iname = self.instance.name
13504 feedback_fn("Removing old exports for instance %s" % iname)
13505 exportlist = self.rpc.call_export_list(nodelist)
13506 for node in exportlist:
13507 if exportlist[node].fail_msg:
13509 if iname in exportlist[node].payload:
13510 msg = self.rpc.call_export_remove(node, iname).fail_msg
13512 self.LogWarning("Could not remove older export for instance %s"
13513 " on node %s: %s", iname, node, msg)
13515 def Exec(self, feedback_fn):
13516 """Export an instance to an image in the cluster.
13519 assert self.op.mode in constants.EXPORT_MODES
13521 instance = self.instance
13522 src_node = instance.primary_node
13524 if self.op.shutdown:
13525 # shutdown the instance, but not the disks
13526 feedback_fn("Shutting down instance %s" % instance.name)
13527 result = self.rpc.call_instance_shutdown(src_node, instance,
13528 self.op.shutdown_timeout)
13529 # TODO: Maybe ignore failures if ignore_remove_failures is set
13530 result.Raise("Could not shutdown instance %s on"
13531 " node %s" % (instance.name, src_node))
13533 # set the disks ID correctly since call_instance_start needs the
13534 # correct drbd minor to create the symlinks
13535 for disk in instance.disks:
13536 self.cfg.SetDiskID(disk, src_node)
13538 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13541 # Activate the instance disks if we'exporting a stopped instance
13542 feedback_fn("Activating disks for %s" % instance.name)
13543 _StartInstanceDisks(self, instance, None)
13546 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13549 helper.CreateSnapshots()
13551 if (self.op.shutdown and
13552 instance.admin_state == constants.ADMINST_UP and
13553 not self.op.remove_instance):
13554 assert not activate_disks
13555 feedback_fn("Starting instance %s" % instance.name)
13556 result = self.rpc.call_instance_start(src_node,
13557 (instance, None, None), False)
13558 msg = result.fail_msg
13560 feedback_fn("Failed to start instance: %s" % msg)
13561 _ShutdownInstanceDisks(self, instance)
13562 raise errors.OpExecError("Could not start instance: %s" % msg)
13564 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13565 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13566 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13567 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13568 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13570 (key_name, _, _) = self.x509_key_name
13573 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13576 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13577 key_name, dest_ca_pem,
13582 # Check for backwards compatibility
13583 assert len(dresults) == len(instance.disks)
13584 assert compat.all(isinstance(i, bool) for i in dresults), \
13585 "Not all results are boolean: %r" % dresults
13589 feedback_fn("Deactivating disks for %s" % instance.name)
13590 _ShutdownInstanceDisks(self, instance)
13592 if not (compat.all(dresults) and fin_resu):
13595 failures.append("export finalization")
13596 if not compat.all(dresults):
13597 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13599 failures.append("disk export: disk(s) %s" % fdsk)
13601 raise errors.OpExecError("Export failed, errors in %s" %
13602 utils.CommaJoin(failures))
13604 # At this point, the export was successful, we can cleanup/finish
13606 # Remove instance if requested
13607 if self.op.remove_instance:
13608 feedback_fn("Removing instance %s" % instance.name)
13609 _RemoveInstance(self, feedback_fn, instance,
13610 self.op.ignore_remove_failures)
13612 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13613 self._CleanupExports(feedback_fn)
13615 return fin_resu, dresults
13618 class LUBackupRemove(NoHooksLU):
13619 """Remove exports related to the named instance.
13624 def ExpandNames(self):
13625 self.needed_locks = {}
13626 # We need all nodes to be locked in order for RemoveExport to work, but we
13627 # don't need to lock the instance itself, as nothing will happen to it (and
13628 # we can remove exports also for a removed instance)
13629 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13631 def Exec(self, feedback_fn):
13632 """Remove any export.
13635 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13636 # If the instance was not found we'll try with the name that was passed in.
13637 # This will only work if it was an FQDN, though.
13639 if not instance_name:
13641 instance_name = self.op.instance_name
13643 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13644 exportlist = self.rpc.call_export_list(locked_nodes)
13646 for node in exportlist:
13647 msg = exportlist[node].fail_msg
13649 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13651 if instance_name in exportlist[node].payload:
13653 result = self.rpc.call_export_remove(node, instance_name)
13654 msg = result.fail_msg
13656 logging.error("Could not remove export for instance %s"
13657 " on node %s: %s", instance_name, node, msg)
13659 if fqdn_warn and not found:
13660 feedback_fn("Export not found. If trying to remove an export belonging"
13661 " to a deleted instance please use its Fully Qualified"
13665 class LUGroupAdd(LogicalUnit):
13666 """Logical unit for creating node groups.
13669 HPATH = "group-add"
13670 HTYPE = constants.HTYPE_GROUP
13673 def ExpandNames(self):
13674 # We need the new group's UUID here so that we can create and acquire the
13675 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13676 # that it should not check whether the UUID exists in the configuration.
13677 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13678 self.needed_locks = {}
13679 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13681 def CheckPrereq(self):
13682 """Check prerequisites.
13684 This checks that the given group name is not an existing node group
13689 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13690 except errors.OpPrereqError:
13693 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13694 " node group (UUID: %s)" %
13695 (self.op.group_name, existing_uuid),
13696 errors.ECODE_EXISTS)
13698 if self.op.ndparams:
13699 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13701 if self.op.hv_state:
13702 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13704 self.new_hv_state = None
13706 if self.op.disk_state:
13707 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13709 self.new_disk_state = None
13711 if self.op.diskparams:
13712 for templ in constants.DISK_TEMPLATES:
13713 if templ in self.op.diskparams:
13714 utils.ForceDictType(self.op.diskparams[templ],
13715 constants.DISK_DT_TYPES)
13716 self.new_diskparams = self.op.diskparams
13718 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13719 except errors.OpPrereqError, err:
13720 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13721 errors.ECODE_INVAL)
13723 self.new_diskparams = {}
13725 if self.op.ipolicy:
13726 cluster = self.cfg.GetClusterInfo()
13727 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13729 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13730 except errors.ConfigurationError, err:
13731 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13732 errors.ECODE_INVAL)
13734 def BuildHooksEnv(self):
13735 """Build hooks env.
13739 "GROUP_NAME": self.op.group_name,
13742 def BuildHooksNodes(self):
13743 """Build hooks nodes.
13746 mn = self.cfg.GetMasterNode()
13747 return ([mn], [mn])
13749 def Exec(self, feedback_fn):
13750 """Add the node group to the cluster.
13753 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13754 uuid=self.group_uuid,
13755 alloc_policy=self.op.alloc_policy,
13756 ndparams=self.op.ndparams,
13757 diskparams=self.new_diskparams,
13758 ipolicy=self.op.ipolicy,
13759 hv_state_static=self.new_hv_state,
13760 disk_state_static=self.new_disk_state)
13762 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13763 del self.remove_locks[locking.LEVEL_NODEGROUP]
13766 class LUGroupAssignNodes(NoHooksLU):
13767 """Logical unit for assigning nodes to groups.
13772 def ExpandNames(self):
13773 # These raise errors.OpPrereqError on their own:
13774 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13775 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13777 # We want to lock all the affected nodes and groups. We have readily
13778 # available the list of nodes, and the *destination* group. To gather the
13779 # list of "source" groups, we need to fetch node information later on.
13780 self.needed_locks = {
13781 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13782 locking.LEVEL_NODE: self.op.nodes,
13785 def DeclareLocks(self, level):
13786 if level == locking.LEVEL_NODEGROUP:
13787 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13789 # Try to get all affected nodes' groups without having the group or node
13790 # lock yet. Needs verification later in the code flow.
13791 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13793 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13795 def CheckPrereq(self):
13796 """Check prerequisites.
13799 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13800 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13801 frozenset(self.op.nodes))
13803 expected_locks = (set([self.group_uuid]) |
13804 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13805 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13806 if actual_locks != expected_locks:
13807 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13808 " current groups are '%s', used to be '%s'" %
13809 (utils.CommaJoin(expected_locks),
13810 utils.CommaJoin(actual_locks)))
13812 self.node_data = self.cfg.GetAllNodesInfo()
13813 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13814 instance_data = self.cfg.GetAllInstancesInfo()
13816 if self.group is None:
13817 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13818 (self.op.group_name, self.group_uuid))
13820 (new_splits, previous_splits) = \
13821 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13822 for node in self.op.nodes],
13823 self.node_data, instance_data)
13826 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13828 if not self.op.force:
13829 raise errors.OpExecError("The following instances get split by this"
13830 " change and --force was not given: %s" %
13833 self.LogWarning("This operation will split the following instances: %s",
13836 if previous_splits:
13837 self.LogWarning("In addition, these already-split instances continue"
13838 " to be split across groups: %s",
13839 utils.CommaJoin(utils.NiceSort(previous_splits)))
13841 def Exec(self, feedback_fn):
13842 """Assign nodes to a new group.
13845 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13847 self.cfg.AssignGroupNodes(mods)
13850 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13851 """Check for split instances after a node assignment.
13853 This method considers a series of node assignments as an atomic operation,
13854 and returns information about split instances after applying the set of
13857 In particular, it returns information about newly split instances, and
13858 instances that were already split, and remain so after the change.
13860 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13863 @type changes: list of (node_name, new_group_uuid) pairs.
13864 @param changes: list of node assignments to consider.
13865 @param node_data: a dict with data for all nodes
13866 @param instance_data: a dict with all instances to consider
13867 @rtype: a two-tuple
13868 @return: a list of instances that were previously okay and result split as a
13869 consequence of this change, and a list of instances that were previously
13870 split and this change does not fix.
13873 changed_nodes = dict((node, group) for node, group in changes
13874 if node_data[node].group != group)
13876 all_split_instances = set()
13877 previously_split_instances = set()
13879 def InstanceNodes(instance):
13880 return [instance.primary_node] + list(instance.secondary_nodes)
13882 for inst in instance_data.values():
13883 if inst.disk_template not in constants.DTS_INT_MIRROR:
13886 instance_nodes = InstanceNodes(inst)
13888 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13889 previously_split_instances.add(inst.name)
13891 if len(set(changed_nodes.get(node, node_data[node].group)
13892 for node in instance_nodes)) > 1:
13893 all_split_instances.add(inst.name)
13895 return (list(all_split_instances - previously_split_instances),
13896 list(previously_split_instances & all_split_instances))
13899 class _GroupQuery(_QueryBase):
13900 FIELDS = query.GROUP_FIELDS
13902 def ExpandNames(self, lu):
13903 lu.needed_locks = {}
13905 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13906 self._cluster = lu.cfg.GetClusterInfo()
13907 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13910 self.wanted = [name_to_uuid[name]
13911 for name in utils.NiceSort(name_to_uuid.keys())]
13913 # Accept names to be either names or UUIDs.
13916 all_uuid = frozenset(self._all_groups.keys())
13918 for name in self.names:
13919 if name in all_uuid:
13920 self.wanted.append(name)
13921 elif name in name_to_uuid:
13922 self.wanted.append(name_to_uuid[name])
13924 missing.append(name)
13927 raise errors.OpPrereqError("Some groups do not exist: %s" %
13928 utils.CommaJoin(missing),
13929 errors.ECODE_NOENT)
13931 def DeclareLocks(self, lu, level):
13934 def _GetQueryData(self, lu):
13935 """Computes the list of node groups and their attributes.
13938 do_nodes = query.GQ_NODE in self.requested_data
13939 do_instances = query.GQ_INST in self.requested_data
13941 group_to_nodes = None
13942 group_to_instances = None
13944 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13945 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13946 # latter GetAllInstancesInfo() is not enough, for we have to go through
13947 # instance->node. Hence, we will need to process nodes even if we only need
13948 # instance information.
13949 if do_nodes or do_instances:
13950 all_nodes = lu.cfg.GetAllNodesInfo()
13951 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13954 for node in all_nodes.values():
13955 if node.group in group_to_nodes:
13956 group_to_nodes[node.group].append(node.name)
13957 node_to_group[node.name] = node.group
13960 all_instances = lu.cfg.GetAllInstancesInfo()
13961 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13963 for instance in all_instances.values():
13964 node = instance.primary_node
13965 if node in node_to_group:
13966 group_to_instances[node_to_group[node]].append(instance.name)
13969 # Do not pass on node information if it was not requested.
13970 group_to_nodes = None
13972 return query.GroupQueryData(self._cluster,
13973 [self._all_groups[uuid]
13974 for uuid in self.wanted],
13975 group_to_nodes, group_to_instances,
13976 query.GQ_DISKPARAMS in self.requested_data)
13979 class LUGroupQuery(NoHooksLU):
13980 """Logical unit for querying node groups.
13985 def CheckArguments(self):
13986 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13987 self.op.output_fields, False)
13989 def ExpandNames(self):
13990 self.gq.ExpandNames(self)
13992 def DeclareLocks(self, level):
13993 self.gq.DeclareLocks(self, level)
13995 def Exec(self, feedback_fn):
13996 return self.gq.OldStyleQuery(self)
13999 class LUGroupSetParams(LogicalUnit):
14000 """Modifies the parameters of a node group.
14003 HPATH = "group-modify"
14004 HTYPE = constants.HTYPE_GROUP
14007 def CheckArguments(self):
14010 self.op.diskparams,
14011 self.op.alloc_policy,
14013 self.op.disk_state,
14017 if all_changes.count(None) == len(all_changes):
14018 raise errors.OpPrereqError("Please pass at least one modification",
14019 errors.ECODE_INVAL)
14021 def ExpandNames(self):
14022 # This raises errors.OpPrereqError on its own:
14023 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14025 self.needed_locks = {
14026 locking.LEVEL_INSTANCE: [],
14027 locking.LEVEL_NODEGROUP: [self.group_uuid],
14030 self.share_locks[locking.LEVEL_INSTANCE] = 1
14032 def DeclareLocks(self, level):
14033 if level == locking.LEVEL_INSTANCE:
14034 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14036 # Lock instances optimistically, needs verification once group lock has
14038 self.needed_locks[locking.LEVEL_INSTANCE] = \
14039 self.cfg.GetNodeGroupInstances(self.group_uuid)
14042 def _UpdateAndVerifyDiskParams(old, new):
14043 """Updates and verifies disk parameters.
14046 new_params = _GetUpdatedParams(old, new)
14047 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14050 def CheckPrereq(self):
14051 """Check prerequisites.
14054 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14056 # Check if locked instances are still correct
14057 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14059 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14060 cluster = self.cfg.GetClusterInfo()
14062 if self.group is None:
14063 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14064 (self.op.group_name, self.group_uuid))
14066 if self.op.ndparams:
14067 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14068 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14069 self.new_ndparams = new_ndparams
14071 if self.op.diskparams:
14072 diskparams = self.group.diskparams
14073 uavdp = self._UpdateAndVerifyDiskParams
14074 # For each disktemplate subdict update and verify the values
14075 new_diskparams = dict((dt,
14076 uavdp(diskparams.get(dt, {}),
14077 self.op.diskparams[dt]))
14078 for dt in constants.DISK_TEMPLATES
14079 if dt in self.op.diskparams)
14080 # As we've all subdicts of diskparams ready, lets merge the actual
14081 # dict with all updated subdicts
14082 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14084 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14085 except errors.OpPrereqError, err:
14086 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14087 errors.ECODE_INVAL)
14089 if self.op.hv_state:
14090 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14091 self.group.hv_state_static)
14093 if self.op.disk_state:
14094 self.new_disk_state = \
14095 _MergeAndVerifyDiskState(self.op.disk_state,
14096 self.group.disk_state_static)
14098 if self.op.ipolicy:
14099 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14103 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14104 inst_filter = lambda inst: inst.name in owned_instances
14105 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14107 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14109 new_ipolicy, instances)
14112 self.LogWarning("After the ipolicy change the following instances"
14113 " violate them: %s",
14114 utils.CommaJoin(violations))
14116 def BuildHooksEnv(self):
14117 """Build hooks env.
14121 "GROUP_NAME": self.op.group_name,
14122 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14125 def BuildHooksNodes(self):
14126 """Build hooks nodes.
14129 mn = self.cfg.GetMasterNode()
14130 return ([mn], [mn])
14132 def Exec(self, feedback_fn):
14133 """Modifies the node group.
14138 if self.op.ndparams:
14139 self.group.ndparams = self.new_ndparams
14140 result.append(("ndparams", str(self.group.ndparams)))
14142 if self.op.diskparams:
14143 self.group.diskparams = self.new_diskparams
14144 result.append(("diskparams", str(self.group.diskparams)))
14146 if self.op.alloc_policy:
14147 self.group.alloc_policy = self.op.alloc_policy
14149 if self.op.hv_state:
14150 self.group.hv_state_static = self.new_hv_state
14152 if self.op.disk_state:
14153 self.group.disk_state_static = self.new_disk_state
14155 if self.op.ipolicy:
14156 self.group.ipolicy = self.new_ipolicy
14158 self.cfg.Update(self.group, feedback_fn)
14162 class LUGroupRemove(LogicalUnit):
14163 HPATH = "group-remove"
14164 HTYPE = constants.HTYPE_GROUP
14167 def ExpandNames(self):
14168 # This will raises errors.OpPrereqError on its own:
14169 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14170 self.needed_locks = {
14171 locking.LEVEL_NODEGROUP: [self.group_uuid],
14174 def CheckPrereq(self):
14175 """Check prerequisites.
14177 This checks that the given group name exists as a node group, that is
14178 empty (i.e., contains no nodes), and that is not the last group of the
14182 # Verify that the group is empty.
14183 group_nodes = [node.name
14184 for node in self.cfg.GetAllNodesInfo().values()
14185 if node.group == self.group_uuid]
14188 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14190 (self.op.group_name,
14191 utils.CommaJoin(utils.NiceSort(group_nodes))),
14192 errors.ECODE_STATE)
14194 # Verify the cluster would not be left group-less.
14195 if len(self.cfg.GetNodeGroupList()) == 1:
14196 raise errors.OpPrereqError("Group '%s' is the only group,"
14197 " cannot be removed" %
14198 self.op.group_name,
14199 errors.ECODE_STATE)
14201 def BuildHooksEnv(self):
14202 """Build hooks env.
14206 "GROUP_NAME": self.op.group_name,
14209 def BuildHooksNodes(self):
14210 """Build hooks nodes.
14213 mn = self.cfg.GetMasterNode()
14214 return ([mn], [mn])
14216 def Exec(self, feedback_fn):
14217 """Remove the node group.
14221 self.cfg.RemoveNodeGroup(self.group_uuid)
14222 except errors.ConfigurationError:
14223 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14224 (self.op.group_name, self.group_uuid))
14226 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14229 class LUGroupRename(LogicalUnit):
14230 HPATH = "group-rename"
14231 HTYPE = constants.HTYPE_GROUP
14234 def ExpandNames(self):
14235 # This raises errors.OpPrereqError on its own:
14236 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14238 self.needed_locks = {
14239 locking.LEVEL_NODEGROUP: [self.group_uuid],
14242 def CheckPrereq(self):
14243 """Check prerequisites.
14245 Ensures requested new name is not yet used.
14249 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14250 except errors.OpPrereqError:
14253 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14254 " node group (UUID: %s)" %
14255 (self.op.new_name, new_name_uuid),
14256 errors.ECODE_EXISTS)
14258 def BuildHooksEnv(self):
14259 """Build hooks env.
14263 "OLD_NAME": self.op.group_name,
14264 "NEW_NAME": self.op.new_name,
14267 def BuildHooksNodes(self):
14268 """Build hooks nodes.
14271 mn = self.cfg.GetMasterNode()
14273 all_nodes = self.cfg.GetAllNodesInfo()
14274 all_nodes.pop(mn, None)
14277 run_nodes.extend(node.name for node in all_nodes.values()
14278 if node.group == self.group_uuid)
14280 return (run_nodes, run_nodes)
14282 def Exec(self, feedback_fn):
14283 """Rename the node group.
14286 group = self.cfg.GetNodeGroup(self.group_uuid)
14289 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14290 (self.op.group_name, self.group_uuid))
14292 group.name = self.op.new_name
14293 self.cfg.Update(group, feedback_fn)
14295 return self.op.new_name
14298 class LUGroupEvacuate(LogicalUnit):
14299 HPATH = "group-evacuate"
14300 HTYPE = constants.HTYPE_GROUP
14303 def ExpandNames(self):
14304 # This raises errors.OpPrereqError on its own:
14305 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14307 if self.op.target_groups:
14308 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14309 self.op.target_groups)
14311 self.req_target_uuids = []
14313 if self.group_uuid in self.req_target_uuids:
14314 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14315 " as a target group (targets are %s)" %
14317 utils.CommaJoin(self.req_target_uuids)),
14318 errors.ECODE_INVAL)
14320 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14322 self.share_locks = _ShareAll()
14323 self.needed_locks = {
14324 locking.LEVEL_INSTANCE: [],
14325 locking.LEVEL_NODEGROUP: [],
14326 locking.LEVEL_NODE: [],
14329 def DeclareLocks(self, level):
14330 if level == locking.LEVEL_INSTANCE:
14331 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14333 # Lock instances optimistically, needs verification once node and group
14334 # locks have been acquired
14335 self.needed_locks[locking.LEVEL_INSTANCE] = \
14336 self.cfg.GetNodeGroupInstances(self.group_uuid)
14338 elif level == locking.LEVEL_NODEGROUP:
14339 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14341 if self.req_target_uuids:
14342 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14344 # Lock all groups used by instances optimistically; this requires going
14345 # via the node before it's locked, requiring verification later on
14346 lock_groups.update(group_uuid
14347 for instance_name in
14348 self.owned_locks(locking.LEVEL_INSTANCE)
14350 self.cfg.GetInstanceNodeGroups(instance_name))
14352 # No target groups, need to lock all of them
14353 lock_groups = locking.ALL_SET
14355 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14357 elif level == locking.LEVEL_NODE:
14358 # This will only lock the nodes in the group to be evacuated which
14359 # contain actual instances
14360 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14361 self._LockInstancesNodes()
14363 # Lock all nodes in group to be evacuated and target groups
14364 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14365 assert self.group_uuid in owned_groups
14366 member_nodes = [node_name
14367 for group in owned_groups
14368 for node_name in self.cfg.GetNodeGroup(group).members]
14369 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14371 def CheckPrereq(self):
14372 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14373 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14374 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14376 assert owned_groups.issuperset(self.req_target_uuids)
14377 assert self.group_uuid in owned_groups
14379 # Check if locked instances are still correct
14380 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14382 # Get instance information
14383 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14385 # Check if node groups for locked instances are still correct
14386 _CheckInstancesNodeGroups(self.cfg, self.instances,
14387 owned_groups, owned_nodes, self.group_uuid)
14389 if self.req_target_uuids:
14390 # User requested specific target groups
14391 self.target_uuids = self.req_target_uuids
14393 # All groups except the one to be evacuated are potential targets
14394 self.target_uuids = [group_uuid for group_uuid in owned_groups
14395 if group_uuid != self.group_uuid]
14397 if not self.target_uuids:
14398 raise errors.OpPrereqError("There are no possible target groups",
14399 errors.ECODE_INVAL)
14401 def BuildHooksEnv(self):
14402 """Build hooks env.
14406 "GROUP_NAME": self.op.group_name,
14407 "TARGET_GROUPS": " ".join(self.target_uuids),
14410 def BuildHooksNodes(self):
14411 """Build hooks nodes.
14414 mn = self.cfg.GetMasterNode()
14416 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14418 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14420 return (run_nodes, run_nodes)
14422 def Exec(self, feedback_fn):
14423 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14425 assert self.group_uuid not in self.target_uuids
14427 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14428 instances=instances, target_groups=self.target_uuids)
14430 ial.Run(self.op.iallocator)
14432 if not ial.success:
14433 raise errors.OpPrereqError("Can't compute group evacuation using"
14434 " iallocator '%s': %s" %
14435 (self.op.iallocator, ial.info),
14436 errors.ECODE_NORES)
14438 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14440 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14441 len(jobs), self.op.group_name)
14443 return ResultWithJobs(jobs)
14446 class TagsLU(NoHooksLU): # pylint: disable=W0223
14447 """Generic tags LU.
14449 This is an abstract class which is the parent of all the other tags LUs.
14452 def ExpandNames(self):
14453 self.group_uuid = None
14454 self.needed_locks = {}
14456 if self.op.kind == constants.TAG_NODE:
14457 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14458 lock_level = locking.LEVEL_NODE
14459 lock_name = self.op.name
14460 elif self.op.kind == constants.TAG_INSTANCE:
14461 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14462 lock_level = locking.LEVEL_INSTANCE
14463 lock_name = self.op.name
14464 elif self.op.kind == constants.TAG_NODEGROUP:
14465 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14466 lock_level = locking.LEVEL_NODEGROUP
14467 lock_name = self.group_uuid
14472 if lock_level and getattr(self.op, "use_locking", True):
14473 self.needed_locks[lock_level] = lock_name
14475 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14476 # not possible to acquire the BGL based on opcode parameters)
14478 def CheckPrereq(self):
14479 """Check prerequisites.
14482 if self.op.kind == constants.TAG_CLUSTER:
14483 self.target = self.cfg.GetClusterInfo()
14484 elif self.op.kind == constants.TAG_NODE:
14485 self.target = self.cfg.GetNodeInfo(self.op.name)
14486 elif self.op.kind == constants.TAG_INSTANCE:
14487 self.target = self.cfg.GetInstanceInfo(self.op.name)
14488 elif self.op.kind == constants.TAG_NODEGROUP:
14489 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14491 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14492 str(self.op.kind), errors.ECODE_INVAL)
14495 class LUTagsGet(TagsLU):
14496 """Returns the tags of a given object.
14501 def ExpandNames(self):
14502 TagsLU.ExpandNames(self)
14504 # Share locks as this is only a read operation
14505 self.share_locks = _ShareAll()
14507 def Exec(self, feedback_fn):
14508 """Returns the tag list.
14511 return list(self.target.GetTags())
14514 class LUTagsSearch(NoHooksLU):
14515 """Searches the tags for a given pattern.
14520 def ExpandNames(self):
14521 self.needed_locks = {}
14523 def CheckPrereq(self):
14524 """Check prerequisites.
14526 This checks the pattern passed for validity by compiling it.
14530 self.re = re.compile(self.op.pattern)
14531 except re.error, err:
14532 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14533 (self.op.pattern, err), errors.ECODE_INVAL)
14535 def Exec(self, feedback_fn):
14536 """Returns the tag list.
14540 tgts = [("/cluster", cfg.GetClusterInfo())]
14541 ilist = cfg.GetAllInstancesInfo().values()
14542 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14543 nlist = cfg.GetAllNodesInfo().values()
14544 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14545 tgts.extend(("/nodegroup/%s" % n.name, n)
14546 for n in cfg.GetAllNodeGroupsInfo().values())
14548 for path, target in tgts:
14549 for tag in target.GetTags():
14550 if self.re.search(tag):
14551 results.append((path, tag))
14555 class LUTagsSet(TagsLU):
14556 """Sets a tag on a given object.
14561 def CheckPrereq(self):
14562 """Check prerequisites.
14564 This checks the type and length of the tag name and value.
14567 TagsLU.CheckPrereq(self)
14568 for tag in self.op.tags:
14569 objects.TaggableObject.ValidateTag(tag)
14571 def Exec(self, feedback_fn):
14576 for tag in self.op.tags:
14577 self.target.AddTag(tag)
14578 except errors.TagError, err:
14579 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14580 self.cfg.Update(self.target, feedback_fn)
14583 class LUTagsDel(TagsLU):
14584 """Delete a list of tags from a given object.
14589 def CheckPrereq(self):
14590 """Check prerequisites.
14592 This checks that we have the given tag.
14595 TagsLU.CheckPrereq(self)
14596 for tag in self.op.tags:
14597 objects.TaggableObject.ValidateTag(tag)
14598 del_tags = frozenset(self.op.tags)
14599 cur_tags = self.target.GetTags()
14601 diff_tags = del_tags - cur_tags
14603 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14604 raise errors.OpPrereqError("Tag(s) %s not found" %
14605 (utils.CommaJoin(diff_names), ),
14606 errors.ECODE_NOENT)
14608 def Exec(self, feedback_fn):
14609 """Remove the tag from the object.
14612 for tag in self.op.tags:
14613 self.target.RemoveTag(tag)
14614 self.cfg.Update(self.target, feedback_fn)
14617 class LUTestDelay(NoHooksLU):
14618 """Sleep for a specified amount of time.
14620 This LU sleeps on the master and/or nodes for a specified amount of
14626 def ExpandNames(self):
14627 """Expand names and set required locks.
14629 This expands the node list, if any.
14632 self.needed_locks = {}
14633 if self.op.on_nodes:
14634 # _GetWantedNodes can be used here, but is not always appropriate to use
14635 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14636 # more information.
14637 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14638 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14640 def _TestDelay(self):
14641 """Do the actual sleep.
14644 if self.op.on_master:
14645 if not utils.TestDelay(self.op.duration):
14646 raise errors.OpExecError("Error during master delay test")
14647 if self.op.on_nodes:
14648 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14649 for node, node_result in result.items():
14650 node_result.Raise("Failure during rpc call to node %s" % node)
14652 def Exec(self, feedback_fn):
14653 """Execute the test delay opcode, with the wanted repetitions.
14656 if self.op.repeat == 0:
14659 top_value = self.op.repeat - 1
14660 for i in range(self.op.repeat):
14661 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14665 class LUTestJqueue(NoHooksLU):
14666 """Utility LU to test some aspects of the job queue.
14671 # Must be lower than default timeout for WaitForJobChange to see whether it
14672 # notices changed jobs
14673 _CLIENT_CONNECT_TIMEOUT = 20.0
14674 _CLIENT_CONFIRM_TIMEOUT = 60.0
14677 def _NotifyUsingSocket(cls, cb, errcls):
14678 """Opens a Unix socket and waits for another program to connect.
14681 @param cb: Callback to send socket name to client
14682 @type errcls: class
14683 @param errcls: Exception class to use for errors
14686 # Using a temporary directory as there's no easy way to create temporary
14687 # sockets without writing a custom loop around tempfile.mktemp and
14689 tmpdir = tempfile.mkdtemp()
14691 tmpsock = utils.PathJoin(tmpdir, "sock")
14693 logging.debug("Creating temporary socket at %s", tmpsock)
14694 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14699 # Send details to client
14702 # Wait for client to connect before continuing
14703 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14705 (conn, _) = sock.accept()
14706 except socket.error, err:
14707 raise errcls("Client didn't connect in time (%s)" % err)
14711 # Remove as soon as client is connected
14712 shutil.rmtree(tmpdir)
14714 # Wait for client to close
14717 # pylint: disable=E1101
14718 # Instance of '_socketobject' has no ... member
14719 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14721 except socket.error, err:
14722 raise errcls("Client failed to confirm notification (%s)" % err)
14726 def _SendNotification(self, test, arg, sockname):
14727 """Sends a notification to the client.
14730 @param test: Test name
14731 @param arg: Test argument (depends on test)
14732 @type sockname: string
14733 @param sockname: Socket path
14736 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14738 def _Notify(self, prereq, test, arg):
14739 """Notifies the client of a test.
14742 @param prereq: Whether this is a prereq-phase test
14744 @param test: Test name
14745 @param arg: Test argument (depends on test)
14749 errcls = errors.OpPrereqError
14751 errcls = errors.OpExecError
14753 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14757 def CheckArguments(self):
14758 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14759 self.expandnames_calls = 0
14761 def ExpandNames(self):
14762 checkargs_calls = getattr(self, "checkargs_calls", 0)
14763 if checkargs_calls < 1:
14764 raise errors.ProgrammerError("CheckArguments was not called")
14766 self.expandnames_calls += 1
14768 if self.op.notify_waitlock:
14769 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14771 self.LogInfo("Expanding names")
14773 # Get lock on master node (just to get a lock, not for a particular reason)
14774 self.needed_locks = {
14775 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14778 def Exec(self, feedback_fn):
14779 if self.expandnames_calls < 1:
14780 raise errors.ProgrammerError("ExpandNames was not called")
14782 if self.op.notify_exec:
14783 self._Notify(False, constants.JQT_EXEC, None)
14785 self.LogInfo("Executing")
14787 if self.op.log_messages:
14788 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14789 for idx, msg in enumerate(self.op.log_messages):
14790 self.LogInfo("Sending log message %s", idx + 1)
14791 feedback_fn(constants.JQT_MSGPREFIX + msg)
14792 # Report how many test messages have been sent
14793 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14796 raise errors.OpExecError("Opcode failure was requested")
14801 class IAllocator(object):
14802 """IAllocator framework.
14804 An IAllocator instance has three sets of attributes:
14805 - cfg that is needed to query the cluster
14806 - input data (all members of the _KEYS class attribute are required)
14807 - four buffer attributes (in|out_data|text), that represent the
14808 input (to the external script) in text and data structure format,
14809 and the output from it, again in two formats
14810 - the result variables from the script (success, info, nodes) for
14814 # pylint: disable=R0902
14815 # lots of instance attributes
14817 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14819 self.rpc = rpc_runner
14820 # init buffer variables
14821 self.in_text = self.out_text = self.in_data = self.out_data = None
14822 # init all input fields so that pylint is happy
14824 self.memory = self.disks = self.disk_template = self.spindle_use = None
14825 self.os = self.tags = self.nics = self.vcpus = None
14826 self.hypervisor = None
14827 self.relocate_from = None
14829 self.instances = None
14830 self.evac_mode = None
14831 self.target_groups = []
14833 self.required_nodes = None
14834 # init result fields
14835 self.success = self.info = self.result = None
14838 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14840 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14841 " IAllocator" % self.mode)
14843 keyset = [n for (n, _) in keydata]
14846 if key not in keyset:
14847 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14848 " IAllocator" % key)
14849 setattr(self, key, kwargs[key])
14852 if key not in kwargs:
14853 raise errors.ProgrammerError("Missing input parameter '%s' to"
14854 " IAllocator" % key)
14855 self._BuildInputData(compat.partial(fn, self), keydata)
14857 def _ComputeClusterData(self):
14858 """Compute the generic allocator input data.
14860 This is the data that is independent of the actual operation.
14864 cluster_info = cfg.GetClusterInfo()
14867 "version": constants.IALLOCATOR_VERSION,
14868 "cluster_name": cfg.GetClusterName(),
14869 "cluster_tags": list(cluster_info.GetTags()),
14870 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14871 "ipolicy": cluster_info.ipolicy,
14873 ninfo = cfg.GetAllNodesInfo()
14874 iinfo = cfg.GetAllInstancesInfo().values()
14875 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14878 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14880 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14881 hypervisor_name = self.hypervisor
14882 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14883 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14885 hypervisor_name = cluster_info.primary_hypervisor
14887 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14890 self.rpc.call_all_instances_info(node_list,
14891 cluster_info.enabled_hypervisors)
14893 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14895 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14896 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14897 i_list, config_ndata)
14898 assert len(data["nodes"]) == len(ninfo), \
14899 "Incomplete node data computed"
14901 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14903 self.in_data = data
14906 def _ComputeNodeGroupData(cfg):
14907 """Compute node groups data.
14910 cluster = cfg.GetClusterInfo()
14911 ng = dict((guuid, {
14912 "name": gdata.name,
14913 "alloc_policy": gdata.alloc_policy,
14914 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14916 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14921 def _ComputeBasicNodeData(cfg, node_cfg):
14922 """Compute global node data.
14925 @returns: a dict of name: (node dict, node config)
14928 # fill in static (config-based) values
14929 node_results = dict((ninfo.name, {
14930 "tags": list(ninfo.GetTags()),
14931 "primary_ip": ninfo.primary_ip,
14932 "secondary_ip": ninfo.secondary_ip,
14933 "offline": ninfo.offline,
14934 "drained": ninfo.drained,
14935 "master_candidate": ninfo.master_candidate,
14936 "group": ninfo.group,
14937 "master_capable": ninfo.master_capable,
14938 "vm_capable": ninfo.vm_capable,
14939 "ndparams": cfg.GetNdParams(ninfo),
14941 for ninfo in node_cfg.values())
14943 return node_results
14946 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14948 """Compute global node data.
14950 @param node_results: the basic node structures as filled from the config
14953 #TODO(dynmem): compute the right data on MAX and MIN memory
14954 # make a copy of the current dict
14955 node_results = dict(node_results)
14956 for nname, nresult in node_data.items():
14957 assert nname in node_results, "Missing basic data for node %s" % nname
14958 ninfo = node_cfg[nname]
14960 if not (ninfo.offline or ninfo.drained):
14961 nresult.Raise("Can't get data for node %s" % nname)
14962 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14964 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14966 for attr in ["memory_total", "memory_free", "memory_dom0",
14967 "vg_size", "vg_free", "cpu_total"]:
14968 if attr not in remote_info:
14969 raise errors.OpExecError("Node '%s' didn't return attribute"
14970 " '%s'" % (nname, attr))
14971 if not isinstance(remote_info[attr], int):
14972 raise errors.OpExecError("Node '%s' returned invalid value"
14974 (nname, attr, remote_info[attr]))
14975 # compute memory used by primary instances
14976 i_p_mem = i_p_up_mem = 0
14977 for iinfo, beinfo in i_list:
14978 if iinfo.primary_node == nname:
14979 i_p_mem += beinfo[constants.BE_MAXMEM]
14980 if iinfo.name not in node_iinfo[nname].payload:
14983 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14984 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14985 remote_info["memory_free"] -= max(0, i_mem_diff)
14987 if iinfo.admin_state == constants.ADMINST_UP:
14988 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14990 # compute memory used by instances
14992 "total_memory": remote_info["memory_total"],
14993 "reserved_memory": remote_info["memory_dom0"],
14994 "free_memory": remote_info["memory_free"],
14995 "total_disk": remote_info["vg_size"],
14996 "free_disk": remote_info["vg_free"],
14997 "total_cpus": remote_info["cpu_total"],
14998 "i_pri_memory": i_p_mem,
14999 "i_pri_up_memory": i_p_up_mem,
15001 pnr_dyn.update(node_results[nname])
15002 node_results[nname] = pnr_dyn
15004 return node_results
15007 def _ComputeInstanceData(cluster_info, i_list):
15008 """Compute global instance data.
15012 for iinfo, beinfo in i_list:
15014 for nic in iinfo.nics:
15015 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
15019 "mode": filled_params[constants.NIC_MODE],
15020 "link": filled_params[constants.NIC_LINK],
15022 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
15023 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
15024 nic_data.append(nic_dict)
15026 "tags": list(iinfo.GetTags()),
15027 "admin_state": iinfo.admin_state,
15028 "vcpus": beinfo[constants.BE_VCPUS],
15029 "memory": beinfo[constants.BE_MAXMEM],
15030 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
15032 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
15034 "disks": [{constants.IDISK_SIZE: dsk.size,
15035 constants.IDISK_MODE: dsk.mode}
15036 for dsk in iinfo.disks],
15037 "disk_template": iinfo.disk_template,
15038 "hypervisor": iinfo.hypervisor,
15040 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
15042 instance_data[iinfo.name] = pir
15044 return instance_data
15046 def _AddNewInstance(self):
15047 """Add new instance data to allocator structure.
15049 This in combination with _AllocatorGetClusterData will create the
15050 correct structure needed as input for the allocator.
15052 The checks for the completeness of the opcode must have already been
15056 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
15058 if self.disk_template in constants.DTS_INT_MIRROR:
15059 self.required_nodes = 2
15061 self.required_nodes = 1
15065 "disk_template": self.disk_template,
15068 "vcpus": self.vcpus,
15069 "memory": self.memory,
15070 "spindle_use": self.spindle_use,
15071 "disks": self.disks,
15072 "disk_space_total": disk_space,
15074 "required_nodes": self.required_nodes,
15075 "hypervisor": self.hypervisor,
15080 def _AddRelocateInstance(self):
15081 """Add relocate instance data to allocator structure.
15083 This in combination with _IAllocatorGetClusterData will create the
15084 correct structure needed as input for the allocator.
15086 The checks for the completeness of the opcode must have already been
15090 instance = self.cfg.GetInstanceInfo(self.name)
15091 if instance is None:
15092 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15093 " IAllocator" % self.name)
15095 if instance.disk_template not in constants.DTS_MIRRORED:
15096 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15097 errors.ECODE_INVAL)
15099 if instance.disk_template in constants.DTS_INT_MIRROR and \
15100 len(instance.secondary_nodes) != 1:
15101 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15102 errors.ECODE_STATE)
15104 self.required_nodes = 1
15105 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15106 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15110 "disk_space_total": disk_space,
15111 "required_nodes": self.required_nodes,
15112 "relocate_from": self.relocate_from,
15116 def _AddNodeEvacuate(self):
15117 """Get data for node-evacuate requests.
15121 "instances": self.instances,
15122 "evac_mode": self.evac_mode,
15125 def _AddChangeGroup(self):
15126 """Get data for node-evacuate requests.
15130 "instances": self.instances,
15131 "target_groups": self.target_groups,
15134 def _BuildInputData(self, fn, keydata):
15135 """Build input data structures.
15138 self._ComputeClusterData()
15141 request["type"] = self.mode
15142 for keyname, keytype in keydata:
15143 if keyname not in request:
15144 raise errors.ProgrammerError("Request parameter %s is missing" %
15146 val = request[keyname]
15147 if not keytype(val):
15148 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15149 " validation, value %s, expected"
15150 " type %s" % (keyname, val, keytype))
15151 self.in_data["request"] = request
15153 self.in_text = serializer.Dump(self.in_data)
15155 _STRING_LIST = ht.TListOf(ht.TString)
15156 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15157 # pylint: disable=E1101
15158 # Class '...' has no 'OP_ID' member
15159 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15160 opcodes.OpInstanceMigrate.OP_ID,
15161 opcodes.OpInstanceReplaceDisks.OP_ID])
15165 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15166 ht.TItems([ht.TNonEmptyString,
15167 ht.TNonEmptyString,
15168 ht.TListOf(ht.TNonEmptyString),
15171 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15172 ht.TItems([ht.TNonEmptyString,
15175 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15176 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15179 constants.IALLOCATOR_MODE_ALLOC:
15182 ("name", ht.TString),
15183 ("memory", ht.TInt),
15184 ("spindle_use", ht.TInt),
15185 ("disks", ht.TListOf(ht.TDict)),
15186 ("disk_template", ht.TString),
15187 ("os", ht.TString),
15188 ("tags", _STRING_LIST),
15189 ("nics", ht.TListOf(ht.TDict)),
15190 ("vcpus", ht.TInt),
15191 ("hypervisor", ht.TString),
15193 constants.IALLOCATOR_MODE_RELOC:
15194 (_AddRelocateInstance,
15195 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15197 constants.IALLOCATOR_MODE_NODE_EVAC:
15198 (_AddNodeEvacuate, [
15199 ("instances", _STRING_LIST),
15200 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15202 constants.IALLOCATOR_MODE_CHG_GROUP:
15203 (_AddChangeGroup, [
15204 ("instances", _STRING_LIST),
15205 ("target_groups", _STRING_LIST),
15209 def Run(self, name, validate=True, call_fn=None):
15210 """Run an instance allocator and return the results.
15213 if call_fn is None:
15214 call_fn = self.rpc.call_iallocator_runner
15216 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15217 result.Raise("Failure while running the iallocator script")
15219 self.out_text = result.payload
15221 self._ValidateResult()
15223 def _ValidateResult(self):
15224 """Process the allocator results.
15226 This will process and if successful save the result in
15227 self.out_data and the other parameters.
15231 rdict = serializer.Load(self.out_text)
15232 except Exception, err:
15233 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15235 if not isinstance(rdict, dict):
15236 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15238 # TODO: remove backwards compatiblity in later versions
15239 if "nodes" in rdict and "result" not in rdict:
15240 rdict["result"] = rdict["nodes"]
15243 for key in "success", "info", "result":
15244 if key not in rdict:
15245 raise errors.OpExecError("Can't parse iallocator results:"
15246 " missing key '%s'" % key)
15247 setattr(self, key, rdict[key])
15249 if not self._result_check(self.result):
15250 raise errors.OpExecError("Iallocator returned invalid result,"
15251 " expected %s, got %s" %
15252 (self._result_check, self.result),
15253 errors.ECODE_INVAL)
15255 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15256 assert self.relocate_from is not None
15257 assert self.required_nodes == 1
15259 node2group = dict((name, ndata["group"])
15260 for (name, ndata) in self.in_data["nodes"].items())
15262 fn = compat.partial(self._NodesToGroups, node2group,
15263 self.in_data["nodegroups"])
15265 instance = self.cfg.GetInstanceInfo(self.name)
15266 request_groups = fn(self.relocate_from + [instance.primary_node])
15267 result_groups = fn(rdict["result"] + [instance.primary_node])
15269 if self.success and not set(result_groups).issubset(request_groups):
15270 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15271 " differ from original groups (%s)" %
15272 (utils.CommaJoin(result_groups),
15273 utils.CommaJoin(request_groups)))
15275 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15276 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15278 self.out_data = rdict
15281 def _NodesToGroups(node2group, groups, nodes):
15282 """Returns a list of unique group names for a list of nodes.
15284 @type node2group: dict
15285 @param node2group: Map from node name to group UUID
15287 @param groups: Group information
15289 @param nodes: Node names
15296 group_uuid = node2group[node]
15298 # Ignore unknown node
15302 group = groups[group_uuid]
15304 # Can't find group, let's use UUID
15305 group_name = group_uuid
15307 group_name = group["name"]
15309 result.add(group_name)
15311 return sorted(result)
15314 class LUTestAllocator(NoHooksLU):
15315 """Run allocator tests.
15317 This LU runs the allocator tests
15320 def CheckPrereq(self):
15321 """Check prerequisites.
15323 This checks the opcode parameters depending on the director and mode test.
15326 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15327 for attr in ["memory", "disks", "disk_template",
15328 "os", "tags", "nics", "vcpus"]:
15329 if not hasattr(self.op, attr):
15330 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15331 attr, errors.ECODE_INVAL)
15332 iname = self.cfg.ExpandInstanceName(self.op.name)
15333 if iname is not None:
15334 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15335 iname, errors.ECODE_EXISTS)
15336 if not isinstance(self.op.nics, list):
15337 raise errors.OpPrereqError("Invalid parameter 'nics'",
15338 errors.ECODE_INVAL)
15339 if not isinstance(self.op.disks, list):
15340 raise errors.OpPrereqError("Invalid parameter 'disks'",
15341 errors.ECODE_INVAL)
15342 for row in self.op.disks:
15343 if (not isinstance(row, dict) or
15344 constants.IDISK_SIZE not in row or
15345 not isinstance(row[constants.IDISK_SIZE], int) or
15346 constants.IDISK_MODE not in row or
15347 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15348 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15349 " parameter", errors.ECODE_INVAL)
15350 if self.op.hypervisor is None:
15351 self.op.hypervisor = self.cfg.GetHypervisorType()
15352 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15353 fname = _ExpandInstanceName(self.cfg, self.op.name)
15354 self.op.name = fname
15355 self.relocate_from = \
15356 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15357 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15358 constants.IALLOCATOR_MODE_NODE_EVAC):
15359 if not self.op.instances:
15360 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15361 self.op.instances = _GetWantedInstances(self, self.op.instances)
15363 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15364 self.op.mode, errors.ECODE_INVAL)
15366 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15367 if self.op.allocator is None:
15368 raise errors.OpPrereqError("Missing allocator name",
15369 errors.ECODE_INVAL)
15370 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15371 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15372 self.op.direction, errors.ECODE_INVAL)
15374 def Exec(self, feedback_fn):
15375 """Run the allocator test.
15378 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15379 ial = IAllocator(self.cfg, self.rpc,
15382 memory=self.op.memory,
15383 disks=self.op.disks,
15384 disk_template=self.op.disk_template,
15388 vcpus=self.op.vcpus,
15389 hypervisor=self.op.hypervisor,
15391 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15392 ial = IAllocator(self.cfg, self.rpc,
15395 relocate_from=list(self.relocate_from),
15397 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15398 ial = IAllocator(self.cfg, self.rpc,
15400 instances=self.op.instances,
15401 target_groups=self.op.target_groups)
15402 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15403 ial = IAllocator(self.cfg, self.rpc,
15405 instances=self.op.instances,
15406 evac_mode=self.op.evac_mode)
15408 raise errors.ProgrammerError("Uncatched mode %s in"
15409 " LUTestAllocator.Exec", self.op.mode)
15411 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15412 result = ial.in_text
15414 ial.Run(self.op.allocator, validate=False)
15415 result = ial.out_text
15419 #: Query type implementations
15421 constants.QR_CLUSTER: _ClusterQuery,
15422 constants.QR_INSTANCE: _InstanceQuery,
15423 constants.QR_NODE: _NodeQuery,
15424 constants.QR_GROUP: _GroupQuery,
15425 constants.QR_OS: _OsQuery,
15426 constants.QR_EXPORT: _ExportQuery,
15429 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15432 def _GetQueryImplementation(name):
15433 """Returns the implemtnation for a query type.
15435 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15439 return _QUERY_IMPL[name]
15441 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15442 errors.ECODE_INVAL)