4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
63 from ganeti import runtime
64 from ganeti import network
66 import ganeti.masterd.instance # pylint: disable=W0611
69 #: Size of DRBD meta block device
73 INSTANCE_DOWN = [constants.ADMINST_DOWN]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
77 #: Instance status in which an instance can be marked as offline/online
78 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
79 constants.ADMINST_OFFLINE,
84 """Data container for LU results with jobs.
86 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
87 by L{mcpu._ProcessResult}. The latter will then submit the jobs
88 contained in the C{jobs} attribute and include the job IDs in the opcode
92 def __init__(self, jobs, **kwargs):
93 """Initializes this class.
95 Additional return values can be specified as keyword arguments.
97 @type jobs: list of lists of L{opcode.OpCode}
98 @param jobs: A list of lists of opcode objects
105 class LogicalUnit(object):
106 """Logical Unit base class.
108 Subclasses must follow these rules:
109 - implement ExpandNames
110 - implement CheckPrereq (except when tasklets are used)
111 - implement Exec (except when tasklets are used)
112 - implement BuildHooksEnv
113 - implement BuildHooksNodes
114 - redefine HPATH and HTYPE
115 - optionally redefine their run requirements:
116 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
118 Note that all commands require root permissions.
120 @ivar dry_run_result: the value (if any) that will be returned to the caller
121 in dry-run mode (signalled by opcode dry_run parameter)
128 def __init__(self, processor, op, context, rpc_runner):
129 """Constructor for LogicalUnit.
131 This needs to be overridden in derived classes in order to check op
135 self.proc = processor
137 self.cfg = context.cfg
138 self.glm = context.glm
140 self.owned_locks = context.glm.list_owned
141 self.context = context
142 self.rpc = rpc_runner
143 # Dicts used to declare locking needs to mcpu
144 self.needed_locks = None
145 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
147 self.remove_locks = {}
148 # Used to force good behavior when calling helper functions
149 self.recalculate_locks = {}
151 self.Log = processor.Log # pylint: disable=C0103
152 self.LogWarning = processor.LogWarning # pylint: disable=C0103
153 self.LogInfo = processor.LogInfo # pylint: disable=C0103
154 self.LogStep = processor.LogStep # pylint: disable=C0103
155 # support for dry-run
156 self.dry_run_result = None
157 # support for generic debug attribute
158 if (not hasattr(self.op, "debug_level") or
159 not isinstance(self.op.debug_level, int)):
160 self.op.debug_level = 0
165 # Validate opcode parameters and set defaults
166 self.op.Validate(True)
168 self.CheckArguments()
170 def CheckArguments(self):
171 """Check syntactic validity for the opcode arguments.
173 This method is for doing a simple syntactic check and ensure
174 validity of opcode parameters, without any cluster-related
175 checks. While the same can be accomplished in ExpandNames and/or
176 CheckPrereq, doing these separate is better because:
178 - ExpandNames is left as as purely a lock-related function
179 - CheckPrereq is run after we have acquired locks (and possible
182 The function is allowed to change the self.op attribute so that
183 later methods can no longer worry about missing parameters.
188 def ExpandNames(self):
189 """Expand names for this LU.
191 This method is called before starting to execute the opcode, and it should
192 update all the parameters of the opcode to their canonical form (e.g. a
193 short node name must be fully expanded after this method has successfully
194 completed). This way locking, hooks, logging, etc. can work correctly.
196 LUs which implement this method must also populate the self.needed_locks
197 member, as a dict with lock levels as keys, and a list of needed lock names
200 - use an empty dict if you don't need any lock
201 - if you don't need any lock at a particular level omit that
202 level (note that in this case C{DeclareLocks} won't be called
203 at all for that level)
204 - if you need locks at a level, but you can't calculate it in
205 this function, initialise that level with an empty list and do
206 further processing in L{LogicalUnit.DeclareLocks} (see that
207 function's docstring)
208 - don't put anything for the BGL level
209 - if you want all locks at a level use L{locking.ALL_SET} as a value
211 If you need to share locks (rather than acquire them exclusively) at one
212 level you can modify self.share_locks, setting a true value (usually 1) for
213 that level. By default locks are not shared.
215 This function can also define a list of tasklets, which then will be
216 executed in order instead of the usual LU-level CheckPrereq and Exec
217 functions, if those are not defined by the LU.
221 # Acquire all nodes and one instance
222 self.needed_locks = {
223 locking.LEVEL_NODE: locking.ALL_SET,
224 locking.LEVEL_INSTANCE: ['instance1.example.com'],
226 # Acquire just two nodes
227 self.needed_locks = {
228 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
231 self.needed_locks = {} # No, you can't leave it to the default value None
234 # The implementation of this method is mandatory only if the new LU is
235 # concurrent, so that old LUs don't need to be changed all at the same
238 self.needed_locks = {} # Exclusive LUs don't need locks.
240 raise NotImplementedError
242 def DeclareLocks(self, level):
243 """Declare LU locking needs for a level
245 While most LUs can just declare their locking needs at ExpandNames time,
246 sometimes there's the need to calculate some locks after having acquired
247 the ones before. This function is called just before acquiring locks at a
248 particular level, but after acquiring the ones at lower levels, and permits
249 such calculations. It can be used to modify self.needed_locks, and by
250 default it does nothing.
252 This function is only called if you have something already set in
253 self.needed_locks for the level.
255 @param level: Locking level which is going to be locked
256 @type level: member of L{ganeti.locking.LEVELS}
260 def CheckPrereq(self):
261 """Check prerequisites for this LU.
263 This method should check that the prerequisites for the execution
264 of this LU are fulfilled. It can do internode communication, but
265 it should be idempotent - no cluster or system changes are
268 The method should raise errors.OpPrereqError in case something is
269 not fulfilled. Its return value is ignored.
271 This method should also update all the parameters of the opcode to
272 their canonical form if it hasn't been done by ExpandNames before.
275 if self.tasklets is not None:
276 for (idx, tl) in enumerate(self.tasklets):
277 logging.debug("Checking prerequisites for tasklet %s/%s",
278 idx + 1, len(self.tasklets))
283 def Exec(self, feedback_fn):
286 This method should implement the actual work. It should raise
287 errors.OpExecError for failures that are somewhat dealt with in
291 if self.tasklets is not None:
292 for (idx, tl) in enumerate(self.tasklets):
293 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
296 raise NotImplementedError
298 def BuildHooksEnv(self):
299 """Build hooks environment for this LU.
302 @return: Dictionary containing the environment that will be used for
303 running the hooks for this LU. The keys of the dict must not be prefixed
304 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
305 will extend the environment with additional variables. If no environment
306 should be defined, an empty dictionary should be returned (not C{None}).
307 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311 raise NotImplementedError
313 def BuildHooksNodes(self):
314 """Build list of nodes to run LU's hooks.
316 @rtype: tuple; (list, list)
317 @return: Tuple containing a list of node names on which the hook
318 should run before the execution and a list of node names on which the
319 hook should run after the execution. No nodes should be returned as an
320 empty list (and not None).
321 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
325 raise NotImplementedError
327 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
328 """Notify the LU about the results of its hooks.
330 This method is called every time a hooks phase is executed, and notifies
331 the Logical Unit about the hooks' result. The LU can then use it to alter
332 its result based on the hooks. By default the method does nothing and the
333 previous result is passed back unchanged but any LU can define it if it
334 wants to use the local cluster hook-scripts somehow.
336 @param phase: one of L{constants.HOOKS_PHASE_POST} or
337 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
338 @param hook_results: the results of the multi-node hooks rpc call
339 @param feedback_fn: function used send feedback back to the caller
340 @param lu_result: the previous Exec result this LU had, or None
342 @return: the new Exec result, based on the previous result
346 # API must be kept, thus we ignore the unused argument and could
347 # be a function warnings
348 # pylint: disable=W0613,R0201
351 def _ExpandAndLockInstance(self):
352 """Helper function to expand and lock an instance.
354 Many LUs that work on an instance take its name in self.op.instance_name
355 and need to expand it and then declare the expanded name for locking. This
356 function does it, and then updates self.op.instance_name to the expanded
357 name. It also initializes needed_locks as a dict, if this hasn't been done
361 if self.needed_locks is None:
362 self.needed_locks = {}
364 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
365 "_ExpandAndLockInstance called with instance-level locks set"
366 self.op.instance_name = _ExpandInstanceName(self.cfg,
367 self.op.instance_name)
368 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
370 def _LockInstancesNodes(self, primary_only=False,
371 level=locking.LEVEL_NODE):
372 """Helper function to declare instances' nodes for locking.
374 This function should be called after locking one or more instances to lock
375 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
376 with all primary or secondary nodes for instances already locked and
377 present in self.needed_locks[locking.LEVEL_INSTANCE].
379 It should be called from DeclareLocks, and for safety only works if
380 self.recalculate_locks[locking.LEVEL_NODE] is set.
382 In the future it may grow parameters to just lock some instance's nodes, or
383 to just lock primaries or secondary nodes, if needed.
385 If should be called in DeclareLocks in a way similar to::
387 if level == locking.LEVEL_NODE:
388 self._LockInstancesNodes()
390 @type primary_only: boolean
391 @param primary_only: only lock primary nodes of locked instances
392 @param level: Which lock level to use for locking nodes
395 assert level in self.recalculate_locks, \
396 "_LockInstancesNodes helper function called with no nodes to recalculate"
398 # TODO: check if we're really been called with the instance locks held
400 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
401 # future we might want to have different behaviors depending on the value
402 # of self.recalculate_locks[locking.LEVEL_NODE]
404 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
405 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
406 wanted_nodes.append(instance.primary_node)
408 wanted_nodes.extend(instance.secondary_nodes)
410 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
411 self.needed_locks[level] = wanted_nodes
412 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
413 self.needed_locks[level].extend(wanted_nodes)
415 raise errors.ProgrammerError("Unknown recalculation mode")
417 del self.recalculate_locks[level]
420 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
421 """Simple LU which runs no hooks.
423 This LU is intended as a parent for other LogicalUnits which will
424 run no hooks, in order to reduce duplicate code.
430 def BuildHooksEnv(self):
431 """Empty BuildHooksEnv for NoHooksLu.
433 This just raises an error.
436 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
438 def BuildHooksNodes(self):
439 """Empty BuildHooksNodes for NoHooksLU.
442 raise AssertionError("BuildHooksNodes called for NoHooksLU")
446 """Tasklet base class.
448 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
449 they can mix legacy code with tasklets. Locking needs to be done in the LU,
450 tasklets know nothing about locks.
452 Subclasses must follow these rules:
453 - Implement CheckPrereq
457 def __init__(self, lu):
464 def CheckPrereq(self):
465 """Check prerequisites for this tasklets.
467 This method should check whether the prerequisites for the execution of
468 this tasklet are fulfilled. It can do internode communication, but it
469 should be idempotent - no cluster or system changes are allowed.
471 The method should raise errors.OpPrereqError in case something is not
472 fulfilled. Its return value is ignored.
474 This method should also update all parameters to their canonical form if it
475 hasn't been done before.
480 def Exec(self, feedback_fn):
481 """Execute the tasklet.
483 This method should implement the actual work. It should raise
484 errors.OpExecError for failures that are somewhat dealt with in code, or
488 raise NotImplementedError
492 """Base for query utility classes.
495 #: Attribute holding field definitions
501 def __init__(self, qfilter, fields, use_locking):
502 """Initializes this class.
505 self.use_locking = use_locking
507 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
508 namefield=self.SORT_FIELD)
509 self.requested_data = self.query.RequestedData()
510 self.names = self.query.RequestedNames()
512 # Sort only if no names were requested
513 self.sort_by_name = not self.names
515 self.do_locking = None
518 def _GetNames(self, lu, all_names, lock_level):
519 """Helper function to determine names asked for in the query.
523 names = lu.owned_locks(lock_level)
527 if self.wanted == locking.ALL_SET:
528 assert not self.names
529 # caller didn't specify names, so ordering is not important
530 return utils.NiceSort(names)
532 # caller specified names and we must keep the same order
534 assert not self.do_locking or lu.glm.is_owned(lock_level)
536 missing = set(self.wanted).difference(names)
538 raise errors.OpExecError("Some items were removed before retrieving"
539 " their data: %s" % missing)
541 # Return expanded names
544 def ExpandNames(self, lu):
545 """Expand names for this query.
547 See L{LogicalUnit.ExpandNames}.
550 raise NotImplementedError()
552 def DeclareLocks(self, lu, level):
553 """Declare locks for this query.
555 See L{LogicalUnit.DeclareLocks}.
558 raise NotImplementedError()
560 def _GetQueryData(self, lu):
561 """Collects all data for this query.
563 @return: Query data object
566 raise NotImplementedError()
568 def NewStyleQuery(self, lu):
569 """Collect data and execute query.
572 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
573 sort_by_name=self.sort_by_name)
575 def OldStyleQuery(self, lu):
576 """Collect data and execute query.
579 return self.query.OldStyleQuery(self._GetQueryData(lu),
580 sort_by_name=self.sort_by_name)
584 """Returns a dict declaring all lock levels shared.
587 return dict.fromkeys(locking.LEVELS, 1)
590 def _MakeLegacyNodeInfo(data):
591 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
593 Converts the data into a single dictionary. This is fine for most use cases,
594 but some require information from more than one volume group or hypervisor.
597 (bootid, (vg_info, ), (hv_info, )) = data
599 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
604 def _AnnotateDiskParams(instance, devs, cfg):
605 """Little helper wrapper to the rpc annotation method.
607 @param instance: The instance object
608 @type devs: List of L{objects.Disk}
609 @param devs: The root devices (not any of its children!)
610 @param cfg: The config object
611 @returns The annotated disk copies
612 @see L{rpc.AnnotateDiskParams}
615 return rpc.AnnotateDiskParams(instance.disk_template, devs,
616 cfg.GetInstanceDiskParams(instance))
619 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
621 """Checks if node groups for locked instances are still correct.
623 @type cfg: L{config.ConfigWriter}
624 @param cfg: Cluster configuration
625 @type instances: dict; string as key, L{objects.Instance} as value
626 @param instances: Dictionary, instance name as key, instance object as value
627 @type owned_groups: iterable of string
628 @param owned_groups: List of owned groups
629 @type owned_nodes: iterable of string
630 @param owned_nodes: List of owned nodes
631 @type cur_group_uuid: string or None
632 @param cur_group_uuid: Optional group UUID to check against instance's groups
635 for (name, inst) in instances.items():
636 assert owned_nodes.issuperset(inst.all_nodes), \
637 "Instance %s's nodes changed while we kept the lock" % name
639 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
641 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
642 "Instance %s has no node in group %s" % (name, cur_group_uuid)
645 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
646 """Checks if the owned node groups are still correct for an instance.
648 @type cfg: L{config.ConfigWriter}
649 @param cfg: The cluster configuration
650 @type instance_name: string
651 @param instance_name: Instance name
652 @type owned_groups: set or frozenset
653 @param owned_groups: List of currently owned node groups
656 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
658 if not owned_groups.issuperset(inst_groups):
659 raise errors.OpPrereqError("Instance %s's node groups changed since"
660 " locks were acquired, current groups are"
661 " are '%s', owning groups '%s'; retry the"
664 utils.CommaJoin(inst_groups),
665 utils.CommaJoin(owned_groups)),
671 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
672 """Checks if the instances in a node group are still correct.
674 @type cfg: L{config.ConfigWriter}
675 @param cfg: The cluster configuration
676 @type group_uuid: string
677 @param group_uuid: Node group UUID
678 @type owned_instances: set or frozenset
679 @param owned_instances: List of currently owned instances
682 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
683 if owned_instances != wanted_instances:
684 raise errors.OpPrereqError("Instances in node group '%s' changed since"
685 " locks were acquired, wanted '%s', have '%s';"
686 " retry the operation" %
688 utils.CommaJoin(wanted_instances),
689 utils.CommaJoin(owned_instances)),
692 return wanted_instances
695 def _SupportsOob(cfg, node):
696 """Tells if node supports OOB.
698 @type cfg: L{config.ConfigWriter}
699 @param cfg: The cluster configuration
700 @type node: L{objects.Node}
701 @param node: The node
702 @return: The OOB script if supported or an empty string otherwise
705 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
708 def _GetWantedNodes(lu, nodes):
709 """Returns list of checked and expanded node names.
711 @type lu: L{LogicalUnit}
712 @param lu: the logical unit on whose behalf we execute
714 @param nodes: list of node names or None for all nodes
716 @return: the list of nodes, sorted
717 @raise errors.ProgrammerError: if the nodes parameter is wrong type
721 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
723 return utils.NiceSort(lu.cfg.GetNodeList())
726 def _GetWantedInstances(lu, instances):
727 """Returns list of checked and expanded instance names.
729 @type lu: L{LogicalUnit}
730 @param lu: the logical unit on whose behalf we execute
731 @type instances: list
732 @param instances: list of instance names or None for all instances
734 @return: the list of instances, sorted
735 @raise errors.OpPrereqError: if the instances parameter is wrong type
736 @raise errors.OpPrereqError: if any of the passed instances is not found
740 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
742 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
746 def _GetUpdatedParams(old_params, update_dict,
747 use_default=True, use_none=False):
748 """Return the new version of a parameter dictionary.
750 @type old_params: dict
751 @param old_params: old parameters
752 @type update_dict: dict
753 @param update_dict: dict containing new parameter values, or
754 constants.VALUE_DEFAULT to reset the parameter to its default
756 @param use_default: boolean
757 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
758 values as 'to be deleted' values
759 @param use_none: boolean
760 @type use_none: whether to recognise C{None} values as 'to be
763 @return: the new parameter dictionary
766 params_copy = copy.deepcopy(old_params)
767 for key, val in update_dict.iteritems():
768 if ((use_default and val == constants.VALUE_DEFAULT) or
769 (use_none and val is None)):
775 params_copy[key] = val
779 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
780 """Return the new version of a instance policy.
782 @param group_policy: whether this policy applies to a group and thus
783 we should support removal of policy entries
786 use_none = use_default = group_policy
787 ipolicy = copy.deepcopy(old_ipolicy)
788 for key, value in new_ipolicy.items():
789 if key not in constants.IPOLICY_ALL_KEYS:
790 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
792 if key in constants.IPOLICY_ISPECS:
793 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
794 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
796 use_default=use_default)
798 if (not value or value == [constants.VALUE_DEFAULT] or
799 value == constants.VALUE_DEFAULT):
803 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
804 " on the cluster'" % key,
807 if key in constants.IPOLICY_PARAMETERS:
808 # FIXME: we assume all such values are float
810 ipolicy[key] = float(value)
811 except (TypeError, ValueError), err:
812 raise errors.OpPrereqError("Invalid value for attribute"
813 " '%s': '%s', error: %s" %
814 (key, value, err), errors.ECODE_INVAL)
816 # FIXME: we assume all others are lists; this should be redone
818 ipolicy[key] = list(value)
820 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
821 except errors.ConfigurationError, err:
822 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
827 def _UpdateAndVerifySubDict(base, updates, type_check):
828 """Updates and verifies a dict with sub dicts of the same type.
830 @param base: The dict with the old data
831 @param updates: The dict with the new data
832 @param type_check: Dict suitable to ForceDictType to verify correct types
833 @returns: A new dict with updated and verified values
837 new = _GetUpdatedParams(old, value)
838 utils.ForceDictType(new, type_check)
841 ret = copy.deepcopy(base)
842 ret.update(dict((key, fn(base.get(key, {}), value))
843 for key, value in updates.items()))
847 def _MergeAndVerifyHvState(op_input, obj_input):
848 """Combines the hv state from an opcode with the one of the object
850 @param op_input: The input dict from the opcode
851 @param obj_input: The input dict from the objects
852 @return: The verified and updated dict
856 invalid_hvs = set(op_input) - constants.HYPER_TYPES
858 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
859 " %s" % utils.CommaJoin(invalid_hvs),
861 if obj_input is None:
863 type_check = constants.HVSTS_PARAMETER_TYPES
864 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
869 def _MergeAndVerifyDiskState(op_input, obj_input):
870 """Combines the disk state from an opcode with the one of the object
872 @param op_input: The input dict from the opcode
873 @param obj_input: The input dict from the objects
874 @return: The verified and updated dict
877 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
879 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
880 utils.CommaJoin(invalid_dst),
882 type_check = constants.DSS_PARAMETER_TYPES
883 if obj_input is None:
885 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
887 for key, value in op_input.items())
892 def _ReleaseLocks(lu, level, names=None, keep=None):
893 """Releases locks owned by an LU.
895 @type lu: L{LogicalUnit}
896 @param level: Lock level
897 @type names: list or None
898 @param names: Names of locks to release
899 @type keep: list or None
900 @param keep: Names of locks to retain
903 assert not (keep is not None and names is not None), \
904 "Only one of the 'names' and the 'keep' parameters can be given"
906 if names is not None:
907 should_release = names.__contains__
909 should_release = lambda name: name not in keep
911 should_release = None
913 owned = lu.owned_locks(level)
915 # Not owning any lock at this level, do nothing
922 # Determine which locks to release
924 if should_release(name):
929 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
931 # Release just some locks
932 lu.glm.release(level, names=release)
934 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
937 lu.glm.release(level)
939 assert not lu.glm.is_owned(level), "No locks should be owned"
942 def _MapInstanceDisksToNodes(instances):
943 """Creates a map from (node, volume) to instance name.
945 @type instances: list of L{objects.Instance}
946 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
949 return dict(((node, vol), inst.name)
950 for inst in instances
951 for (node, vols) in inst.MapLVsByNode().items()
955 def _RunPostHook(lu, node_name):
956 """Runs the post-hook for an opcode on a single node.
959 hm = lu.proc.BuildHooksManager(lu)
961 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
963 # pylint: disable=W0702
964 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
967 def _CheckOutputFields(static, dynamic, selected):
968 """Checks whether all selected fields are valid.
970 @type static: L{utils.FieldSet}
971 @param static: static fields set
972 @type dynamic: L{utils.FieldSet}
973 @param dynamic: dynamic fields set
980 delta = f.NonMatching(selected)
982 raise errors.OpPrereqError("Unknown output fields selected: %s"
983 % ",".join(delta), errors.ECODE_INVAL)
986 def _CheckGlobalHvParams(params):
987 """Validates that given hypervisor params are not global ones.
989 This will ensure that instances don't get customised versions of
993 used_globals = constants.HVC_GLOBALS.intersection(params)
995 msg = ("The following hypervisor parameters are global and cannot"
996 " be customized at instance level, please modify them at"
997 " cluster level: %s" % utils.CommaJoin(used_globals))
998 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1001 def _CheckNodeOnline(lu, node, msg=None):
1002 """Ensure that a given node is online.
1004 @param lu: the LU on behalf of which we make the check
1005 @param node: the node to check
1006 @param msg: if passed, should be a message to replace the default one
1007 @raise errors.OpPrereqError: if the node is offline
1011 msg = "Can't use offline node"
1012 if lu.cfg.GetNodeInfo(node).offline:
1013 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1016 def _CheckNodeNotDrained(lu, node):
1017 """Ensure that a given node is not drained.
1019 @param lu: the LU on behalf of which we make the check
1020 @param node: the node to check
1021 @raise errors.OpPrereqError: if the node is drained
1024 if lu.cfg.GetNodeInfo(node).drained:
1025 raise errors.OpPrereqError("Can't use drained node %s" % node,
1029 def _CheckNodeVmCapable(lu, node):
1030 """Ensure that a given node is vm capable.
1032 @param lu: the LU on behalf of which we make the check
1033 @param node: the node to check
1034 @raise errors.OpPrereqError: if the node is not vm capable
1037 if not lu.cfg.GetNodeInfo(node).vm_capable:
1038 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1042 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1043 """Ensure that a node supports a given OS.
1045 @param lu: the LU on behalf of which we make the check
1046 @param node: the node to check
1047 @param os_name: the OS to query about
1048 @param force_variant: whether to ignore variant errors
1049 @raise errors.OpPrereqError: if the node is not supporting the OS
1052 result = lu.rpc.call_os_get(node, os_name)
1053 result.Raise("OS '%s' not in supported OS list for node %s" %
1055 prereq=True, ecode=errors.ECODE_INVAL)
1056 if not force_variant:
1057 _CheckOSVariant(result.payload, os_name)
1060 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1061 """Ensure that a node has the given secondary ip.
1063 @type lu: L{LogicalUnit}
1064 @param lu: the LU on behalf of which we make the check
1066 @param node: the node to check
1067 @type secondary_ip: string
1068 @param secondary_ip: the ip to check
1069 @type prereq: boolean
1070 @param prereq: whether to throw a prerequisite or an execute error
1071 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1072 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1075 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1076 result.Raise("Failure checking secondary ip on node %s" % node,
1077 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1078 if not result.payload:
1079 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1080 " please fix and re-run this command" % secondary_ip)
1082 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1084 raise errors.OpExecError(msg)
1087 def _GetClusterDomainSecret():
1088 """Reads the cluster domain secret.
1091 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1095 def _CheckInstanceState(lu, instance, req_states, msg=None):
1096 """Ensure that an instance is in one of the required states.
1098 @param lu: the LU on behalf of which we make the check
1099 @param instance: the instance to check
1100 @param msg: if passed, should be a message to replace the default one
1101 @raise errors.OpPrereqError: if the instance is not in the required state
1105 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1106 if instance.admin_state not in req_states:
1107 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1108 (instance.name, instance.admin_state, msg),
1111 if constants.ADMINST_UP not in req_states:
1112 pnode = instance.primary_node
1113 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1114 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1115 prereq=True, ecode=errors.ECODE_ENVIRON)
1117 if instance.name in ins_l.payload:
1118 raise errors.OpPrereqError("Instance %s is running, %s" %
1119 (instance.name, msg), errors.ECODE_STATE)
1122 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1123 """Computes if value is in the desired range.
1125 @param name: name of the parameter for which we perform the check
1126 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1128 @param ipolicy: dictionary containing min, max and std values
1129 @param value: actual value that we want to use
1130 @return: None or element not meeting the criteria
1134 if value in [None, constants.VALUE_AUTO]:
1136 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1137 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1138 if value > max_v or min_v > value:
1140 fqn = "%s/%s" % (name, qualifier)
1143 return ("%s value %s is not in range [%s, %s]" %
1144 (fqn, value, min_v, max_v))
1148 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1149 nic_count, disk_sizes, spindle_use,
1150 _compute_fn=_ComputeMinMaxSpec):
1151 """Verifies ipolicy against provided specs.
1154 @param ipolicy: The ipolicy
1156 @param mem_size: The memory size
1157 @type cpu_count: int
1158 @param cpu_count: Used cpu cores
1159 @type disk_count: int
1160 @param disk_count: Number of disks used
1161 @type nic_count: int
1162 @param nic_count: Number of nics used
1163 @type disk_sizes: list of ints
1164 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1165 @type spindle_use: int
1166 @param spindle_use: The number of spindles this instance uses
1167 @param _compute_fn: The compute function (unittest only)
1168 @return: A list of violations, or an empty list of no violations are found
1171 assert disk_count == len(disk_sizes)
1174 (constants.ISPEC_MEM_SIZE, "", mem_size),
1175 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1176 (constants.ISPEC_DISK_COUNT, "", disk_count),
1177 (constants.ISPEC_NIC_COUNT, "", nic_count),
1178 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1179 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1180 for idx, d in enumerate(disk_sizes)]
1183 (_compute_fn(name, qualifier, ipolicy, value)
1184 for (name, qualifier, value) in test_settings))
1187 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1188 _compute_fn=_ComputeIPolicySpecViolation):
1189 """Compute if instance meets the specs of ipolicy.
1192 @param ipolicy: The ipolicy to verify against
1193 @type instance: L{objects.Instance}
1194 @param instance: The instance to verify
1195 @param _compute_fn: The function to verify ipolicy (unittest only)
1196 @see: L{_ComputeIPolicySpecViolation}
1199 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1200 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1201 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1202 disk_count = len(instance.disks)
1203 disk_sizes = [disk.size for disk in instance.disks]
1204 nic_count = len(instance.nics)
1206 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1207 disk_sizes, spindle_use)
1210 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1211 _compute_fn=_ComputeIPolicySpecViolation):
1212 """Compute if instance specs meets the specs of ipolicy.
1215 @param ipolicy: The ipolicy to verify against
1216 @param instance_spec: dict
1217 @param instance_spec: The instance spec to verify
1218 @param _compute_fn: The function to verify ipolicy (unittest only)
1219 @see: L{_ComputeIPolicySpecViolation}
1222 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1223 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1224 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1225 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1226 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1227 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1229 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1230 disk_sizes, spindle_use)
1233 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1235 _compute_fn=_ComputeIPolicyInstanceViolation):
1236 """Compute if instance meets the specs of the new target group.
1238 @param ipolicy: The ipolicy to verify
1239 @param instance: The instance object to verify
1240 @param current_group: The current group of the instance
1241 @param target_group: The new group of the instance
1242 @param _compute_fn: The function to verify ipolicy (unittest only)
1243 @see: L{_ComputeIPolicySpecViolation}
1246 if current_group == target_group:
1249 return _compute_fn(ipolicy, instance)
1252 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1253 _compute_fn=_ComputeIPolicyNodeViolation):
1254 """Checks that the target node is correct in terms of instance policy.
1256 @param ipolicy: The ipolicy to verify
1257 @param instance: The instance object to verify
1258 @param node: The new node to relocate
1259 @param ignore: Ignore violations of the ipolicy
1260 @param _compute_fn: The function to verify ipolicy (unittest only)
1261 @see: L{_ComputeIPolicySpecViolation}
1264 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1265 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1268 msg = ("Instance does not meet target node group's (%s) instance"
1269 " policy: %s") % (node.group, utils.CommaJoin(res))
1273 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1276 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1277 """Computes a set of any instances that would violate the new ipolicy.
1279 @param old_ipolicy: The current (still in-place) ipolicy
1280 @param new_ipolicy: The new (to become) ipolicy
1281 @param instances: List of instances to verify
1282 @return: A list of instances which violates the new ipolicy but
1286 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1287 _ComputeViolatingInstances(old_ipolicy, instances))
1290 def _ExpandItemName(fn, name, kind):
1291 """Expand an item name.
1293 @param fn: the function to use for expansion
1294 @param name: requested item name
1295 @param kind: text description ('Node' or 'Instance')
1296 @return: the resolved (full) name
1297 @raise errors.OpPrereqError: if the item is not found
1300 full_name = fn(name)
1301 if full_name is None:
1302 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1307 def _ExpandNodeName(cfg, name):
1308 """Wrapper over L{_ExpandItemName} for nodes."""
1309 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1312 def _ExpandInstanceName(cfg, name):
1313 """Wrapper over L{_ExpandItemName} for instance."""
1314 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1317 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1318 minmem, maxmem, vcpus, nics, disk_template, disks,
1319 bep, hvp, hypervisor_name, tags):
1320 """Builds instance related env variables for hooks
1322 This builds the hook environment from individual variables.
1325 @param name: the name of the instance
1326 @type primary_node: string
1327 @param primary_node: the name of the instance's primary node
1328 @type secondary_nodes: list
1329 @param secondary_nodes: list of secondary nodes as strings
1330 @type os_type: string
1331 @param os_type: the name of the instance's OS
1332 @type status: string
1333 @param status: the desired status of the instance
1334 @type minmem: string
1335 @param minmem: the minimum memory size of the instance
1336 @type maxmem: string
1337 @param maxmem: the maximum memory size of the instance
1339 @param vcpus: the count of VCPUs the instance has
1341 @param nics: list of tuples (ip, mac, mode, link) representing
1342 the NICs the instance has
1343 @type disk_template: string
1344 @param disk_template: the disk template of the instance
1346 @param disks: the list of (size, mode) pairs
1348 @param bep: the backend parameters for the instance
1350 @param hvp: the hypervisor parameters for the instance
1351 @type hypervisor_name: string
1352 @param hypervisor_name: the hypervisor for the instance
1354 @param tags: list of instance tags as strings
1356 @return: the hook environment for this instance
1361 "INSTANCE_NAME": name,
1362 "INSTANCE_PRIMARY": primary_node,
1363 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1364 "INSTANCE_OS_TYPE": os_type,
1365 "INSTANCE_STATUS": status,
1366 "INSTANCE_MINMEM": minmem,
1367 "INSTANCE_MAXMEM": maxmem,
1368 # TODO(2.7) remove deprecated "memory" value
1369 "INSTANCE_MEMORY": maxmem,
1370 "INSTANCE_VCPUS": vcpus,
1371 "INSTANCE_DISK_TEMPLATE": disk_template,
1372 "INSTANCE_HYPERVISOR": hypervisor_name,
1375 nic_count = len(nics)
1376 for idx, (ip, mac, mode, link) in enumerate(nics):
1379 env["INSTANCE_NIC%d_IP" % idx] = ip
1380 env["INSTANCE_NIC%d_MAC" % idx] = mac
1381 env["INSTANCE_NIC%d_MODE" % idx] = mode
1382 env["INSTANCE_NIC%d_LINK" % idx] = link
1383 if mode == constants.NIC_MODE_BRIDGED:
1384 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1388 env["INSTANCE_NIC_COUNT"] = nic_count
1391 disk_count = len(disks)
1392 for idx, (size, mode) in enumerate(disks):
1393 env["INSTANCE_DISK%d_SIZE" % idx] = size
1394 env["INSTANCE_DISK%d_MODE" % idx] = mode
1398 env["INSTANCE_DISK_COUNT"] = disk_count
1403 env["INSTANCE_TAGS"] = " ".join(tags)
1405 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1406 for key, value in source.items():
1407 env["INSTANCE_%s_%s" % (kind, key)] = value
1412 def _NICListToTuple(lu, nics):
1413 """Build a list of nic information tuples.
1415 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1416 value in LUInstanceQueryData.
1418 @type lu: L{LogicalUnit}
1419 @param lu: the logical unit on whose behalf we execute
1420 @type nics: list of L{objects.NIC}
1421 @param nics: list of nics to convert to hooks tuples
1425 cluster = lu.cfg.GetClusterInfo()
1429 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1430 mode = filled_params[constants.NIC_MODE]
1431 link = filled_params[constants.NIC_LINK]
1432 hooks_nics.append((ip, mac, mode, link))
1436 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1437 """Builds instance related env variables for hooks from an object.
1439 @type lu: L{LogicalUnit}
1440 @param lu: the logical unit on whose behalf we execute
1441 @type instance: L{objects.Instance}
1442 @param instance: the instance for which we should build the
1444 @type override: dict
1445 @param override: dictionary with key/values that will override
1448 @return: the hook environment dictionary
1451 cluster = lu.cfg.GetClusterInfo()
1452 bep = cluster.FillBE(instance)
1453 hvp = cluster.FillHV(instance)
1455 "name": instance.name,
1456 "primary_node": instance.primary_node,
1457 "secondary_nodes": instance.secondary_nodes,
1458 "os_type": instance.os,
1459 "status": instance.admin_state,
1460 "maxmem": bep[constants.BE_MAXMEM],
1461 "minmem": bep[constants.BE_MINMEM],
1462 "vcpus": bep[constants.BE_VCPUS],
1463 "nics": _NICListToTuple(lu, instance.nics),
1464 "disk_template": instance.disk_template,
1465 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1468 "hypervisor_name": instance.hypervisor,
1469 "tags": instance.tags,
1472 args.update(override)
1473 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1476 def _AdjustCandidatePool(lu, exceptions):
1477 """Adjust the candidate pool after node operations.
1480 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1482 lu.LogInfo("Promoted nodes to master candidate role: %s",
1483 utils.CommaJoin(node.name for node in mod_list))
1484 for name in mod_list:
1485 lu.context.ReaddNode(name)
1486 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1488 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1492 def _DecideSelfPromotion(lu, exceptions=None):
1493 """Decide whether I should promote myself as a master candidate.
1496 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1497 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1498 # the new node will increase mc_max with one, so:
1499 mc_should = min(mc_should + 1, cp_size)
1500 return mc_now < mc_should
1503 def _CalculateGroupIPolicy(cluster, group):
1504 """Calculate instance policy for group.
1507 return cluster.SimpleFillIPolicy(group.ipolicy)
1510 def _ComputeViolatingInstances(ipolicy, instances):
1511 """Computes a set of instances who violates given ipolicy.
1513 @param ipolicy: The ipolicy to verify
1514 @type instances: object.Instance
1515 @param instances: List of instances to verify
1516 @return: A frozenset of instance names violating the ipolicy
1519 return frozenset([inst.name for inst in instances
1520 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1523 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1524 """Check that the brigdes needed by a list of nics exist.
1527 cluster = lu.cfg.GetClusterInfo()
1528 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1529 brlist = [params[constants.NIC_LINK] for params in paramslist
1530 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1532 result = lu.rpc.call_bridges_exist(target_node, brlist)
1533 result.Raise("Error checking bridges on destination node '%s'" %
1534 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1537 def _CheckInstanceBridgesExist(lu, instance, node=None):
1538 """Check that the brigdes needed by an instance exist.
1542 node = instance.primary_node
1543 _CheckNicsBridgesExist(lu, instance.nics, node)
1546 def _CheckOSVariant(os_obj, name):
1547 """Check whether an OS name conforms to the os variants specification.
1549 @type os_obj: L{objects.OS}
1550 @param os_obj: OS object to check
1552 @param name: OS name passed by the user, to check for validity
1555 variant = objects.OS.GetVariant(name)
1556 if not os_obj.supported_variants:
1558 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1559 " passed)" % (os_obj.name, variant),
1563 raise errors.OpPrereqError("OS name must include a variant",
1566 if variant not in os_obj.supported_variants:
1567 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1570 def _GetNodeInstancesInner(cfg, fn):
1571 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1574 def _GetNodeInstances(cfg, node_name):
1575 """Returns a list of all primary and secondary instances on a node.
1579 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1582 def _GetNodePrimaryInstances(cfg, node_name):
1583 """Returns primary instances on a node.
1586 return _GetNodeInstancesInner(cfg,
1587 lambda inst: node_name == inst.primary_node)
1590 def _GetNodeSecondaryInstances(cfg, node_name):
1591 """Returns secondary instances on a node.
1594 return _GetNodeInstancesInner(cfg,
1595 lambda inst: node_name in inst.secondary_nodes)
1598 def _GetStorageTypeArgs(cfg, storage_type):
1599 """Returns the arguments for a storage type.
1602 # Special case for file storage
1603 if storage_type == constants.ST_FILE:
1604 # storage.FileStorage wants a list of storage directories
1605 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1610 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1613 for dev in instance.disks:
1614 cfg.SetDiskID(dev, node_name)
1616 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1618 result.Raise("Failed to get disk status from node %s" % node_name,
1619 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1621 for idx, bdev_status in enumerate(result.payload):
1622 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1628 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1629 """Check the sanity of iallocator and node arguments and use the
1630 cluster-wide iallocator if appropriate.
1632 Check that at most one of (iallocator, node) is specified. If none is
1633 specified, then the LU's opcode's iallocator slot is filled with the
1634 cluster-wide default iallocator.
1636 @type iallocator_slot: string
1637 @param iallocator_slot: the name of the opcode iallocator slot
1638 @type node_slot: string
1639 @param node_slot: the name of the opcode target node slot
1642 node = getattr(lu.op, node_slot, None)
1643 iallocator = getattr(lu.op, iallocator_slot, None)
1645 if node is not None and iallocator is not None:
1646 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1648 elif node is None and iallocator is None:
1649 default_iallocator = lu.cfg.GetDefaultIAllocator()
1650 if default_iallocator:
1651 setattr(lu.op, iallocator_slot, default_iallocator)
1653 raise errors.OpPrereqError("No iallocator or node given and no"
1654 " cluster-wide default iallocator found;"
1655 " please specify either an iallocator or a"
1656 " node, or set a cluster-wide default"
1660 def _GetDefaultIAllocator(cfg, iallocator):
1661 """Decides on which iallocator to use.
1663 @type cfg: L{config.ConfigWriter}
1664 @param cfg: Cluster configuration object
1665 @type iallocator: string or None
1666 @param iallocator: Iallocator specified in opcode
1668 @return: Iallocator name
1672 # Use default iallocator
1673 iallocator = cfg.GetDefaultIAllocator()
1676 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1677 " opcode nor as a cluster-wide default",
1683 class LUClusterPostInit(LogicalUnit):
1684 """Logical unit for running hooks after cluster initialization.
1687 HPATH = "cluster-init"
1688 HTYPE = constants.HTYPE_CLUSTER
1690 def BuildHooksEnv(self):
1695 "OP_TARGET": self.cfg.GetClusterName(),
1698 def BuildHooksNodes(self):
1699 """Build hooks nodes.
1702 return ([], [self.cfg.GetMasterNode()])
1704 def Exec(self, feedback_fn):
1711 class LUClusterDestroy(LogicalUnit):
1712 """Logical unit for destroying the cluster.
1715 HPATH = "cluster-destroy"
1716 HTYPE = constants.HTYPE_CLUSTER
1718 def BuildHooksEnv(self):
1723 "OP_TARGET": self.cfg.GetClusterName(),
1726 def BuildHooksNodes(self):
1727 """Build hooks nodes.
1732 def CheckPrereq(self):
1733 """Check prerequisites.
1735 This checks whether the cluster is empty.
1737 Any errors are signaled by raising errors.OpPrereqError.
1740 master = self.cfg.GetMasterNode()
1742 nodelist = self.cfg.GetNodeList()
1743 if len(nodelist) != 1 or nodelist[0] != master:
1744 raise errors.OpPrereqError("There are still %d node(s) in"
1745 " this cluster." % (len(nodelist) - 1),
1747 instancelist = self.cfg.GetInstanceList()
1749 raise errors.OpPrereqError("There are still %d instance(s) in"
1750 " this cluster." % len(instancelist),
1753 def Exec(self, feedback_fn):
1754 """Destroys the cluster.
1757 master_params = self.cfg.GetMasterNetworkParameters()
1759 # Run post hooks on master node before it's removed
1760 _RunPostHook(self, master_params.name)
1762 ems = self.cfg.GetUseExternalMipScript()
1763 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1766 self.LogWarning("Error disabling the master IP address: %s",
1769 return master_params.name
1772 def _VerifyCertificate(filename):
1773 """Verifies a certificate for L{LUClusterVerifyConfig}.
1775 @type filename: string
1776 @param filename: Path to PEM file
1780 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1781 utils.ReadFile(filename))
1782 except Exception, err: # pylint: disable=W0703
1783 return (LUClusterVerifyConfig.ETYPE_ERROR,
1784 "Failed to load X509 certificate %s: %s" % (filename, err))
1787 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1788 constants.SSL_CERT_EXPIRATION_ERROR)
1791 fnamemsg = "While verifying %s: %s" % (filename, msg)
1796 return (None, fnamemsg)
1797 elif errcode == utils.CERT_WARNING:
1798 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1799 elif errcode == utils.CERT_ERROR:
1800 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1802 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1805 def _GetAllHypervisorParameters(cluster, instances):
1806 """Compute the set of all hypervisor parameters.
1808 @type cluster: L{objects.Cluster}
1809 @param cluster: the cluster object
1810 @param instances: list of L{objects.Instance}
1811 @param instances: additional instances from which to obtain parameters
1812 @rtype: list of (origin, hypervisor, parameters)
1813 @return: a list with all parameters found, indicating the hypervisor they
1814 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1819 for hv_name in cluster.enabled_hypervisors:
1820 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1822 for os_name, os_hvp in cluster.os_hvp.items():
1823 for hv_name, hv_params in os_hvp.items():
1825 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1826 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1828 # TODO: collapse identical parameter values in a single one
1829 for instance in instances:
1830 if instance.hvparams:
1831 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1832 cluster.FillHV(instance)))
1837 class _VerifyErrors(object):
1838 """Mix-in for cluster/group verify LUs.
1840 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1841 self.op and self._feedback_fn to be available.)
1845 ETYPE_FIELD = "code"
1846 ETYPE_ERROR = "ERROR"
1847 ETYPE_WARNING = "WARNING"
1849 def _Error(self, ecode, item, msg, *args, **kwargs):
1850 """Format an error message.
1852 Based on the opcode's error_codes parameter, either format a
1853 parseable error code, or a simpler error string.
1855 This must be called only from Exec and functions called from Exec.
1858 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1859 itype, etxt, _ = ecode
1860 # first complete the msg
1863 # then format the whole message
1864 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1865 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1871 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1872 # and finally report it via the feedback_fn
1873 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1875 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1876 """Log an error message if the passed condition is True.
1880 or self.op.debug_simulate_errors) # pylint: disable=E1101
1882 # If the error code is in the list of ignored errors, demote the error to a
1884 (_, etxt, _) = ecode
1885 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1886 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1889 self._Error(ecode, *args, **kwargs)
1891 # do not mark the operation as failed for WARN cases only
1892 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1893 self.bad = self.bad or cond
1896 class LUClusterVerify(NoHooksLU):
1897 """Submits all jobs necessary to verify the cluster.
1902 def ExpandNames(self):
1903 self.needed_locks = {}
1905 def Exec(self, feedback_fn):
1908 if self.op.group_name:
1909 groups = [self.op.group_name]
1910 depends_fn = lambda: None
1912 groups = self.cfg.GetNodeGroupList()
1914 # Verify global configuration
1916 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1919 # Always depend on global verification
1920 depends_fn = lambda: [(-len(jobs), [])]
1922 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1923 ignore_errors=self.op.ignore_errors,
1924 depends=depends_fn())]
1925 for group in groups)
1927 # Fix up all parameters
1928 for op in itertools.chain(*jobs): # pylint: disable=W0142
1929 op.debug_simulate_errors = self.op.debug_simulate_errors
1930 op.verbose = self.op.verbose
1931 op.error_codes = self.op.error_codes
1933 op.skip_checks = self.op.skip_checks
1934 except AttributeError:
1935 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1937 return ResultWithJobs(jobs)
1940 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1941 """Verifies the cluster config.
1946 def _VerifyHVP(self, hvp_data):
1947 """Verifies locally the syntax of the hypervisor parameters.
1950 for item, hv_name, hv_params in hvp_data:
1951 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1954 hv_class = hypervisor.GetHypervisor(hv_name)
1955 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1956 hv_class.CheckParameterSyntax(hv_params)
1957 except errors.GenericError, err:
1958 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1960 def ExpandNames(self):
1961 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1962 self.share_locks = _ShareAll()
1964 def CheckPrereq(self):
1965 """Check prerequisites.
1968 # Retrieve all information
1969 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1970 self.all_node_info = self.cfg.GetAllNodesInfo()
1971 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1973 def Exec(self, feedback_fn):
1974 """Verify integrity of cluster, performing various test on nodes.
1978 self._feedback_fn = feedback_fn
1980 feedback_fn("* Verifying cluster config")
1982 for msg in self.cfg.VerifyConfig():
1983 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1985 feedback_fn("* Verifying cluster certificate files")
1987 for cert_filename in constants.ALL_CERT_FILES:
1988 (errcode, msg) = _VerifyCertificate(cert_filename)
1989 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1991 feedback_fn("* Verifying hypervisor parameters")
1993 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1994 self.all_inst_info.values()))
1996 feedback_fn("* Verifying all nodes belong to an existing group")
1998 # We do this verification here because, should this bogus circumstance
1999 # occur, it would never be caught by VerifyGroup, which only acts on
2000 # nodes/instances reachable from existing node groups.
2002 dangling_nodes = set(node.name for node in self.all_node_info.values()
2003 if node.group not in self.all_group_info)
2005 dangling_instances = {}
2006 no_node_instances = []
2008 for inst in self.all_inst_info.values():
2009 if inst.primary_node in dangling_nodes:
2010 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2011 elif inst.primary_node not in self.all_node_info:
2012 no_node_instances.append(inst.name)
2017 utils.CommaJoin(dangling_instances.get(node.name,
2019 for node in dangling_nodes]
2021 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2023 "the following nodes (and their instances) belong to a non"
2024 " existing group: %s", utils.CommaJoin(pretty_dangling))
2026 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2028 "the following instances have a non-existing primary-node:"
2029 " %s", utils.CommaJoin(no_node_instances))
2034 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2035 """Verifies the status of a node group.
2038 HPATH = "cluster-verify"
2039 HTYPE = constants.HTYPE_CLUSTER
2042 _HOOKS_INDENT_RE = re.compile("^", re.M)
2044 class NodeImage(object):
2045 """A class representing the logical and physical status of a node.
2048 @ivar name: the node name to which this object refers
2049 @ivar volumes: a structure as returned from
2050 L{ganeti.backend.GetVolumeList} (runtime)
2051 @ivar instances: a list of running instances (runtime)
2052 @ivar pinst: list of configured primary instances (config)
2053 @ivar sinst: list of configured secondary instances (config)
2054 @ivar sbp: dictionary of {primary-node: list of instances} for all
2055 instances for which this node is secondary (config)
2056 @ivar mfree: free memory, as reported by hypervisor (runtime)
2057 @ivar dfree: free disk, as reported by the node (runtime)
2058 @ivar offline: the offline status (config)
2059 @type rpc_fail: boolean
2060 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2061 not whether the individual keys were correct) (runtime)
2062 @type lvm_fail: boolean
2063 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2064 @type hyp_fail: boolean
2065 @ivar hyp_fail: whether the RPC call didn't return the instance list
2066 @type ghost: boolean
2067 @ivar ghost: whether this is a known node or not (config)
2068 @type os_fail: boolean
2069 @ivar os_fail: whether the RPC call didn't return valid OS data
2071 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2072 @type vm_capable: boolean
2073 @ivar vm_capable: whether the node can host instances
2076 def __init__(self, offline=False, name=None, vm_capable=True):
2085 self.offline = offline
2086 self.vm_capable = vm_capable
2087 self.rpc_fail = False
2088 self.lvm_fail = False
2089 self.hyp_fail = False
2091 self.os_fail = False
2094 def ExpandNames(self):
2095 # This raises errors.OpPrereqError on its own:
2096 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2098 # Get instances in node group; this is unsafe and needs verification later
2100 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2102 self.needed_locks = {
2103 locking.LEVEL_INSTANCE: inst_names,
2104 locking.LEVEL_NODEGROUP: [self.group_uuid],
2105 locking.LEVEL_NODE: [],
2108 self.share_locks = _ShareAll()
2110 def DeclareLocks(self, level):
2111 if level == locking.LEVEL_NODE:
2112 # Get members of node group; this is unsafe and needs verification later
2113 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2115 all_inst_info = self.cfg.GetAllInstancesInfo()
2117 # In Exec(), we warn about mirrored instances that have primary and
2118 # secondary living in separate node groups. To fully verify that
2119 # volumes for these instances are healthy, we will need to do an
2120 # extra call to their secondaries. We ensure here those nodes will
2122 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2123 # Important: access only the instances whose lock is owned
2124 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2125 nodes.update(all_inst_info[inst].secondary_nodes)
2127 self.needed_locks[locking.LEVEL_NODE] = nodes
2129 def CheckPrereq(self):
2130 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2131 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2133 group_nodes = set(self.group_info.members)
2135 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2138 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2140 unlocked_instances = \
2141 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2144 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2145 utils.CommaJoin(unlocked_nodes),
2148 if unlocked_instances:
2149 raise errors.OpPrereqError("Missing lock for instances: %s" %
2150 utils.CommaJoin(unlocked_instances),
2153 self.all_node_info = self.cfg.GetAllNodesInfo()
2154 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2156 self.my_node_names = utils.NiceSort(group_nodes)
2157 self.my_inst_names = utils.NiceSort(group_instances)
2159 self.my_node_info = dict((name, self.all_node_info[name])
2160 for name in self.my_node_names)
2162 self.my_inst_info = dict((name, self.all_inst_info[name])
2163 for name in self.my_inst_names)
2165 # We detect here the nodes that will need the extra RPC calls for verifying
2166 # split LV volumes; they should be locked.
2167 extra_lv_nodes = set()
2169 for inst in self.my_inst_info.values():
2170 if inst.disk_template in constants.DTS_INT_MIRROR:
2171 for nname in inst.all_nodes:
2172 if self.all_node_info[nname].group != self.group_uuid:
2173 extra_lv_nodes.add(nname)
2175 unlocked_lv_nodes = \
2176 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2178 if unlocked_lv_nodes:
2179 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2180 utils.CommaJoin(unlocked_lv_nodes),
2182 self.extra_lv_nodes = list(extra_lv_nodes)
2184 def _VerifyNode(self, ninfo, nresult):
2185 """Perform some basic validation on data returned from a node.
2187 - check the result data structure is well formed and has all the
2189 - check ganeti version
2191 @type ninfo: L{objects.Node}
2192 @param ninfo: the node to check
2193 @param nresult: the results from the node
2195 @return: whether overall this call was successful (and we can expect
2196 reasonable values in the respose)
2200 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2202 # main result, nresult should be a non-empty dict
2203 test = not nresult or not isinstance(nresult, dict)
2204 _ErrorIf(test, constants.CV_ENODERPC, node,
2205 "unable to verify node: no data returned")
2209 # compares ganeti version
2210 local_version = constants.PROTOCOL_VERSION
2211 remote_version = nresult.get("version", None)
2212 test = not (remote_version and
2213 isinstance(remote_version, (list, tuple)) and
2214 len(remote_version) == 2)
2215 _ErrorIf(test, constants.CV_ENODERPC, node,
2216 "connection to node returned invalid data")
2220 test = local_version != remote_version[0]
2221 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2222 "incompatible protocol versions: master %s,"
2223 " node %s", local_version, remote_version[0])
2227 # node seems compatible, we can actually try to look into its results
2229 # full package version
2230 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2231 constants.CV_ENODEVERSION, node,
2232 "software version mismatch: master %s, node %s",
2233 constants.RELEASE_VERSION, remote_version[1],
2234 code=self.ETYPE_WARNING)
2236 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2237 if ninfo.vm_capable and isinstance(hyp_result, dict):
2238 for hv_name, hv_result in hyp_result.iteritems():
2239 test = hv_result is not None
2240 _ErrorIf(test, constants.CV_ENODEHV, node,
2241 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2243 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2244 if ninfo.vm_capable and isinstance(hvp_result, list):
2245 for item, hv_name, hv_result in hvp_result:
2246 _ErrorIf(True, constants.CV_ENODEHV, node,
2247 "hypervisor %s parameter verify failure (source %s): %s",
2248 hv_name, item, hv_result)
2250 test = nresult.get(constants.NV_NODESETUP,
2251 ["Missing NODESETUP results"])
2252 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2257 def _VerifyNodeTime(self, ninfo, nresult,
2258 nvinfo_starttime, nvinfo_endtime):
2259 """Check the node time.
2261 @type ninfo: L{objects.Node}
2262 @param ninfo: the node to check
2263 @param nresult: the remote results for the node
2264 @param nvinfo_starttime: the start time of the RPC call
2265 @param nvinfo_endtime: the end time of the RPC call
2269 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2271 ntime = nresult.get(constants.NV_TIME, None)
2273 ntime_merged = utils.MergeTime(ntime)
2274 except (ValueError, TypeError):
2275 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2278 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2279 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2280 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2281 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2285 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2286 "Node time diverges by at least %s from master node time",
2289 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2290 """Check the node LVM results.
2292 @type ninfo: L{objects.Node}
2293 @param ninfo: the node to check
2294 @param nresult: the remote results for the node
2295 @param vg_name: the configured VG name
2302 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2304 # checks vg existence and size > 20G
2305 vglist = nresult.get(constants.NV_VGLIST, None)
2307 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2309 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2310 constants.MIN_VG_SIZE)
2311 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2314 pvlist = nresult.get(constants.NV_PVLIST, None)
2315 test = pvlist is None
2316 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2318 # check that ':' is not present in PV names, since it's a
2319 # special character for lvcreate (denotes the range of PEs to
2321 for _, pvname, owner_vg in pvlist:
2322 test = ":" in pvname
2323 _ErrorIf(test, constants.CV_ENODELVM, node,
2324 "Invalid character ':' in PV '%s' of VG '%s'",
2327 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2328 """Check the node bridges.
2330 @type ninfo: L{objects.Node}
2331 @param ninfo: the node to check
2332 @param nresult: the remote results for the node
2333 @param bridges: the expected list of bridges
2340 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2342 missing = nresult.get(constants.NV_BRIDGES, None)
2343 test = not isinstance(missing, list)
2344 _ErrorIf(test, constants.CV_ENODENET, node,
2345 "did not return valid bridge information")
2347 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2348 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2350 def _VerifyNodeUserScripts(self, ninfo, nresult):
2351 """Check the results of user scripts presence and executability on the node
2353 @type ninfo: L{objects.Node}
2354 @param ninfo: the node to check
2355 @param nresult: the remote results for the node
2360 test = not constants.NV_USERSCRIPTS in nresult
2361 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2362 "did not return user scripts information")
2364 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2366 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2367 "user scripts not present or not executable: %s" %
2368 utils.CommaJoin(sorted(broken_scripts)))
2370 def _VerifyNodeNetwork(self, ninfo, nresult):
2371 """Check the node network connectivity results.
2373 @type ninfo: L{objects.Node}
2374 @param ninfo: the node to check
2375 @param nresult: the remote results for the node
2379 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2381 test = constants.NV_NODELIST not in nresult
2382 _ErrorIf(test, constants.CV_ENODESSH, node,
2383 "node hasn't returned node ssh connectivity data")
2385 if nresult[constants.NV_NODELIST]:
2386 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2387 _ErrorIf(True, constants.CV_ENODESSH, node,
2388 "ssh communication with node '%s': %s", a_node, a_msg)
2390 test = constants.NV_NODENETTEST not in nresult
2391 _ErrorIf(test, constants.CV_ENODENET, node,
2392 "node hasn't returned node tcp connectivity data")
2394 if nresult[constants.NV_NODENETTEST]:
2395 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2397 _ErrorIf(True, constants.CV_ENODENET, node,
2398 "tcp communication with node '%s': %s",
2399 anode, nresult[constants.NV_NODENETTEST][anode])
2401 test = constants.NV_MASTERIP not in nresult
2402 _ErrorIf(test, constants.CV_ENODENET, node,
2403 "node hasn't returned node master IP reachability data")
2405 if not nresult[constants.NV_MASTERIP]:
2406 if node == self.master_node:
2407 msg = "the master node cannot reach the master IP (not configured?)"
2409 msg = "cannot reach the master IP"
2410 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2412 def _VerifyInstance(self, instance, instanceconfig, node_image,
2414 """Verify an instance.
2416 This function checks to see if the required block devices are
2417 available on the instance's node.
2420 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2421 node_current = instanceconfig.primary_node
2423 node_vol_should = {}
2424 instanceconfig.MapLVsByNode(node_vol_should)
2426 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2427 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2428 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2430 for node in node_vol_should:
2431 n_img = node_image[node]
2432 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2433 # ignore missing volumes on offline or broken nodes
2435 for volume in node_vol_should[node]:
2436 test = volume not in n_img.volumes
2437 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2438 "volume %s missing on node %s", volume, node)
2440 if instanceconfig.admin_state == constants.ADMINST_UP:
2441 pri_img = node_image[node_current]
2442 test = instance not in pri_img.instances and not pri_img.offline
2443 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2444 "instance not running on its primary node %s",
2447 diskdata = [(nname, success, status, idx)
2448 for (nname, disks) in diskstatus.items()
2449 for idx, (success, status) in enumerate(disks)]
2451 for nname, success, bdev_status, idx in diskdata:
2452 # the 'ghost node' construction in Exec() ensures that we have a
2454 snode = node_image[nname]
2455 bad_snode = snode.ghost or snode.offline
2456 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2457 not success and not bad_snode,
2458 constants.CV_EINSTANCEFAULTYDISK, instance,
2459 "couldn't retrieve status for disk/%s on %s: %s",
2460 idx, nname, bdev_status)
2461 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2462 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2463 constants.CV_EINSTANCEFAULTYDISK, instance,
2464 "disk/%s on %s is faulty", idx, nname)
2466 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2467 """Verify if there are any unknown volumes in the cluster.
2469 The .os, .swap and backup volumes are ignored. All other volumes are
2470 reported as unknown.
2472 @type reserved: L{ganeti.utils.FieldSet}
2473 @param reserved: a FieldSet of reserved volume names
2476 for node, n_img in node_image.items():
2477 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2478 self.all_node_info[node].group != self.group_uuid):
2479 # skip non-healthy nodes
2481 for volume in n_img.volumes:
2482 test = ((node not in node_vol_should or
2483 volume not in node_vol_should[node]) and
2484 not reserved.Matches(volume))
2485 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2486 "volume %s is unknown", volume)
2488 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2489 """Verify N+1 Memory Resilience.
2491 Check that if one single node dies we can still start all the
2492 instances it was primary for.
2495 cluster_info = self.cfg.GetClusterInfo()
2496 for node, n_img in node_image.items():
2497 # This code checks that every node which is now listed as
2498 # secondary has enough memory to host all instances it is
2499 # supposed to should a single other node in the cluster fail.
2500 # FIXME: not ready for failover to an arbitrary node
2501 # FIXME: does not support file-backed instances
2502 # WARNING: we currently take into account down instances as well
2503 # as up ones, considering that even if they're down someone
2504 # might want to start them even in the event of a node failure.
2505 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2506 # we're skipping nodes marked offline and nodes in other groups from
2507 # the N+1 warning, since most likely we don't have good memory
2508 # infromation from them; we already list instances living on such
2509 # nodes, and that's enough warning
2511 #TODO(dynmem): also consider ballooning out other instances
2512 for prinode, instances in n_img.sbp.items():
2514 for instance in instances:
2515 bep = cluster_info.FillBE(instance_cfg[instance])
2516 if bep[constants.BE_AUTO_BALANCE]:
2517 needed_mem += bep[constants.BE_MINMEM]
2518 test = n_img.mfree < needed_mem
2519 self._ErrorIf(test, constants.CV_ENODEN1, node,
2520 "not enough memory to accomodate instance failovers"
2521 " should node %s fail (%dMiB needed, %dMiB available)",
2522 prinode, needed_mem, n_img.mfree)
2525 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2526 (files_all, files_opt, files_mc, files_vm)):
2527 """Verifies file checksums collected from all nodes.
2529 @param errorif: Callback for reporting errors
2530 @param nodeinfo: List of L{objects.Node} objects
2531 @param master_node: Name of master node
2532 @param all_nvinfo: RPC results
2535 # Define functions determining which nodes to consider for a file
2538 (files_mc, lambda node: (node.master_candidate or
2539 node.name == master_node)),
2540 (files_vm, lambda node: node.vm_capable),
2543 # Build mapping from filename to list of nodes which should have the file
2545 for (files, fn) in files2nodefn:
2547 filenodes = nodeinfo
2549 filenodes = filter(fn, nodeinfo)
2550 nodefiles.update((filename,
2551 frozenset(map(operator.attrgetter("name"), filenodes)))
2552 for filename in files)
2554 assert set(nodefiles) == (files_all | files_mc | files_vm)
2556 fileinfo = dict((filename, {}) for filename in nodefiles)
2557 ignore_nodes = set()
2559 for node in nodeinfo:
2561 ignore_nodes.add(node.name)
2564 nresult = all_nvinfo[node.name]
2566 if nresult.fail_msg or not nresult.payload:
2569 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2571 test = not (node_files and isinstance(node_files, dict))
2572 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2573 "Node did not return file checksum data")
2575 ignore_nodes.add(node.name)
2578 # Build per-checksum mapping from filename to nodes having it
2579 for (filename, checksum) in node_files.items():
2580 assert filename in nodefiles
2581 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2583 for (filename, checksums) in fileinfo.items():
2584 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2586 # Nodes having the file
2587 with_file = frozenset(node_name
2588 for nodes in fileinfo[filename].values()
2589 for node_name in nodes) - ignore_nodes
2591 expected_nodes = nodefiles[filename] - ignore_nodes
2593 # Nodes missing file
2594 missing_file = expected_nodes - with_file
2596 if filename in files_opt:
2598 errorif(missing_file and missing_file != expected_nodes,
2599 constants.CV_ECLUSTERFILECHECK, None,
2600 "File %s is optional, but it must exist on all or no"
2601 " nodes (not found on %s)",
2602 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2604 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2605 "File %s is missing from node(s) %s", filename,
2606 utils.CommaJoin(utils.NiceSort(missing_file)))
2608 # Warn if a node has a file it shouldn't
2609 unexpected = with_file - expected_nodes
2611 constants.CV_ECLUSTERFILECHECK, None,
2612 "File %s should not exist on node(s) %s",
2613 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2615 # See if there are multiple versions of the file
2616 test = len(checksums) > 1
2618 variants = ["variant %s on %s" %
2619 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2620 for (idx, (checksum, nodes)) in
2621 enumerate(sorted(checksums.items()))]
2625 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2626 "File %s found with %s different checksums (%s)",
2627 filename, len(checksums), "; ".join(variants))
2629 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2631 """Verifies and the node DRBD status.
2633 @type ninfo: L{objects.Node}
2634 @param ninfo: the node to check
2635 @param nresult: the remote results for the node
2636 @param instanceinfo: the dict of instances
2637 @param drbd_helper: the configured DRBD usermode helper
2638 @param drbd_map: the DRBD map as returned by
2639 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2643 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2646 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2647 test = (helper_result == None)
2648 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2649 "no drbd usermode helper returned")
2651 status, payload = helper_result
2653 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2654 "drbd usermode helper check unsuccessful: %s", payload)
2655 test = status and (payload != drbd_helper)
2656 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2657 "wrong drbd usermode helper: %s", payload)
2659 # compute the DRBD minors
2661 for minor, instance in drbd_map[node].items():
2662 test = instance not in instanceinfo
2663 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2664 "ghost instance '%s' in temporary DRBD map", instance)
2665 # ghost instance should not be running, but otherwise we
2666 # don't give double warnings (both ghost instance and
2667 # unallocated minor in use)
2669 node_drbd[minor] = (instance, False)
2671 instance = instanceinfo[instance]
2672 node_drbd[minor] = (instance.name,
2673 instance.admin_state == constants.ADMINST_UP)
2675 # and now check them
2676 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2677 test = not isinstance(used_minors, (tuple, list))
2678 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2679 "cannot parse drbd status file: %s", str(used_minors))
2681 # we cannot check drbd status
2684 for minor, (iname, must_exist) in node_drbd.items():
2685 test = minor not in used_minors and must_exist
2686 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2687 "drbd minor %d of instance %s is not active", minor, iname)
2688 for minor in used_minors:
2689 test = minor not in node_drbd
2690 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2691 "unallocated drbd minor %d is in use", minor)
2693 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2694 """Builds the node OS structures.
2696 @type ninfo: L{objects.Node}
2697 @param ninfo: the node to check
2698 @param nresult: the remote results for the node
2699 @param nimg: the node image object
2703 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2705 remote_os = nresult.get(constants.NV_OSLIST, None)
2706 test = (not isinstance(remote_os, list) or
2707 not compat.all(isinstance(v, list) and len(v) == 7
2708 for v in remote_os))
2710 _ErrorIf(test, constants.CV_ENODEOS, node,
2711 "node hasn't returned valid OS data")
2720 for (name, os_path, status, diagnose,
2721 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2723 if name not in os_dict:
2726 # parameters is a list of lists instead of list of tuples due to
2727 # JSON lacking a real tuple type, fix it:
2728 parameters = [tuple(v) for v in parameters]
2729 os_dict[name].append((os_path, status, diagnose,
2730 set(variants), set(parameters), set(api_ver)))
2732 nimg.oslist = os_dict
2734 def _VerifyNodeOS(self, ninfo, nimg, base):
2735 """Verifies the node OS list.
2737 @type ninfo: L{objects.Node}
2738 @param ninfo: the node to check
2739 @param nimg: the node image object
2740 @param base: the 'template' node we match against (e.g. from the master)
2744 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2746 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2748 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2749 for os_name, os_data in nimg.oslist.items():
2750 assert os_data, "Empty OS status for OS %s?!" % os_name
2751 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2752 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2753 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2754 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2755 "OS '%s' has multiple entries (first one shadows the rest): %s",
2756 os_name, utils.CommaJoin([v[0] for v in os_data]))
2757 # comparisons with the 'base' image
2758 test = os_name not in base.oslist
2759 _ErrorIf(test, constants.CV_ENODEOS, node,
2760 "Extra OS %s not present on reference node (%s)",
2764 assert base.oslist[os_name], "Base node has empty OS status?"
2765 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2767 # base OS is invalid, skipping
2769 for kind, a, b in [("API version", f_api, b_api),
2770 ("variants list", f_var, b_var),
2771 ("parameters", beautify_params(f_param),
2772 beautify_params(b_param))]:
2773 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2774 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2775 kind, os_name, base.name,
2776 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2778 # check any missing OSes
2779 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2780 _ErrorIf(missing, constants.CV_ENODEOS, node,
2781 "OSes present on reference node %s but missing on this node: %s",
2782 base.name, utils.CommaJoin(missing))
2784 def _VerifyOob(self, ninfo, nresult):
2785 """Verifies out of band functionality of a node.
2787 @type ninfo: L{objects.Node}
2788 @param ninfo: the node to check
2789 @param nresult: the remote results for the node
2793 # We just have to verify the paths on master and/or master candidates
2794 # as the oob helper is invoked on the master
2795 if ((ninfo.master_candidate or ninfo.master_capable) and
2796 constants.NV_OOB_PATHS in nresult):
2797 for path_result in nresult[constants.NV_OOB_PATHS]:
2798 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2800 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2801 """Verifies and updates the node volume data.
2803 This function will update a L{NodeImage}'s internal structures
2804 with data from the remote call.
2806 @type ninfo: L{objects.Node}
2807 @param ninfo: the node to check
2808 @param nresult: the remote results for the node
2809 @param nimg: the node image object
2810 @param vg_name: the configured VG name
2814 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2816 nimg.lvm_fail = True
2817 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2820 elif isinstance(lvdata, basestring):
2821 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2822 utils.SafeEncode(lvdata))
2823 elif not isinstance(lvdata, dict):
2824 _ErrorIf(True, constants.CV_ENODELVM, node,
2825 "rpc call to node failed (lvlist)")
2827 nimg.volumes = lvdata
2828 nimg.lvm_fail = False
2830 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2831 """Verifies and updates the node instance list.
2833 If the listing was successful, then updates this node's instance
2834 list. Otherwise, it marks the RPC call as failed for the instance
2837 @type ninfo: L{objects.Node}
2838 @param ninfo: the node to check
2839 @param nresult: the remote results for the node
2840 @param nimg: the node image object
2843 idata = nresult.get(constants.NV_INSTANCELIST, None)
2844 test = not isinstance(idata, list)
2845 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2846 "rpc call to node failed (instancelist): %s",
2847 utils.SafeEncode(str(idata)))
2849 nimg.hyp_fail = True
2851 nimg.instances = idata
2853 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2854 """Verifies and computes a node information map
2856 @type ninfo: L{objects.Node}
2857 @param ninfo: the node to check
2858 @param nresult: the remote results for the node
2859 @param nimg: the node image object
2860 @param vg_name: the configured VG name
2864 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2866 # try to read free memory (from the hypervisor)
2867 hv_info = nresult.get(constants.NV_HVINFO, None)
2868 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2869 _ErrorIf(test, constants.CV_ENODEHV, node,
2870 "rpc call to node failed (hvinfo)")
2873 nimg.mfree = int(hv_info["memory_free"])
2874 except (ValueError, TypeError):
2875 _ErrorIf(True, constants.CV_ENODERPC, node,
2876 "node returned invalid nodeinfo, check hypervisor")
2878 # FIXME: devise a free space model for file based instances as well
2879 if vg_name is not None:
2880 test = (constants.NV_VGLIST not in nresult or
2881 vg_name not in nresult[constants.NV_VGLIST])
2882 _ErrorIf(test, constants.CV_ENODELVM, node,
2883 "node didn't return data for the volume group '%s'"
2884 " - it is either missing or broken", vg_name)
2887 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2888 except (ValueError, TypeError):
2889 _ErrorIf(True, constants.CV_ENODERPC, node,
2890 "node returned invalid LVM info, check LVM status")
2892 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2893 """Gets per-disk status information for all instances.
2895 @type nodelist: list of strings
2896 @param nodelist: Node names
2897 @type node_image: dict of (name, L{objects.Node})
2898 @param node_image: Node objects
2899 @type instanceinfo: dict of (name, L{objects.Instance})
2900 @param instanceinfo: Instance objects
2901 @rtype: {instance: {node: [(succes, payload)]}}
2902 @return: a dictionary of per-instance dictionaries with nodes as
2903 keys and disk information as values; the disk information is a
2904 list of tuples (success, payload)
2907 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2910 node_disks_devonly = {}
2911 diskless_instances = set()
2912 diskless = constants.DT_DISKLESS
2914 for nname in nodelist:
2915 node_instances = list(itertools.chain(node_image[nname].pinst,
2916 node_image[nname].sinst))
2917 diskless_instances.update(inst for inst in node_instances
2918 if instanceinfo[inst].disk_template == diskless)
2919 disks = [(inst, disk)
2920 for inst in node_instances
2921 for disk in instanceinfo[inst].disks]
2924 # No need to collect data
2927 node_disks[nname] = disks
2929 # _AnnotateDiskParams makes already copies of the disks
2931 for (inst, dev) in disks:
2932 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2933 self.cfg.SetDiskID(anno_disk, nname)
2934 devonly.append(anno_disk)
2936 node_disks_devonly[nname] = devonly
2938 assert len(node_disks) == len(node_disks_devonly)
2940 # Collect data from all nodes with disks
2941 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2944 assert len(result) == len(node_disks)
2948 for (nname, nres) in result.items():
2949 disks = node_disks[nname]
2952 # No data from this node
2953 data = len(disks) * [(False, "node offline")]
2956 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2957 "while getting disk information: %s", msg)
2959 # No data from this node
2960 data = len(disks) * [(False, msg)]
2963 for idx, i in enumerate(nres.payload):
2964 if isinstance(i, (tuple, list)) and len(i) == 2:
2967 logging.warning("Invalid result from node %s, entry %d: %s",
2969 data.append((False, "Invalid result from the remote node"))
2971 for ((inst, _), status) in zip(disks, data):
2972 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2974 # Add empty entries for diskless instances.
2975 for inst in diskless_instances:
2976 assert inst not in instdisk
2979 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2980 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2981 compat.all(isinstance(s, (tuple, list)) and
2982 len(s) == 2 for s in statuses)
2983 for inst, nnames in instdisk.items()
2984 for nname, statuses in nnames.items())
2985 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2990 def _SshNodeSelector(group_uuid, all_nodes):
2991 """Create endless iterators for all potential SSH check hosts.
2994 nodes = [node for node in all_nodes
2995 if (node.group != group_uuid and
2997 keyfunc = operator.attrgetter("group")
2999 return map(itertools.cycle,
3000 [sorted(map(operator.attrgetter("name"), names))
3001 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3005 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3006 """Choose which nodes should talk to which other nodes.
3008 We will make nodes contact all nodes in their group, and one node from
3011 @warning: This algorithm has a known issue if one node group is much
3012 smaller than others (e.g. just one node). In such a case all other
3013 nodes will talk to the single node.
3016 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3017 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3019 return (online_nodes,
3020 dict((name, sorted([i.next() for i in sel]))
3021 for name in online_nodes))
3023 def BuildHooksEnv(self):
3026 Cluster-Verify hooks just ran in the post phase and their failure makes
3027 the output be logged in the verify output and the verification to fail.
3031 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3034 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3035 for node in self.my_node_info.values())
3039 def BuildHooksNodes(self):
3040 """Build hooks nodes.
3043 return ([], self.my_node_names)
3045 def Exec(self, feedback_fn):
3046 """Verify integrity of the node group, performing various test on nodes.
3049 # This method has too many local variables. pylint: disable=R0914
3050 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3052 if not self.my_node_names:
3054 feedback_fn("* Empty node group, skipping verification")
3058 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3059 verbose = self.op.verbose
3060 self._feedback_fn = feedback_fn
3062 vg_name = self.cfg.GetVGName()
3063 drbd_helper = self.cfg.GetDRBDHelper()
3064 cluster = self.cfg.GetClusterInfo()
3065 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3066 hypervisors = cluster.enabled_hypervisors
3067 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3069 i_non_redundant = [] # Non redundant instances
3070 i_non_a_balanced = [] # Non auto-balanced instances
3071 i_offline = 0 # Count of offline instances
3072 n_offline = 0 # Count of offline nodes
3073 n_drained = 0 # Count of nodes being drained
3074 node_vol_should = {}
3076 # FIXME: verify OS list
3079 filemap = _ComputeAncillaryFiles(cluster, False)
3081 # do local checksums
3082 master_node = self.master_node = self.cfg.GetMasterNode()
3083 master_ip = self.cfg.GetMasterIP()
3085 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3088 if self.cfg.GetUseExternalMipScript():
3089 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3091 node_verify_param = {
3092 constants.NV_FILELIST:
3093 utils.UniqueSequence(filename
3094 for files in filemap
3095 for filename in files),
3096 constants.NV_NODELIST:
3097 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3098 self.all_node_info.values()),
3099 constants.NV_HYPERVISOR: hypervisors,
3100 constants.NV_HVPARAMS:
3101 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3102 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3103 for node in node_data_list
3104 if not node.offline],
3105 constants.NV_INSTANCELIST: hypervisors,
3106 constants.NV_VERSION: None,
3107 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3108 constants.NV_NODESETUP: None,
3109 constants.NV_TIME: None,
3110 constants.NV_MASTERIP: (master_node, master_ip),
3111 constants.NV_OSLIST: None,
3112 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3113 constants.NV_USERSCRIPTS: user_scripts,
3116 if vg_name is not None:
3117 node_verify_param[constants.NV_VGLIST] = None
3118 node_verify_param[constants.NV_LVLIST] = vg_name
3119 node_verify_param[constants.NV_PVLIST] = [vg_name]
3120 node_verify_param[constants.NV_DRBDLIST] = None
3123 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3126 # FIXME: this needs to be changed per node-group, not cluster-wide
3128 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3129 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3130 bridges.add(default_nicpp[constants.NIC_LINK])
3131 for instance in self.my_inst_info.values():
3132 for nic in instance.nics:
3133 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3134 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3135 bridges.add(full_nic[constants.NIC_LINK])
3138 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3140 # Build our expected cluster state
3141 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3143 vm_capable=node.vm_capable))
3144 for node in node_data_list)
3148 for node in self.all_node_info.values():
3149 path = _SupportsOob(self.cfg, node)
3150 if path and path not in oob_paths:
3151 oob_paths.append(path)
3154 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3156 for instance in self.my_inst_names:
3157 inst_config = self.my_inst_info[instance]
3158 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3161 for nname in inst_config.all_nodes:
3162 if nname not in node_image:
3163 gnode = self.NodeImage(name=nname)
3164 gnode.ghost = (nname not in self.all_node_info)
3165 node_image[nname] = gnode
3167 inst_config.MapLVsByNode(node_vol_should)
3169 pnode = inst_config.primary_node
3170 node_image[pnode].pinst.append(instance)
3172 for snode in inst_config.secondary_nodes:
3173 nimg = node_image[snode]
3174 nimg.sinst.append(instance)
3175 if pnode not in nimg.sbp:
3176 nimg.sbp[pnode] = []
3177 nimg.sbp[pnode].append(instance)
3179 # At this point, we have the in-memory data structures complete,
3180 # except for the runtime information, which we'll gather next
3182 # Due to the way our RPC system works, exact response times cannot be
3183 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3184 # time before and after executing the request, we can at least have a time
3186 nvinfo_starttime = time.time()
3187 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3189 self.cfg.GetClusterName())
3190 nvinfo_endtime = time.time()
3192 if self.extra_lv_nodes and vg_name is not None:
3194 self.rpc.call_node_verify(self.extra_lv_nodes,
3195 {constants.NV_LVLIST: vg_name},
3196 self.cfg.GetClusterName())
3198 extra_lv_nvinfo = {}
3200 all_drbd_map = self.cfg.ComputeDRBDMap()
3202 feedback_fn("* Gathering disk information (%s nodes)" %
3203 len(self.my_node_names))
3204 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3207 feedback_fn("* Verifying configuration file consistency")
3209 # If not all nodes are being checked, we need to make sure the master node
3210 # and a non-checked vm_capable node are in the list.
3211 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3213 vf_nvinfo = all_nvinfo.copy()
3214 vf_node_info = list(self.my_node_info.values())
3215 additional_nodes = []
3216 if master_node not in self.my_node_info:
3217 additional_nodes.append(master_node)
3218 vf_node_info.append(self.all_node_info[master_node])
3219 # Add the first vm_capable node we find which is not included,
3220 # excluding the master node (which we already have)
3221 for node in absent_nodes:
3222 nodeinfo = self.all_node_info[node]
3223 if (nodeinfo.vm_capable and not nodeinfo.offline and
3224 node != master_node):
3225 additional_nodes.append(node)
3226 vf_node_info.append(self.all_node_info[node])
3228 key = constants.NV_FILELIST
3229 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3230 {key: node_verify_param[key]},
3231 self.cfg.GetClusterName()))
3233 vf_nvinfo = all_nvinfo
3234 vf_node_info = self.my_node_info.values()
3236 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3238 feedback_fn("* Verifying node status")
3242 for node_i in node_data_list:
3244 nimg = node_image[node]
3248 feedback_fn("* Skipping offline node %s" % (node,))
3252 if node == master_node:
3254 elif node_i.master_candidate:
3255 ntype = "master candidate"
3256 elif node_i.drained:
3262 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3264 msg = all_nvinfo[node].fail_msg
3265 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3268 nimg.rpc_fail = True
3271 nresult = all_nvinfo[node].payload
3273 nimg.call_ok = self._VerifyNode(node_i, nresult)
3274 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3275 self._VerifyNodeNetwork(node_i, nresult)
3276 self._VerifyNodeUserScripts(node_i, nresult)
3277 self._VerifyOob(node_i, nresult)
3280 self._VerifyNodeLVM(node_i, nresult, vg_name)
3281 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3284 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3285 self._UpdateNodeInstances(node_i, nresult, nimg)
3286 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3287 self._UpdateNodeOS(node_i, nresult, nimg)
3289 if not nimg.os_fail:
3290 if refos_img is None:
3292 self._VerifyNodeOS(node_i, nimg, refos_img)
3293 self._VerifyNodeBridges(node_i, nresult, bridges)
3295 # Check whether all running instancies are primary for the node. (This
3296 # can no longer be done from _VerifyInstance below, since some of the
3297 # wrong instances could be from other node groups.)
3298 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3300 for inst in non_primary_inst:
3301 test = inst in self.all_inst_info
3302 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3303 "instance should not run on node %s", node_i.name)
3304 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3305 "node is running unknown instance %s", inst)
3307 for node, result in extra_lv_nvinfo.items():
3308 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3309 node_image[node], vg_name)
3311 feedback_fn("* Verifying instance status")
3312 for instance in self.my_inst_names:
3314 feedback_fn("* Verifying instance %s" % instance)
3315 inst_config = self.my_inst_info[instance]
3316 self._VerifyInstance(instance, inst_config, node_image,
3318 inst_nodes_offline = []
3320 pnode = inst_config.primary_node
3321 pnode_img = node_image[pnode]
3322 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3323 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3324 " primary node failed", instance)
3326 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3328 constants.CV_EINSTANCEBADNODE, instance,
3329 "instance is marked as running and lives on offline node %s",
3330 inst_config.primary_node)
3332 # If the instance is non-redundant we cannot survive losing its primary
3333 # node, so we are not N+1 compliant. On the other hand we have no disk
3334 # templates with more than one secondary so that situation is not well
3336 # FIXME: does not support file-backed instances
3337 if not inst_config.secondary_nodes:
3338 i_non_redundant.append(instance)
3340 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3341 constants.CV_EINSTANCELAYOUT,
3342 instance, "instance has multiple secondary nodes: %s",
3343 utils.CommaJoin(inst_config.secondary_nodes),
3344 code=self.ETYPE_WARNING)
3346 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3347 pnode = inst_config.primary_node
3348 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3349 instance_groups = {}
3351 for node in instance_nodes:
3352 instance_groups.setdefault(self.all_node_info[node].group,
3356 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3357 # Sort so that we always list the primary node first.
3358 for group, nodes in sorted(instance_groups.items(),
3359 key=lambda (_, nodes): pnode in nodes,
3362 self._ErrorIf(len(instance_groups) > 1,
3363 constants.CV_EINSTANCESPLITGROUPS,
3364 instance, "instance has primary and secondary nodes in"
3365 " different groups: %s", utils.CommaJoin(pretty_list),
3366 code=self.ETYPE_WARNING)
3368 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3369 i_non_a_balanced.append(instance)
3371 for snode in inst_config.secondary_nodes:
3372 s_img = node_image[snode]
3373 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3374 snode, "instance %s, connection to secondary node failed",
3378 inst_nodes_offline.append(snode)
3380 # warn that the instance lives on offline nodes
3381 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3382 "instance has offline secondary node(s) %s",
3383 utils.CommaJoin(inst_nodes_offline))
3384 # ... or ghost/non-vm_capable nodes
3385 for node in inst_config.all_nodes:
3386 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3387 instance, "instance lives on ghost node %s", node)
3388 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3389 instance, "instance lives on non-vm_capable node %s", node)
3391 feedback_fn("* Verifying orphan volumes")
3392 reserved = utils.FieldSet(*cluster.reserved_lvs)
3394 # We will get spurious "unknown volume" warnings if any node of this group
3395 # is secondary for an instance whose primary is in another group. To avoid
3396 # them, we find these instances and add their volumes to node_vol_should.
3397 for inst in self.all_inst_info.values():
3398 for secondary in inst.secondary_nodes:
3399 if (secondary in self.my_node_info
3400 and inst.name not in self.my_inst_info):
3401 inst.MapLVsByNode(node_vol_should)
3404 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3406 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3407 feedback_fn("* Verifying N+1 Memory redundancy")
3408 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3410 feedback_fn("* Other Notes")
3412 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3413 % len(i_non_redundant))
3415 if i_non_a_balanced:
3416 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3417 % len(i_non_a_balanced))
3420 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3423 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3426 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3430 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3431 """Analyze the post-hooks' result
3433 This method analyses the hook result, handles it, and sends some
3434 nicely-formatted feedback back to the user.
3436 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3437 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3438 @param hooks_results: the results of the multi-node hooks rpc call
3439 @param feedback_fn: function used send feedback back to the caller
3440 @param lu_result: previous Exec result
3441 @return: the new Exec result, based on the previous result
3445 # We only really run POST phase hooks, only for non-empty groups,
3446 # and are only interested in their results
3447 if not self.my_node_names:
3450 elif phase == constants.HOOKS_PHASE_POST:
3451 # Used to change hooks' output to proper indentation
3452 feedback_fn("* Hooks Results")
3453 assert hooks_results, "invalid result from hooks"
3455 for node_name in hooks_results:
3456 res = hooks_results[node_name]
3458 test = msg and not res.offline
3459 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3460 "Communication failure in hooks execution: %s", msg)
3461 if res.offline or msg:
3462 # No need to investigate payload if node is offline or gave
3465 for script, hkr, output in res.payload:
3466 test = hkr == constants.HKR_FAIL
3467 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3468 "Script %s failed, output:", script)
3470 output = self._HOOKS_INDENT_RE.sub(" ", output)
3471 feedback_fn("%s" % output)
3477 class LUClusterVerifyDisks(NoHooksLU):
3478 """Verifies the cluster disks status.
3483 def ExpandNames(self):
3484 self.share_locks = _ShareAll()
3485 self.needed_locks = {
3486 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3489 def Exec(self, feedback_fn):
3490 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3492 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3493 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3494 for group in group_names])
3497 class LUGroupVerifyDisks(NoHooksLU):
3498 """Verifies the status of all disks in a node group.
3503 def ExpandNames(self):
3504 # Raises errors.OpPrereqError on its own if group can't be found
3505 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3507 self.share_locks = _ShareAll()
3508 self.needed_locks = {
3509 locking.LEVEL_INSTANCE: [],
3510 locking.LEVEL_NODEGROUP: [],
3511 locking.LEVEL_NODE: [],
3514 def DeclareLocks(self, level):
3515 if level == locking.LEVEL_INSTANCE:
3516 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3518 # Lock instances optimistically, needs verification once node and group
3519 # locks have been acquired
3520 self.needed_locks[locking.LEVEL_INSTANCE] = \
3521 self.cfg.GetNodeGroupInstances(self.group_uuid)
3523 elif level == locking.LEVEL_NODEGROUP:
3524 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3526 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3527 set([self.group_uuid] +
3528 # Lock all groups used by instances optimistically; this requires
3529 # going via the node before it's locked, requiring verification
3532 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3533 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3535 elif level == locking.LEVEL_NODE:
3536 # This will only lock the nodes in the group to be verified which contain
3538 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3539 self._LockInstancesNodes()
3541 # Lock all nodes in group to be verified
3542 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3543 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3544 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3546 def CheckPrereq(self):
3547 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3548 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3549 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3551 assert self.group_uuid in owned_groups
3553 # Check if locked instances are still correct
3554 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3556 # Get instance information
3557 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3559 # Check if node groups for locked instances are still correct
3560 _CheckInstancesNodeGroups(self.cfg, self.instances,
3561 owned_groups, owned_nodes, self.group_uuid)
3563 def Exec(self, feedback_fn):
3564 """Verify integrity of cluster disks.
3566 @rtype: tuple of three items
3567 @return: a tuple of (dict of node-to-node_error, list of instances
3568 which need activate-disks, dict of instance: (node, volume) for
3573 res_instances = set()
3576 nv_dict = _MapInstanceDisksToNodes([inst
3577 for inst in self.instances.values()
3578 if inst.admin_state == constants.ADMINST_UP])
3581 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3582 set(self.cfg.GetVmCapableNodeList()))
3584 node_lvs = self.rpc.call_lv_list(nodes, [])
3586 for (node, node_res) in node_lvs.items():
3587 if node_res.offline:
3590 msg = node_res.fail_msg
3592 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3593 res_nodes[node] = msg
3596 for lv_name, (_, _, lv_online) in node_res.payload.items():
3597 inst = nv_dict.pop((node, lv_name), None)
3598 if not (lv_online or inst is None):
3599 res_instances.add(inst)
3601 # any leftover items in nv_dict are missing LVs, let's arrange the data
3603 for key, inst in nv_dict.iteritems():
3604 res_missing.setdefault(inst, []).append(list(key))
3606 return (res_nodes, list(res_instances), res_missing)
3609 class LUClusterRepairDiskSizes(NoHooksLU):
3610 """Verifies the cluster disks sizes.
3615 def ExpandNames(self):
3616 if self.op.instances:
3617 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3618 self.needed_locks = {
3619 locking.LEVEL_NODE_RES: [],
3620 locking.LEVEL_INSTANCE: self.wanted_names,
3622 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3624 self.wanted_names = None
3625 self.needed_locks = {
3626 locking.LEVEL_NODE_RES: locking.ALL_SET,
3627 locking.LEVEL_INSTANCE: locking.ALL_SET,
3629 self.share_locks = {
3630 locking.LEVEL_NODE_RES: 1,
3631 locking.LEVEL_INSTANCE: 0,
3634 def DeclareLocks(self, level):
3635 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3636 self._LockInstancesNodes(primary_only=True, level=level)
3638 def CheckPrereq(self):
3639 """Check prerequisites.
3641 This only checks the optional instance list against the existing names.
3644 if self.wanted_names is None:
3645 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3647 self.wanted_instances = \
3648 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3650 def _EnsureChildSizes(self, disk):
3651 """Ensure children of the disk have the needed disk size.
3653 This is valid mainly for DRBD8 and fixes an issue where the
3654 children have smaller disk size.
3656 @param disk: an L{ganeti.objects.Disk} object
3659 if disk.dev_type == constants.LD_DRBD8:
3660 assert disk.children, "Empty children for DRBD8?"
3661 fchild = disk.children[0]
3662 mismatch = fchild.size < disk.size
3664 self.LogInfo("Child disk has size %d, parent %d, fixing",
3665 fchild.size, disk.size)
3666 fchild.size = disk.size
3668 # and we recurse on this child only, not on the metadev
3669 return self._EnsureChildSizes(fchild) or mismatch
3673 def Exec(self, feedback_fn):
3674 """Verify the size of cluster disks.
3677 # TODO: check child disks too
3678 # TODO: check differences in size between primary/secondary nodes
3680 for instance in self.wanted_instances:
3681 pnode = instance.primary_node
3682 if pnode not in per_node_disks:
3683 per_node_disks[pnode] = []
3684 for idx, disk in enumerate(instance.disks):
3685 per_node_disks[pnode].append((instance, idx, disk))
3687 assert not (frozenset(per_node_disks.keys()) -
3688 self.owned_locks(locking.LEVEL_NODE_RES)), \
3689 "Not owning correct locks"
3690 assert not self.owned_locks(locking.LEVEL_NODE)
3693 for node, dskl in per_node_disks.items():
3694 newl = [v[2].Copy() for v in dskl]
3696 self.cfg.SetDiskID(dsk, node)
3697 result = self.rpc.call_blockdev_getsize(node, newl)
3699 self.LogWarning("Failure in blockdev_getsize call to node"
3700 " %s, ignoring", node)
3702 if len(result.payload) != len(dskl):
3703 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3704 " result.payload=%s", node, len(dskl), result.payload)
3705 self.LogWarning("Invalid result from node %s, ignoring node results",
3708 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3710 self.LogWarning("Disk %d of instance %s did not return size"
3711 " information, ignoring", idx, instance.name)
3713 if not isinstance(size, (int, long)):
3714 self.LogWarning("Disk %d of instance %s did not return valid"
3715 " size information, ignoring", idx, instance.name)
3718 if size != disk.size:
3719 self.LogInfo("Disk %d of instance %s has mismatched size,"
3720 " correcting: recorded %d, actual %d", idx,
3721 instance.name, disk.size, size)
3723 self.cfg.Update(instance, feedback_fn)
3724 changed.append((instance.name, idx, size))
3725 if self._EnsureChildSizes(disk):
3726 self.cfg.Update(instance, feedback_fn)
3727 changed.append((instance.name, idx, disk.size))
3731 class LUClusterRename(LogicalUnit):
3732 """Rename the cluster.
3735 HPATH = "cluster-rename"
3736 HTYPE = constants.HTYPE_CLUSTER
3738 def BuildHooksEnv(self):
3743 "OP_TARGET": self.cfg.GetClusterName(),
3744 "NEW_NAME": self.op.name,
3747 def BuildHooksNodes(self):
3748 """Build hooks nodes.
3751 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3753 def CheckPrereq(self):
3754 """Verify that the passed name is a valid one.
3757 hostname = netutils.GetHostname(name=self.op.name,
3758 family=self.cfg.GetPrimaryIPFamily())
3760 new_name = hostname.name
3761 self.ip = new_ip = hostname.ip
3762 old_name = self.cfg.GetClusterName()
3763 old_ip = self.cfg.GetMasterIP()
3764 if new_name == old_name and new_ip == old_ip:
3765 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3766 " cluster has changed",
3768 if new_ip != old_ip:
3769 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3770 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3771 " reachable on the network" %
3772 new_ip, errors.ECODE_NOTUNIQUE)
3774 self.op.name = new_name
3776 def Exec(self, feedback_fn):
3777 """Rename the cluster.
3780 clustername = self.op.name
3783 # shutdown the master IP
3784 master_params = self.cfg.GetMasterNetworkParameters()
3785 ems = self.cfg.GetUseExternalMipScript()
3786 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3788 result.Raise("Could not disable the master role")
3791 cluster = self.cfg.GetClusterInfo()
3792 cluster.cluster_name = clustername
3793 cluster.master_ip = new_ip
3794 self.cfg.Update(cluster, feedback_fn)
3796 # update the known hosts file
3797 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3798 node_list = self.cfg.GetOnlineNodeList()
3800 node_list.remove(master_params.name)
3803 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3805 master_params.ip = new_ip
3806 result = self.rpc.call_node_activate_master_ip(master_params.name,
3808 msg = result.fail_msg
3810 self.LogWarning("Could not re-enable the master role on"
3811 " the master, please restart manually: %s", msg)
3816 def _ValidateNetmask(cfg, netmask):
3817 """Checks if a netmask is valid.
3819 @type cfg: L{config.ConfigWriter}
3820 @param cfg: The cluster configuration
3822 @param netmask: the netmask to be verified
3823 @raise errors.OpPrereqError: if the validation fails
3826 ip_family = cfg.GetPrimaryIPFamily()
3828 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3829 except errors.ProgrammerError:
3830 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3832 if not ipcls.ValidateNetmask(netmask):
3833 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3837 class LUClusterSetParams(LogicalUnit):
3838 """Change the parameters of the cluster.
3841 HPATH = "cluster-modify"
3842 HTYPE = constants.HTYPE_CLUSTER
3845 def CheckArguments(self):
3849 if self.op.uid_pool:
3850 uidpool.CheckUidPool(self.op.uid_pool)
3852 if self.op.add_uids:
3853 uidpool.CheckUidPool(self.op.add_uids)
3855 if self.op.remove_uids:
3856 uidpool.CheckUidPool(self.op.remove_uids)
3858 if self.op.master_netmask is not None:
3859 _ValidateNetmask(self.cfg, self.op.master_netmask)
3861 if self.op.diskparams:
3862 for dt_params in self.op.diskparams.values():
3863 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3865 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3866 except errors.OpPrereqError, err:
3867 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3870 def ExpandNames(self):
3871 # FIXME: in the future maybe other cluster params won't require checking on
3872 # all nodes to be modified.
3873 self.needed_locks = {
3874 locking.LEVEL_NODE: locking.ALL_SET,
3875 locking.LEVEL_INSTANCE: locking.ALL_SET,
3876 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3878 self.share_locks = {
3879 locking.LEVEL_NODE: 1,
3880 locking.LEVEL_INSTANCE: 1,
3881 locking.LEVEL_NODEGROUP: 1,
3884 def BuildHooksEnv(self):
3889 "OP_TARGET": self.cfg.GetClusterName(),
3890 "NEW_VG_NAME": self.op.vg_name,
3893 def BuildHooksNodes(self):
3894 """Build hooks nodes.
3897 mn = self.cfg.GetMasterNode()
3900 def CheckPrereq(self):
3901 """Check prerequisites.
3903 This checks whether the given params don't conflict and
3904 if the given volume group is valid.
3907 if self.op.vg_name is not None and not self.op.vg_name:
3908 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3909 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3910 " instances exist", errors.ECODE_INVAL)
3912 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3913 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3914 raise errors.OpPrereqError("Cannot disable drbd helper while"
3915 " drbd-based instances exist",
3918 node_list = self.owned_locks(locking.LEVEL_NODE)
3920 # if vg_name not None, checks given volume group on all nodes
3922 vglist = self.rpc.call_vg_list(node_list)
3923 for node in node_list:
3924 msg = vglist[node].fail_msg
3926 # ignoring down node
3927 self.LogWarning("Error while gathering data on node %s"
3928 " (ignoring node): %s", node, msg)
3930 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3932 constants.MIN_VG_SIZE)
3934 raise errors.OpPrereqError("Error on node '%s': %s" %
3935 (node, vgstatus), errors.ECODE_ENVIRON)
3937 if self.op.drbd_helper:
3938 # checks given drbd helper on all nodes
3939 helpers = self.rpc.call_drbd_helper(node_list)
3940 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3942 self.LogInfo("Not checking drbd helper on offline node %s", node)
3944 msg = helpers[node].fail_msg
3946 raise errors.OpPrereqError("Error checking drbd helper on node"
3947 " '%s': %s" % (node, msg),
3948 errors.ECODE_ENVIRON)
3949 node_helper = helpers[node].payload
3950 if node_helper != self.op.drbd_helper:
3951 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3952 (node, node_helper), errors.ECODE_ENVIRON)
3954 self.cluster = cluster = self.cfg.GetClusterInfo()
3955 # validate params changes
3956 if self.op.beparams:
3957 objects.UpgradeBeParams(self.op.beparams)
3958 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3959 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3961 if self.op.ndparams:
3962 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3963 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3965 # TODO: we need a more general way to handle resetting
3966 # cluster-level parameters to default values
3967 if self.new_ndparams["oob_program"] == "":
3968 self.new_ndparams["oob_program"] = \
3969 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3971 if self.op.hv_state:
3972 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3973 self.cluster.hv_state_static)
3974 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3975 for hv, values in new_hv_state.items())
3977 if self.op.disk_state:
3978 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3979 self.cluster.disk_state_static)
3980 self.new_disk_state = \
3981 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3982 for name, values in svalues.items()))
3983 for storage, svalues in new_disk_state.items())
3986 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3989 all_instances = self.cfg.GetAllInstancesInfo().values()
3991 for group in self.cfg.GetAllNodeGroupsInfo().values():
3992 instances = frozenset([inst for inst in all_instances
3993 if compat.any(node in group.members
3994 for node in inst.all_nodes)])
3995 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3996 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3998 new_ipolicy, instances)
4000 violations.update(new)
4003 self.LogWarning("After the ipolicy change the following instances"
4004 " violate them: %s",
4005 utils.CommaJoin(utils.NiceSort(violations)))
4007 if self.op.nicparams:
4008 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4009 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4010 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4013 # check all instances for consistency
4014 for instance in self.cfg.GetAllInstancesInfo().values():
4015 for nic_idx, nic in enumerate(instance.nics):
4016 params_copy = copy.deepcopy(nic.nicparams)
4017 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4019 # check parameter syntax
4021 objects.NIC.CheckParameterSyntax(params_filled)
4022 except errors.ConfigurationError, err:
4023 nic_errors.append("Instance %s, nic/%d: %s" %
4024 (instance.name, nic_idx, err))
4026 # if we're moving instances to routed, check that they have an ip
4027 target_mode = params_filled[constants.NIC_MODE]
4028 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4029 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4030 " address" % (instance.name, nic_idx))
4032 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4033 "\n".join(nic_errors))
4035 # hypervisor list/parameters
4036 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4037 if self.op.hvparams:
4038 for hv_name, hv_dict in self.op.hvparams.items():
4039 if hv_name not in self.new_hvparams:
4040 self.new_hvparams[hv_name] = hv_dict
4042 self.new_hvparams[hv_name].update(hv_dict)
4044 # disk template parameters
4045 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4046 if self.op.diskparams:
4047 for dt_name, dt_params in self.op.diskparams.items():
4048 if dt_name not in self.op.diskparams:
4049 self.new_diskparams[dt_name] = dt_params
4051 self.new_diskparams[dt_name].update(dt_params)
4053 # os hypervisor parameters
4054 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4056 for os_name, hvs in self.op.os_hvp.items():
4057 if os_name not in self.new_os_hvp:
4058 self.new_os_hvp[os_name] = hvs
4060 for hv_name, hv_dict in hvs.items():
4061 if hv_name not in self.new_os_hvp[os_name]:
4062 self.new_os_hvp[os_name][hv_name] = hv_dict
4064 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4067 self.new_osp = objects.FillDict(cluster.osparams, {})
4068 if self.op.osparams:
4069 for os_name, osp in self.op.osparams.items():
4070 if os_name not in self.new_osp:
4071 self.new_osp[os_name] = {}
4073 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4076 if not self.new_osp[os_name]:
4077 # we removed all parameters
4078 del self.new_osp[os_name]
4080 # check the parameter validity (remote check)
4081 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4082 os_name, self.new_osp[os_name])
4084 # changes to the hypervisor list
4085 if self.op.enabled_hypervisors is not None:
4086 self.hv_list = self.op.enabled_hypervisors
4087 for hv in self.hv_list:
4088 # if the hypervisor doesn't already exist in the cluster
4089 # hvparams, we initialize it to empty, and then (in both
4090 # cases) we make sure to fill the defaults, as we might not
4091 # have a complete defaults list if the hypervisor wasn't
4093 if hv not in new_hvp:
4095 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4096 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4098 self.hv_list = cluster.enabled_hypervisors
4100 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4101 # either the enabled list has changed, or the parameters have, validate
4102 for hv_name, hv_params in self.new_hvparams.items():
4103 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4104 (self.op.enabled_hypervisors and
4105 hv_name in self.op.enabled_hypervisors)):
4106 # either this is a new hypervisor, or its parameters have changed
4107 hv_class = hypervisor.GetHypervisor(hv_name)
4108 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4109 hv_class.CheckParameterSyntax(hv_params)
4110 _CheckHVParams(self, node_list, hv_name, hv_params)
4113 # no need to check any newly-enabled hypervisors, since the
4114 # defaults have already been checked in the above code-block
4115 for os_name, os_hvp in self.new_os_hvp.items():
4116 for hv_name, hv_params in os_hvp.items():
4117 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4118 # we need to fill in the new os_hvp on top of the actual hv_p
4119 cluster_defaults = self.new_hvparams.get(hv_name, {})
4120 new_osp = objects.FillDict(cluster_defaults, hv_params)
4121 hv_class = hypervisor.GetHypervisor(hv_name)
4122 hv_class.CheckParameterSyntax(new_osp)
4123 _CheckHVParams(self, node_list, hv_name, new_osp)
4125 if self.op.default_iallocator:
4126 alloc_script = utils.FindFile(self.op.default_iallocator,
4127 constants.IALLOCATOR_SEARCH_PATH,
4129 if alloc_script is None:
4130 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4131 " specified" % self.op.default_iallocator,
4134 def Exec(self, feedback_fn):
4135 """Change the parameters of the cluster.
4138 if self.op.vg_name is not None:
4139 new_volume = self.op.vg_name
4142 if new_volume != self.cfg.GetVGName():
4143 self.cfg.SetVGName(new_volume)
4145 feedback_fn("Cluster LVM configuration already in desired"
4146 " state, not changing")
4147 if self.op.drbd_helper is not None:
4148 new_helper = self.op.drbd_helper
4151 if new_helper != self.cfg.GetDRBDHelper():
4152 self.cfg.SetDRBDHelper(new_helper)
4154 feedback_fn("Cluster DRBD helper already in desired state,"
4156 if self.op.hvparams:
4157 self.cluster.hvparams = self.new_hvparams
4159 self.cluster.os_hvp = self.new_os_hvp
4160 if self.op.enabled_hypervisors is not None:
4161 self.cluster.hvparams = self.new_hvparams
4162 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4163 if self.op.beparams:
4164 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4165 if self.op.nicparams:
4166 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4168 self.cluster.ipolicy = self.new_ipolicy
4169 if self.op.osparams:
4170 self.cluster.osparams = self.new_osp
4171 if self.op.ndparams:
4172 self.cluster.ndparams = self.new_ndparams
4173 if self.op.diskparams:
4174 self.cluster.diskparams = self.new_diskparams
4175 if self.op.hv_state:
4176 self.cluster.hv_state_static = self.new_hv_state
4177 if self.op.disk_state:
4178 self.cluster.disk_state_static = self.new_disk_state
4180 if self.op.candidate_pool_size is not None:
4181 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4182 # we need to update the pool size here, otherwise the save will fail
4183 _AdjustCandidatePool(self, [])
4185 if self.op.maintain_node_health is not None:
4186 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4187 feedback_fn("Note: CONFD was disabled at build time, node health"
4188 " maintenance is not useful (still enabling it)")
4189 self.cluster.maintain_node_health = self.op.maintain_node_health
4191 if self.op.prealloc_wipe_disks is not None:
4192 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4194 if self.op.add_uids is not None:
4195 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4197 if self.op.remove_uids is not None:
4198 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4200 if self.op.uid_pool is not None:
4201 self.cluster.uid_pool = self.op.uid_pool
4203 if self.op.default_iallocator is not None:
4204 self.cluster.default_iallocator = self.op.default_iallocator
4206 if self.op.reserved_lvs is not None:
4207 self.cluster.reserved_lvs = self.op.reserved_lvs
4209 if self.op.use_external_mip_script is not None:
4210 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4212 def helper_os(aname, mods, desc):
4214 lst = getattr(self.cluster, aname)
4215 for key, val in mods:
4216 if key == constants.DDM_ADD:
4218 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4221 elif key == constants.DDM_REMOVE:
4225 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4227 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4229 if self.op.hidden_os:
4230 helper_os("hidden_os", self.op.hidden_os, "hidden")
4232 if self.op.blacklisted_os:
4233 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4235 if self.op.master_netdev:
4236 master_params = self.cfg.GetMasterNetworkParameters()
4237 ems = self.cfg.GetUseExternalMipScript()
4238 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4239 self.cluster.master_netdev)
4240 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4242 result.Raise("Could not disable the master ip")
4243 feedback_fn("Changing master_netdev from %s to %s" %
4244 (master_params.netdev, self.op.master_netdev))
4245 self.cluster.master_netdev = self.op.master_netdev
4247 if self.op.master_netmask:
4248 master_params = self.cfg.GetMasterNetworkParameters()
4249 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4250 result = self.rpc.call_node_change_master_netmask(master_params.name,
4251 master_params.netmask,
4252 self.op.master_netmask,
4254 master_params.netdev)
4256 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4259 self.cluster.master_netmask = self.op.master_netmask
4261 self.cfg.Update(self.cluster, feedback_fn)
4263 if self.op.master_netdev:
4264 master_params = self.cfg.GetMasterNetworkParameters()
4265 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4266 self.op.master_netdev)
4267 ems = self.cfg.GetUseExternalMipScript()
4268 result = self.rpc.call_node_activate_master_ip(master_params.name,
4271 self.LogWarning("Could not re-enable the master ip on"
4272 " the master, please restart manually: %s",
4276 def _UploadHelper(lu, nodes, fname):
4277 """Helper for uploading a file and showing warnings.
4280 if os.path.exists(fname):
4281 result = lu.rpc.call_upload_file(nodes, fname)
4282 for to_node, to_result in result.items():
4283 msg = to_result.fail_msg
4285 msg = ("Copy of file %s to node %s failed: %s" %
4286 (fname, to_node, msg))
4287 lu.proc.LogWarning(msg)
4290 def _ComputeAncillaryFiles(cluster, redist):
4291 """Compute files external to Ganeti which need to be consistent.
4293 @type redist: boolean
4294 @param redist: Whether to include files which need to be redistributed
4297 # Compute files for all nodes
4299 constants.SSH_KNOWN_HOSTS_FILE,
4300 constants.CONFD_HMAC_KEY,
4301 constants.CLUSTER_DOMAIN_SECRET_FILE,
4302 constants.SPICE_CERT_FILE,
4303 constants.SPICE_CACERT_FILE,
4304 constants.RAPI_USERS_FILE,
4308 files_all.update(constants.ALL_CERT_FILES)
4309 files_all.update(ssconf.SimpleStore().GetFileList())
4311 # we need to ship at least the RAPI certificate
4312 files_all.add(constants.RAPI_CERT_FILE)
4314 if cluster.modify_etc_hosts:
4315 files_all.add(constants.ETC_HOSTS)
4317 if cluster.use_external_mip_script:
4318 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4320 # Files which are optional, these must:
4321 # - be present in one other category as well
4322 # - either exist or not exist on all nodes of that category (mc, vm all)
4324 constants.RAPI_USERS_FILE,
4327 # Files which should only be on master candidates
4331 files_mc.add(constants.CLUSTER_CONF_FILE)
4333 # Files which should only be on VM-capable nodes
4334 files_vm = set(filename
4335 for hv_name in cluster.enabled_hypervisors
4336 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4338 files_opt |= set(filename
4339 for hv_name in cluster.enabled_hypervisors
4340 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4342 # Filenames in each category must be unique
4343 all_files_set = files_all | files_mc | files_vm
4344 assert (len(all_files_set) ==
4345 sum(map(len, [files_all, files_mc, files_vm]))), \
4346 "Found file listed in more than one file list"
4348 # Optional files must be present in one other category
4349 assert all_files_set.issuperset(files_opt), \
4350 "Optional file not in a different required list"
4352 return (files_all, files_opt, files_mc, files_vm)
4355 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4356 """Distribute additional files which are part of the cluster configuration.
4358 ConfigWriter takes care of distributing the config and ssconf files, but
4359 there are more files which should be distributed to all nodes. This function
4360 makes sure those are copied.
4362 @param lu: calling logical unit
4363 @param additional_nodes: list of nodes not in the config to distribute to
4364 @type additional_vm: boolean
4365 @param additional_vm: whether the additional nodes are vm-capable or not
4368 # Gather target nodes
4369 cluster = lu.cfg.GetClusterInfo()
4370 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4372 online_nodes = lu.cfg.GetOnlineNodeList()
4373 online_set = frozenset(online_nodes)
4374 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4376 if additional_nodes is not None:
4377 online_nodes.extend(additional_nodes)
4379 vm_nodes.extend(additional_nodes)
4381 # Never distribute to master node
4382 for nodelist in [online_nodes, vm_nodes]:
4383 if master_info.name in nodelist:
4384 nodelist.remove(master_info.name)
4387 (files_all, _, files_mc, files_vm) = \
4388 _ComputeAncillaryFiles(cluster, True)
4390 # Never re-distribute configuration file from here
4391 assert not (constants.CLUSTER_CONF_FILE in files_all or
4392 constants.CLUSTER_CONF_FILE in files_vm)
4393 assert not files_mc, "Master candidates not handled in this function"
4396 (online_nodes, files_all),
4397 (vm_nodes, files_vm),
4401 for (node_list, files) in filemap:
4403 _UploadHelper(lu, node_list, fname)
4406 class LUClusterRedistConf(NoHooksLU):
4407 """Force the redistribution of cluster configuration.
4409 This is a very simple LU.
4414 def ExpandNames(self):
4415 self.needed_locks = {
4416 locking.LEVEL_NODE: locking.ALL_SET,
4418 self.share_locks[locking.LEVEL_NODE] = 1
4420 def Exec(self, feedback_fn):
4421 """Redistribute the configuration.
4424 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4425 _RedistributeAncillaryFiles(self)
4428 class LUClusterActivateMasterIp(NoHooksLU):
4429 """Activate the master IP on the master node.
4432 def Exec(self, feedback_fn):
4433 """Activate the master IP.
4436 master_params = self.cfg.GetMasterNetworkParameters()
4437 ems = self.cfg.GetUseExternalMipScript()
4438 result = self.rpc.call_node_activate_master_ip(master_params.name,
4440 result.Raise("Could not activate the master IP")
4443 class LUClusterDeactivateMasterIp(NoHooksLU):
4444 """Deactivate the master IP on the master node.
4447 def Exec(self, feedback_fn):
4448 """Deactivate the master IP.
4451 master_params = self.cfg.GetMasterNetworkParameters()
4452 ems = self.cfg.GetUseExternalMipScript()
4453 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4455 result.Raise("Could not deactivate the master IP")
4458 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4459 """Sleep and poll for an instance's disk to sync.
4462 if not instance.disks or disks is not None and not disks:
4465 disks = _ExpandCheckDisks(instance, disks)
4468 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4470 node = instance.primary_node
4473 lu.cfg.SetDiskID(dev, node)
4475 # TODO: Convert to utils.Retry
4478 degr_retries = 10 # in seconds, as we sleep 1 second each time
4482 cumul_degraded = False
4483 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4484 msg = rstats.fail_msg
4486 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4489 raise errors.RemoteError("Can't contact node %s for mirror data,"
4490 " aborting." % node)
4493 rstats = rstats.payload
4495 for i, mstat in enumerate(rstats):
4497 lu.LogWarning("Can't compute data for node %s/%s",
4498 node, disks[i].iv_name)
4501 cumul_degraded = (cumul_degraded or
4502 (mstat.is_degraded and mstat.sync_percent is None))
4503 if mstat.sync_percent is not None:
4505 if mstat.estimated_time is not None:
4506 rem_time = ("%s remaining (estimated)" %
4507 utils.FormatSeconds(mstat.estimated_time))
4508 max_time = mstat.estimated_time
4510 rem_time = "no time estimate"
4511 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4512 (disks[i].iv_name, mstat.sync_percent, rem_time))
4514 # if we're done but degraded, let's do a few small retries, to
4515 # make sure we see a stable and not transient situation; therefore
4516 # we force restart of the loop
4517 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4518 logging.info("Degraded disks found, %d retries left", degr_retries)
4526 time.sleep(min(60, max_time))
4529 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4530 return not cumul_degraded
4533 def _BlockdevFind(lu, node, dev, instance):
4534 """Wrapper around call_blockdev_find to annotate diskparams.
4536 @param lu: A reference to the lu object
4537 @param node: The node to call out
4538 @param dev: The device to find
4539 @param instance: The instance object the device belongs to
4540 @returns The result of the rpc call
4543 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4544 return lu.rpc.call_blockdev_find(node, disk)
4547 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4548 """Wrapper around L{_CheckDiskConsistencyInner}.
4551 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4552 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4556 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4558 """Check that mirrors are not degraded.
4560 @attention: The device has to be annotated already.
4562 The ldisk parameter, if True, will change the test from the
4563 is_degraded attribute (which represents overall non-ok status for
4564 the device(s)) to the ldisk (representing the local storage status).
4567 lu.cfg.SetDiskID(dev, node)
4571 if on_primary or dev.AssembleOnSecondary():
4572 rstats = lu.rpc.call_blockdev_find(node, dev)
4573 msg = rstats.fail_msg
4575 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4577 elif not rstats.payload:
4578 lu.LogWarning("Can't find disk on node %s", node)
4582 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4584 result = result and not rstats.payload.is_degraded
4587 for child in dev.children:
4588 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4594 class LUOobCommand(NoHooksLU):
4595 """Logical unit for OOB handling.
4599 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4601 def ExpandNames(self):
4602 """Gather locks we need.
4605 if self.op.node_names:
4606 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4607 lock_names = self.op.node_names
4609 lock_names = locking.ALL_SET
4611 self.needed_locks = {
4612 locking.LEVEL_NODE: lock_names,
4615 def CheckPrereq(self):
4616 """Check prerequisites.
4619 - the node exists in the configuration
4622 Any errors are signaled by raising errors.OpPrereqError.
4626 self.master_node = self.cfg.GetMasterNode()
4628 assert self.op.power_delay >= 0.0
4630 if self.op.node_names:
4631 if (self.op.command in self._SKIP_MASTER and
4632 self.master_node in self.op.node_names):
4633 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4634 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4636 if master_oob_handler:
4637 additional_text = ("run '%s %s %s' if you want to operate on the"
4638 " master regardless") % (master_oob_handler,
4642 additional_text = "it does not support out-of-band operations"
4644 raise errors.OpPrereqError(("Operating on the master node %s is not"
4645 " allowed for %s; %s") %
4646 (self.master_node, self.op.command,
4647 additional_text), errors.ECODE_INVAL)
4649 self.op.node_names = self.cfg.GetNodeList()
4650 if self.op.command in self._SKIP_MASTER:
4651 self.op.node_names.remove(self.master_node)
4653 if self.op.command in self._SKIP_MASTER:
4654 assert self.master_node not in self.op.node_names
4656 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4658 raise errors.OpPrereqError("Node %s not found" % node_name,
4661 self.nodes.append(node)
4663 if (not self.op.ignore_status and
4664 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4665 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4666 " not marked offline") % node_name,
4669 def Exec(self, feedback_fn):
4670 """Execute OOB and return result if we expect any.
4673 master_node = self.master_node
4676 for idx, node in enumerate(utils.NiceSort(self.nodes,
4677 key=lambda node: node.name)):
4678 node_entry = [(constants.RS_NORMAL, node.name)]
4679 ret.append(node_entry)
4681 oob_program = _SupportsOob(self.cfg, node)
4684 node_entry.append((constants.RS_UNAVAIL, None))
4687 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4688 self.op.command, oob_program, node.name)
4689 result = self.rpc.call_run_oob(master_node, oob_program,
4690 self.op.command, node.name,
4694 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4695 node.name, result.fail_msg)
4696 node_entry.append((constants.RS_NODATA, None))
4699 self._CheckPayload(result)
4700 except errors.OpExecError, err:
4701 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4703 node_entry.append((constants.RS_NODATA, None))
4705 if self.op.command == constants.OOB_HEALTH:
4706 # For health we should log important events
4707 for item, status in result.payload:
4708 if status in [constants.OOB_STATUS_WARNING,
4709 constants.OOB_STATUS_CRITICAL]:
4710 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4711 item, node.name, status)
4713 if self.op.command == constants.OOB_POWER_ON:
4715 elif self.op.command == constants.OOB_POWER_OFF:
4716 node.powered = False
4717 elif self.op.command == constants.OOB_POWER_STATUS:
4718 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4719 if powered != node.powered:
4720 logging.warning(("Recorded power state (%s) of node '%s' does not"
4721 " match actual power state (%s)"), node.powered,
4724 # For configuration changing commands we should update the node
4725 if self.op.command in (constants.OOB_POWER_ON,
4726 constants.OOB_POWER_OFF):
4727 self.cfg.Update(node, feedback_fn)
4729 node_entry.append((constants.RS_NORMAL, result.payload))
4731 if (self.op.command == constants.OOB_POWER_ON and
4732 idx < len(self.nodes) - 1):
4733 time.sleep(self.op.power_delay)
4737 def _CheckPayload(self, result):
4738 """Checks if the payload is valid.
4740 @param result: RPC result
4741 @raises errors.OpExecError: If payload is not valid
4745 if self.op.command == constants.OOB_HEALTH:
4746 if not isinstance(result.payload, list):
4747 errs.append("command 'health' is expected to return a list but got %s" %
4748 type(result.payload))
4750 for item, status in result.payload:
4751 if status not in constants.OOB_STATUSES:
4752 errs.append("health item '%s' has invalid status '%s'" %
4755 if self.op.command == constants.OOB_POWER_STATUS:
4756 if not isinstance(result.payload, dict):
4757 errs.append("power-status is expected to return a dict but got %s" %
4758 type(result.payload))
4760 if self.op.command in [
4761 constants.OOB_POWER_ON,
4762 constants.OOB_POWER_OFF,
4763 constants.OOB_POWER_CYCLE,
4765 if result.payload is not None:
4766 errs.append("%s is expected to not return payload but got '%s'" %
4767 (self.op.command, result.payload))
4770 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4771 utils.CommaJoin(errs))
4774 class _OsQuery(_QueryBase):
4775 FIELDS = query.OS_FIELDS
4777 def ExpandNames(self, lu):
4778 # Lock all nodes in shared mode
4779 # Temporary removal of locks, should be reverted later
4780 # TODO: reintroduce locks when they are lighter-weight
4781 lu.needed_locks = {}
4782 #self.share_locks[locking.LEVEL_NODE] = 1
4783 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4785 # The following variables interact with _QueryBase._GetNames
4787 self.wanted = self.names
4789 self.wanted = locking.ALL_SET
4791 self.do_locking = self.use_locking
4793 def DeclareLocks(self, lu, level):
4797 def _DiagnoseByOS(rlist):
4798 """Remaps a per-node return list into an a per-os per-node dictionary
4800 @param rlist: a map with node names as keys and OS objects as values
4803 @return: a dictionary with osnames as keys and as value another
4804 map, with nodes as keys and tuples of (path, status, diagnose,
4805 variants, parameters, api_versions) as values, eg::
4807 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4808 (/srv/..., False, "invalid api")],
4809 "node2": [(/srv/..., True, "", [], [])]}
4814 # we build here the list of nodes that didn't fail the RPC (at RPC
4815 # level), so that nodes with a non-responding node daemon don't
4816 # make all OSes invalid
4817 good_nodes = [node_name for node_name in rlist
4818 if not rlist[node_name].fail_msg]
4819 for node_name, nr in rlist.items():
4820 if nr.fail_msg or not nr.payload:
4822 for (name, path, status, diagnose, variants,
4823 params, api_versions) in nr.payload:
4824 if name not in all_os:
4825 # build a list of nodes for this os containing empty lists
4826 # for each node in node_list
4828 for nname in good_nodes:
4829 all_os[name][nname] = []
4830 # convert params from [name, help] to (name, help)
4831 params = [tuple(v) for v in params]
4832 all_os[name][node_name].append((path, status, diagnose,
4833 variants, params, api_versions))
4836 def _GetQueryData(self, lu):
4837 """Computes the list of nodes and their attributes.
4840 # Locking is not used
4841 assert not (compat.any(lu.glm.is_owned(level)
4842 for level in locking.LEVELS
4843 if level != locking.LEVEL_CLUSTER) or
4844 self.do_locking or self.use_locking)
4846 valid_nodes = [node.name
4847 for node in lu.cfg.GetAllNodesInfo().values()
4848 if not node.offline and node.vm_capable]
4849 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4850 cluster = lu.cfg.GetClusterInfo()
4854 for (os_name, os_data) in pol.items():
4855 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4856 hidden=(os_name in cluster.hidden_os),
4857 blacklisted=(os_name in cluster.blacklisted_os))
4861 api_versions = set()
4863 for idx, osl in enumerate(os_data.values()):
4864 info.valid = bool(info.valid and osl and osl[0][1])
4868 (node_variants, node_params, node_api) = osl[0][3:6]
4871 variants.update(node_variants)
4872 parameters.update(node_params)
4873 api_versions.update(node_api)
4875 # Filter out inconsistent values
4876 variants.intersection_update(node_variants)
4877 parameters.intersection_update(node_params)
4878 api_versions.intersection_update(node_api)
4880 info.variants = list(variants)
4881 info.parameters = list(parameters)
4882 info.api_versions = list(api_versions)
4884 data[os_name] = info
4886 # Prepare data in requested order
4887 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4891 class LUOsDiagnose(NoHooksLU):
4892 """Logical unit for OS diagnose/query.
4898 def _BuildFilter(fields, names):
4899 """Builds a filter for querying OSes.
4902 name_filter = qlang.MakeSimpleFilter("name", names)
4904 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4905 # respective field is not requested
4906 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4907 for fname in ["hidden", "blacklisted"]
4908 if fname not in fields]
4909 if "valid" not in fields:
4910 status_filter.append([qlang.OP_TRUE, "valid"])
4913 status_filter.insert(0, qlang.OP_AND)
4915 status_filter = None
4917 if name_filter and status_filter:
4918 return [qlang.OP_AND, name_filter, status_filter]
4922 return status_filter
4924 def CheckArguments(self):
4925 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4926 self.op.output_fields, False)
4928 def ExpandNames(self):
4929 self.oq.ExpandNames(self)
4931 def Exec(self, feedback_fn):
4932 return self.oq.OldStyleQuery(self)
4935 class LUNodeRemove(LogicalUnit):
4936 """Logical unit for removing a node.
4939 HPATH = "node-remove"
4940 HTYPE = constants.HTYPE_NODE
4942 def BuildHooksEnv(self):
4947 "OP_TARGET": self.op.node_name,
4948 "NODE_NAME": self.op.node_name,
4951 def BuildHooksNodes(self):
4952 """Build hooks nodes.
4954 This doesn't run on the target node in the pre phase as a failed
4955 node would then be impossible to remove.
4958 all_nodes = self.cfg.GetNodeList()
4960 all_nodes.remove(self.op.node_name)
4963 return (all_nodes, all_nodes)
4965 def CheckPrereq(self):
4966 """Check prerequisites.
4969 - the node exists in the configuration
4970 - it does not have primary or secondary instances
4971 - it's not the master
4973 Any errors are signaled by raising errors.OpPrereqError.
4976 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4977 node = self.cfg.GetNodeInfo(self.op.node_name)
4978 assert node is not None
4980 masternode = self.cfg.GetMasterNode()
4981 if node.name == masternode:
4982 raise errors.OpPrereqError("Node is the master node, failover to another"
4983 " node is required", errors.ECODE_INVAL)
4985 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4986 if node.name in instance.all_nodes:
4987 raise errors.OpPrereqError("Instance %s is still running on the node,"
4988 " please remove first" % instance_name,
4990 self.op.node_name = node.name
4993 def Exec(self, feedback_fn):
4994 """Removes the node from the cluster.
4998 logging.info("Stopping the node daemon and removing configs from node %s",
5001 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5003 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5006 # Promote nodes to master candidate as needed
5007 _AdjustCandidatePool(self, exceptions=[node.name])
5008 self.context.RemoveNode(node.name)
5010 # Run post hooks on the node before it's removed
5011 _RunPostHook(self, node.name)
5013 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5014 msg = result.fail_msg
5016 self.LogWarning("Errors encountered on the remote node while leaving"
5017 " the cluster: %s", msg)
5019 # Remove node from our /etc/hosts
5020 if self.cfg.GetClusterInfo().modify_etc_hosts:
5021 master_node = self.cfg.GetMasterNode()
5022 result = self.rpc.call_etc_hosts_modify(master_node,
5023 constants.ETC_HOSTS_REMOVE,
5025 result.Raise("Can't update hosts file with new host data")
5026 _RedistributeAncillaryFiles(self)
5029 class _NodeQuery(_QueryBase):
5030 FIELDS = query.NODE_FIELDS
5032 def ExpandNames(self, lu):
5033 lu.needed_locks = {}
5034 lu.share_locks = _ShareAll()
5037 self.wanted = _GetWantedNodes(lu, self.names)
5039 self.wanted = locking.ALL_SET
5041 self.do_locking = (self.use_locking and
5042 query.NQ_LIVE in self.requested_data)
5045 # If any non-static field is requested we need to lock the nodes
5046 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5048 def DeclareLocks(self, lu, level):
5051 def _GetQueryData(self, lu):
5052 """Computes the list of nodes and their attributes.
5055 all_info = lu.cfg.GetAllNodesInfo()
5057 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5059 # Gather data as requested
5060 if query.NQ_LIVE in self.requested_data:
5061 # filter out non-vm_capable nodes
5062 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5064 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5065 [lu.cfg.GetHypervisorType()])
5066 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5067 for (name, nresult) in node_data.items()
5068 if not nresult.fail_msg and nresult.payload)
5072 if query.NQ_INST in self.requested_data:
5073 node_to_primary = dict([(name, set()) for name in nodenames])
5074 node_to_secondary = dict([(name, set()) for name in nodenames])
5076 inst_data = lu.cfg.GetAllInstancesInfo()
5078 for inst in inst_data.values():
5079 if inst.primary_node in node_to_primary:
5080 node_to_primary[inst.primary_node].add(inst.name)
5081 for secnode in inst.secondary_nodes:
5082 if secnode in node_to_secondary:
5083 node_to_secondary[secnode].add(inst.name)
5085 node_to_primary = None
5086 node_to_secondary = None
5088 if query.NQ_OOB in self.requested_data:
5089 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5090 for name, node in all_info.iteritems())
5094 if query.NQ_GROUP in self.requested_data:
5095 groups = lu.cfg.GetAllNodeGroupsInfo()
5099 return query.NodeQueryData([all_info[name] for name in nodenames],
5100 live_data, lu.cfg.GetMasterNode(),
5101 node_to_primary, node_to_secondary, groups,
5102 oob_support, lu.cfg.GetClusterInfo())
5105 class LUNodeQuery(NoHooksLU):
5106 """Logical unit for querying nodes.
5109 # pylint: disable=W0142
5112 def CheckArguments(self):
5113 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5114 self.op.output_fields, self.op.use_locking)
5116 def ExpandNames(self):
5117 self.nq.ExpandNames(self)
5119 def DeclareLocks(self, level):
5120 self.nq.DeclareLocks(self, level)
5122 def Exec(self, feedback_fn):
5123 return self.nq.OldStyleQuery(self)
5126 class LUNodeQueryvols(NoHooksLU):
5127 """Logical unit for getting volumes on node(s).
5131 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5132 _FIELDS_STATIC = utils.FieldSet("node")
5134 def CheckArguments(self):
5135 _CheckOutputFields(static=self._FIELDS_STATIC,
5136 dynamic=self._FIELDS_DYNAMIC,
5137 selected=self.op.output_fields)
5139 def ExpandNames(self):
5140 self.share_locks = _ShareAll()
5141 self.needed_locks = {}
5143 if not self.op.nodes:
5144 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5146 self.needed_locks[locking.LEVEL_NODE] = \
5147 _GetWantedNodes(self, self.op.nodes)
5149 def Exec(self, feedback_fn):
5150 """Computes the list of nodes and their attributes.
5153 nodenames = self.owned_locks(locking.LEVEL_NODE)
5154 volumes = self.rpc.call_node_volumes(nodenames)
5156 ilist = self.cfg.GetAllInstancesInfo()
5157 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5160 for node in nodenames:
5161 nresult = volumes[node]
5164 msg = nresult.fail_msg
5166 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5169 node_vols = sorted(nresult.payload,
5170 key=operator.itemgetter("dev"))
5172 for vol in node_vols:
5174 for field in self.op.output_fields:
5177 elif field == "phys":
5181 elif field == "name":
5183 elif field == "size":
5184 val = int(float(vol["size"]))
5185 elif field == "instance":
5186 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5188 raise errors.ParameterError(field)
5189 node_output.append(str(val))
5191 output.append(node_output)
5196 class LUNodeQueryStorage(NoHooksLU):
5197 """Logical unit for getting information on storage units on node(s).
5200 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5203 def CheckArguments(self):
5204 _CheckOutputFields(static=self._FIELDS_STATIC,
5205 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5206 selected=self.op.output_fields)
5208 def ExpandNames(self):
5209 self.share_locks = _ShareAll()
5210 self.needed_locks = {}
5213 self.needed_locks[locking.LEVEL_NODE] = \
5214 _GetWantedNodes(self, self.op.nodes)
5216 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5218 def Exec(self, feedback_fn):
5219 """Computes the list of nodes and their attributes.
5222 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5224 # Always get name to sort by
5225 if constants.SF_NAME in self.op.output_fields:
5226 fields = self.op.output_fields[:]
5228 fields = [constants.SF_NAME] + self.op.output_fields
5230 # Never ask for node or type as it's only known to the LU
5231 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5232 while extra in fields:
5233 fields.remove(extra)
5235 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5236 name_idx = field_idx[constants.SF_NAME]
5238 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5239 data = self.rpc.call_storage_list(self.nodes,
5240 self.op.storage_type, st_args,
5241 self.op.name, fields)
5245 for node in utils.NiceSort(self.nodes):
5246 nresult = data[node]
5250 msg = nresult.fail_msg
5252 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5255 rows = dict([(row[name_idx], row) for row in nresult.payload])
5257 for name in utils.NiceSort(rows.keys()):
5262 for field in self.op.output_fields:
5263 if field == constants.SF_NODE:
5265 elif field == constants.SF_TYPE:
5266 val = self.op.storage_type
5267 elif field in field_idx:
5268 val = row[field_idx[field]]
5270 raise errors.ParameterError(field)
5279 class _InstanceQuery(_QueryBase):
5280 FIELDS = query.INSTANCE_FIELDS
5282 def ExpandNames(self, lu):
5283 lu.needed_locks = {}
5284 lu.share_locks = _ShareAll()
5287 self.wanted = _GetWantedInstances(lu, self.names)
5289 self.wanted = locking.ALL_SET
5291 self.do_locking = (self.use_locking and
5292 query.IQ_LIVE in self.requested_data)
5294 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5295 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5296 lu.needed_locks[locking.LEVEL_NODE] = []
5297 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5299 self.do_grouplocks = (self.do_locking and
5300 query.IQ_NODES in self.requested_data)
5302 def DeclareLocks(self, lu, level):
5304 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5305 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5307 # Lock all groups used by instances optimistically; this requires going
5308 # via the node before it's locked, requiring verification later on
5309 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5311 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5312 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5313 elif level == locking.LEVEL_NODE:
5314 lu._LockInstancesNodes() # pylint: disable=W0212
5317 def _CheckGroupLocks(lu):
5318 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5319 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5321 # Check if node groups for locked instances are still correct
5322 for instance_name in owned_instances:
5323 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5325 def _GetQueryData(self, lu):
5326 """Computes the list of instances and their attributes.
5329 if self.do_grouplocks:
5330 self._CheckGroupLocks(lu)
5332 cluster = lu.cfg.GetClusterInfo()
5333 all_info = lu.cfg.GetAllInstancesInfo()
5335 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5337 instance_list = [all_info[name] for name in instance_names]
5338 nodes = frozenset(itertools.chain(*(inst.all_nodes
5339 for inst in instance_list)))
5340 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5343 wrongnode_inst = set()
5345 # Gather data as requested
5346 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5348 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5350 result = node_data[name]
5352 # offline nodes will be in both lists
5353 assert result.fail_msg
5354 offline_nodes.append(name)
5356 bad_nodes.append(name)
5357 elif result.payload:
5358 for inst in result.payload:
5359 if inst in all_info:
5360 if all_info[inst].primary_node == name:
5361 live_data.update(result.payload)
5363 wrongnode_inst.add(inst)
5365 # orphan instance; we don't list it here as we don't
5366 # handle this case yet in the output of instance listing
5367 logging.warning("Orphan instance '%s' found on node %s",
5369 # else no instance is alive
5373 if query.IQ_DISKUSAGE in self.requested_data:
5374 disk_usage = dict((inst.name,
5375 _ComputeDiskSize(inst.disk_template,
5376 [{constants.IDISK_SIZE: disk.size}
5377 for disk in inst.disks]))
5378 for inst in instance_list)
5382 if query.IQ_CONSOLE in self.requested_data:
5384 for inst in instance_list:
5385 if inst.name in live_data:
5386 # Instance is running
5387 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5389 consinfo[inst.name] = None
5390 assert set(consinfo.keys()) == set(instance_names)
5394 if query.IQ_NODES in self.requested_data:
5395 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5397 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5398 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5399 for uuid in set(map(operator.attrgetter("group"),
5405 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5406 disk_usage, offline_nodes, bad_nodes,
5407 live_data, wrongnode_inst, consinfo,
5411 class LUQuery(NoHooksLU):
5412 """Query for resources/items of a certain kind.
5415 # pylint: disable=W0142
5418 def CheckArguments(self):
5419 qcls = _GetQueryImplementation(self.op.what)
5421 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5423 def ExpandNames(self):
5424 self.impl.ExpandNames(self)
5426 def DeclareLocks(self, level):
5427 self.impl.DeclareLocks(self, level)
5429 def Exec(self, feedback_fn):
5430 return self.impl.NewStyleQuery(self)
5433 class LUQueryFields(NoHooksLU):
5434 """Query for resources/items of a certain kind.
5437 # pylint: disable=W0142
5440 def CheckArguments(self):
5441 self.qcls = _GetQueryImplementation(self.op.what)
5443 def ExpandNames(self):
5444 self.needed_locks = {}
5446 def Exec(self, feedback_fn):
5447 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5450 class LUNodeModifyStorage(NoHooksLU):
5451 """Logical unit for modifying a storage volume on a node.
5456 def CheckArguments(self):
5457 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5459 storage_type = self.op.storage_type
5462 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5464 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5465 " modified" % storage_type,
5468 diff = set(self.op.changes.keys()) - modifiable
5470 raise errors.OpPrereqError("The following fields can not be modified for"
5471 " storage units of type '%s': %r" %
5472 (storage_type, list(diff)),
5475 def ExpandNames(self):
5476 self.needed_locks = {
5477 locking.LEVEL_NODE: self.op.node_name,
5480 def Exec(self, feedback_fn):
5481 """Computes the list of nodes and their attributes.
5484 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5485 result = self.rpc.call_storage_modify(self.op.node_name,
5486 self.op.storage_type, st_args,
5487 self.op.name, self.op.changes)
5488 result.Raise("Failed to modify storage unit '%s' on %s" %
5489 (self.op.name, self.op.node_name))
5492 class LUNodeAdd(LogicalUnit):
5493 """Logical unit for adding node to the cluster.
5497 HTYPE = constants.HTYPE_NODE
5498 _NFLAGS = ["master_capable", "vm_capable"]
5500 def CheckArguments(self):
5501 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5502 # validate/normalize the node name
5503 self.hostname = netutils.GetHostname(name=self.op.node_name,
5504 family=self.primary_ip_family)
5505 self.op.node_name = self.hostname.name
5507 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5508 raise errors.OpPrereqError("Cannot readd the master node",
5511 if self.op.readd and self.op.group:
5512 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5513 " being readded", errors.ECODE_INVAL)
5515 def BuildHooksEnv(self):
5518 This will run on all nodes before, and on all nodes + the new node after.
5522 "OP_TARGET": self.op.node_name,
5523 "NODE_NAME": self.op.node_name,
5524 "NODE_PIP": self.op.primary_ip,
5525 "NODE_SIP": self.op.secondary_ip,
5526 "MASTER_CAPABLE": str(self.op.master_capable),
5527 "VM_CAPABLE": str(self.op.vm_capable),
5530 def BuildHooksNodes(self):
5531 """Build hooks nodes.
5534 # Exclude added node
5535 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5536 post_nodes = pre_nodes + [self.op.node_name, ]
5538 return (pre_nodes, post_nodes)
5540 def CheckPrereq(self):
5541 """Check prerequisites.
5544 - the new node is not already in the config
5546 - its parameters (single/dual homed) matches the cluster
5548 Any errors are signaled by raising errors.OpPrereqError.
5552 hostname = self.hostname
5553 node = hostname.name
5554 primary_ip = self.op.primary_ip = hostname.ip
5555 if self.op.secondary_ip is None:
5556 if self.primary_ip_family == netutils.IP6Address.family:
5557 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5558 " IPv4 address must be given as secondary",
5560 self.op.secondary_ip = primary_ip
5562 secondary_ip = self.op.secondary_ip
5563 if not netutils.IP4Address.IsValid(secondary_ip):
5564 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5565 " address" % secondary_ip, errors.ECODE_INVAL)
5567 node_list = cfg.GetNodeList()
5568 if not self.op.readd and node in node_list:
5569 raise errors.OpPrereqError("Node %s is already in the configuration" %
5570 node, errors.ECODE_EXISTS)
5571 elif self.op.readd and node not in node_list:
5572 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5575 self.changed_primary_ip = False
5577 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5578 if self.op.readd and node == existing_node_name:
5579 if existing_node.secondary_ip != secondary_ip:
5580 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5581 " address configuration as before",
5583 if existing_node.primary_ip != primary_ip:
5584 self.changed_primary_ip = True
5588 if (existing_node.primary_ip == primary_ip or
5589 existing_node.secondary_ip == primary_ip or
5590 existing_node.primary_ip == secondary_ip or
5591 existing_node.secondary_ip == secondary_ip):
5592 raise errors.OpPrereqError("New node ip address(es) conflict with"
5593 " existing node %s" % existing_node.name,
5594 errors.ECODE_NOTUNIQUE)
5596 # After this 'if' block, None is no longer a valid value for the
5597 # _capable op attributes
5599 old_node = self.cfg.GetNodeInfo(node)
5600 assert old_node is not None, "Can't retrieve locked node %s" % node
5601 for attr in self._NFLAGS:
5602 if getattr(self.op, attr) is None:
5603 setattr(self.op, attr, getattr(old_node, attr))
5605 for attr in self._NFLAGS:
5606 if getattr(self.op, attr) is None:
5607 setattr(self.op, attr, True)
5609 if self.op.readd and not self.op.vm_capable:
5610 pri, sec = cfg.GetNodeInstances(node)
5612 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5613 " flag set to false, but it already holds"
5614 " instances" % node,
5617 # check that the type of the node (single versus dual homed) is the
5618 # same as for the master
5619 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5620 master_singlehomed = myself.secondary_ip == myself.primary_ip
5621 newbie_singlehomed = secondary_ip == primary_ip
5622 if master_singlehomed != newbie_singlehomed:
5623 if master_singlehomed:
5624 raise errors.OpPrereqError("The master has no secondary ip but the"
5625 " new node has one",
5628 raise errors.OpPrereqError("The master has a secondary ip but the"
5629 " new node doesn't have one",
5632 # checks reachability
5633 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5634 raise errors.OpPrereqError("Node not reachable by ping",
5635 errors.ECODE_ENVIRON)
5637 if not newbie_singlehomed:
5638 # check reachability from my secondary ip to newbie's secondary ip
5639 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5640 source=myself.secondary_ip):
5641 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5642 " based ping to node daemon port",
5643 errors.ECODE_ENVIRON)
5650 if self.op.master_capable:
5651 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5653 self.master_candidate = False
5656 self.new_node = old_node
5658 node_group = cfg.LookupNodeGroup(self.op.group)
5659 self.new_node = objects.Node(name=node,
5660 primary_ip=primary_ip,
5661 secondary_ip=secondary_ip,
5662 master_candidate=self.master_candidate,
5663 offline=False, drained=False,
5666 if self.op.ndparams:
5667 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5669 if self.op.hv_state:
5670 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5672 if self.op.disk_state:
5673 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5675 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5676 # it a property on the base class.
5677 result = rpc.DnsOnlyRunner().call_version([node])[node]
5678 result.Raise("Can't get version information from node %s" % node)
5679 if constants.PROTOCOL_VERSION == result.payload:
5680 logging.info("Communication to node %s fine, sw version %s match",
5681 node, result.payload)
5683 raise errors.OpPrereqError("Version mismatch master version %s,"
5684 " node version %s" %
5685 (constants.PROTOCOL_VERSION, result.payload),
5686 errors.ECODE_ENVIRON)
5688 def Exec(self, feedback_fn):
5689 """Adds the new node to the cluster.
5692 new_node = self.new_node
5693 node = new_node.name
5695 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5698 # We adding a new node so we assume it's powered
5699 new_node.powered = True
5701 # for re-adds, reset the offline/drained/master-candidate flags;
5702 # we need to reset here, otherwise offline would prevent RPC calls
5703 # later in the procedure; this also means that if the re-add
5704 # fails, we are left with a non-offlined, broken node
5706 new_node.drained = new_node.offline = False # pylint: disable=W0201
5707 self.LogInfo("Readding a node, the offline/drained flags were reset")
5708 # if we demote the node, we do cleanup later in the procedure
5709 new_node.master_candidate = self.master_candidate
5710 if self.changed_primary_ip:
5711 new_node.primary_ip = self.op.primary_ip
5713 # copy the master/vm_capable flags
5714 for attr in self._NFLAGS:
5715 setattr(new_node, attr, getattr(self.op, attr))
5717 # notify the user about any possible mc promotion
5718 if new_node.master_candidate:
5719 self.LogInfo("Node will be a master candidate")
5721 if self.op.ndparams:
5722 new_node.ndparams = self.op.ndparams
5724 new_node.ndparams = {}
5726 if self.op.hv_state:
5727 new_node.hv_state_static = self.new_hv_state
5729 if self.op.disk_state:
5730 new_node.disk_state_static = self.new_disk_state
5732 # Add node to our /etc/hosts, and add key to known_hosts
5733 if self.cfg.GetClusterInfo().modify_etc_hosts:
5734 master_node = self.cfg.GetMasterNode()
5735 result = self.rpc.call_etc_hosts_modify(master_node,
5736 constants.ETC_HOSTS_ADD,
5739 result.Raise("Can't update hosts file with new host data")
5741 if new_node.secondary_ip != new_node.primary_ip:
5742 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5745 node_verify_list = [self.cfg.GetMasterNode()]
5746 node_verify_param = {
5747 constants.NV_NODELIST: ([node], {}),
5748 # TODO: do a node-net-test as well?
5751 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5752 self.cfg.GetClusterName())
5753 for verifier in node_verify_list:
5754 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5755 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5757 for failed in nl_payload:
5758 feedback_fn("ssh/hostname verification failed"
5759 " (checking from %s): %s" %
5760 (verifier, nl_payload[failed]))
5761 raise errors.OpExecError("ssh/hostname verification failed")
5764 _RedistributeAncillaryFiles(self)
5765 self.context.ReaddNode(new_node)
5766 # make sure we redistribute the config
5767 self.cfg.Update(new_node, feedback_fn)
5768 # and make sure the new node will not have old files around
5769 if not new_node.master_candidate:
5770 result = self.rpc.call_node_demote_from_mc(new_node.name)
5771 msg = result.fail_msg
5773 self.LogWarning("Node failed to demote itself from master"
5774 " candidate status: %s" % msg)
5776 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5777 additional_vm=self.op.vm_capable)
5778 self.context.AddNode(new_node, self.proc.GetECId())
5781 class LUNodeSetParams(LogicalUnit):
5782 """Modifies the parameters of a node.
5784 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5785 to the node role (as _ROLE_*)
5786 @cvar _R2F: a dictionary from node role to tuples of flags
5787 @cvar _FLAGS: a list of attribute names corresponding to the flags
5790 HPATH = "node-modify"
5791 HTYPE = constants.HTYPE_NODE
5793 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5795 (True, False, False): _ROLE_CANDIDATE,
5796 (False, True, False): _ROLE_DRAINED,
5797 (False, False, True): _ROLE_OFFLINE,
5798 (False, False, False): _ROLE_REGULAR,
5800 _R2F = dict((v, k) for k, v in _F2R.items())
5801 _FLAGS = ["master_candidate", "drained", "offline"]
5803 def CheckArguments(self):
5804 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5805 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5806 self.op.master_capable, self.op.vm_capable,
5807 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5809 if all_mods.count(None) == len(all_mods):
5810 raise errors.OpPrereqError("Please pass at least one modification",
5812 if all_mods.count(True) > 1:
5813 raise errors.OpPrereqError("Can't set the node into more than one"
5814 " state at the same time",
5817 # Boolean value that tells us whether we might be demoting from MC
5818 self.might_demote = (self.op.master_candidate == False or
5819 self.op.offline == True or
5820 self.op.drained == True or
5821 self.op.master_capable == False)
5823 if self.op.secondary_ip:
5824 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5825 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5826 " address" % self.op.secondary_ip,
5829 self.lock_all = self.op.auto_promote and self.might_demote
5830 self.lock_instances = self.op.secondary_ip is not None
5832 def _InstanceFilter(self, instance):
5833 """Filter for getting affected instances.
5836 return (instance.disk_template in constants.DTS_INT_MIRROR and
5837 self.op.node_name in instance.all_nodes)
5839 def ExpandNames(self):
5841 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5843 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5845 # Since modifying a node can have severe effects on currently running
5846 # operations the resource lock is at least acquired in shared mode
5847 self.needed_locks[locking.LEVEL_NODE_RES] = \
5848 self.needed_locks[locking.LEVEL_NODE]
5850 # Get node resource and instance locks in shared mode; they are not used
5851 # for anything but read-only access
5852 self.share_locks[locking.LEVEL_NODE_RES] = 1
5853 self.share_locks[locking.LEVEL_INSTANCE] = 1
5855 if self.lock_instances:
5856 self.needed_locks[locking.LEVEL_INSTANCE] = \
5857 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5859 def BuildHooksEnv(self):
5862 This runs on the master node.
5866 "OP_TARGET": self.op.node_name,
5867 "MASTER_CANDIDATE": str(self.op.master_candidate),
5868 "OFFLINE": str(self.op.offline),
5869 "DRAINED": str(self.op.drained),
5870 "MASTER_CAPABLE": str(self.op.master_capable),
5871 "VM_CAPABLE": str(self.op.vm_capable),
5874 def BuildHooksNodes(self):
5875 """Build hooks nodes.
5878 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5881 def CheckPrereq(self):
5882 """Check prerequisites.
5884 This only checks the instance list against the existing names.
5887 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5889 if self.lock_instances:
5890 affected_instances = \
5891 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5893 # Verify instance locks
5894 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5895 wanted_instances = frozenset(affected_instances.keys())
5896 if wanted_instances - owned_instances:
5897 raise errors.OpPrereqError("Instances affected by changing node %s's"
5898 " secondary IP address have changed since"
5899 " locks were acquired, wanted '%s', have"
5900 " '%s'; retry the operation" %
5902 utils.CommaJoin(wanted_instances),
5903 utils.CommaJoin(owned_instances)),
5906 affected_instances = None
5908 if (self.op.master_candidate is not None or
5909 self.op.drained is not None or
5910 self.op.offline is not None):
5911 # we can't change the master's node flags
5912 if self.op.node_name == self.cfg.GetMasterNode():
5913 raise errors.OpPrereqError("The master role can be changed"
5914 " only via master-failover",
5917 if self.op.master_candidate and not node.master_capable:
5918 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5919 " it a master candidate" % node.name,
5922 if self.op.vm_capable == False:
5923 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5925 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5926 " the vm_capable flag" % node.name,
5929 if node.master_candidate and self.might_demote and not self.lock_all:
5930 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5931 # check if after removing the current node, we're missing master
5933 (mc_remaining, mc_should, _) = \
5934 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5935 if mc_remaining < mc_should:
5936 raise errors.OpPrereqError("Not enough master candidates, please"
5937 " pass auto promote option to allow"
5938 " promotion (--auto-promote or RAPI"
5939 " auto_promote=True)", errors.ECODE_STATE)
5941 self.old_flags = old_flags = (node.master_candidate,
5942 node.drained, node.offline)
5943 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5944 self.old_role = old_role = self._F2R[old_flags]
5946 # Check for ineffective changes
5947 for attr in self._FLAGS:
5948 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5949 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5950 setattr(self.op, attr, None)
5952 # Past this point, any flag change to False means a transition
5953 # away from the respective state, as only real changes are kept
5955 # TODO: We might query the real power state if it supports OOB
5956 if _SupportsOob(self.cfg, node):
5957 if self.op.offline is False and not (node.powered or
5958 self.op.powered == True):
5959 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5960 " offline status can be reset") %
5962 elif self.op.powered is not None:
5963 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5964 " as it does not support out-of-band"
5965 " handling") % self.op.node_name)
5967 # If we're being deofflined/drained, we'll MC ourself if needed
5968 if (self.op.drained == False or self.op.offline == False or
5969 (self.op.master_capable and not node.master_capable)):
5970 if _DecideSelfPromotion(self):
5971 self.op.master_candidate = True
5972 self.LogInfo("Auto-promoting node to master candidate")
5974 # If we're no longer master capable, we'll demote ourselves from MC
5975 if self.op.master_capable == False and node.master_candidate:
5976 self.LogInfo("Demoting from master candidate")
5977 self.op.master_candidate = False
5980 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5981 if self.op.master_candidate:
5982 new_role = self._ROLE_CANDIDATE
5983 elif self.op.drained:
5984 new_role = self._ROLE_DRAINED
5985 elif self.op.offline:
5986 new_role = self._ROLE_OFFLINE
5987 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5988 # False is still in new flags, which means we're un-setting (the
5990 new_role = self._ROLE_REGULAR
5991 else: # no new flags, nothing, keep old role
5994 self.new_role = new_role
5996 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5997 # Trying to transition out of offline status
5998 result = self.rpc.call_version([node.name])[node.name]
6000 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6001 " to report its version: %s" %
6002 (node.name, result.fail_msg),
6005 self.LogWarning("Transitioning node from offline to online state"
6006 " without using re-add. Please make sure the node"
6009 if self.op.secondary_ip:
6010 # Ok even without locking, because this can't be changed by any LU
6011 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6012 master_singlehomed = master.secondary_ip == master.primary_ip
6013 if master_singlehomed and self.op.secondary_ip:
6014 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6015 " homed cluster", errors.ECODE_INVAL)
6017 assert not (frozenset(affected_instances) -
6018 self.owned_locks(locking.LEVEL_INSTANCE))
6021 if affected_instances:
6022 raise errors.OpPrereqError("Cannot change secondary IP address:"
6023 " offline node has instances (%s)"
6024 " configured to use it" %
6025 utils.CommaJoin(affected_instances.keys()))
6027 # On online nodes, check that no instances are running, and that
6028 # the node has the new ip and we can reach it.
6029 for instance in affected_instances.values():
6030 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6031 msg="cannot change secondary ip")
6033 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6034 if master.name != node.name:
6035 # check reachability from master secondary ip to new secondary ip
6036 if not netutils.TcpPing(self.op.secondary_ip,
6037 constants.DEFAULT_NODED_PORT,
6038 source=master.secondary_ip):
6039 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6040 " based ping to node daemon port",
6041 errors.ECODE_ENVIRON)
6043 if self.op.ndparams:
6044 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6045 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6046 self.new_ndparams = new_ndparams
6048 if self.op.hv_state:
6049 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6050 self.node.hv_state_static)
6052 if self.op.disk_state:
6053 self.new_disk_state = \
6054 _MergeAndVerifyDiskState(self.op.disk_state,
6055 self.node.disk_state_static)
6057 def Exec(self, feedback_fn):
6062 old_role = self.old_role
6063 new_role = self.new_role
6067 if self.op.ndparams:
6068 node.ndparams = self.new_ndparams
6070 if self.op.powered is not None:
6071 node.powered = self.op.powered
6073 if self.op.hv_state:
6074 node.hv_state_static = self.new_hv_state
6076 if self.op.disk_state:
6077 node.disk_state_static = self.new_disk_state
6079 for attr in ["master_capable", "vm_capable"]:
6080 val = getattr(self.op, attr)
6082 setattr(node, attr, val)
6083 result.append((attr, str(val)))
6085 if new_role != old_role:
6086 # Tell the node to demote itself, if no longer MC and not offline
6087 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6088 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6090 self.LogWarning("Node failed to demote itself: %s", msg)
6092 new_flags = self._R2F[new_role]
6093 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6095 result.append((desc, str(nf)))
6096 (node.master_candidate, node.drained, node.offline) = new_flags
6098 # we locked all nodes, we adjust the CP before updating this node
6100 _AdjustCandidatePool(self, [node.name])
6102 if self.op.secondary_ip:
6103 node.secondary_ip = self.op.secondary_ip
6104 result.append(("secondary_ip", self.op.secondary_ip))
6106 # this will trigger configuration file update, if needed
6107 self.cfg.Update(node, feedback_fn)
6109 # this will trigger job queue propagation or cleanup if the mc
6111 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6112 self.context.ReaddNode(node)
6117 class LUNodePowercycle(NoHooksLU):
6118 """Powercycles a node.
6123 def CheckArguments(self):
6124 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6125 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6126 raise errors.OpPrereqError("The node is the master and the force"
6127 " parameter was not set",
6130 def ExpandNames(self):
6131 """Locking for PowercycleNode.
6133 This is a last-resort option and shouldn't block on other
6134 jobs. Therefore, we grab no locks.
6137 self.needed_locks = {}
6139 def Exec(self, feedback_fn):
6143 result = self.rpc.call_node_powercycle(self.op.node_name,
6144 self.cfg.GetHypervisorType())
6145 result.Raise("Failed to schedule the reboot")
6146 return result.payload
6149 class LUClusterQuery(NoHooksLU):
6150 """Query cluster configuration.
6155 def ExpandNames(self):
6156 self.needed_locks = {}
6158 def Exec(self, feedback_fn):
6159 """Return cluster config.
6162 cluster = self.cfg.GetClusterInfo()
6165 # Filter just for enabled hypervisors
6166 for os_name, hv_dict in cluster.os_hvp.items():
6167 os_hvp[os_name] = {}
6168 for hv_name, hv_params in hv_dict.items():
6169 if hv_name in cluster.enabled_hypervisors:
6170 os_hvp[os_name][hv_name] = hv_params
6172 # Convert ip_family to ip_version
6173 primary_ip_version = constants.IP4_VERSION
6174 if cluster.primary_ip_family == netutils.IP6Address.family:
6175 primary_ip_version = constants.IP6_VERSION
6178 "software_version": constants.RELEASE_VERSION,
6179 "protocol_version": constants.PROTOCOL_VERSION,
6180 "config_version": constants.CONFIG_VERSION,
6181 "os_api_version": max(constants.OS_API_VERSIONS),
6182 "export_version": constants.EXPORT_VERSION,
6183 "architecture": runtime.GetArchInfo(),
6184 "name": cluster.cluster_name,
6185 "master": cluster.master_node,
6186 "default_hypervisor": cluster.primary_hypervisor,
6187 "enabled_hypervisors": cluster.enabled_hypervisors,
6188 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6189 for hypervisor_name in cluster.enabled_hypervisors]),
6191 "beparams": cluster.beparams,
6192 "osparams": cluster.osparams,
6193 "ipolicy": cluster.ipolicy,
6194 "nicparams": cluster.nicparams,
6195 "ndparams": cluster.ndparams,
6196 "diskparams": cluster.diskparams,
6197 "candidate_pool_size": cluster.candidate_pool_size,
6198 "master_netdev": cluster.master_netdev,
6199 "master_netmask": cluster.master_netmask,
6200 "use_external_mip_script": cluster.use_external_mip_script,
6201 "volume_group_name": cluster.volume_group_name,
6202 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6203 "file_storage_dir": cluster.file_storage_dir,
6204 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6205 "maintain_node_health": cluster.maintain_node_health,
6206 "ctime": cluster.ctime,
6207 "mtime": cluster.mtime,
6208 "uuid": cluster.uuid,
6209 "tags": list(cluster.GetTags()),
6210 "uid_pool": cluster.uid_pool,
6211 "default_iallocator": cluster.default_iallocator,
6212 "reserved_lvs": cluster.reserved_lvs,
6213 "primary_ip_version": primary_ip_version,
6214 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6215 "hidden_os": cluster.hidden_os,
6216 "blacklisted_os": cluster.blacklisted_os,
6222 class LUClusterConfigQuery(NoHooksLU):
6223 """Return configuration values.
6228 def CheckArguments(self):
6229 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6231 def ExpandNames(self):
6232 self.cq.ExpandNames(self)
6234 def DeclareLocks(self, level):
6235 self.cq.DeclareLocks(self, level)
6237 def Exec(self, feedback_fn):
6238 result = self.cq.OldStyleQuery(self)
6240 assert len(result) == 1
6245 class _ClusterQuery(_QueryBase):
6246 FIELDS = query.CLUSTER_FIELDS
6248 #: Do not sort (there is only one item)
6251 def ExpandNames(self, lu):
6252 lu.needed_locks = {}
6254 # The following variables interact with _QueryBase._GetNames
6255 self.wanted = locking.ALL_SET
6256 self.do_locking = self.use_locking
6259 raise errors.OpPrereqError("Can not use locking for cluster queries",
6262 def DeclareLocks(self, lu, level):
6265 def _GetQueryData(self, lu):
6266 """Computes the list of nodes and their attributes.
6269 # Locking is not used
6270 assert not (compat.any(lu.glm.is_owned(level)
6271 for level in locking.LEVELS
6272 if level != locking.LEVEL_CLUSTER) or
6273 self.do_locking or self.use_locking)
6275 if query.CQ_CONFIG in self.requested_data:
6276 cluster = lu.cfg.GetClusterInfo()
6278 cluster = NotImplemented
6280 if query.CQ_QUEUE_DRAINED in self.requested_data:
6281 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6283 drain_flag = NotImplemented
6285 if query.CQ_WATCHER_PAUSE in self.requested_data:
6286 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6288 watcher_pause = NotImplemented
6290 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6293 class LUInstanceActivateDisks(NoHooksLU):
6294 """Bring up an instance's disks.
6299 def ExpandNames(self):
6300 self._ExpandAndLockInstance()
6301 self.needed_locks[locking.LEVEL_NODE] = []
6302 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6304 def DeclareLocks(self, level):
6305 if level == locking.LEVEL_NODE:
6306 self._LockInstancesNodes()
6308 def CheckPrereq(self):
6309 """Check prerequisites.
6311 This checks that the instance is in the cluster.
6314 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6315 assert self.instance is not None, \
6316 "Cannot retrieve locked instance %s" % self.op.instance_name
6317 _CheckNodeOnline(self, self.instance.primary_node)
6319 def Exec(self, feedback_fn):
6320 """Activate the disks.
6323 disks_ok, disks_info = \
6324 _AssembleInstanceDisks(self, self.instance,
6325 ignore_size=self.op.ignore_size)
6327 raise errors.OpExecError("Cannot activate block devices")
6332 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6334 """Prepare the block devices for an instance.
6336 This sets up the block devices on all nodes.
6338 @type lu: L{LogicalUnit}
6339 @param lu: the logical unit on whose behalf we execute
6340 @type instance: L{objects.Instance}
6341 @param instance: the instance for whose disks we assemble
6342 @type disks: list of L{objects.Disk} or None
6343 @param disks: which disks to assemble (or all, if None)
6344 @type ignore_secondaries: boolean
6345 @param ignore_secondaries: if true, errors on secondary nodes
6346 won't result in an error return from the function
6347 @type ignore_size: boolean
6348 @param ignore_size: if true, the current known size of the disk
6349 will not be used during the disk activation, useful for cases
6350 when the size is wrong
6351 @return: False if the operation failed, otherwise a list of
6352 (host, instance_visible_name, node_visible_name)
6353 with the mapping from node devices to instance devices
6358 iname = instance.name
6359 disks = _ExpandCheckDisks(instance, disks)
6361 # With the two passes mechanism we try to reduce the window of
6362 # opportunity for the race condition of switching DRBD to primary
6363 # before handshaking occured, but we do not eliminate it
6365 # The proper fix would be to wait (with some limits) until the
6366 # connection has been made and drbd transitions from WFConnection
6367 # into any other network-connected state (Connected, SyncTarget,
6370 # 1st pass, assemble on all nodes in secondary mode
6371 for idx, inst_disk in enumerate(disks):
6372 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6374 node_disk = node_disk.Copy()
6375 node_disk.UnsetSize()
6376 lu.cfg.SetDiskID(node_disk, node)
6377 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6379 msg = result.fail_msg
6381 is_offline_secondary = (node in instance.secondary_nodes and
6383 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6384 " (is_primary=False, pass=1): %s",
6385 inst_disk.iv_name, node, msg)
6386 if not (ignore_secondaries or is_offline_secondary):
6389 # FIXME: race condition on drbd migration to primary
6391 # 2nd pass, do only the primary node
6392 for idx, inst_disk in enumerate(disks):
6395 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6396 if node != instance.primary_node:
6399 node_disk = node_disk.Copy()
6400 node_disk.UnsetSize()
6401 lu.cfg.SetDiskID(node_disk, node)
6402 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6404 msg = result.fail_msg
6406 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6407 " (is_primary=True, pass=2): %s",
6408 inst_disk.iv_name, node, msg)
6411 dev_path = result.payload
6413 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6415 # leave the disks configured for the primary node
6416 # this is a workaround that would be fixed better by
6417 # improving the logical/physical id handling
6419 lu.cfg.SetDiskID(disk, instance.primary_node)
6421 return disks_ok, device_info
6424 def _StartInstanceDisks(lu, instance, force):
6425 """Start the disks of an instance.
6428 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6429 ignore_secondaries=force)
6431 _ShutdownInstanceDisks(lu, instance)
6432 if force is not None and not force:
6433 lu.proc.LogWarning("", hint="If the message above refers to a"
6435 " you can retry the operation using '--force'.")
6436 raise errors.OpExecError("Disk consistency error")
6439 class LUInstanceDeactivateDisks(NoHooksLU):
6440 """Shutdown an instance's disks.
6445 def ExpandNames(self):
6446 self._ExpandAndLockInstance()
6447 self.needed_locks[locking.LEVEL_NODE] = []
6448 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6450 def DeclareLocks(self, level):
6451 if level == locking.LEVEL_NODE:
6452 self._LockInstancesNodes()
6454 def CheckPrereq(self):
6455 """Check prerequisites.
6457 This checks that the instance is in the cluster.
6460 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6461 assert self.instance is not None, \
6462 "Cannot retrieve locked instance %s" % self.op.instance_name
6464 def Exec(self, feedback_fn):
6465 """Deactivate the disks
6468 instance = self.instance
6470 _ShutdownInstanceDisks(self, instance)
6472 _SafeShutdownInstanceDisks(self, instance)
6475 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6476 """Shutdown block devices of an instance.
6478 This function checks if an instance is running, before calling
6479 _ShutdownInstanceDisks.
6482 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6483 _ShutdownInstanceDisks(lu, instance, disks=disks)
6486 def _ExpandCheckDisks(instance, disks):
6487 """Return the instance disks selected by the disks list
6489 @type disks: list of L{objects.Disk} or None
6490 @param disks: selected disks
6491 @rtype: list of L{objects.Disk}
6492 @return: selected instance disks to act on
6496 return instance.disks
6498 if not set(disks).issubset(instance.disks):
6499 raise errors.ProgrammerError("Can only act on disks belonging to the"
6504 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6505 """Shutdown block devices of an instance.
6507 This does the shutdown on all nodes of the instance.
6509 If the ignore_primary is false, errors on the primary node are
6514 disks = _ExpandCheckDisks(instance, disks)
6517 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6518 lu.cfg.SetDiskID(top_disk, node)
6519 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6520 msg = result.fail_msg
6522 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6523 disk.iv_name, node, msg)
6524 if ((node == instance.primary_node and not ignore_primary) or
6525 (node != instance.primary_node and not result.offline)):
6530 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6531 """Checks if a node has enough free memory.
6533 This function check if a given node has the needed amount of free
6534 memory. In case the node has less memory or we cannot get the
6535 information from the node, this function raise an OpPrereqError
6538 @type lu: C{LogicalUnit}
6539 @param lu: a logical unit from which we get configuration data
6541 @param node: the node to check
6542 @type reason: C{str}
6543 @param reason: string to use in the error message
6544 @type requested: C{int}
6545 @param requested: the amount of memory in MiB to check for
6546 @type hypervisor_name: C{str}
6547 @param hypervisor_name: the hypervisor to ask for memory stats
6549 @return: node current free memory
6550 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6551 we cannot check the node
6554 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6555 nodeinfo[node].Raise("Can't get data from node %s" % node,
6556 prereq=True, ecode=errors.ECODE_ENVIRON)
6557 (_, _, (hv_info, )) = nodeinfo[node].payload
6559 free_mem = hv_info.get("memory_free", None)
6560 if not isinstance(free_mem, int):
6561 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6562 " was '%s'" % (node, free_mem),
6563 errors.ECODE_ENVIRON)
6564 if requested > free_mem:
6565 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6566 " needed %s MiB, available %s MiB" %
6567 (node, reason, requested, free_mem),
6572 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6573 """Checks if nodes have enough free disk space in the all VGs.
6575 This function check if all given nodes have the needed amount of
6576 free disk. In case any node has less disk or we cannot get the
6577 information from the node, this function raise an OpPrereqError
6580 @type lu: C{LogicalUnit}
6581 @param lu: a logical unit from which we get configuration data
6582 @type nodenames: C{list}
6583 @param nodenames: the list of node names to check
6584 @type req_sizes: C{dict}
6585 @param req_sizes: the hash of vg and corresponding amount of disk in
6587 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6588 or we cannot check the node
6591 for vg, req_size in req_sizes.items():
6592 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6595 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6596 """Checks if nodes have enough free disk space in the specified VG.
6598 This function check if all given nodes have the needed amount of
6599 free disk. In case any node has less disk or we cannot get the
6600 information from the node, this function raise an OpPrereqError
6603 @type lu: C{LogicalUnit}
6604 @param lu: a logical unit from which we get configuration data
6605 @type nodenames: C{list}
6606 @param nodenames: the list of node names to check
6608 @param vg: the volume group to check
6609 @type requested: C{int}
6610 @param requested: the amount of disk in MiB to check for
6611 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6612 or we cannot check the node
6615 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6616 for node in nodenames:
6617 info = nodeinfo[node]
6618 info.Raise("Cannot get current information from node %s" % node,
6619 prereq=True, ecode=errors.ECODE_ENVIRON)
6620 (_, (vg_info, ), _) = info.payload
6621 vg_free = vg_info.get("vg_free", None)
6622 if not isinstance(vg_free, int):
6623 raise errors.OpPrereqError("Can't compute free disk space on node"
6624 " %s for vg %s, result was '%s'" %
6625 (node, vg, vg_free), errors.ECODE_ENVIRON)
6626 if requested > vg_free:
6627 raise errors.OpPrereqError("Not enough disk space on target node %s"
6628 " vg %s: required %d MiB, available %d MiB" %
6629 (node, vg, requested, vg_free),
6633 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6634 """Checks if nodes have enough physical CPUs
6636 This function checks if all given nodes have the needed number of
6637 physical CPUs. In case any node has less CPUs or we cannot get the
6638 information from the node, this function raises an OpPrereqError
6641 @type lu: C{LogicalUnit}
6642 @param lu: a logical unit from which we get configuration data
6643 @type nodenames: C{list}
6644 @param nodenames: the list of node names to check
6645 @type requested: C{int}
6646 @param requested: the minimum acceptable number of physical CPUs
6647 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6648 or we cannot check the node
6651 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6652 for node in nodenames:
6653 info = nodeinfo[node]
6654 info.Raise("Cannot get current information from node %s" % node,
6655 prereq=True, ecode=errors.ECODE_ENVIRON)
6656 (_, _, (hv_info, )) = info.payload
6657 num_cpus = hv_info.get("cpu_total", None)
6658 if not isinstance(num_cpus, int):
6659 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6660 " on node %s, result was '%s'" %
6661 (node, num_cpus), errors.ECODE_ENVIRON)
6662 if requested > num_cpus:
6663 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6664 "required" % (node, num_cpus, requested),
6668 class LUInstanceStartup(LogicalUnit):
6669 """Starts an instance.
6672 HPATH = "instance-start"
6673 HTYPE = constants.HTYPE_INSTANCE
6676 def CheckArguments(self):
6678 if self.op.beparams:
6679 # fill the beparams dict
6680 objects.UpgradeBeParams(self.op.beparams)
6681 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6683 def ExpandNames(self):
6684 self._ExpandAndLockInstance()
6685 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6687 def DeclareLocks(self, level):
6688 if level == locking.LEVEL_NODE_RES:
6689 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6691 def BuildHooksEnv(self):
6694 This runs on master, primary and secondary nodes of the instance.
6698 "FORCE": self.op.force,
6701 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6705 def BuildHooksNodes(self):
6706 """Build hooks nodes.
6709 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6712 def CheckPrereq(self):
6713 """Check prerequisites.
6715 This checks that the instance is in the cluster.
6718 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6719 assert self.instance is not None, \
6720 "Cannot retrieve locked instance %s" % self.op.instance_name
6723 if self.op.hvparams:
6724 # check hypervisor parameter syntax (locally)
6725 cluster = self.cfg.GetClusterInfo()
6726 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6727 filled_hvp = cluster.FillHV(instance)
6728 filled_hvp.update(self.op.hvparams)
6729 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6730 hv_type.CheckParameterSyntax(filled_hvp)
6731 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6733 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6735 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6737 if self.primary_offline and self.op.ignore_offline_nodes:
6738 self.proc.LogWarning("Ignoring offline primary node")
6740 if self.op.hvparams or self.op.beparams:
6741 self.proc.LogWarning("Overridden parameters are ignored")
6743 _CheckNodeOnline(self, instance.primary_node)
6745 bep = self.cfg.GetClusterInfo().FillBE(instance)
6746 bep.update(self.op.beparams)
6748 # check bridges existence
6749 _CheckInstanceBridgesExist(self, instance)
6751 remote_info = self.rpc.call_instance_info(instance.primary_node,
6753 instance.hypervisor)
6754 remote_info.Raise("Error checking node %s" % instance.primary_node,
6755 prereq=True, ecode=errors.ECODE_ENVIRON)
6756 if not remote_info.payload: # not running already
6757 _CheckNodeFreeMemory(self, instance.primary_node,
6758 "starting instance %s" % instance.name,
6759 bep[constants.BE_MINMEM], instance.hypervisor)
6761 def Exec(self, feedback_fn):
6762 """Start the instance.
6765 instance = self.instance
6766 force = self.op.force
6768 if not self.op.no_remember:
6769 self.cfg.MarkInstanceUp(instance.name)
6771 if self.primary_offline:
6772 assert self.op.ignore_offline_nodes
6773 self.proc.LogInfo("Primary node offline, marked instance as started")
6775 node_current = instance.primary_node
6777 _StartInstanceDisks(self, instance, force)
6780 self.rpc.call_instance_start(node_current,
6781 (instance, self.op.hvparams,
6783 self.op.startup_paused)
6784 msg = result.fail_msg
6786 _ShutdownInstanceDisks(self, instance)
6787 raise errors.OpExecError("Could not start instance: %s" % msg)
6790 class LUInstanceReboot(LogicalUnit):
6791 """Reboot an instance.
6794 HPATH = "instance-reboot"
6795 HTYPE = constants.HTYPE_INSTANCE
6798 def ExpandNames(self):
6799 self._ExpandAndLockInstance()
6801 def BuildHooksEnv(self):
6804 This runs on master, primary and secondary nodes of the instance.
6808 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6809 "REBOOT_TYPE": self.op.reboot_type,
6810 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6813 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6817 def BuildHooksNodes(self):
6818 """Build hooks nodes.
6821 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6824 def CheckPrereq(self):
6825 """Check prerequisites.
6827 This checks that the instance is in the cluster.
6830 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6831 assert self.instance is not None, \
6832 "Cannot retrieve locked instance %s" % self.op.instance_name
6833 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6834 _CheckNodeOnline(self, instance.primary_node)
6836 # check bridges existence
6837 _CheckInstanceBridgesExist(self, instance)
6839 def Exec(self, feedback_fn):
6840 """Reboot the instance.
6843 instance = self.instance
6844 ignore_secondaries = self.op.ignore_secondaries
6845 reboot_type = self.op.reboot_type
6847 remote_info = self.rpc.call_instance_info(instance.primary_node,
6849 instance.hypervisor)
6850 remote_info.Raise("Error checking node %s" % instance.primary_node)
6851 instance_running = bool(remote_info.payload)
6853 node_current = instance.primary_node
6855 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6856 constants.INSTANCE_REBOOT_HARD]:
6857 for disk in instance.disks:
6858 self.cfg.SetDiskID(disk, node_current)
6859 result = self.rpc.call_instance_reboot(node_current, instance,
6861 self.op.shutdown_timeout)
6862 result.Raise("Could not reboot instance")
6864 if instance_running:
6865 result = self.rpc.call_instance_shutdown(node_current, instance,
6866 self.op.shutdown_timeout)
6867 result.Raise("Could not shutdown instance for full reboot")
6868 _ShutdownInstanceDisks(self, instance)
6870 self.LogInfo("Instance %s was already stopped, starting now",
6872 _StartInstanceDisks(self, instance, ignore_secondaries)
6873 result = self.rpc.call_instance_start(node_current,
6874 (instance, None, None), False)
6875 msg = result.fail_msg
6877 _ShutdownInstanceDisks(self, instance)
6878 raise errors.OpExecError("Could not start instance for"
6879 " full reboot: %s" % msg)
6881 self.cfg.MarkInstanceUp(instance.name)
6884 class LUInstanceShutdown(LogicalUnit):
6885 """Shutdown an instance.
6888 HPATH = "instance-stop"
6889 HTYPE = constants.HTYPE_INSTANCE
6892 def ExpandNames(self):
6893 self._ExpandAndLockInstance()
6895 def BuildHooksEnv(self):
6898 This runs on master, primary and secondary nodes of the instance.
6901 env = _BuildInstanceHookEnvByObject(self, self.instance)
6902 env["TIMEOUT"] = self.op.timeout
6905 def BuildHooksNodes(self):
6906 """Build hooks nodes.
6909 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6912 def CheckPrereq(self):
6913 """Check prerequisites.
6915 This checks that the instance is in the cluster.
6918 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6919 assert self.instance is not None, \
6920 "Cannot retrieve locked instance %s" % self.op.instance_name
6922 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6924 self.primary_offline = \
6925 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6927 if self.primary_offline and self.op.ignore_offline_nodes:
6928 self.proc.LogWarning("Ignoring offline primary node")
6930 _CheckNodeOnline(self, self.instance.primary_node)
6932 def Exec(self, feedback_fn):
6933 """Shutdown the instance.
6936 instance = self.instance
6937 node_current = instance.primary_node
6938 timeout = self.op.timeout
6940 if not self.op.no_remember:
6941 self.cfg.MarkInstanceDown(instance.name)
6943 if self.primary_offline:
6944 assert self.op.ignore_offline_nodes
6945 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6947 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6948 msg = result.fail_msg
6950 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6952 _ShutdownInstanceDisks(self, instance)
6955 class LUInstanceReinstall(LogicalUnit):
6956 """Reinstall an instance.
6959 HPATH = "instance-reinstall"
6960 HTYPE = constants.HTYPE_INSTANCE
6963 def ExpandNames(self):
6964 self._ExpandAndLockInstance()
6966 def BuildHooksEnv(self):
6969 This runs on master, primary and secondary nodes of the instance.
6972 return _BuildInstanceHookEnvByObject(self, self.instance)
6974 def BuildHooksNodes(self):
6975 """Build hooks nodes.
6978 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6981 def CheckPrereq(self):
6982 """Check prerequisites.
6984 This checks that the instance is in the cluster and is not running.
6987 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6988 assert instance is not None, \
6989 "Cannot retrieve locked instance %s" % self.op.instance_name
6990 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6991 " offline, cannot reinstall")
6993 if instance.disk_template == constants.DT_DISKLESS:
6994 raise errors.OpPrereqError("Instance '%s' has no disks" %
6995 self.op.instance_name,
6997 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6999 if self.op.os_type is not None:
7001 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7002 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7003 instance_os = self.op.os_type
7005 instance_os = instance.os
7007 nodelist = list(instance.all_nodes)
7009 if self.op.osparams:
7010 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7011 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7012 self.os_inst = i_osdict # the new dict (without defaults)
7016 self.instance = instance
7018 def Exec(self, feedback_fn):
7019 """Reinstall the instance.
7022 inst = self.instance
7024 if self.op.os_type is not None:
7025 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7026 inst.os = self.op.os_type
7027 # Write to configuration
7028 self.cfg.Update(inst, feedback_fn)
7030 _StartInstanceDisks(self, inst, None)
7032 feedback_fn("Running the instance OS create scripts...")
7033 # FIXME: pass debug option from opcode to backend
7034 result = self.rpc.call_instance_os_add(inst.primary_node,
7035 (inst, self.os_inst), True,
7036 self.op.debug_level)
7037 result.Raise("Could not install OS for instance %s on node %s" %
7038 (inst.name, inst.primary_node))
7040 _ShutdownInstanceDisks(self, inst)
7043 class LUInstanceRecreateDisks(LogicalUnit):
7044 """Recreate an instance's missing disks.
7047 HPATH = "instance-recreate-disks"
7048 HTYPE = constants.HTYPE_INSTANCE
7051 _MODIFYABLE = frozenset([
7052 constants.IDISK_SIZE,
7053 constants.IDISK_MODE,
7056 # New or changed disk parameters may have different semantics
7057 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7058 constants.IDISK_ADOPT,
7060 # TODO: Implement support changing VG while recreating
7062 constants.IDISK_METAVG,
7065 def CheckArguments(self):
7066 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7067 # Normalize and convert deprecated list of disk indices
7068 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7070 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7072 raise errors.OpPrereqError("Some disks have been specified more than"
7073 " once: %s" % utils.CommaJoin(duplicates),
7076 for (idx, params) in self.op.disks:
7077 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7078 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7080 raise errors.OpPrereqError("Parameters for disk %s try to change"
7081 " unmodifyable parameter(s): %s" %
7082 (idx, utils.CommaJoin(unsupported)),
7085 def ExpandNames(self):
7086 self._ExpandAndLockInstance()
7087 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7089 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7090 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7092 self.needed_locks[locking.LEVEL_NODE] = []
7093 self.needed_locks[locking.LEVEL_NODE_RES] = []
7095 def DeclareLocks(self, level):
7096 if level == locking.LEVEL_NODE:
7097 # if we replace the nodes, we only need to lock the old primary,
7098 # otherwise we need to lock all nodes for disk re-creation
7099 primary_only = bool(self.op.nodes)
7100 self._LockInstancesNodes(primary_only=primary_only)
7101 elif level == locking.LEVEL_NODE_RES:
7103 self.needed_locks[locking.LEVEL_NODE_RES] = \
7104 self.needed_locks[locking.LEVEL_NODE][:]
7106 def BuildHooksEnv(self):
7109 This runs on master, primary and secondary nodes of the instance.
7112 return _BuildInstanceHookEnvByObject(self, self.instance)
7114 def BuildHooksNodes(self):
7115 """Build hooks nodes.
7118 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7121 def CheckPrereq(self):
7122 """Check prerequisites.
7124 This checks that the instance is in the cluster and is not running.
7127 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7128 assert instance is not None, \
7129 "Cannot retrieve locked instance %s" % self.op.instance_name
7131 if len(self.op.nodes) != len(instance.all_nodes):
7132 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7133 " %d replacement nodes were specified" %
7134 (instance.name, len(instance.all_nodes),
7135 len(self.op.nodes)),
7137 assert instance.disk_template != constants.DT_DRBD8 or \
7138 len(self.op.nodes) == 2
7139 assert instance.disk_template != constants.DT_PLAIN or \
7140 len(self.op.nodes) == 1
7141 primary_node = self.op.nodes[0]
7143 primary_node = instance.primary_node
7144 _CheckNodeOnline(self, primary_node)
7146 if instance.disk_template == constants.DT_DISKLESS:
7147 raise errors.OpPrereqError("Instance '%s' has no disks" %
7148 self.op.instance_name, errors.ECODE_INVAL)
7150 # if we replace nodes *and* the old primary is offline, we don't
7152 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7153 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7154 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7155 if not (self.op.nodes and old_pnode.offline):
7156 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7157 msg="cannot recreate disks")
7160 self.disks = dict(self.op.disks)
7162 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7164 maxidx = max(self.disks.keys())
7165 if maxidx >= len(instance.disks):
7166 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7169 if (self.op.nodes and
7170 sorted(self.disks.keys()) != range(len(instance.disks))):
7171 raise errors.OpPrereqError("Can't recreate disks partially and"
7172 " change the nodes at the same time",
7175 self.instance = instance
7177 def Exec(self, feedback_fn):
7178 """Recreate the disks.
7181 instance = self.instance
7183 assert (self.owned_locks(locking.LEVEL_NODE) ==
7184 self.owned_locks(locking.LEVEL_NODE_RES))
7187 mods = [] # keeps track of needed changes
7189 for idx, disk in enumerate(instance.disks):
7191 changes = self.disks[idx]
7193 # Disk should not be recreated
7197 # update secondaries for disks, if needed
7198 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7199 # need to update the nodes and minors
7200 assert len(self.op.nodes) == 2
7201 assert len(disk.logical_id) == 6 # otherwise disk internals
7203 (_, _, old_port, _, _, old_secret) = disk.logical_id
7204 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7205 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7206 new_minors[0], new_minors[1], old_secret)
7207 assert len(disk.logical_id) == len(new_id)
7211 mods.append((idx, new_id, changes))
7213 # now that we have passed all asserts above, we can apply the mods
7214 # in a single run (to avoid partial changes)
7215 for idx, new_id, changes in mods:
7216 disk = instance.disks[idx]
7217 if new_id is not None:
7218 assert disk.dev_type == constants.LD_DRBD8
7219 disk.logical_id = new_id
7221 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7222 mode=changes.get(constants.IDISK_MODE, None))
7224 # change primary node, if needed
7226 instance.primary_node = self.op.nodes[0]
7227 self.LogWarning("Changing the instance's nodes, you will have to"
7228 " remove any disks left on the older nodes manually")
7231 self.cfg.Update(instance, feedback_fn)
7233 _CreateDisks(self, instance, to_skip=to_skip)
7236 class LUInstanceRename(LogicalUnit):
7237 """Rename an instance.
7240 HPATH = "instance-rename"
7241 HTYPE = constants.HTYPE_INSTANCE
7243 def CheckArguments(self):
7247 if self.op.ip_check and not self.op.name_check:
7248 # TODO: make the ip check more flexible and not depend on the name check
7249 raise errors.OpPrereqError("IP address check requires a name check",
7252 def BuildHooksEnv(self):
7255 This runs on master, primary and secondary nodes of the instance.
7258 env = _BuildInstanceHookEnvByObject(self, self.instance)
7259 env["INSTANCE_NEW_NAME"] = self.op.new_name
7262 def BuildHooksNodes(self):
7263 """Build hooks nodes.
7266 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7269 def CheckPrereq(self):
7270 """Check prerequisites.
7272 This checks that the instance is in the cluster and is not running.
7275 self.op.instance_name = _ExpandInstanceName(self.cfg,
7276 self.op.instance_name)
7277 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7278 assert instance is not None
7279 _CheckNodeOnline(self, instance.primary_node)
7280 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7281 msg="cannot rename")
7282 self.instance = instance
7284 new_name = self.op.new_name
7285 if self.op.name_check:
7286 hostname = netutils.GetHostname(name=new_name)
7287 if hostname.name != new_name:
7288 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7290 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7291 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7292 " same as given hostname '%s'") %
7293 (hostname.name, self.op.new_name),
7295 new_name = self.op.new_name = hostname.name
7296 if (self.op.ip_check and
7297 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7298 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7299 (hostname.ip, new_name),
7300 errors.ECODE_NOTUNIQUE)
7302 instance_list = self.cfg.GetInstanceList()
7303 if new_name in instance_list and new_name != instance.name:
7304 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7305 new_name, errors.ECODE_EXISTS)
7307 def Exec(self, feedback_fn):
7308 """Rename the instance.
7311 inst = self.instance
7312 old_name = inst.name
7314 rename_file_storage = False
7315 if (inst.disk_template in constants.DTS_FILEBASED and
7316 self.op.new_name != inst.name):
7317 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7318 rename_file_storage = True
7320 self.cfg.RenameInstance(inst.name, self.op.new_name)
7321 # Change the instance lock. This is definitely safe while we hold the BGL.
7322 # Otherwise the new lock would have to be added in acquired mode.
7324 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7325 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7327 # re-read the instance from the configuration after rename
7328 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7330 if rename_file_storage:
7331 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7332 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7333 old_file_storage_dir,
7334 new_file_storage_dir)
7335 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7336 " (but the instance has been renamed in Ganeti)" %
7337 (inst.primary_node, old_file_storage_dir,
7338 new_file_storage_dir))
7340 _StartInstanceDisks(self, inst, None)
7342 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7343 old_name, self.op.debug_level)
7344 msg = result.fail_msg
7346 msg = ("Could not run OS rename script for instance %s on node %s"
7347 " (but the instance has been renamed in Ganeti): %s" %
7348 (inst.name, inst.primary_node, msg))
7349 self.proc.LogWarning(msg)
7351 _ShutdownInstanceDisks(self, inst)
7356 class LUInstanceRemove(LogicalUnit):
7357 """Remove an instance.
7360 HPATH = "instance-remove"
7361 HTYPE = constants.HTYPE_INSTANCE
7364 def ExpandNames(self):
7365 self._ExpandAndLockInstance()
7366 self.needed_locks[locking.LEVEL_NODE] = []
7367 self.needed_locks[locking.LEVEL_NODE_RES] = []
7368 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7370 def DeclareLocks(self, level):
7371 if level == locking.LEVEL_NODE:
7372 self._LockInstancesNodes()
7373 elif level == locking.LEVEL_NODE_RES:
7375 self.needed_locks[locking.LEVEL_NODE_RES] = \
7376 self.needed_locks[locking.LEVEL_NODE][:]
7378 def BuildHooksEnv(self):
7381 This runs on master, primary and secondary nodes of the instance.
7384 env = _BuildInstanceHookEnvByObject(self, self.instance)
7385 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7388 def BuildHooksNodes(self):
7389 """Build hooks nodes.
7392 nl = [self.cfg.GetMasterNode()]
7393 nl_post = list(self.instance.all_nodes) + nl
7394 return (nl, nl_post)
7396 def CheckPrereq(self):
7397 """Check prerequisites.
7399 This checks that the instance is in the cluster.
7402 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7403 assert self.instance is not None, \
7404 "Cannot retrieve locked instance %s" % self.op.instance_name
7406 def Exec(self, feedback_fn):
7407 """Remove the instance.
7410 instance = self.instance
7411 logging.info("Shutting down instance %s on node %s",
7412 instance.name, instance.primary_node)
7414 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7415 self.op.shutdown_timeout)
7416 msg = result.fail_msg
7418 if self.op.ignore_failures:
7419 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7421 raise errors.OpExecError("Could not shutdown instance %s on"
7423 (instance.name, instance.primary_node, msg))
7425 assert (self.owned_locks(locking.LEVEL_NODE) ==
7426 self.owned_locks(locking.LEVEL_NODE_RES))
7427 assert not (set(instance.all_nodes) -
7428 self.owned_locks(locking.LEVEL_NODE)), \
7429 "Not owning correct locks"
7431 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7434 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7435 """Utility function to remove an instance.
7438 logging.info("Removing block devices for instance %s", instance.name)
7440 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7441 if not ignore_failures:
7442 raise errors.OpExecError("Can't remove instance's disks")
7443 feedback_fn("Warning: can't remove instance's disks")
7445 logging.info("Removing instance %s out of cluster config", instance.name)
7447 lu.cfg.RemoveInstance(instance.name)
7449 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7450 "Instance lock removal conflict"
7452 # Remove lock for the instance
7453 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7456 class LUInstanceQuery(NoHooksLU):
7457 """Logical unit for querying instances.
7460 # pylint: disable=W0142
7463 def CheckArguments(self):
7464 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7465 self.op.output_fields, self.op.use_locking)
7467 def ExpandNames(self):
7468 self.iq.ExpandNames(self)
7470 def DeclareLocks(self, level):
7471 self.iq.DeclareLocks(self, level)
7473 def Exec(self, feedback_fn):
7474 return self.iq.OldStyleQuery(self)
7477 class LUInstanceFailover(LogicalUnit):
7478 """Failover an instance.
7481 HPATH = "instance-failover"
7482 HTYPE = constants.HTYPE_INSTANCE
7485 def CheckArguments(self):
7486 """Check the arguments.
7489 self.iallocator = getattr(self.op, "iallocator", None)
7490 self.target_node = getattr(self.op, "target_node", None)
7492 def ExpandNames(self):
7493 self._ExpandAndLockInstance()
7495 if self.op.target_node is not None:
7496 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7498 self.needed_locks[locking.LEVEL_NODE] = []
7499 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7501 self.needed_locks[locking.LEVEL_NODE_RES] = []
7502 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7504 ignore_consistency = self.op.ignore_consistency
7505 shutdown_timeout = self.op.shutdown_timeout
7506 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7509 ignore_consistency=ignore_consistency,
7510 shutdown_timeout=shutdown_timeout,
7511 ignore_ipolicy=self.op.ignore_ipolicy)
7512 self.tasklets = [self._migrater]
7514 def DeclareLocks(self, level):
7515 if level == locking.LEVEL_NODE:
7516 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7517 if instance.disk_template in constants.DTS_EXT_MIRROR:
7518 if self.op.target_node is None:
7519 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7521 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7522 self.op.target_node]
7523 del self.recalculate_locks[locking.LEVEL_NODE]
7525 self._LockInstancesNodes()
7526 elif level == locking.LEVEL_NODE_RES:
7528 self.needed_locks[locking.LEVEL_NODE_RES] = \
7529 self.needed_locks[locking.LEVEL_NODE][:]
7531 def BuildHooksEnv(self):
7534 This runs on master, primary and secondary nodes of the instance.
7537 instance = self._migrater.instance
7538 source_node = instance.primary_node
7539 target_node = self.op.target_node
7541 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7542 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7543 "OLD_PRIMARY": source_node,
7544 "NEW_PRIMARY": target_node,
7547 if instance.disk_template in constants.DTS_INT_MIRROR:
7548 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7549 env["NEW_SECONDARY"] = source_node
7551 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7553 env.update(_BuildInstanceHookEnvByObject(self, instance))
7557 def BuildHooksNodes(self):
7558 """Build hooks nodes.
7561 instance = self._migrater.instance
7562 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7563 return (nl, nl + [instance.primary_node])
7566 class LUInstanceMigrate(LogicalUnit):
7567 """Migrate an instance.
7569 This is migration without shutting down, compared to the failover,
7570 which is done with shutdown.
7573 HPATH = "instance-migrate"
7574 HTYPE = constants.HTYPE_INSTANCE
7577 def ExpandNames(self):
7578 self._ExpandAndLockInstance()
7580 if self.op.target_node is not None:
7581 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7583 self.needed_locks[locking.LEVEL_NODE] = []
7584 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7586 self.needed_locks[locking.LEVEL_NODE] = []
7587 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7590 TLMigrateInstance(self, self.op.instance_name,
7591 cleanup=self.op.cleanup,
7593 fallback=self.op.allow_failover,
7594 allow_runtime_changes=self.op.allow_runtime_changes,
7595 ignore_ipolicy=self.op.ignore_ipolicy)
7596 self.tasklets = [self._migrater]
7598 def DeclareLocks(self, level):
7599 if level == locking.LEVEL_NODE:
7600 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7601 if instance.disk_template in constants.DTS_EXT_MIRROR:
7602 if self.op.target_node is None:
7603 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7605 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7606 self.op.target_node]
7607 del self.recalculate_locks[locking.LEVEL_NODE]
7609 self._LockInstancesNodes()
7610 elif level == locking.LEVEL_NODE_RES:
7612 self.needed_locks[locking.LEVEL_NODE_RES] = \
7613 self.needed_locks[locking.LEVEL_NODE][:]
7615 def BuildHooksEnv(self):
7618 This runs on master, primary and secondary nodes of the instance.
7621 instance = self._migrater.instance
7622 source_node = instance.primary_node
7623 target_node = self.op.target_node
7624 env = _BuildInstanceHookEnvByObject(self, instance)
7626 "MIGRATE_LIVE": self._migrater.live,
7627 "MIGRATE_CLEANUP": self.op.cleanup,
7628 "OLD_PRIMARY": source_node,
7629 "NEW_PRIMARY": target_node,
7630 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7633 if instance.disk_template in constants.DTS_INT_MIRROR:
7634 env["OLD_SECONDARY"] = target_node
7635 env["NEW_SECONDARY"] = source_node
7637 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7641 def BuildHooksNodes(self):
7642 """Build hooks nodes.
7645 instance = self._migrater.instance
7646 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7647 return (nl, nl + [instance.primary_node])
7650 class LUInstanceMove(LogicalUnit):
7651 """Move an instance by data-copying.
7654 HPATH = "instance-move"
7655 HTYPE = constants.HTYPE_INSTANCE
7658 def ExpandNames(self):
7659 self._ExpandAndLockInstance()
7660 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7661 self.op.target_node = target_node
7662 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7663 self.needed_locks[locking.LEVEL_NODE_RES] = []
7664 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7666 def DeclareLocks(self, level):
7667 if level == locking.LEVEL_NODE:
7668 self._LockInstancesNodes(primary_only=True)
7669 elif level == locking.LEVEL_NODE_RES:
7671 self.needed_locks[locking.LEVEL_NODE_RES] = \
7672 self.needed_locks[locking.LEVEL_NODE][:]
7674 def BuildHooksEnv(self):
7677 This runs on master, primary and secondary nodes of the instance.
7681 "TARGET_NODE": self.op.target_node,
7682 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7684 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7687 def BuildHooksNodes(self):
7688 """Build hooks nodes.
7692 self.cfg.GetMasterNode(),
7693 self.instance.primary_node,
7694 self.op.target_node,
7698 def CheckPrereq(self):
7699 """Check prerequisites.
7701 This checks that the instance is in the cluster.
7704 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7705 assert self.instance is not None, \
7706 "Cannot retrieve locked instance %s" % self.op.instance_name
7708 node = self.cfg.GetNodeInfo(self.op.target_node)
7709 assert node is not None, \
7710 "Cannot retrieve locked node %s" % self.op.target_node
7712 self.target_node = target_node = node.name
7714 if target_node == instance.primary_node:
7715 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7716 (instance.name, target_node),
7719 bep = self.cfg.GetClusterInfo().FillBE(instance)
7721 for idx, dsk in enumerate(instance.disks):
7722 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7723 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7724 " cannot copy" % idx, errors.ECODE_STATE)
7726 _CheckNodeOnline(self, target_node)
7727 _CheckNodeNotDrained(self, target_node)
7728 _CheckNodeVmCapable(self, target_node)
7729 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7730 self.cfg.GetNodeGroup(node.group))
7731 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7732 ignore=self.op.ignore_ipolicy)
7734 if instance.admin_state == constants.ADMINST_UP:
7735 # check memory requirements on the secondary node
7736 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7737 instance.name, bep[constants.BE_MAXMEM],
7738 instance.hypervisor)
7740 self.LogInfo("Not checking memory on the secondary node as"
7741 " instance will not be started")
7743 # check bridge existance
7744 _CheckInstanceBridgesExist(self, instance, node=target_node)
7746 def Exec(self, feedback_fn):
7747 """Move an instance.
7749 The move is done by shutting it down on its present node, copying
7750 the data over (slow) and starting it on the new node.
7753 instance = self.instance
7755 source_node = instance.primary_node
7756 target_node = self.target_node
7758 self.LogInfo("Shutting down instance %s on source node %s",
7759 instance.name, source_node)
7761 assert (self.owned_locks(locking.LEVEL_NODE) ==
7762 self.owned_locks(locking.LEVEL_NODE_RES))
7764 result = self.rpc.call_instance_shutdown(source_node, instance,
7765 self.op.shutdown_timeout)
7766 msg = result.fail_msg
7768 if self.op.ignore_consistency:
7769 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7770 " Proceeding anyway. Please make sure node"
7771 " %s is down. Error details: %s",
7772 instance.name, source_node, source_node, msg)
7774 raise errors.OpExecError("Could not shutdown instance %s on"
7776 (instance.name, source_node, msg))
7778 # create the target disks
7780 _CreateDisks(self, instance, target_node=target_node)
7781 except errors.OpExecError:
7782 self.LogWarning("Device creation failed, reverting...")
7784 _RemoveDisks(self, instance, target_node=target_node)
7786 self.cfg.ReleaseDRBDMinors(instance.name)
7789 cluster_name = self.cfg.GetClusterInfo().cluster_name
7792 # activate, get path, copy the data over
7793 for idx, disk in enumerate(instance.disks):
7794 self.LogInfo("Copying data for disk %d", idx)
7795 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7796 instance.name, True, idx)
7798 self.LogWarning("Can't assemble newly created disk %d: %s",
7799 idx, result.fail_msg)
7800 errs.append(result.fail_msg)
7802 dev_path = result.payload
7803 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7804 target_node, dev_path,
7807 self.LogWarning("Can't copy data over for disk %d: %s",
7808 idx, result.fail_msg)
7809 errs.append(result.fail_msg)
7813 self.LogWarning("Some disks failed to copy, aborting")
7815 _RemoveDisks(self, instance, target_node=target_node)
7817 self.cfg.ReleaseDRBDMinors(instance.name)
7818 raise errors.OpExecError("Errors during disk copy: %s" %
7821 instance.primary_node = target_node
7822 self.cfg.Update(instance, feedback_fn)
7824 self.LogInfo("Removing the disks on the original node")
7825 _RemoveDisks(self, instance, target_node=source_node)
7827 # Only start the instance if it's marked as up
7828 if instance.admin_state == constants.ADMINST_UP:
7829 self.LogInfo("Starting instance %s on node %s",
7830 instance.name, target_node)
7832 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7833 ignore_secondaries=True)
7835 _ShutdownInstanceDisks(self, instance)
7836 raise errors.OpExecError("Can't activate the instance's disks")
7838 result = self.rpc.call_instance_start(target_node,
7839 (instance, None, None), False)
7840 msg = result.fail_msg
7842 _ShutdownInstanceDisks(self, instance)
7843 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7844 (instance.name, target_node, msg))
7847 class LUNodeMigrate(LogicalUnit):
7848 """Migrate all instances from a node.
7851 HPATH = "node-migrate"
7852 HTYPE = constants.HTYPE_NODE
7855 def CheckArguments(self):
7858 def ExpandNames(self):
7859 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7861 self.share_locks = _ShareAll()
7862 self.needed_locks = {
7863 locking.LEVEL_NODE: [self.op.node_name],
7866 def BuildHooksEnv(self):
7869 This runs on the master, the primary and all the secondaries.
7873 "NODE_NAME": self.op.node_name,
7874 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7877 def BuildHooksNodes(self):
7878 """Build hooks nodes.
7881 nl = [self.cfg.GetMasterNode()]
7884 def CheckPrereq(self):
7887 def Exec(self, feedback_fn):
7888 # Prepare jobs for migration instances
7889 allow_runtime_changes = self.op.allow_runtime_changes
7891 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7894 iallocator=self.op.iallocator,
7895 target_node=self.op.target_node,
7896 allow_runtime_changes=allow_runtime_changes,
7897 ignore_ipolicy=self.op.ignore_ipolicy)]
7898 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7901 # TODO: Run iallocator in this opcode and pass correct placement options to
7902 # OpInstanceMigrate. Since other jobs can modify the cluster between
7903 # running the iallocator and the actual migration, a good consistency model
7904 # will have to be found.
7906 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7907 frozenset([self.op.node_name]))
7909 return ResultWithJobs(jobs)
7912 class TLMigrateInstance(Tasklet):
7913 """Tasklet class for instance migration.
7916 @ivar live: whether the migration will be done live or non-live;
7917 this variable is initalized only after CheckPrereq has run
7918 @type cleanup: boolean
7919 @ivar cleanup: Wheater we cleanup from a failed migration
7920 @type iallocator: string
7921 @ivar iallocator: The iallocator used to determine target_node
7922 @type target_node: string
7923 @ivar target_node: If given, the target_node to reallocate the instance to
7924 @type failover: boolean
7925 @ivar failover: Whether operation results in failover or migration
7926 @type fallback: boolean
7927 @ivar fallback: Whether fallback to failover is allowed if migration not
7929 @type ignore_consistency: boolean
7930 @ivar ignore_consistency: Wheter we should ignore consistency between source
7932 @type shutdown_timeout: int
7933 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7934 @type ignore_ipolicy: bool
7935 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7940 _MIGRATION_POLL_INTERVAL = 1 # seconds
7941 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7943 def __init__(self, lu, instance_name, cleanup=False,
7944 failover=False, fallback=False,
7945 ignore_consistency=False,
7946 allow_runtime_changes=True,
7947 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7948 ignore_ipolicy=False):
7949 """Initializes this class.
7952 Tasklet.__init__(self, lu)
7955 self.instance_name = instance_name
7956 self.cleanup = cleanup
7957 self.live = False # will be overridden later
7958 self.failover = failover
7959 self.fallback = fallback
7960 self.ignore_consistency = ignore_consistency
7961 self.shutdown_timeout = shutdown_timeout
7962 self.ignore_ipolicy = ignore_ipolicy
7963 self.allow_runtime_changes = allow_runtime_changes
7965 def CheckPrereq(self):
7966 """Check prerequisites.
7968 This checks that the instance is in the cluster.
7971 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7972 instance = self.cfg.GetInstanceInfo(instance_name)
7973 assert instance is not None
7974 self.instance = instance
7975 cluster = self.cfg.GetClusterInfo()
7977 if (not self.cleanup and
7978 not instance.admin_state == constants.ADMINST_UP and
7979 not self.failover and self.fallback):
7980 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7981 " switching to failover")
7982 self.failover = True
7984 if instance.disk_template not in constants.DTS_MIRRORED:
7989 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7990 " %s" % (instance.disk_template, text),
7993 if instance.disk_template in constants.DTS_EXT_MIRROR:
7994 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7996 if self.lu.op.iallocator:
7997 self._RunAllocator()
7999 # We set set self.target_node as it is required by
8001 self.target_node = self.lu.op.target_node
8003 # Check that the target node is correct in terms of instance policy
8004 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8005 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8006 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8007 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8008 ignore=self.ignore_ipolicy)
8010 # self.target_node is already populated, either directly or by the
8012 target_node = self.target_node
8013 if self.target_node == instance.primary_node:
8014 raise errors.OpPrereqError("Cannot migrate instance %s"
8015 " to its primary (%s)" %
8016 (instance.name, instance.primary_node))
8018 if len(self.lu.tasklets) == 1:
8019 # It is safe to release locks only when we're the only tasklet
8021 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8022 keep=[instance.primary_node, self.target_node])
8025 secondary_nodes = instance.secondary_nodes
8026 if not secondary_nodes:
8027 raise errors.ConfigurationError("No secondary node but using"
8028 " %s disk template" %
8029 instance.disk_template)
8030 target_node = secondary_nodes[0]
8031 if self.lu.op.iallocator or (self.lu.op.target_node and
8032 self.lu.op.target_node != target_node):
8034 text = "failed over"
8037 raise errors.OpPrereqError("Instances with disk template %s cannot"
8038 " be %s to arbitrary nodes"
8039 " (neither an iallocator nor a target"
8040 " node can be passed)" %
8041 (instance.disk_template, text),
8043 nodeinfo = self.cfg.GetNodeInfo(target_node)
8044 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8045 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8046 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8047 ignore=self.ignore_ipolicy)
8049 i_be = cluster.FillBE(instance)
8051 # check memory requirements on the secondary node
8052 if (not self.cleanup and
8053 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8054 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8055 "migrating instance %s" %
8057 i_be[constants.BE_MINMEM],
8058 instance.hypervisor)
8060 self.lu.LogInfo("Not checking memory on the secondary node as"
8061 " instance will not be started")
8063 # check if failover must be forced instead of migration
8064 if (not self.cleanup and not self.failover and
8065 i_be[constants.BE_ALWAYS_FAILOVER]):
8067 self.lu.LogInfo("Instance configured to always failover; fallback"
8069 self.failover = True
8071 raise errors.OpPrereqError("This instance has been configured to"
8072 " always failover, please allow failover",
8075 # check bridge existance
8076 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8078 if not self.cleanup:
8079 _CheckNodeNotDrained(self.lu, target_node)
8080 if not self.failover:
8081 result = self.rpc.call_instance_migratable(instance.primary_node,
8083 if result.fail_msg and self.fallback:
8084 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8086 self.failover = True
8088 result.Raise("Can't migrate, please use failover",
8089 prereq=True, ecode=errors.ECODE_STATE)
8091 assert not (self.failover and self.cleanup)
8093 if not self.failover:
8094 if self.lu.op.live is not None and self.lu.op.mode is not None:
8095 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8096 " parameters are accepted",
8098 if self.lu.op.live is not None:
8100 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8102 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8103 # reset the 'live' parameter to None so that repeated
8104 # invocations of CheckPrereq do not raise an exception
8105 self.lu.op.live = None
8106 elif self.lu.op.mode is None:
8107 # read the default value from the hypervisor
8108 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8109 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8111 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8113 # Failover is never live
8116 if not (self.failover or self.cleanup):
8117 remote_info = self.rpc.call_instance_info(instance.primary_node,
8119 instance.hypervisor)
8120 remote_info.Raise("Error checking instance on node %s" %
8121 instance.primary_node)
8122 instance_running = bool(remote_info.payload)
8123 if instance_running:
8124 self.current_mem = int(remote_info.payload["memory"])
8126 def _RunAllocator(self):
8127 """Run the allocator based on input opcode.
8130 # FIXME: add a self.ignore_ipolicy option
8131 ial = IAllocator(self.cfg, self.rpc,
8132 mode=constants.IALLOCATOR_MODE_RELOC,
8133 name=self.instance_name,
8134 relocate_from=[self.instance.primary_node],
8137 ial.Run(self.lu.op.iallocator)
8140 raise errors.OpPrereqError("Can't compute nodes using"
8141 " iallocator '%s': %s" %
8142 (self.lu.op.iallocator, ial.info),
8144 if len(ial.result) != ial.required_nodes:
8145 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8146 " of nodes (%s), required %s" %
8147 (self.lu.op.iallocator, len(ial.result),
8148 ial.required_nodes), errors.ECODE_FAULT)
8149 self.target_node = ial.result[0]
8150 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8151 self.instance_name, self.lu.op.iallocator,
8152 utils.CommaJoin(ial.result))
8154 def _WaitUntilSync(self):
8155 """Poll with custom rpc for disk sync.
8157 This uses our own step-based rpc call.
8160 self.feedback_fn("* wait until resync is done")
8164 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8166 (self.instance.disks,
8169 for node, nres in result.items():
8170 nres.Raise("Cannot resync disks on node %s" % node)
8171 node_done, node_percent = nres.payload
8172 all_done = all_done and node_done
8173 if node_percent is not None:
8174 min_percent = min(min_percent, node_percent)
8176 if min_percent < 100:
8177 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8180 def _EnsureSecondary(self, node):
8181 """Demote a node to secondary.
8184 self.feedback_fn("* switching node %s to secondary mode" % node)
8186 for dev in self.instance.disks:
8187 self.cfg.SetDiskID(dev, node)
8189 result = self.rpc.call_blockdev_close(node, self.instance.name,
8190 self.instance.disks)
8191 result.Raise("Cannot change disk to secondary on node %s" % node)
8193 def _GoStandalone(self):
8194 """Disconnect from the network.
8197 self.feedback_fn("* changing into standalone mode")
8198 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8199 self.instance.disks)
8200 for node, nres in result.items():
8201 nres.Raise("Cannot disconnect disks node %s" % node)
8203 def _GoReconnect(self, multimaster):
8204 """Reconnect to the network.
8210 msg = "single-master"
8211 self.feedback_fn("* changing disks into %s mode" % msg)
8212 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8213 (self.instance.disks, self.instance),
8214 self.instance.name, multimaster)
8215 for node, nres in result.items():
8216 nres.Raise("Cannot change disks config on node %s" % node)
8218 def _ExecCleanup(self):
8219 """Try to cleanup after a failed migration.
8221 The cleanup is done by:
8222 - check that the instance is running only on one node
8223 (and update the config if needed)
8224 - change disks on its secondary node to secondary
8225 - wait until disks are fully synchronized
8226 - disconnect from the network
8227 - change disks into single-master mode
8228 - wait again until disks are fully synchronized
8231 instance = self.instance
8232 target_node = self.target_node
8233 source_node = self.source_node
8235 # check running on only one node
8236 self.feedback_fn("* checking where the instance actually runs"
8237 " (if this hangs, the hypervisor might be in"
8239 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8240 for node, result in ins_l.items():
8241 result.Raise("Can't contact node %s" % node)
8243 runningon_source = instance.name in ins_l[source_node].payload
8244 runningon_target = instance.name in ins_l[target_node].payload
8246 if runningon_source and runningon_target:
8247 raise errors.OpExecError("Instance seems to be running on two nodes,"
8248 " or the hypervisor is confused; you will have"
8249 " to ensure manually that it runs only on one"
8250 " and restart this operation")
8252 if not (runningon_source or runningon_target):
8253 raise errors.OpExecError("Instance does not seem to be running at all;"
8254 " in this case it's safer to repair by"
8255 " running 'gnt-instance stop' to ensure disk"
8256 " shutdown, and then restarting it")
8258 if runningon_target:
8259 # the migration has actually succeeded, we need to update the config
8260 self.feedback_fn("* instance running on secondary node (%s),"
8261 " updating config" % target_node)
8262 instance.primary_node = target_node
8263 self.cfg.Update(instance, self.feedback_fn)
8264 demoted_node = source_node
8266 self.feedback_fn("* instance confirmed to be running on its"
8267 " primary node (%s)" % source_node)
8268 demoted_node = target_node
8270 if instance.disk_template in constants.DTS_INT_MIRROR:
8271 self._EnsureSecondary(demoted_node)
8273 self._WaitUntilSync()
8274 except errors.OpExecError:
8275 # we ignore here errors, since if the device is standalone, it
8276 # won't be able to sync
8278 self._GoStandalone()
8279 self._GoReconnect(False)
8280 self._WaitUntilSync()
8282 self.feedback_fn("* done")
8284 def _RevertDiskStatus(self):
8285 """Try to revert the disk status after a failed migration.
8288 target_node = self.target_node
8289 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8293 self._EnsureSecondary(target_node)
8294 self._GoStandalone()
8295 self._GoReconnect(False)
8296 self._WaitUntilSync()
8297 except errors.OpExecError, err:
8298 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8299 " please try to recover the instance manually;"
8300 " error '%s'" % str(err))
8302 def _AbortMigration(self):
8303 """Call the hypervisor code to abort a started migration.
8306 instance = self.instance
8307 target_node = self.target_node
8308 source_node = self.source_node
8309 migration_info = self.migration_info
8311 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8315 abort_msg = abort_result.fail_msg
8317 logging.error("Aborting migration failed on target node %s: %s",
8318 target_node, abort_msg)
8319 # Don't raise an exception here, as we stil have to try to revert the
8320 # disk status, even if this step failed.
8322 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8323 instance, False, self.live)
8324 abort_msg = abort_result.fail_msg
8326 logging.error("Aborting migration failed on source node %s: %s",
8327 source_node, abort_msg)
8329 def _ExecMigration(self):
8330 """Migrate an instance.
8332 The migrate is done by:
8333 - change the disks into dual-master mode
8334 - wait until disks are fully synchronized again
8335 - migrate the instance
8336 - change disks on the new secondary node (the old primary) to secondary
8337 - wait until disks are fully synchronized
8338 - change disks into single-master mode
8341 instance = self.instance
8342 target_node = self.target_node
8343 source_node = self.source_node
8345 # Check for hypervisor version mismatch and warn the user.
8346 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8347 None, [self.instance.hypervisor])
8348 for ninfo in nodeinfo.values():
8349 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8351 (_, _, (src_info, )) = nodeinfo[source_node].payload
8352 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8354 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8355 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8356 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8357 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8358 if src_version != dst_version:
8359 self.feedback_fn("* warning: hypervisor version mismatch between"
8360 " source (%s) and target (%s) node" %
8361 (src_version, dst_version))
8363 self.feedback_fn("* checking disk consistency between source and target")
8364 for (idx, dev) in enumerate(instance.disks):
8365 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8366 raise errors.OpExecError("Disk %s is degraded or not fully"
8367 " synchronized on target node,"
8368 " aborting migration" % idx)
8370 if self.current_mem > self.tgt_free_mem:
8371 if not self.allow_runtime_changes:
8372 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8373 " free memory to fit instance %s on target"
8374 " node %s (have %dMB, need %dMB)" %
8375 (instance.name, target_node,
8376 self.tgt_free_mem, self.current_mem))
8377 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8378 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8381 rpcres.Raise("Cannot modify instance runtime memory")
8383 # First get the migration information from the remote node
8384 result = self.rpc.call_migration_info(source_node, instance)
8385 msg = result.fail_msg
8387 log_err = ("Failed fetching source migration information from %s: %s" %
8389 logging.error(log_err)
8390 raise errors.OpExecError(log_err)
8392 self.migration_info = migration_info = result.payload
8394 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8395 # Then switch the disks to master/master mode
8396 self._EnsureSecondary(target_node)
8397 self._GoStandalone()
8398 self._GoReconnect(True)
8399 self._WaitUntilSync()
8401 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8402 result = self.rpc.call_accept_instance(target_node,
8405 self.nodes_ip[target_node])
8407 msg = result.fail_msg
8409 logging.error("Instance pre-migration failed, trying to revert"
8410 " disk status: %s", msg)
8411 self.feedback_fn("Pre-migration failed, aborting")
8412 self._AbortMigration()
8413 self._RevertDiskStatus()
8414 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8415 (instance.name, msg))
8417 self.feedback_fn("* migrating instance to %s" % target_node)
8418 result = self.rpc.call_instance_migrate(source_node, instance,
8419 self.nodes_ip[target_node],
8421 msg = result.fail_msg
8423 logging.error("Instance migration failed, trying to revert"
8424 " disk status: %s", msg)
8425 self.feedback_fn("Migration failed, aborting")
8426 self._AbortMigration()
8427 self._RevertDiskStatus()
8428 raise errors.OpExecError("Could not migrate instance %s: %s" %
8429 (instance.name, msg))
8431 self.feedback_fn("* starting memory transfer")
8432 last_feedback = time.time()
8434 result = self.rpc.call_instance_get_migration_status(source_node,
8436 msg = result.fail_msg
8437 ms = result.payload # MigrationStatus instance
8438 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8439 logging.error("Instance migration failed, trying to revert"
8440 " disk status: %s", msg)
8441 self.feedback_fn("Migration failed, aborting")
8442 self._AbortMigration()
8443 self._RevertDiskStatus()
8444 raise errors.OpExecError("Could not migrate instance %s: %s" %
8445 (instance.name, msg))
8447 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8448 self.feedback_fn("* memory transfer complete")
8451 if (utils.TimeoutExpired(last_feedback,
8452 self._MIGRATION_FEEDBACK_INTERVAL) and
8453 ms.transferred_ram is not None):
8454 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8455 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8456 last_feedback = time.time()
8458 time.sleep(self._MIGRATION_POLL_INTERVAL)
8460 result = self.rpc.call_instance_finalize_migration_src(source_node,
8464 msg = result.fail_msg
8466 logging.error("Instance migration succeeded, but finalization failed"
8467 " on the source node: %s", msg)
8468 raise errors.OpExecError("Could not finalize instance migration: %s" %
8471 instance.primary_node = target_node
8473 # distribute new instance config to the other nodes
8474 self.cfg.Update(instance, self.feedback_fn)
8476 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8480 msg = result.fail_msg
8482 logging.error("Instance migration succeeded, but finalization failed"
8483 " on the target node: %s", msg)
8484 raise errors.OpExecError("Could not finalize instance migration: %s" %
8487 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8488 self._EnsureSecondary(source_node)
8489 self._WaitUntilSync()
8490 self._GoStandalone()
8491 self._GoReconnect(False)
8492 self._WaitUntilSync()
8494 # If the instance's disk template is `rbd' and there was a successful
8495 # migration, unmap the device from the source node.
8496 if self.instance.disk_template == constants.DT_RBD:
8497 disks = _ExpandCheckDisks(instance, instance.disks)
8498 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8500 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8501 msg = result.fail_msg
8503 logging.error("Migration was successful, but couldn't unmap the"
8504 " block device %s on source node %s: %s",
8505 disk.iv_name, source_node, msg)
8506 logging.error("You need to unmap the device %s manually on %s",
8507 disk.iv_name, source_node)
8509 self.feedback_fn("* done")
8511 def _ExecFailover(self):
8512 """Failover an instance.
8514 The failover is done by shutting it down on its present node and
8515 starting it on the secondary.
8518 instance = self.instance
8519 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8521 source_node = instance.primary_node
8522 target_node = self.target_node
8524 if instance.admin_state == constants.ADMINST_UP:
8525 self.feedback_fn("* checking disk consistency between source and target")
8526 for (idx, dev) in enumerate(instance.disks):
8527 # for drbd, these are drbd over lvm
8528 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8530 if primary_node.offline:
8531 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8533 (primary_node.name, idx, target_node))
8534 elif not self.ignore_consistency:
8535 raise errors.OpExecError("Disk %s is degraded on target node,"
8536 " aborting failover" % idx)
8538 self.feedback_fn("* not checking disk consistency as instance is not"
8541 self.feedback_fn("* shutting down instance on source node")
8542 logging.info("Shutting down instance %s on node %s",
8543 instance.name, source_node)
8545 result = self.rpc.call_instance_shutdown(source_node, instance,
8546 self.shutdown_timeout)
8547 msg = result.fail_msg
8549 if self.ignore_consistency or primary_node.offline:
8550 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8551 " proceeding anyway; please make sure node"
8552 " %s is down; error details: %s",
8553 instance.name, source_node, source_node, msg)
8555 raise errors.OpExecError("Could not shutdown instance %s on"
8557 (instance.name, source_node, msg))
8559 self.feedback_fn("* deactivating the instance's disks on source node")
8560 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8561 raise errors.OpExecError("Can't shut down the instance's disks")
8563 instance.primary_node = target_node
8564 # distribute new instance config to the other nodes
8565 self.cfg.Update(instance, self.feedback_fn)
8567 # Only start the instance if it's marked as up
8568 if instance.admin_state == constants.ADMINST_UP:
8569 self.feedback_fn("* activating the instance's disks on target node %s" %
8571 logging.info("Starting instance %s on node %s",
8572 instance.name, target_node)
8574 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8575 ignore_secondaries=True)
8577 _ShutdownInstanceDisks(self.lu, instance)
8578 raise errors.OpExecError("Can't activate the instance's disks")
8580 self.feedback_fn("* starting the instance on the target node %s" %
8582 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8584 msg = result.fail_msg
8586 _ShutdownInstanceDisks(self.lu, instance)
8587 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8588 (instance.name, target_node, msg))
8590 def Exec(self, feedback_fn):
8591 """Perform the migration.
8594 self.feedback_fn = feedback_fn
8595 self.source_node = self.instance.primary_node
8597 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8598 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8599 self.target_node = self.instance.secondary_nodes[0]
8600 # Otherwise self.target_node has been populated either
8601 # directly, or through an iallocator.
8603 self.all_nodes = [self.source_node, self.target_node]
8604 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8605 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8608 feedback_fn("Failover instance %s" % self.instance.name)
8609 self._ExecFailover()
8611 feedback_fn("Migrating instance %s" % self.instance.name)
8614 return self._ExecCleanup()
8616 return self._ExecMigration()
8619 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8621 """Wrapper around L{_CreateBlockDevInner}.
8623 This method annotates the root device first.
8626 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8627 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8631 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8633 """Create a tree of block devices on a given node.
8635 If this device type has to be created on secondaries, create it and
8638 If not, just recurse to children keeping the same 'force' value.
8640 @attention: The device has to be annotated already.
8642 @param lu: the lu on whose behalf we execute
8643 @param node: the node on which to create the device
8644 @type instance: L{objects.Instance}
8645 @param instance: the instance which owns the device
8646 @type device: L{objects.Disk}
8647 @param device: the device to create
8648 @type force_create: boolean
8649 @param force_create: whether to force creation of this device; this
8650 will be change to True whenever we find a device which has
8651 CreateOnSecondary() attribute
8652 @param info: the extra 'metadata' we should attach to the device
8653 (this will be represented as a LVM tag)
8654 @type force_open: boolean
8655 @param force_open: this parameter will be passes to the
8656 L{backend.BlockdevCreate} function where it specifies
8657 whether we run on primary or not, and it affects both
8658 the child assembly and the device own Open() execution
8661 if device.CreateOnSecondary():
8665 for child in device.children:
8666 _CreateBlockDevInner(lu, node, instance, child, force_create,
8669 if not force_create:
8672 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8675 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8676 """Create a single block device on a given node.
8678 This will not recurse over children of the device, so they must be
8681 @param lu: the lu on whose behalf we execute
8682 @param node: the node on which to create the device
8683 @type instance: L{objects.Instance}
8684 @param instance: the instance which owns the device
8685 @type device: L{objects.Disk}
8686 @param device: the device to create
8687 @param info: the extra 'metadata' we should attach to the device
8688 (this will be represented as a LVM tag)
8689 @type force_open: boolean
8690 @param force_open: this parameter will be passes to the
8691 L{backend.BlockdevCreate} function where it specifies
8692 whether we run on primary or not, and it affects both
8693 the child assembly and the device own Open() execution
8696 lu.cfg.SetDiskID(device, node)
8697 result = lu.rpc.call_blockdev_create(node, device, device.size,
8698 instance.name, force_open, info)
8699 result.Raise("Can't create block device %s on"
8700 " node %s for instance %s" % (device, node, instance.name))
8701 if device.physical_id is None:
8702 device.physical_id = result.payload
8705 def _GenerateUniqueNames(lu, exts):
8706 """Generate a suitable LV name.
8708 This will generate a logical volume name for the given instance.
8713 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8714 results.append("%s%s" % (new_id, val))
8718 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8719 iv_name, p_minor, s_minor):
8720 """Generate a drbd8 device complete with its children.
8723 assert len(vgnames) == len(names) == 2
8724 port = lu.cfg.AllocatePort()
8725 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8727 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8728 logical_id=(vgnames[0], names[0]),
8730 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8731 logical_id=(vgnames[1], names[1]),
8733 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8734 logical_id=(primary, secondary, port,
8737 children=[dev_data, dev_meta],
8738 iv_name=iv_name, params={})
8742 _DISK_TEMPLATE_NAME_PREFIX = {
8743 constants.DT_PLAIN: "",
8744 constants.DT_RBD: ".rbd",
8748 _DISK_TEMPLATE_DEVICE_TYPE = {
8749 constants.DT_PLAIN: constants.LD_LV,
8750 constants.DT_FILE: constants.LD_FILE,
8751 constants.DT_SHARED_FILE: constants.LD_FILE,
8752 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8753 constants.DT_RBD: constants.LD_RBD,
8757 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8758 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8759 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8760 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8761 """Generate the entire disk layout for a given template type.
8764 #TODO: compute space requirements
8766 vgname = lu.cfg.GetVGName()
8767 disk_count = len(disk_info)
8770 if template_name == constants.DT_DISKLESS:
8772 elif template_name == constants.DT_DRBD8:
8773 if len(secondary_nodes) != 1:
8774 raise errors.ProgrammerError("Wrong template configuration")
8775 remote_node = secondary_nodes[0]
8776 minors = lu.cfg.AllocateDRBDMinor(
8777 [primary_node, remote_node] * len(disk_info), instance_name)
8779 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8781 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8784 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8785 for i in range(disk_count)]):
8786 names.append(lv_prefix + "_data")
8787 names.append(lv_prefix + "_meta")
8788 for idx, disk in enumerate(disk_info):
8789 disk_index = idx + base_index
8790 data_vg = disk.get(constants.IDISK_VG, vgname)
8791 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8792 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8793 disk[constants.IDISK_SIZE],
8795 names[idx * 2:idx * 2 + 2],
8796 "disk/%d" % disk_index,
8797 minors[idx * 2], minors[idx * 2 + 1])
8798 disk_dev.mode = disk[constants.IDISK_MODE]
8799 disks.append(disk_dev)
8802 raise errors.ProgrammerError("Wrong template configuration")
8804 if template_name == constants.DT_FILE:
8806 elif template_name == constants.DT_SHARED_FILE:
8807 _req_shr_file_storage()
8809 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8810 if name_prefix is None:
8813 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8814 (name_prefix, base_index + i)
8815 for i in range(disk_count)])
8817 if template_name == constants.DT_PLAIN:
8818 def logical_id_fn(idx, _, disk):
8819 vg = disk.get(constants.IDISK_VG, vgname)
8820 return (vg, names[idx])
8821 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8823 lambda _, disk_index, disk: (file_driver,
8824 "%s/disk%d" % (file_storage_dir,
8826 elif template_name == constants.DT_BLOCK:
8828 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8829 disk[constants.IDISK_ADOPT])
8830 elif template_name == constants.DT_RBD:
8831 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8833 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8835 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8837 for idx, disk in enumerate(disk_info):
8838 disk_index = idx + base_index
8839 size = disk[constants.IDISK_SIZE]
8840 feedback_fn("* disk %s, size %s" %
8841 (disk_index, utils.FormatUnit(size, "h")))
8842 disks.append(objects.Disk(dev_type=dev_type, size=size,
8843 logical_id=logical_id_fn(idx, disk_index, disk),
8844 iv_name="disk/%d" % disk_index,
8845 mode=disk[constants.IDISK_MODE],
8851 def _GetInstanceInfoText(instance):
8852 """Compute that text that should be added to the disk's metadata.
8855 return "originstname+%s" % instance.name
8858 def _CalcEta(time_taken, written, total_size):
8859 """Calculates the ETA based on size written and total size.
8861 @param time_taken: The time taken so far
8862 @param written: amount written so far
8863 @param total_size: The total size of data to be written
8864 @return: The remaining time in seconds
8867 avg_time = time_taken / float(written)
8868 return (total_size - written) * avg_time
8871 def _WipeDisks(lu, instance):
8872 """Wipes instance disks.
8874 @type lu: L{LogicalUnit}
8875 @param lu: the logical unit on whose behalf we execute
8876 @type instance: L{objects.Instance}
8877 @param instance: the instance whose disks we should create
8878 @return: the success of the wipe
8881 node = instance.primary_node
8883 for device in instance.disks:
8884 lu.cfg.SetDiskID(device, node)
8886 logging.info("Pause sync of instance %s disks", instance.name)
8887 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8888 (instance.disks, instance),
8890 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8892 for idx, success in enumerate(result.payload):
8894 logging.warn("pause-sync of instance %s for disks %d failed",
8898 for idx, device in enumerate(instance.disks):
8899 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8900 # MAX_WIPE_CHUNK at max
8901 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8902 constants.MIN_WIPE_CHUNK_PERCENT)
8903 # we _must_ make this an int, otherwise rounding errors will
8905 wipe_chunk_size = int(wipe_chunk_size)
8907 lu.LogInfo("* Wiping disk %d", idx)
8908 logging.info("Wiping disk %d for instance %s, node %s using"
8909 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8914 start_time = time.time()
8916 while offset < size:
8917 wipe_size = min(wipe_chunk_size, size - offset)
8918 logging.debug("Wiping disk %d, offset %s, chunk %s",
8919 idx, offset, wipe_size)
8920 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8922 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8923 (idx, offset, wipe_size))
8926 if now - last_output >= 60:
8927 eta = _CalcEta(now - start_time, offset, size)
8928 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8929 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8932 logging.info("Resume sync of instance %s disks", instance.name)
8934 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8935 (instance.disks, instance),
8939 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
8940 " please have a look at the status and troubleshoot"
8941 " the issue: %s", node, result.fail_msg)
8943 for idx, success in enumerate(result.payload):
8945 lu.LogWarning("Resume sync of disk %d failed, please have a"
8946 " look at the status and troubleshoot the issue", idx)
8947 logging.warn("resume-sync of instance %s for disks %d failed",
8951 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8952 """Create all disks for an instance.
8954 This abstracts away some work from AddInstance.
8956 @type lu: L{LogicalUnit}
8957 @param lu: the logical unit on whose behalf we execute
8958 @type instance: L{objects.Instance}
8959 @param instance: the instance whose disks we should create
8961 @param to_skip: list of indices to skip
8962 @type target_node: string
8963 @param target_node: if passed, overrides the target node for creation
8965 @return: the success of the creation
8968 info = _GetInstanceInfoText(instance)
8969 if target_node is None:
8970 pnode = instance.primary_node
8971 all_nodes = instance.all_nodes
8976 if instance.disk_template in constants.DTS_FILEBASED:
8977 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8978 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8980 result.Raise("Failed to create directory '%s' on"
8981 " node %s" % (file_storage_dir, pnode))
8983 # Note: this needs to be kept in sync with adding of disks in
8984 # LUInstanceSetParams
8985 for idx, device in enumerate(instance.disks):
8986 if to_skip and idx in to_skip:
8988 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8990 for node in all_nodes:
8991 f_create = node == pnode
8992 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8995 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8996 """Remove all disks for an instance.
8998 This abstracts away some work from `AddInstance()` and
8999 `RemoveInstance()`. Note that in case some of the devices couldn't
9000 be removed, the removal will continue with the other ones (compare
9001 with `_CreateDisks()`).
9003 @type lu: L{LogicalUnit}
9004 @param lu: the logical unit on whose behalf we execute
9005 @type instance: L{objects.Instance}
9006 @param instance: the instance whose disks we should remove
9007 @type target_node: string
9008 @param target_node: used to override the node on which to remove the disks
9010 @return: the success of the removal
9013 logging.info("Removing block devices for instance %s", instance.name)
9016 ports_to_release = set()
9017 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9018 for (idx, device) in enumerate(anno_disks):
9020 edata = [(target_node, device)]
9022 edata = device.ComputeNodeTree(instance.primary_node)
9023 for node, disk in edata:
9024 lu.cfg.SetDiskID(disk, node)
9025 result = lu.rpc.call_blockdev_remove(node, disk)
9027 lu.LogWarning("Could not remove disk %s on node %s,"
9028 " continuing anyway: %s", idx, node, result.fail_msg)
9029 if not (result.offline and node != instance.primary_node):
9032 # if this is a DRBD disk, return its port to the pool
9033 if device.dev_type in constants.LDS_DRBD:
9034 ports_to_release.add(device.logical_id[2])
9036 if all_result or ignore_failures:
9037 for port in ports_to_release:
9038 lu.cfg.AddTcpUdpPort(port)
9040 if instance.disk_template == constants.DT_FILE:
9041 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9045 tgt = instance.primary_node
9046 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9048 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9049 file_storage_dir, instance.primary_node, result.fail_msg)
9055 def _ComputeDiskSizePerVG(disk_template, disks):
9056 """Compute disk size requirements in the volume group
9059 def _compute(disks, payload):
9060 """Universal algorithm.
9065 vgs[disk[constants.IDISK_VG]] = \
9066 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9070 # Required free disk space as a function of disk and swap space
9072 constants.DT_DISKLESS: {},
9073 constants.DT_PLAIN: _compute(disks, 0),
9074 # 128 MB are added for drbd metadata for each disk
9075 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9076 constants.DT_FILE: {},
9077 constants.DT_SHARED_FILE: {},
9080 if disk_template not in req_size_dict:
9081 raise errors.ProgrammerError("Disk template '%s' size requirement"
9082 " is unknown" % disk_template)
9084 return req_size_dict[disk_template]
9087 def _ComputeDiskSize(disk_template, disks):
9088 """Compute disk size requirements according to disk template
9091 # Required free disk space as a function of disk and swap space
9093 constants.DT_DISKLESS: None,
9094 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9095 # 128 MB are added for drbd metadata for each disk
9097 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9098 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9099 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9100 constants.DT_BLOCK: 0,
9101 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9104 if disk_template not in req_size_dict:
9105 raise errors.ProgrammerError("Disk template '%s' size requirement"
9106 " is unknown" % disk_template)
9108 return req_size_dict[disk_template]
9111 def _FilterVmNodes(lu, nodenames):
9112 """Filters out non-vm_capable nodes from a list.
9114 @type lu: L{LogicalUnit}
9115 @param lu: the logical unit for which we check
9116 @type nodenames: list
9117 @param nodenames: the list of nodes on which we should check
9119 @return: the list of vm-capable nodes
9122 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9123 return [name for name in nodenames if name not in vm_nodes]
9126 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9127 """Hypervisor parameter validation.
9129 This function abstract the hypervisor parameter validation to be
9130 used in both instance create and instance modify.
9132 @type lu: L{LogicalUnit}
9133 @param lu: the logical unit for which we check
9134 @type nodenames: list
9135 @param nodenames: the list of nodes on which we should check
9136 @type hvname: string
9137 @param hvname: the name of the hypervisor we should use
9138 @type hvparams: dict
9139 @param hvparams: the parameters which we need to check
9140 @raise errors.OpPrereqError: if the parameters are not valid
9143 nodenames = _FilterVmNodes(lu, nodenames)
9145 cluster = lu.cfg.GetClusterInfo()
9146 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9148 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9149 for node in nodenames:
9153 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9156 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9157 """OS parameters validation.
9159 @type lu: L{LogicalUnit}
9160 @param lu: the logical unit for which we check
9161 @type required: boolean
9162 @param required: whether the validation should fail if the OS is not
9164 @type nodenames: list
9165 @param nodenames: the list of nodes on which we should check
9166 @type osname: string
9167 @param osname: the name of the hypervisor we should use
9168 @type osparams: dict
9169 @param osparams: the parameters which we need to check
9170 @raise errors.OpPrereqError: if the parameters are not valid
9173 nodenames = _FilterVmNodes(lu, nodenames)
9174 result = lu.rpc.call_os_validate(nodenames, required, osname,
9175 [constants.OS_VALIDATE_PARAMETERS],
9177 for node, nres in result.items():
9178 # we don't check for offline cases since this should be run only
9179 # against the master node and/or an instance's nodes
9180 nres.Raise("OS Parameters validation failed on node %s" % node)
9181 if not nres.payload:
9182 lu.LogInfo("OS %s not found on node %s, validation skipped",
9186 class LUInstanceCreate(LogicalUnit):
9187 """Create an instance.
9190 HPATH = "instance-add"
9191 HTYPE = constants.HTYPE_INSTANCE
9194 def CheckArguments(self):
9198 # do not require name_check to ease forward/backward compatibility
9200 if self.op.no_install and self.op.start:
9201 self.LogInfo("No-installation mode selected, disabling startup")
9202 self.op.start = False
9203 # validate/normalize the instance name
9204 self.op.instance_name = \
9205 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9207 if self.op.ip_check and not self.op.name_check:
9208 # TODO: make the ip check more flexible and not depend on the name check
9209 raise errors.OpPrereqError("Cannot do IP address check without a name"
9210 " check", errors.ECODE_INVAL)
9212 # check nics' parameter names
9213 for nic in self.op.nics:
9214 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9216 # check disks. parameter names and consistent adopt/no-adopt strategy
9217 has_adopt = has_no_adopt = False
9218 for disk in self.op.disks:
9219 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9220 if constants.IDISK_ADOPT in disk:
9224 if has_adopt and has_no_adopt:
9225 raise errors.OpPrereqError("Either all disks are adopted or none is",
9228 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9229 raise errors.OpPrereqError("Disk adoption is not supported for the"
9230 " '%s' disk template" %
9231 self.op.disk_template,
9233 if self.op.iallocator is not None:
9234 raise errors.OpPrereqError("Disk adoption not allowed with an"
9235 " iallocator script", errors.ECODE_INVAL)
9236 if self.op.mode == constants.INSTANCE_IMPORT:
9237 raise errors.OpPrereqError("Disk adoption not allowed for"
9238 " instance import", errors.ECODE_INVAL)
9240 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9241 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9242 " but no 'adopt' parameter given" %
9243 self.op.disk_template,
9246 self.adopt_disks = has_adopt
9248 # instance name verification
9249 if self.op.name_check:
9250 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9251 self.op.instance_name = self.hostname1.name
9252 # used in CheckPrereq for ip ping check
9253 self.check_ip = self.hostname1.ip
9255 self.check_ip = None
9257 # file storage checks
9258 if (self.op.file_driver and
9259 not self.op.file_driver in constants.FILE_DRIVER):
9260 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9261 self.op.file_driver, errors.ECODE_INVAL)
9263 if self.op.disk_template == constants.DT_FILE:
9264 opcodes.RequireFileStorage()
9265 elif self.op.disk_template == constants.DT_SHARED_FILE:
9266 opcodes.RequireSharedFileStorage()
9268 ### Node/iallocator related checks
9269 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9271 if self.op.pnode is not None:
9272 if self.op.disk_template in constants.DTS_INT_MIRROR:
9273 if self.op.snode is None:
9274 raise errors.OpPrereqError("The networked disk templates need"
9275 " a mirror node", errors.ECODE_INVAL)
9277 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9279 self.op.snode = None
9281 self._cds = _GetClusterDomainSecret()
9283 if self.op.mode == constants.INSTANCE_IMPORT:
9284 # On import force_variant must be True, because if we forced it at
9285 # initial install, our only chance when importing it back is that it
9287 self.op.force_variant = True
9289 if self.op.no_install:
9290 self.LogInfo("No-installation mode has no effect during import")
9292 elif self.op.mode == constants.INSTANCE_CREATE:
9293 if self.op.os_type is None:
9294 raise errors.OpPrereqError("No guest OS specified",
9296 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9297 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9298 " installation" % self.op.os_type,
9300 if self.op.disk_template is None:
9301 raise errors.OpPrereqError("No disk template specified",
9304 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9305 # Check handshake to ensure both clusters have the same domain secret
9306 src_handshake = self.op.source_handshake
9307 if not src_handshake:
9308 raise errors.OpPrereqError("Missing source handshake",
9311 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9314 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9317 # Load and check source CA
9318 self.source_x509_ca_pem = self.op.source_x509_ca
9319 if not self.source_x509_ca_pem:
9320 raise errors.OpPrereqError("Missing source X509 CA",
9324 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9326 except OpenSSL.crypto.Error, err:
9327 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9328 (err, ), errors.ECODE_INVAL)
9330 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9331 if errcode is not None:
9332 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9335 self.source_x509_ca = cert
9337 src_instance_name = self.op.source_instance_name
9338 if not src_instance_name:
9339 raise errors.OpPrereqError("Missing source instance name",
9342 self.source_instance_name = \
9343 netutils.GetHostname(name=src_instance_name).name
9346 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9347 self.op.mode, errors.ECODE_INVAL)
9349 def ExpandNames(self):
9350 """ExpandNames for CreateInstance.
9352 Figure out the right locks for instance creation.
9355 self.needed_locks = {}
9357 instance_name = self.op.instance_name
9358 # this is just a preventive check, but someone might still add this
9359 # instance in the meantime, and creation will fail at lock-add time
9360 if instance_name in self.cfg.GetInstanceList():
9361 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9362 instance_name, errors.ECODE_EXISTS)
9364 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9366 if self.op.iallocator:
9367 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9368 # specifying a group on instance creation and then selecting nodes from
9370 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9371 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9373 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9374 nodelist = [self.op.pnode]
9375 if self.op.snode is not None:
9376 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9377 nodelist.append(self.op.snode)
9378 self.needed_locks[locking.LEVEL_NODE] = nodelist
9379 # Lock resources of instance's primary and secondary nodes (copy to
9380 # prevent accidential modification)
9381 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9383 # in case of import lock the source node too
9384 if self.op.mode == constants.INSTANCE_IMPORT:
9385 src_node = self.op.src_node
9386 src_path = self.op.src_path
9388 if src_path is None:
9389 self.op.src_path = src_path = self.op.instance_name
9391 if src_node is None:
9392 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9393 self.op.src_node = None
9394 if os.path.isabs(src_path):
9395 raise errors.OpPrereqError("Importing an instance from a path"
9396 " requires a source node option",
9399 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9400 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9401 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9402 if not os.path.isabs(src_path):
9403 self.op.src_path = src_path = \
9404 utils.PathJoin(constants.EXPORT_DIR, src_path)
9406 def _RunAllocator(self):
9407 """Run the allocator based on input opcode.
9410 nics = [n.ToDict() for n in self.nics]
9411 ial = IAllocator(self.cfg, self.rpc,
9412 mode=constants.IALLOCATOR_MODE_ALLOC,
9413 name=self.op.instance_name,
9414 disk_template=self.op.disk_template,
9417 vcpus=self.be_full[constants.BE_VCPUS],
9418 memory=self.be_full[constants.BE_MAXMEM],
9419 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9422 hypervisor=self.op.hypervisor,
9425 ial.Run(self.op.iallocator)
9428 raise errors.OpPrereqError("Can't compute nodes using"
9429 " iallocator '%s': %s" %
9430 (self.op.iallocator, ial.info),
9432 if len(ial.result) != ial.required_nodes:
9433 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9434 " of nodes (%s), required %s" %
9435 (self.op.iallocator, len(ial.result),
9436 ial.required_nodes), errors.ECODE_FAULT)
9437 self.op.pnode = ial.result[0]
9438 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9439 self.op.instance_name, self.op.iallocator,
9440 utils.CommaJoin(ial.result))
9441 if ial.required_nodes == 2:
9442 self.op.snode = ial.result[1]
9444 def BuildHooksEnv(self):
9447 This runs on master, primary and secondary nodes of the instance.
9451 "ADD_MODE": self.op.mode,
9453 if self.op.mode == constants.INSTANCE_IMPORT:
9454 env["SRC_NODE"] = self.op.src_node
9455 env["SRC_PATH"] = self.op.src_path
9456 env["SRC_IMAGES"] = self.src_images
9458 env.update(_BuildInstanceHookEnv(
9459 name=self.op.instance_name,
9460 primary_node=self.op.pnode,
9461 secondary_nodes=self.secondaries,
9462 status=self.op.start,
9463 os_type=self.op.os_type,
9464 minmem=self.be_full[constants.BE_MINMEM],
9465 maxmem=self.be_full[constants.BE_MAXMEM],
9466 vcpus=self.be_full[constants.BE_VCPUS],
9467 nics=_NICListToTuple(self, self.nics),
9468 disk_template=self.op.disk_template,
9469 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9470 for d in self.disks],
9473 hypervisor_name=self.op.hypervisor,
9479 def BuildHooksNodes(self):
9480 """Build hooks nodes.
9483 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9486 def _ReadExportInfo(self):
9487 """Reads the export information from disk.
9489 It will override the opcode source node and path with the actual
9490 information, if these two were not specified before.
9492 @return: the export information
9495 assert self.op.mode == constants.INSTANCE_IMPORT
9497 src_node = self.op.src_node
9498 src_path = self.op.src_path
9500 if src_node is None:
9501 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9502 exp_list = self.rpc.call_export_list(locked_nodes)
9504 for node in exp_list:
9505 if exp_list[node].fail_msg:
9507 if src_path in exp_list[node].payload:
9509 self.op.src_node = src_node = node
9510 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9514 raise errors.OpPrereqError("No export found for relative path %s" %
9515 src_path, errors.ECODE_INVAL)
9517 _CheckNodeOnline(self, src_node)
9518 result = self.rpc.call_export_info(src_node, src_path)
9519 result.Raise("No export or invalid export found in dir %s" % src_path)
9521 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9522 if not export_info.has_section(constants.INISECT_EXP):
9523 raise errors.ProgrammerError("Corrupted export config",
9524 errors.ECODE_ENVIRON)
9526 ei_version = export_info.get(constants.INISECT_EXP, "version")
9527 if (int(ei_version) != constants.EXPORT_VERSION):
9528 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9529 (ei_version, constants.EXPORT_VERSION),
9530 errors.ECODE_ENVIRON)
9533 def _ReadExportParams(self, einfo):
9534 """Use export parameters as defaults.
9536 In case the opcode doesn't specify (as in override) some instance
9537 parameters, then try to use them from the export information, if
9541 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9543 if self.op.disk_template is None:
9544 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9545 self.op.disk_template = einfo.get(constants.INISECT_INS,
9547 if self.op.disk_template not in constants.DISK_TEMPLATES:
9548 raise errors.OpPrereqError("Disk template specified in configuration"
9549 " file is not one of the allowed values:"
9550 " %s" % " ".join(constants.DISK_TEMPLATES))
9552 raise errors.OpPrereqError("No disk template specified and the export"
9553 " is missing the disk_template information",
9556 if not self.op.disks:
9558 # TODO: import the disk iv_name too
9559 for idx in range(constants.MAX_DISKS):
9560 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9561 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9562 disks.append({constants.IDISK_SIZE: disk_sz})
9563 self.op.disks = disks
9564 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9565 raise errors.OpPrereqError("No disk info specified and the export"
9566 " is missing the disk information",
9569 if not self.op.nics:
9571 for idx in range(constants.MAX_NICS):
9572 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9574 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9575 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9582 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9583 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9585 if (self.op.hypervisor is None and
9586 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9587 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9589 if einfo.has_section(constants.INISECT_HYP):
9590 # use the export parameters but do not override the ones
9591 # specified by the user
9592 for name, value in einfo.items(constants.INISECT_HYP):
9593 if name not in self.op.hvparams:
9594 self.op.hvparams[name] = value
9596 if einfo.has_section(constants.INISECT_BEP):
9597 # use the parameters, without overriding
9598 for name, value in einfo.items(constants.INISECT_BEP):
9599 if name not in self.op.beparams:
9600 self.op.beparams[name] = value
9601 # Compatibility for the old "memory" be param
9602 if name == constants.BE_MEMORY:
9603 if constants.BE_MAXMEM not in self.op.beparams:
9604 self.op.beparams[constants.BE_MAXMEM] = value
9605 if constants.BE_MINMEM not in self.op.beparams:
9606 self.op.beparams[constants.BE_MINMEM] = value
9608 # try to read the parameters old style, from the main section
9609 for name in constants.BES_PARAMETERS:
9610 if (name not in self.op.beparams and
9611 einfo.has_option(constants.INISECT_INS, name)):
9612 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9614 if einfo.has_section(constants.INISECT_OSP):
9615 # use the parameters, without overriding
9616 for name, value in einfo.items(constants.INISECT_OSP):
9617 if name not in self.op.osparams:
9618 self.op.osparams[name] = value
9620 def _RevertToDefaults(self, cluster):
9621 """Revert the instance parameters to the default values.
9625 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9626 for name in self.op.hvparams.keys():
9627 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9628 del self.op.hvparams[name]
9630 be_defs = cluster.SimpleFillBE({})
9631 for name in self.op.beparams.keys():
9632 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9633 del self.op.beparams[name]
9635 nic_defs = cluster.SimpleFillNIC({})
9636 for nic in self.op.nics:
9637 for name in constants.NICS_PARAMETERS:
9638 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9641 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9642 for name in self.op.osparams.keys():
9643 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9644 del self.op.osparams[name]
9646 def _CalculateFileStorageDir(self):
9647 """Calculate final instance file storage dir.
9650 # file storage dir calculation/check
9651 self.instance_file_storage_dir = None
9652 if self.op.disk_template in constants.DTS_FILEBASED:
9653 # build the full file storage dir path
9656 if self.op.disk_template == constants.DT_SHARED_FILE:
9657 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9659 get_fsd_fn = self.cfg.GetFileStorageDir
9661 cfg_storagedir = get_fsd_fn()
9662 if not cfg_storagedir:
9663 raise errors.OpPrereqError("Cluster file storage dir not defined")
9664 joinargs.append(cfg_storagedir)
9666 if self.op.file_storage_dir is not None:
9667 joinargs.append(self.op.file_storage_dir)
9669 joinargs.append(self.op.instance_name)
9671 # pylint: disable=W0142
9672 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9674 def CheckPrereq(self): # pylint: disable=R0914
9675 """Check prerequisites.
9678 self._CalculateFileStorageDir()
9680 if self.op.mode == constants.INSTANCE_IMPORT:
9681 export_info = self._ReadExportInfo()
9682 self._ReadExportParams(export_info)
9683 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9685 self._old_instance_name = None
9687 if (not self.cfg.GetVGName() and
9688 self.op.disk_template not in constants.DTS_NOT_LVM):
9689 raise errors.OpPrereqError("Cluster does not support lvm-based"
9690 " instances", errors.ECODE_STATE)
9692 if (self.op.hypervisor is None or
9693 self.op.hypervisor == constants.VALUE_AUTO):
9694 self.op.hypervisor = self.cfg.GetHypervisorType()
9696 cluster = self.cfg.GetClusterInfo()
9697 enabled_hvs = cluster.enabled_hypervisors
9698 if self.op.hypervisor not in enabled_hvs:
9699 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9700 " cluster (%s)" % (self.op.hypervisor,
9701 ",".join(enabled_hvs)),
9704 # Check tag validity
9705 for tag in self.op.tags:
9706 objects.TaggableObject.ValidateTag(tag)
9708 # check hypervisor parameter syntax (locally)
9709 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9710 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9712 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9713 hv_type.CheckParameterSyntax(filled_hvp)
9714 self.hv_full = filled_hvp
9715 # check that we don't specify global parameters on an instance
9716 _CheckGlobalHvParams(self.op.hvparams)
9718 # fill and remember the beparams dict
9719 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9720 for param, value in self.op.beparams.iteritems():
9721 if value == constants.VALUE_AUTO:
9722 self.op.beparams[param] = default_beparams[param]
9723 objects.UpgradeBeParams(self.op.beparams)
9724 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9725 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9727 # build os parameters
9728 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9730 # now that hvp/bep are in final format, let's reset to defaults,
9732 if self.op.identify_defaults:
9733 self._RevertToDefaults(cluster)
9737 for idx, nic in enumerate(self.op.nics):
9738 nic_mode_req = nic.get(constants.INIC_MODE, None)
9739 nic_mode = nic_mode_req
9740 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9741 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9743 # in routed mode, for the first nic, the default ip is 'auto'
9744 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9745 default_ip_mode = constants.VALUE_AUTO
9747 default_ip_mode = constants.VALUE_NONE
9749 # ip validity checks
9750 ip = nic.get(constants.INIC_IP, default_ip_mode)
9751 if ip is None or ip.lower() == constants.VALUE_NONE:
9753 elif ip.lower() == constants.VALUE_AUTO:
9754 if not self.op.name_check:
9755 raise errors.OpPrereqError("IP address set to auto but name checks"
9756 " have been skipped",
9758 nic_ip = self.hostname1.ip
9760 if not netutils.IPAddress.IsValid(ip):
9761 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9765 # TODO: check the ip address for uniqueness
9766 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9767 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9770 # MAC address verification
9771 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9772 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9773 mac = utils.NormalizeAndValidateMac(mac)
9776 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9777 except errors.ReservationError:
9778 raise errors.OpPrereqError("MAC address %s already in use"
9779 " in cluster" % mac,
9780 errors.ECODE_NOTUNIQUE)
9782 # Build nic parameters
9783 link = nic.get(constants.INIC_LINK, None)
9784 if link == constants.VALUE_AUTO:
9785 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9788 nicparams[constants.NIC_MODE] = nic_mode
9790 nicparams[constants.NIC_LINK] = link
9792 check_params = cluster.SimpleFillNIC(nicparams)
9793 objects.NIC.CheckParameterSyntax(check_params)
9794 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9796 # disk checks/pre-build
9797 default_vg = self.cfg.GetVGName()
9799 for disk in self.op.disks:
9800 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9801 if mode not in constants.DISK_ACCESS_SET:
9802 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9803 mode, errors.ECODE_INVAL)
9804 size = disk.get(constants.IDISK_SIZE, None)
9806 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9809 except (TypeError, ValueError):
9810 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9813 data_vg = disk.get(constants.IDISK_VG, default_vg)
9815 constants.IDISK_SIZE: size,
9816 constants.IDISK_MODE: mode,
9817 constants.IDISK_VG: data_vg,
9819 if constants.IDISK_METAVG in disk:
9820 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9821 if constants.IDISK_ADOPT in disk:
9822 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9823 self.disks.append(new_disk)
9825 if self.op.mode == constants.INSTANCE_IMPORT:
9827 for idx in range(len(self.disks)):
9828 option = "disk%d_dump" % idx
9829 if export_info.has_option(constants.INISECT_INS, option):
9830 # FIXME: are the old os-es, disk sizes, etc. useful?
9831 export_name = export_info.get(constants.INISECT_INS, option)
9832 image = utils.PathJoin(self.op.src_path, export_name)
9833 disk_images.append(image)
9835 disk_images.append(False)
9837 self.src_images = disk_images
9839 if self.op.instance_name == self._old_instance_name:
9840 for idx, nic in enumerate(self.nics):
9841 if nic.mac == constants.VALUE_AUTO:
9842 nic_mac_ini = "nic%d_mac" % idx
9843 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9845 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9847 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9848 if self.op.ip_check:
9849 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9850 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9851 (self.check_ip, self.op.instance_name),
9852 errors.ECODE_NOTUNIQUE)
9854 #### mac address generation
9855 # By generating here the mac address both the allocator and the hooks get
9856 # the real final mac address rather than the 'auto' or 'generate' value.
9857 # There is a race condition between the generation and the instance object
9858 # creation, which means that we know the mac is valid now, but we're not
9859 # sure it will be when we actually add the instance. If things go bad
9860 # adding the instance will abort because of a duplicate mac, and the
9861 # creation job will fail.
9862 for nic in self.nics:
9863 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9864 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9868 if self.op.iallocator is not None:
9869 self._RunAllocator()
9871 # Release all unneeded node locks
9872 _ReleaseLocks(self, locking.LEVEL_NODE,
9873 keep=filter(None, [self.op.pnode, self.op.snode,
9875 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9876 keep=filter(None, [self.op.pnode, self.op.snode,
9879 #### node related checks
9881 # check primary node
9882 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9883 assert self.pnode is not None, \
9884 "Cannot retrieve locked node %s" % self.op.pnode
9886 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9887 pnode.name, errors.ECODE_STATE)
9889 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9890 pnode.name, errors.ECODE_STATE)
9891 if not pnode.vm_capable:
9892 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9893 " '%s'" % pnode.name, errors.ECODE_STATE)
9895 self.secondaries = []
9897 # mirror node verification
9898 if self.op.disk_template in constants.DTS_INT_MIRROR:
9899 if self.op.snode == pnode.name:
9900 raise errors.OpPrereqError("The secondary node cannot be the"
9901 " primary node", errors.ECODE_INVAL)
9902 _CheckNodeOnline(self, self.op.snode)
9903 _CheckNodeNotDrained(self, self.op.snode)
9904 _CheckNodeVmCapable(self, self.op.snode)
9905 self.secondaries.append(self.op.snode)
9907 snode = self.cfg.GetNodeInfo(self.op.snode)
9908 if pnode.group != snode.group:
9909 self.LogWarning("The primary and secondary nodes are in two"
9910 " different node groups; the disk parameters"
9911 " from the first disk's node group will be"
9914 nodenames = [pnode.name] + self.secondaries
9916 # Verify instance specs
9917 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9919 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9920 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9921 constants.ISPEC_DISK_COUNT: len(self.disks),
9922 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9923 constants.ISPEC_NIC_COUNT: len(self.nics),
9924 constants.ISPEC_SPINDLE_USE: spindle_use,
9927 group_info = self.cfg.GetNodeGroup(pnode.group)
9928 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9929 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9930 if not self.op.ignore_ipolicy and res:
9931 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9932 " policy: %s") % (pnode.group,
9933 utils.CommaJoin(res)),
9936 if not self.adopt_disks:
9937 if self.op.disk_template == constants.DT_RBD:
9938 # _CheckRADOSFreeSpace() is just a placeholder.
9939 # Any function that checks prerequisites can be placed here.
9940 # Check if there is enough space on the RADOS cluster.
9941 _CheckRADOSFreeSpace()
9943 # Check lv size requirements, if not adopting
9944 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9945 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9947 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9948 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9949 disk[constants.IDISK_ADOPT])
9950 for disk in self.disks])
9951 if len(all_lvs) != len(self.disks):
9952 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9954 for lv_name in all_lvs:
9956 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9957 # to ReserveLV uses the same syntax
9958 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9959 except errors.ReservationError:
9960 raise errors.OpPrereqError("LV named %s used by another instance" %
9961 lv_name, errors.ECODE_NOTUNIQUE)
9963 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9964 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9966 node_lvs = self.rpc.call_lv_list([pnode.name],
9967 vg_names.payload.keys())[pnode.name]
9968 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9969 node_lvs = node_lvs.payload
9971 delta = all_lvs.difference(node_lvs.keys())
9973 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9974 utils.CommaJoin(delta),
9976 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9978 raise errors.OpPrereqError("Online logical volumes found, cannot"
9979 " adopt: %s" % utils.CommaJoin(online_lvs),
9981 # update the size of disk based on what is found
9982 for dsk in self.disks:
9983 dsk[constants.IDISK_SIZE] = \
9984 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9985 dsk[constants.IDISK_ADOPT])][0]))
9987 elif self.op.disk_template == constants.DT_BLOCK:
9988 # Normalize and de-duplicate device paths
9989 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9990 for disk in self.disks])
9991 if len(all_disks) != len(self.disks):
9992 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9994 baddisks = [d for d in all_disks
9995 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9997 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9998 " cannot be adopted" %
9999 (", ".join(baddisks),
10000 constants.ADOPTABLE_BLOCKDEV_ROOT),
10001 errors.ECODE_INVAL)
10003 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10004 list(all_disks))[pnode.name]
10005 node_disks.Raise("Cannot get block device information from node %s" %
10007 node_disks = node_disks.payload
10008 delta = all_disks.difference(node_disks.keys())
10010 raise errors.OpPrereqError("Missing block device(s): %s" %
10011 utils.CommaJoin(delta),
10012 errors.ECODE_INVAL)
10013 for dsk in self.disks:
10014 dsk[constants.IDISK_SIZE] = \
10015 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10017 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10019 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10020 # check OS parameters (remotely)
10021 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10023 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10025 # memory check on primary node
10026 #TODO(dynmem): use MINMEM for checking
10028 _CheckNodeFreeMemory(self, self.pnode.name,
10029 "creating instance %s" % self.op.instance_name,
10030 self.be_full[constants.BE_MAXMEM],
10031 self.op.hypervisor)
10033 self.dry_run_result = list(nodenames)
10035 def Exec(self, feedback_fn):
10036 """Create and add the instance to the cluster.
10039 instance = self.op.instance_name
10040 pnode_name = self.pnode.name
10042 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10043 self.owned_locks(locking.LEVEL_NODE)), \
10044 "Node locks differ from node resource locks"
10046 ht_kind = self.op.hypervisor
10047 if ht_kind in constants.HTS_REQ_PORT:
10048 network_port = self.cfg.AllocatePort()
10050 network_port = None
10052 # This is ugly but we got a chicken-egg problem here
10053 # We can only take the group disk parameters, as the instance
10054 # has no disks yet (we are generating them right here).
10055 node = self.cfg.GetNodeInfo(pnode_name)
10056 nodegroup = self.cfg.GetNodeGroup(node.group)
10057 disks = _GenerateDiskTemplate(self,
10058 self.op.disk_template,
10059 instance, pnode_name,
10062 self.instance_file_storage_dir,
10063 self.op.file_driver,
10066 self.cfg.GetGroupDiskParams(nodegroup))
10068 iobj = objects.Instance(name=instance, os=self.op.os_type,
10069 primary_node=pnode_name,
10070 nics=self.nics, disks=disks,
10071 disk_template=self.op.disk_template,
10072 admin_state=constants.ADMINST_DOWN,
10073 network_port=network_port,
10074 beparams=self.op.beparams,
10075 hvparams=self.op.hvparams,
10076 hypervisor=self.op.hypervisor,
10077 osparams=self.op.osparams,
10081 for tag in self.op.tags:
10084 if self.adopt_disks:
10085 if self.op.disk_template == constants.DT_PLAIN:
10086 # rename LVs to the newly-generated names; we need to construct
10087 # 'fake' LV disks with the old data, plus the new unique_id
10088 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10090 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10091 rename_to.append(t_dsk.logical_id)
10092 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10093 self.cfg.SetDiskID(t_dsk, pnode_name)
10094 result = self.rpc.call_blockdev_rename(pnode_name,
10095 zip(tmp_disks, rename_to))
10096 result.Raise("Failed to rename adoped LVs")
10098 feedback_fn("* creating instance disks...")
10100 _CreateDisks(self, iobj)
10101 except errors.OpExecError:
10102 self.LogWarning("Device creation failed, reverting...")
10104 _RemoveDisks(self, iobj)
10106 self.cfg.ReleaseDRBDMinors(instance)
10109 feedback_fn("adding instance %s to cluster config" % instance)
10111 self.cfg.AddInstance(iobj, self.proc.GetECId())
10113 # Declare that we don't want to remove the instance lock anymore, as we've
10114 # added the instance to the config
10115 del self.remove_locks[locking.LEVEL_INSTANCE]
10117 if self.op.mode == constants.INSTANCE_IMPORT:
10118 # Release unused nodes
10119 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10121 # Release all nodes
10122 _ReleaseLocks(self, locking.LEVEL_NODE)
10125 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10126 feedback_fn("* wiping instance disks...")
10128 _WipeDisks(self, iobj)
10129 except errors.OpExecError, err:
10130 logging.exception("Wiping disks failed")
10131 self.LogWarning("Wiping instance disks failed (%s)", err)
10135 # Something is already wrong with the disks, don't do anything else
10137 elif self.op.wait_for_sync:
10138 disk_abort = not _WaitForSync(self, iobj)
10139 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10140 # make sure the disks are not degraded (still sync-ing is ok)
10141 feedback_fn("* checking mirrors status")
10142 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10147 _RemoveDisks(self, iobj)
10148 self.cfg.RemoveInstance(iobj.name)
10149 # Make sure the instance lock gets removed
10150 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10151 raise errors.OpExecError("There are some degraded disks for"
10154 # Release all node resource locks
10155 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10157 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10158 # we need to set the disks ID to the primary node, since the
10159 # preceding code might or might have not done it, depending on
10160 # disk template and other options
10161 for disk in iobj.disks:
10162 self.cfg.SetDiskID(disk, pnode_name)
10163 if self.op.mode == constants.INSTANCE_CREATE:
10164 if not self.op.no_install:
10165 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10166 not self.op.wait_for_sync)
10168 feedback_fn("* pausing disk sync to install instance OS")
10169 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10172 for idx, success in enumerate(result.payload):
10174 logging.warn("pause-sync of instance %s for disk %d failed",
10177 feedback_fn("* running the instance OS create scripts...")
10178 # FIXME: pass debug option from opcode to backend
10180 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10181 self.op.debug_level)
10183 feedback_fn("* resuming disk sync")
10184 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10187 for idx, success in enumerate(result.payload):
10189 logging.warn("resume-sync of instance %s for disk %d failed",
10192 os_add_result.Raise("Could not add os for instance %s"
10193 " on node %s" % (instance, pnode_name))
10196 if self.op.mode == constants.INSTANCE_IMPORT:
10197 feedback_fn("* running the instance OS import scripts...")
10201 for idx, image in enumerate(self.src_images):
10205 # FIXME: pass debug option from opcode to backend
10206 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10207 constants.IEIO_FILE, (image, ),
10208 constants.IEIO_SCRIPT,
10209 (iobj.disks[idx], idx),
10211 transfers.append(dt)
10214 masterd.instance.TransferInstanceData(self, feedback_fn,
10215 self.op.src_node, pnode_name,
10216 self.pnode.secondary_ip,
10218 if not compat.all(import_result):
10219 self.LogWarning("Some disks for instance %s on node %s were not"
10220 " imported successfully" % (instance, pnode_name))
10222 rename_from = self._old_instance_name
10224 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10225 feedback_fn("* preparing remote import...")
10226 # The source cluster will stop the instance before attempting to make
10227 # a connection. In some cases stopping an instance can take a long
10228 # time, hence the shutdown timeout is added to the connection
10230 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10231 self.op.source_shutdown_timeout)
10232 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10234 assert iobj.primary_node == self.pnode.name
10236 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10237 self.source_x509_ca,
10238 self._cds, timeouts)
10239 if not compat.all(disk_results):
10240 # TODO: Should the instance still be started, even if some disks
10241 # failed to import (valid for local imports, too)?
10242 self.LogWarning("Some disks for instance %s on node %s were not"
10243 " imported successfully" % (instance, pnode_name))
10245 rename_from = self.source_instance_name
10248 # also checked in the prereq part
10249 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10252 # Run rename script on newly imported instance
10253 assert iobj.name == instance
10254 feedback_fn("Running rename script for %s" % instance)
10255 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10257 self.op.debug_level)
10258 if result.fail_msg:
10259 self.LogWarning("Failed to run rename script for %s on node"
10260 " %s: %s" % (instance, pnode_name, result.fail_msg))
10262 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10265 iobj.admin_state = constants.ADMINST_UP
10266 self.cfg.Update(iobj, feedback_fn)
10267 logging.info("Starting instance %s on node %s", instance, pnode_name)
10268 feedback_fn("* starting instance...")
10269 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10271 result.Raise("Could not start instance")
10273 return list(iobj.all_nodes)
10276 def _CheckRADOSFreeSpace():
10277 """Compute disk size requirements inside the RADOS cluster.
10280 # For the RADOS cluster we assume there is always enough space.
10284 class LUInstanceConsole(NoHooksLU):
10285 """Connect to an instance's console.
10287 This is somewhat special in that it returns the command line that
10288 you need to run on the master node in order to connect to the
10294 def ExpandNames(self):
10295 self.share_locks = _ShareAll()
10296 self._ExpandAndLockInstance()
10298 def CheckPrereq(self):
10299 """Check prerequisites.
10301 This checks that the instance is in the cluster.
10304 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10305 assert self.instance is not None, \
10306 "Cannot retrieve locked instance %s" % self.op.instance_name
10307 _CheckNodeOnline(self, self.instance.primary_node)
10309 def Exec(self, feedback_fn):
10310 """Connect to the console of an instance
10313 instance = self.instance
10314 node = instance.primary_node
10316 node_insts = self.rpc.call_instance_list([node],
10317 [instance.hypervisor])[node]
10318 node_insts.Raise("Can't get node information from %s" % node)
10320 if instance.name not in node_insts.payload:
10321 if instance.admin_state == constants.ADMINST_UP:
10322 state = constants.INSTST_ERRORDOWN
10323 elif instance.admin_state == constants.ADMINST_DOWN:
10324 state = constants.INSTST_ADMINDOWN
10326 state = constants.INSTST_ADMINOFFLINE
10327 raise errors.OpExecError("Instance %s is not running (state %s)" %
10328 (instance.name, state))
10330 logging.debug("Connecting to console of %s on %s", instance.name, node)
10332 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10335 def _GetInstanceConsole(cluster, instance):
10336 """Returns console information for an instance.
10338 @type cluster: L{objects.Cluster}
10339 @type instance: L{objects.Instance}
10343 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10344 # beparams and hvparams are passed separately, to avoid editing the
10345 # instance and then saving the defaults in the instance itself.
10346 hvparams = cluster.FillHV(instance)
10347 beparams = cluster.FillBE(instance)
10348 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10350 assert console.instance == instance.name
10351 assert console.Validate()
10353 return console.ToDict()
10356 class LUInstanceReplaceDisks(LogicalUnit):
10357 """Replace the disks of an instance.
10360 HPATH = "mirrors-replace"
10361 HTYPE = constants.HTYPE_INSTANCE
10364 def CheckArguments(self):
10365 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10366 self.op.iallocator)
10368 def ExpandNames(self):
10369 self._ExpandAndLockInstance()
10371 assert locking.LEVEL_NODE not in self.needed_locks
10372 assert locking.LEVEL_NODE_RES not in self.needed_locks
10373 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10375 assert self.op.iallocator is None or self.op.remote_node is None, \
10376 "Conflicting options"
10378 if self.op.remote_node is not None:
10379 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10381 # Warning: do not remove the locking of the new secondary here
10382 # unless DRBD8.AddChildren is changed to work in parallel;
10383 # currently it doesn't since parallel invocations of
10384 # FindUnusedMinor will conflict
10385 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10386 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10388 self.needed_locks[locking.LEVEL_NODE] = []
10389 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10391 if self.op.iallocator is not None:
10392 # iallocator will select a new node in the same group
10393 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10395 self.needed_locks[locking.LEVEL_NODE_RES] = []
10397 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10398 self.op.iallocator, self.op.remote_node,
10399 self.op.disks, False, self.op.early_release,
10400 self.op.ignore_ipolicy)
10402 self.tasklets = [self.replacer]
10404 def DeclareLocks(self, level):
10405 if level == locking.LEVEL_NODEGROUP:
10406 assert self.op.remote_node is None
10407 assert self.op.iallocator is not None
10408 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10410 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10411 # Lock all groups used by instance optimistically; this requires going
10412 # via the node before it's locked, requiring verification later on
10413 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10414 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10416 elif level == locking.LEVEL_NODE:
10417 if self.op.iallocator is not None:
10418 assert self.op.remote_node is None
10419 assert not self.needed_locks[locking.LEVEL_NODE]
10421 # Lock member nodes of all locked groups
10422 self.needed_locks[locking.LEVEL_NODE] = [node_name
10423 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10424 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10426 self._LockInstancesNodes()
10427 elif level == locking.LEVEL_NODE_RES:
10429 self.needed_locks[locking.LEVEL_NODE_RES] = \
10430 self.needed_locks[locking.LEVEL_NODE]
10432 def BuildHooksEnv(self):
10433 """Build hooks env.
10435 This runs on the master, the primary and all the secondaries.
10438 instance = self.replacer.instance
10440 "MODE": self.op.mode,
10441 "NEW_SECONDARY": self.op.remote_node,
10442 "OLD_SECONDARY": instance.secondary_nodes[0],
10444 env.update(_BuildInstanceHookEnvByObject(self, instance))
10447 def BuildHooksNodes(self):
10448 """Build hooks nodes.
10451 instance = self.replacer.instance
10453 self.cfg.GetMasterNode(),
10454 instance.primary_node,
10456 if self.op.remote_node is not None:
10457 nl.append(self.op.remote_node)
10460 def CheckPrereq(self):
10461 """Check prerequisites.
10464 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10465 self.op.iallocator is None)
10467 # Verify if node group locks are still correct
10468 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10470 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10472 return LogicalUnit.CheckPrereq(self)
10475 class TLReplaceDisks(Tasklet):
10476 """Replaces disks for an instance.
10478 Note: Locking is not within the scope of this class.
10481 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10482 disks, delay_iallocator, early_release, ignore_ipolicy):
10483 """Initializes this class.
10486 Tasklet.__init__(self, lu)
10489 self.instance_name = instance_name
10491 self.iallocator_name = iallocator_name
10492 self.remote_node = remote_node
10494 self.delay_iallocator = delay_iallocator
10495 self.early_release = early_release
10496 self.ignore_ipolicy = ignore_ipolicy
10499 self.instance = None
10500 self.new_node = None
10501 self.target_node = None
10502 self.other_node = None
10503 self.remote_node_info = None
10504 self.node_secondary_ip = None
10507 def CheckArguments(mode, remote_node, iallocator):
10508 """Helper function for users of this class.
10511 # check for valid parameter combination
10512 if mode == constants.REPLACE_DISK_CHG:
10513 if remote_node is None and iallocator is None:
10514 raise errors.OpPrereqError("When changing the secondary either an"
10515 " iallocator script must be used or the"
10516 " new node given", errors.ECODE_INVAL)
10518 if remote_node is not None and iallocator is not None:
10519 raise errors.OpPrereqError("Give either the iallocator or the new"
10520 " secondary, not both", errors.ECODE_INVAL)
10522 elif remote_node is not None or iallocator is not None:
10523 # Not replacing the secondary
10524 raise errors.OpPrereqError("The iallocator and new node options can"
10525 " only be used when changing the"
10526 " secondary node", errors.ECODE_INVAL)
10529 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10530 """Compute a new secondary node using an IAllocator.
10533 ial = IAllocator(lu.cfg, lu.rpc,
10534 mode=constants.IALLOCATOR_MODE_RELOC,
10535 name=instance_name,
10536 relocate_from=list(relocate_from))
10538 ial.Run(iallocator_name)
10540 if not ial.success:
10541 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10542 " %s" % (iallocator_name, ial.info),
10543 errors.ECODE_NORES)
10545 if len(ial.result) != ial.required_nodes:
10546 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10547 " of nodes (%s), required %s" %
10549 len(ial.result), ial.required_nodes),
10550 errors.ECODE_FAULT)
10552 remote_node_name = ial.result[0]
10554 lu.LogInfo("Selected new secondary for instance '%s': %s",
10555 instance_name, remote_node_name)
10557 return remote_node_name
10559 def _FindFaultyDisks(self, node_name):
10560 """Wrapper for L{_FindFaultyInstanceDisks}.
10563 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10566 def _CheckDisksActivated(self, instance):
10567 """Checks if the instance disks are activated.
10569 @param instance: The instance to check disks
10570 @return: True if they are activated, False otherwise
10573 nodes = instance.all_nodes
10575 for idx, dev in enumerate(instance.disks):
10577 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10578 self.cfg.SetDiskID(dev, node)
10580 result = _BlockdevFind(self, node, dev, instance)
10584 elif result.fail_msg or not result.payload:
10589 def CheckPrereq(self):
10590 """Check prerequisites.
10592 This checks that the instance is in the cluster.
10595 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10596 assert instance is not None, \
10597 "Cannot retrieve locked instance %s" % self.instance_name
10599 if instance.disk_template != constants.DT_DRBD8:
10600 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10601 " instances", errors.ECODE_INVAL)
10603 if len(instance.secondary_nodes) != 1:
10604 raise errors.OpPrereqError("The instance has a strange layout,"
10605 " expected one secondary but found %d" %
10606 len(instance.secondary_nodes),
10607 errors.ECODE_FAULT)
10609 if not self.delay_iallocator:
10610 self._CheckPrereq2()
10612 def _CheckPrereq2(self):
10613 """Check prerequisites, second part.
10615 This function should always be part of CheckPrereq. It was separated and is
10616 now called from Exec because during node evacuation iallocator was only
10617 called with an unmodified cluster model, not taking planned changes into
10621 instance = self.instance
10622 secondary_node = instance.secondary_nodes[0]
10624 if self.iallocator_name is None:
10625 remote_node = self.remote_node
10627 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10628 instance.name, instance.secondary_nodes)
10630 if remote_node is None:
10631 self.remote_node_info = None
10633 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10634 "Remote node '%s' is not locked" % remote_node
10636 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10637 assert self.remote_node_info is not None, \
10638 "Cannot retrieve locked node %s" % remote_node
10640 if remote_node == self.instance.primary_node:
10641 raise errors.OpPrereqError("The specified node is the primary node of"
10642 " the instance", errors.ECODE_INVAL)
10644 if remote_node == secondary_node:
10645 raise errors.OpPrereqError("The specified node is already the"
10646 " secondary node of the instance",
10647 errors.ECODE_INVAL)
10649 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10650 constants.REPLACE_DISK_CHG):
10651 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10652 errors.ECODE_INVAL)
10654 if self.mode == constants.REPLACE_DISK_AUTO:
10655 if not self._CheckDisksActivated(instance):
10656 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10657 " first" % self.instance_name,
10658 errors.ECODE_STATE)
10659 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10660 faulty_secondary = self._FindFaultyDisks(secondary_node)
10662 if faulty_primary and faulty_secondary:
10663 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10664 " one node and can not be repaired"
10665 " automatically" % self.instance_name,
10666 errors.ECODE_STATE)
10669 self.disks = faulty_primary
10670 self.target_node = instance.primary_node
10671 self.other_node = secondary_node
10672 check_nodes = [self.target_node, self.other_node]
10673 elif faulty_secondary:
10674 self.disks = faulty_secondary
10675 self.target_node = secondary_node
10676 self.other_node = instance.primary_node
10677 check_nodes = [self.target_node, self.other_node]
10683 # Non-automatic modes
10684 if self.mode == constants.REPLACE_DISK_PRI:
10685 self.target_node = instance.primary_node
10686 self.other_node = secondary_node
10687 check_nodes = [self.target_node, self.other_node]
10689 elif self.mode == constants.REPLACE_DISK_SEC:
10690 self.target_node = secondary_node
10691 self.other_node = instance.primary_node
10692 check_nodes = [self.target_node, self.other_node]
10694 elif self.mode == constants.REPLACE_DISK_CHG:
10695 self.new_node = remote_node
10696 self.other_node = instance.primary_node
10697 self.target_node = secondary_node
10698 check_nodes = [self.new_node, self.other_node]
10700 _CheckNodeNotDrained(self.lu, remote_node)
10701 _CheckNodeVmCapable(self.lu, remote_node)
10703 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10704 assert old_node_info is not None
10705 if old_node_info.offline and not self.early_release:
10706 # doesn't make sense to delay the release
10707 self.early_release = True
10708 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10709 " early-release mode", secondary_node)
10712 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10715 # If not specified all disks should be replaced
10717 self.disks = range(len(self.instance.disks))
10719 # TODO: This is ugly, but right now we can't distinguish between internal
10720 # submitted opcode and external one. We should fix that.
10721 if self.remote_node_info:
10722 # We change the node, lets verify it still meets instance policy
10723 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10724 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10726 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10727 ignore=self.ignore_ipolicy)
10729 for node in check_nodes:
10730 _CheckNodeOnline(self.lu, node)
10732 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10735 if node_name is not None)
10737 # Release unneeded node and node resource locks
10738 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10739 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10741 # Release any owned node group
10742 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10743 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10745 # Check whether disks are valid
10746 for disk_idx in self.disks:
10747 instance.FindDisk(disk_idx)
10749 # Get secondary node IP addresses
10750 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10751 in self.cfg.GetMultiNodeInfo(touched_nodes))
10753 def Exec(self, feedback_fn):
10754 """Execute disk replacement.
10756 This dispatches the disk replacement to the appropriate handler.
10759 if self.delay_iallocator:
10760 self._CheckPrereq2()
10763 # Verify owned locks before starting operation
10764 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10765 assert set(owned_nodes) == set(self.node_secondary_ip), \
10766 ("Incorrect node locks, owning %s, expected %s" %
10767 (owned_nodes, self.node_secondary_ip.keys()))
10768 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10769 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10771 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10772 assert list(owned_instances) == [self.instance_name], \
10773 "Instance '%s' not locked" % self.instance_name
10775 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10776 "Should not own any node group lock at this point"
10779 feedback_fn("No disks need replacement")
10782 feedback_fn("Replacing disk(s) %s for %s" %
10783 (utils.CommaJoin(self.disks), self.instance.name))
10785 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10787 # Activate the instance disks if we're replacing them on a down instance
10789 _StartInstanceDisks(self.lu, self.instance, True)
10792 # Should we replace the secondary node?
10793 if self.new_node is not None:
10794 fn = self._ExecDrbd8Secondary
10796 fn = self._ExecDrbd8DiskOnly
10798 result = fn(feedback_fn)
10800 # Deactivate the instance disks if we're replacing them on a
10803 _SafeShutdownInstanceDisks(self.lu, self.instance)
10805 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10808 # Verify owned locks
10809 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10810 nodes = frozenset(self.node_secondary_ip)
10811 assert ((self.early_release and not owned_nodes) or
10812 (not self.early_release and not (set(owned_nodes) - nodes))), \
10813 ("Not owning the correct locks, early_release=%s, owned=%r,"
10814 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10818 def _CheckVolumeGroup(self, nodes):
10819 self.lu.LogInfo("Checking volume groups")
10821 vgname = self.cfg.GetVGName()
10823 # Make sure volume group exists on all involved nodes
10824 results = self.rpc.call_vg_list(nodes)
10826 raise errors.OpExecError("Can't list volume groups on the nodes")
10829 res = results[node]
10830 res.Raise("Error checking node %s" % node)
10831 if vgname not in res.payload:
10832 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10835 def _CheckDisksExistence(self, nodes):
10836 # Check disk existence
10837 for idx, dev in enumerate(self.instance.disks):
10838 if idx not in self.disks:
10842 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10843 self.cfg.SetDiskID(dev, node)
10845 result = _BlockdevFind(self, node, dev, self.instance)
10847 msg = result.fail_msg
10848 if msg or not result.payload:
10850 msg = "disk not found"
10851 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10854 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10855 for idx, dev in enumerate(self.instance.disks):
10856 if idx not in self.disks:
10859 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10862 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10863 on_primary, ldisk=ldisk):
10864 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10865 " replace disks for instance %s" %
10866 (node_name, self.instance.name))
10868 def _CreateNewStorage(self, node_name):
10869 """Create new storage on the primary or secondary node.
10871 This is only used for same-node replaces, not for changing the
10872 secondary node, hence we don't want to modify the existing disk.
10877 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10878 for idx, dev in enumerate(disks):
10879 if idx not in self.disks:
10882 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10884 self.cfg.SetDiskID(dev, node_name)
10886 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10887 names = _GenerateUniqueNames(self.lu, lv_names)
10889 (data_disk, meta_disk) = dev.children
10890 vg_data = data_disk.logical_id[0]
10891 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10892 logical_id=(vg_data, names[0]),
10893 params=data_disk.params)
10894 vg_meta = meta_disk.logical_id[0]
10895 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10896 logical_id=(vg_meta, names[1]),
10897 params=meta_disk.params)
10899 new_lvs = [lv_data, lv_meta]
10900 old_lvs = [child.Copy() for child in dev.children]
10901 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10903 # we pass force_create=True to force the LVM creation
10904 for new_lv in new_lvs:
10905 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10906 _GetInstanceInfoText(self.instance), False)
10910 def _CheckDevices(self, node_name, iv_names):
10911 for name, (dev, _, _) in iv_names.iteritems():
10912 self.cfg.SetDiskID(dev, node_name)
10914 result = _BlockdevFind(self, node_name, dev, self.instance)
10916 msg = result.fail_msg
10917 if msg or not result.payload:
10919 msg = "disk not found"
10920 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10923 if result.payload.is_degraded:
10924 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10926 def _RemoveOldStorage(self, node_name, iv_names):
10927 for name, (_, old_lvs, _) in iv_names.iteritems():
10928 self.lu.LogInfo("Remove logical volumes for %s" % name)
10931 self.cfg.SetDiskID(lv, node_name)
10933 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10935 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10936 hint="remove unused LVs manually")
10938 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10939 """Replace a disk on the primary or secondary for DRBD 8.
10941 The algorithm for replace is quite complicated:
10943 1. for each disk to be replaced:
10945 1. create new LVs on the target node with unique names
10946 1. detach old LVs from the drbd device
10947 1. rename old LVs to name_replaced.<time_t>
10948 1. rename new LVs to old LVs
10949 1. attach the new LVs (with the old names now) to the drbd device
10951 1. wait for sync across all devices
10953 1. for each modified disk:
10955 1. remove old LVs (which have the name name_replaces.<time_t>)
10957 Failures are not very well handled.
10962 # Step: check device activation
10963 self.lu.LogStep(1, steps_total, "Check device existence")
10964 self._CheckDisksExistence([self.other_node, self.target_node])
10965 self._CheckVolumeGroup([self.target_node, self.other_node])
10967 # Step: check other node consistency
10968 self.lu.LogStep(2, steps_total, "Check peer consistency")
10969 self._CheckDisksConsistency(self.other_node,
10970 self.other_node == self.instance.primary_node,
10973 # Step: create new storage
10974 self.lu.LogStep(3, steps_total, "Allocate new storage")
10975 iv_names = self._CreateNewStorage(self.target_node)
10977 # Step: for each lv, detach+rename*2+attach
10978 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10979 for dev, old_lvs, new_lvs in iv_names.itervalues():
10980 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10982 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10984 result.Raise("Can't detach drbd from local storage on node"
10985 " %s for device %s" % (self.target_node, dev.iv_name))
10987 #cfg.Update(instance)
10989 # ok, we created the new LVs, so now we know we have the needed
10990 # storage; as such, we proceed on the target node to rename
10991 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10992 # using the assumption that logical_id == physical_id (which in
10993 # turn is the unique_id on that node)
10995 # FIXME(iustin): use a better name for the replaced LVs
10996 temp_suffix = int(time.time())
10997 ren_fn = lambda d, suff: (d.physical_id[0],
10998 d.physical_id[1] + "_replaced-%s" % suff)
11000 # Build the rename list based on what LVs exist on the node
11001 rename_old_to_new = []
11002 for to_ren in old_lvs:
11003 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11004 if not result.fail_msg and result.payload:
11006 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11008 self.lu.LogInfo("Renaming the old LVs on the target node")
11009 result = self.rpc.call_blockdev_rename(self.target_node,
11011 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11013 # Now we rename the new LVs to the old LVs
11014 self.lu.LogInfo("Renaming the new LVs on the target node")
11015 rename_new_to_old = [(new, old.physical_id)
11016 for old, new in zip(old_lvs, new_lvs)]
11017 result = self.rpc.call_blockdev_rename(self.target_node,
11019 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11021 # Intermediate steps of in memory modifications
11022 for old, new in zip(old_lvs, new_lvs):
11023 new.logical_id = old.logical_id
11024 self.cfg.SetDiskID(new, self.target_node)
11026 # We need to modify old_lvs so that removal later removes the
11027 # right LVs, not the newly added ones; note that old_lvs is a
11029 for disk in old_lvs:
11030 disk.logical_id = ren_fn(disk, temp_suffix)
11031 self.cfg.SetDiskID(disk, self.target_node)
11033 # Now that the new lvs have the old name, we can add them to the device
11034 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11035 result = self.rpc.call_blockdev_addchildren(self.target_node,
11036 (dev, self.instance), new_lvs)
11037 msg = result.fail_msg
11039 for new_lv in new_lvs:
11040 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11043 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11044 hint=("cleanup manually the unused logical"
11046 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11048 cstep = itertools.count(5)
11050 if self.early_release:
11051 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11052 self._RemoveOldStorage(self.target_node, iv_names)
11053 # TODO: Check if releasing locks early still makes sense
11054 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11056 # Release all resource locks except those used by the instance
11057 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11058 keep=self.node_secondary_ip.keys())
11060 # Release all node locks while waiting for sync
11061 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11063 # TODO: Can the instance lock be downgraded here? Take the optional disk
11064 # shutdown in the caller into consideration.
11067 # This can fail as the old devices are degraded and _WaitForSync
11068 # does a combined result over all disks, so we don't check its return value
11069 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11070 _WaitForSync(self.lu, self.instance)
11072 # Check all devices manually
11073 self._CheckDevices(self.instance.primary_node, iv_names)
11075 # Step: remove old storage
11076 if not self.early_release:
11077 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11078 self._RemoveOldStorage(self.target_node, iv_names)
11080 def _ExecDrbd8Secondary(self, feedback_fn):
11081 """Replace the secondary node for DRBD 8.
11083 The algorithm for replace is quite complicated:
11084 - for all disks of the instance:
11085 - create new LVs on the new node with same names
11086 - shutdown the drbd device on the old secondary
11087 - disconnect the drbd network on the primary
11088 - create the drbd device on the new secondary
11089 - network attach the drbd on the primary, using an artifice:
11090 the drbd code for Attach() will connect to the network if it
11091 finds a device which is connected to the good local disks but
11092 not network enabled
11093 - wait for sync across all devices
11094 - remove all disks from the old secondary
11096 Failures are not very well handled.
11101 pnode = self.instance.primary_node
11103 # Step: check device activation
11104 self.lu.LogStep(1, steps_total, "Check device existence")
11105 self._CheckDisksExistence([self.instance.primary_node])
11106 self._CheckVolumeGroup([self.instance.primary_node])
11108 # Step: check other node consistency
11109 self.lu.LogStep(2, steps_total, "Check peer consistency")
11110 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11112 # Step: create new storage
11113 self.lu.LogStep(3, steps_total, "Allocate new storage")
11114 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11115 for idx, dev in enumerate(disks):
11116 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11117 (self.new_node, idx))
11118 # we pass force_create=True to force LVM creation
11119 for new_lv in dev.children:
11120 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11121 True, _GetInstanceInfoText(self.instance), False)
11123 # Step 4: dbrd minors and drbd setups changes
11124 # after this, we must manually remove the drbd minors on both the
11125 # error and the success paths
11126 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11127 minors = self.cfg.AllocateDRBDMinor([self.new_node
11128 for dev in self.instance.disks],
11129 self.instance.name)
11130 logging.debug("Allocated minors %r", minors)
11133 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11134 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11135 (self.new_node, idx))
11136 # create new devices on new_node; note that we create two IDs:
11137 # one without port, so the drbd will be activated without
11138 # networking information on the new node at this stage, and one
11139 # with network, for the latter activation in step 4
11140 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11141 if self.instance.primary_node == o_node1:
11144 assert self.instance.primary_node == o_node2, "Three-node instance?"
11147 new_alone_id = (self.instance.primary_node, self.new_node, None,
11148 p_minor, new_minor, o_secret)
11149 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11150 p_minor, new_minor, o_secret)
11152 iv_names[idx] = (dev, dev.children, new_net_id)
11153 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11155 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11156 logical_id=new_alone_id,
11157 children=dev.children,
11160 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11163 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11165 _GetInstanceInfoText(self.instance), False)
11166 except errors.GenericError:
11167 self.cfg.ReleaseDRBDMinors(self.instance.name)
11170 # We have new devices, shutdown the drbd on the old secondary
11171 for idx, dev in enumerate(self.instance.disks):
11172 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11173 self.cfg.SetDiskID(dev, self.target_node)
11174 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11175 (dev, self.instance)).fail_msg
11177 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11178 "node: %s" % (idx, msg),
11179 hint=("Please cleanup this device manually as"
11180 " soon as possible"))
11182 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11183 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11184 self.instance.disks)[pnode]
11186 msg = result.fail_msg
11188 # detaches didn't succeed (unlikely)
11189 self.cfg.ReleaseDRBDMinors(self.instance.name)
11190 raise errors.OpExecError("Can't detach the disks from the network on"
11191 " old node: %s" % (msg,))
11193 # if we managed to detach at least one, we update all the disks of
11194 # the instance to point to the new secondary
11195 self.lu.LogInfo("Updating instance configuration")
11196 for dev, _, new_logical_id in iv_names.itervalues():
11197 dev.logical_id = new_logical_id
11198 self.cfg.SetDiskID(dev, self.instance.primary_node)
11200 self.cfg.Update(self.instance, feedback_fn)
11202 # Release all node locks (the configuration has been updated)
11203 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11205 # and now perform the drbd attach
11206 self.lu.LogInfo("Attaching primary drbds to new secondary"
11207 " (standalone => connected)")
11208 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11210 self.node_secondary_ip,
11211 (self.instance.disks, self.instance),
11212 self.instance.name,
11214 for to_node, to_result in result.items():
11215 msg = to_result.fail_msg
11217 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11219 hint=("please do a gnt-instance info to see the"
11220 " status of disks"))
11222 cstep = itertools.count(5)
11224 if self.early_release:
11225 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11226 self._RemoveOldStorage(self.target_node, iv_names)
11227 # TODO: Check if releasing locks early still makes sense
11228 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11230 # Release all resource locks except those used by the instance
11231 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11232 keep=self.node_secondary_ip.keys())
11234 # TODO: Can the instance lock be downgraded here? Take the optional disk
11235 # shutdown in the caller into consideration.
11238 # This can fail as the old devices are degraded and _WaitForSync
11239 # does a combined result over all disks, so we don't check its return value
11240 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11241 _WaitForSync(self.lu, self.instance)
11243 # Check all devices manually
11244 self._CheckDevices(self.instance.primary_node, iv_names)
11246 # Step: remove old storage
11247 if not self.early_release:
11248 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11249 self._RemoveOldStorage(self.target_node, iv_names)
11252 class LURepairNodeStorage(NoHooksLU):
11253 """Repairs the volume group on a node.
11258 def CheckArguments(self):
11259 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11261 storage_type = self.op.storage_type
11263 if (constants.SO_FIX_CONSISTENCY not in
11264 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11265 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11266 " repaired" % storage_type,
11267 errors.ECODE_INVAL)
11269 def ExpandNames(self):
11270 self.needed_locks = {
11271 locking.LEVEL_NODE: [self.op.node_name],
11274 def _CheckFaultyDisks(self, instance, node_name):
11275 """Ensure faulty disks abort the opcode or at least warn."""
11277 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11279 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11280 " node '%s'" % (instance.name, node_name),
11281 errors.ECODE_STATE)
11282 except errors.OpPrereqError, err:
11283 if self.op.ignore_consistency:
11284 self.proc.LogWarning(str(err.args[0]))
11288 def CheckPrereq(self):
11289 """Check prerequisites.
11292 # Check whether any instance on this node has faulty disks
11293 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11294 if inst.admin_state != constants.ADMINST_UP:
11296 check_nodes = set(inst.all_nodes)
11297 check_nodes.discard(self.op.node_name)
11298 for inst_node_name in check_nodes:
11299 self._CheckFaultyDisks(inst, inst_node_name)
11301 def Exec(self, feedback_fn):
11302 feedback_fn("Repairing storage unit '%s' on %s ..." %
11303 (self.op.name, self.op.node_name))
11305 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11306 result = self.rpc.call_storage_execute(self.op.node_name,
11307 self.op.storage_type, st_args,
11309 constants.SO_FIX_CONSISTENCY)
11310 result.Raise("Failed to repair storage unit '%s' on %s" %
11311 (self.op.name, self.op.node_name))
11314 class LUNodeEvacuate(NoHooksLU):
11315 """Evacuates instances off a list of nodes.
11320 _MODE2IALLOCATOR = {
11321 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11322 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11323 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11325 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11326 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11327 constants.IALLOCATOR_NEVAC_MODES)
11329 def CheckArguments(self):
11330 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11332 def ExpandNames(self):
11333 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11335 if self.op.remote_node is not None:
11336 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11337 assert self.op.remote_node
11339 if self.op.remote_node == self.op.node_name:
11340 raise errors.OpPrereqError("Can not use evacuated node as a new"
11341 " secondary node", errors.ECODE_INVAL)
11343 if self.op.mode != constants.NODE_EVAC_SEC:
11344 raise errors.OpPrereqError("Without the use of an iallocator only"
11345 " secondary instances can be evacuated",
11346 errors.ECODE_INVAL)
11349 self.share_locks = _ShareAll()
11350 self.needed_locks = {
11351 locking.LEVEL_INSTANCE: [],
11352 locking.LEVEL_NODEGROUP: [],
11353 locking.LEVEL_NODE: [],
11356 # Determine nodes (via group) optimistically, needs verification once locks
11357 # have been acquired
11358 self.lock_nodes = self._DetermineNodes()
11360 def _DetermineNodes(self):
11361 """Gets the list of nodes to operate on.
11364 if self.op.remote_node is None:
11365 # Iallocator will choose any node(s) in the same group
11366 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11368 group_nodes = frozenset([self.op.remote_node])
11370 # Determine nodes to be locked
11371 return set([self.op.node_name]) | group_nodes
11373 def _DetermineInstances(self):
11374 """Builds list of instances to operate on.
11377 assert self.op.mode in constants.NODE_EVAC_MODES
11379 if self.op.mode == constants.NODE_EVAC_PRI:
11380 # Primary instances only
11381 inst_fn = _GetNodePrimaryInstances
11382 assert self.op.remote_node is None, \
11383 "Evacuating primary instances requires iallocator"
11384 elif self.op.mode == constants.NODE_EVAC_SEC:
11385 # Secondary instances only
11386 inst_fn = _GetNodeSecondaryInstances
11389 assert self.op.mode == constants.NODE_EVAC_ALL
11390 inst_fn = _GetNodeInstances
11391 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11393 raise errors.OpPrereqError("Due to an issue with the iallocator"
11394 " interface it is not possible to evacuate"
11395 " all instances at once; specify explicitly"
11396 " whether to evacuate primary or secondary"
11398 errors.ECODE_INVAL)
11400 return inst_fn(self.cfg, self.op.node_name)
11402 def DeclareLocks(self, level):
11403 if level == locking.LEVEL_INSTANCE:
11404 # Lock instances optimistically, needs verification once node and group
11405 # locks have been acquired
11406 self.needed_locks[locking.LEVEL_INSTANCE] = \
11407 set(i.name for i in self._DetermineInstances())
11409 elif level == locking.LEVEL_NODEGROUP:
11410 # Lock node groups for all potential target nodes optimistically, needs
11411 # verification once nodes have been acquired
11412 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11413 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11415 elif level == locking.LEVEL_NODE:
11416 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11418 def CheckPrereq(self):
11420 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11421 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11422 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11424 need_nodes = self._DetermineNodes()
11426 if not owned_nodes.issuperset(need_nodes):
11427 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11428 " locks were acquired, current nodes are"
11429 " are '%s', used to be '%s'; retry the"
11431 (self.op.node_name,
11432 utils.CommaJoin(need_nodes),
11433 utils.CommaJoin(owned_nodes)),
11434 errors.ECODE_STATE)
11436 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11437 if owned_groups != wanted_groups:
11438 raise errors.OpExecError("Node groups changed since locks were acquired,"
11439 " current groups are '%s', used to be '%s';"
11440 " retry the operation" %
11441 (utils.CommaJoin(wanted_groups),
11442 utils.CommaJoin(owned_groups)))
11444 # Determine affected instances
11445 self.instances = self._DetermineInstances()
11446 self.instance_names = [i.name for i in self.instances]
11448 if set(self.instance_names) != owned_instances:
11449 raise errors.OpExecError("Instances on node '%s' changed since locks"
11450 " were acquired, current instances are '%s',"
11451 " used to be '%s'; retry the operation" %
11452 (self.op.node_name,
11453 utils.CommaJoin(self.instance_names),
11454 utils.CommaJoin(owned_instances)))
11456 if self.instance_names:
11457 self.LogInfo("Evacuating instances from node '%s': %s",
11459 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11461 self.LogInfo("No instances to evacuate from node '%s'",
11464 if self.op.remote_node is not None:
11465 for i in self.instances:
11466 if i.primary_node == self.op.remote_node:
11467 raise errors.OpPrereqError("Node %s is the primary node of"
11468 " instance %s, cannot use it as"
11470 (self.op.remote_node, i.name),
11471 errors.ECODE_INVAL)
11473 def Exec(self, feedback_fn):
11474 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11476 if not self.instance_names:
11477 # No instances to evacuate
11480 elif self.op.iallocator is not None:
11481 # TODO: Implement relocation to other group
11482 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11483 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11484 instances=list(self.instance_names))
11486 ial.Run(self.op.iallocator)
11488 if not ial.success:
11489 raise errors.OpPrereqError("Can't compute node evacuation using"
11490 " iallocator '%s': %s" %
11491 (self.op.iallocator, ial.info),
11492 errors.ECODE_NORES)
11494 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11496 elif self.op.remote_node is not None:
11497 assert self.op.mode == constants.NODE_EVAC_SEC
11499 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11500 remote_node=self.op.remote_node,
11502 mode=constants.REPLACE_DISK_CHG,
11503 early_release=self.op.early_release)]
11504 for instance_name in self.instance_names
11508 raise errors.ProgrammerError("No iallocator or remote node")
11510 return ResultWithJobs(jobs)
11513 def _SetOpEarlyRelease(early_release, op):
11514 """Sets C{early_release} flag on opcodes if available.
11518 op.early_release = early_release
11519 except AttributeError:
11520 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11525 def _NodeEvacDest(use_nodes, group, nodes):
11526 """Returns group or nodes depending on caller's choice.
11530 return utils.CommaJoin(nodes)
11535 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11536 """Unpacks the result of change-group and node-evacuate iallocator requests.
11538 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11539 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11541 @type lu: L{LogicalUnit}
11542 @param lu: Logical unit instance
11543 @type alloc_result: tuple/list
11544 @param alloc_result: Result from iallocator
11545 @type early_release: bool
11546 @param early_release: Whether to release locks early if possible
11547 @type use_nodes: bool
11548 @param use_nodes: Whether to display node names instead of groups
11551 (moved, failed, jobs) = alloc_result
11554 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11555 for (name, reason) in failed)
11556 lu.LogWarning("Unable to evacuate instances %s", failreason)
11557 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11560 lu.LogInfo("Instances to be moved: %s",
11561 utils.CommaJoin("%s (to %s)" %
11562 (name, _NodeEvacDest(use_nodes, group, nodes))
11563 for (name, group, nodes) in moved))
11565 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11566 map(opcodes.OpCode.LoadOpCode, ops))
11570 class LUInstanceGrowDisk(LogicalUnit):
11571 """Grow a disk of an instance.
11574 HPATH = "disk-grow"
11575 HTYPE = constants.HTYPE_INSTANCE
11578 def ExpandNames(self):
11579 self._ExpandAndLockInstance()
11580 self.needed_locks[locking.LEVEL_NODE] = []
11581 self.needed_locks[locking.LEVEL_NODE_RES] = []
11582 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11583 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11585 def DeclareLocks(self, level):
11586 if level == locking.LEVEL_NODE:
11587 self._LockInstancesNodes()
11588 elif level == locking.LEVEL_NODE_RES:
11590 self.needed_locks[locking.LEVEL_NODE_RES] = \
11591 self.needed_locks[locking.LEVEL_NODE][:]
11593 def BuildHooksEnv(self):
11594 """Build hooks env.
11596 This runs on the master, the primary and all the secondaries.
11600 "DISK": self.op.disk,
11601 "AMOUNT": self.op.amount,
11602 "ABSOLUTE": self.op.absolute,
11604 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11607 def BuildHooksNodes(self):
11608 """Build hooks nodes.
11611 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11614 def CheckPrereq(self):
11615 """Check prerequisites.
11617 This checks that the instance is in the cluster.
11620 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11621 assert instance is not None, \
11622 "Cannot retrieve locked instance %s" % self.op.instance_name
11623 nodenames = list(instance.all_nodes)
11624 for node in nodenames:
11625 _CheckNodeOnline(self, node)
11627 self.instance = instance
11629 if instance.disk_template not in constants.DTS_GROWABLE:
11630 raise errors.OpPrereqError("Instance's disk layout does not support"
11631 " growing", errors.ECODE_INVAL)
11633 self.disk = instance.FindDisk(self.op.disk)
11635 if self.op.absolute:
11636 self.target = self.op.amount
11637 self.delta = self.target - self.disk.size
11639 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11640 "current disk size (%s)" %
11641 (utils.FormatUnit(self.target, "h"),
11642 utils.FormatUnit(self.disk.size, "h")),
11643 errors.ECODE_STATE)
11645 self.delta = self.op.amount
11646 self.target = self.disk.size + self.delta
11648 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11649 utils.FormatUnit(self.delta, "h"),
11650 errors.ECODE_INVAL)
11652 if instance.disk_template not in (constants.DT_FILE,
11653 constants.DT_SHARED_FILE,
11655 # TODO: check the free disk space for file, when that feature will be
11657 _CheckNodesFreeDiskPerVG(self, nodenames,
11658 self.disk.ComputeGrowth(self.delta))
11660 def Exec(self, feedback_fn):
11661 """Execute disk grow.
11664 instance = self.instance
11667 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11668 assert (self.owned_locks(locking.LEVEL_NODE) ==
11669 self.owned_locks(locking.LEVEL_NODE_RES))
11671 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11673 raise errors.OpExecError("Cannot activate block device to grow")
11675 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11676 (self.op.disk, instance.name,
11677 utils.FormatUnit(self.delta, "h"),
11678 utils.FormatUnit(self.target, "h")))
11680 # First run all grow ops in dry-run mode
11681 for node in instance.all_nodes:
11682 self.cfg.SetDiskID(disk, node)
11683 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11685 result.Raise("Grow request failed to node %s" % node)
11687 # We know that (as far as we can test) operations across different
11688 # nodes will succeed, time to run it for real
11689 for node in instance.all_nodes:
11690 self.cfg.SetDiskID(disk, node)
11691 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11693 result.Raise("Grow request failed to node %s" % node)
11695 # TODO: Rewrite code to work properly
11696 # DRBD goes into sync mode for a short amount of time after executing the
11697 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11698 # calling "resize" in sync mode fails. Sleeping for a short amount of
11699 # time is a work-around.
11702 disk.RecordGrow(self.delta)
11703 self.cfg.Update(instance, feedback_fn)
11705 # Changes have been recorded, release node lock
11706 _ReleaseLocks(self, locking.LEVEL_NODE)
11708 # Downgrade lock while waiting for sync
11709 self.glm.downgrade(locking.LEVEL_INSTANCE)
11711 if self.op.wait_for_sync:
11712 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11714 self.proc.LogWarning("Disk sync-ing has not returned a good"
11715 " status; please check the instance")
11716 if instance.admin_state != constants.ADMINST_UP:
11717 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11718 elif instance.admin_state != constants.ADMINST_UP:
11719 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11720 " not supposed to be running because no wait for"
11721 " sync mode was requested")
11723 assert self.owned_locks(locking.LEVEL_NODE_RES)
11724 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11727 class LUInstanceQueryData(NoHooksLU):
11728 """Query runtime instance data.
11733 def ExpandNames(self):
11734 self.needed_locks = {}
11736 # Use locking if requested or when non-static information is wanted
11737 if not (self.op.static or self.op.use_locking):
11738 self.LogWarning("Non-static data requested, locks need to be acquired")
11739 self.op.use_locking = True
11741 if self.op.instances or not self.op.use_locking:
11742 # Expand instance names right here
11743 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11745 # Will use acquired locks
11746 self.wanted_names = None
11748 if self.op.use_locking:
11749 self.share_locks = _ShareAll()
11751 if self.wanted_names is None:
11752 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11754 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11756 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11757 self.needed_locks[locking.LEVEL_NODE] = []
11758 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11760 def DeclareLocks(self, level):
11761 if self.op.use_locking:
11762 if level == locking.LEVEL_NODEGROUP:
11763 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11765 # Lock all groups used by instances optimistically; this requires going
11766 # via the node before it's locked, requiring verification later on
11767 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11768 frozenset(group_uuid
11769 for instance_name in owned_instances
11771 self.cfg.GetInstanceNodeGroups(instance_name))
11773 elif level == locking.LEVEL_NODE:
11774 self._LockInstancesNodes()
11776 def CheckPrereq(self):
11777 """Check prerequisites.
11779 This only checks the optional instance list against the existing names.
11782 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11783 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11784 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11786 if self.wanted_names is None:
11787 assert self.op.use_locking, "Locking was not used"
11788 self.wanted_names = owned_instances
11790 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11792 if self.op.use_locking:
11793 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11796 assert not (owned_instances or owned_groups or owned_nodes)
11798 self.wanted_instances = instances.values()
11800 def _ComputeBlockdevStatus(self, node, instance, dev):
11801 """Returns the status of a block device
11804 if self.op.static or not node:
11807 self.cfg.SetDiskID(dev, node)
11809 result = self.rpc.call_blockdev_find(node, dev)
11813 result.Raise("Can't compute disk status for %s" % instance.name)
11815 status = result.payload
11819 return (status.dev_path, status.major, status.minor,
11820 status.sync_percent, status.estimated_time,
11821 status.is_degraded, status.ldisk_status)
11823 def _ComputeDiskStatus(self, instance, snode, dev):
11824 """Compute block device status.
11827 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11829 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11831 def _ComputeDiskStatusInner(self, instance, snode, dev):
11832 """Compute block device status.
11834 @attention: The device has to be annotated already.
11837 if dev.dev_type in constants.LDS_DRBD:
11838 # we change the snode then (otherwise we use the one passed in)
11839 if dev.logical_id[0] == instance.primary_node:
11840 snode = dev.logical_id[1]
11842 snode = dev.logical_id[0]
11844 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11846 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11849 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11856 "iv_name": dev.iv_name,
11857 "dev_type": dev.dev_type,
11858 "logical_id": dev.logical_id,
11859 "physical_id": dev.physical_id,
11860 "pstatus": dev_pstatus,
11861 "sstatus": dev_sstatus,
11862 "children": dev_children,
11867 def Exec(self, feedback_fn):
11868 """Gather and return data"""
11871 cluster = self.cfg.GetClusterInfo()
11873 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11874 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11876 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11877 for node in nodes.values()))
11879 group2name_fn = lambda uuid: groups[uuid].name
11881 for instance in self.wanted_instances:
11882 pnode = nodes[instance.primary_node]
11884 if self.op.static or pnode.offline:
11885 remote_state = None
11887 self.LogWarning("Primary node %s is marked offline, returning static"
11888 " information only for instance %s" %
11889 (pnode.name, instance.name))
11891 remote_info = self.rpc.call_instance_info(instance.primary_node,
11893 instance.hypervisor)
11894 remote_info.Raise("Error checking node %s" % instance.primary_node)
11895 remote_info = remote_info.payload
11896 if remote_info and "state" in remote_info:
11897 remote_state = "up"
11899 if instance.admin_state == constants.ADMINST_UP:
11900 remote_state = "down"
11902 remote_state = instance.admin_state
11904 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11907 snodes_group_uuids = [nodes[snode_name].group
11908 for snode_name in instance.secondary_nodes]
11910 result[instance.name] = {
11911 "name": instance.name,
11912 "config_state": instance.admin_state,
11913 "run_state": remote_state,
11914 "pnode": instance.primary_node,
11915 "pnode_group_uuid": pnode.group,
11916 "pnode_group_name": group2name_fn(pnode.group),
11917 "snodes": instance.secondary_nodes,
11918 "snodes_group_uuids": snodes_group_uuids,
11919 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11921 # this happens to be the same format used for hooks
11922 "nics": _NICListToTuple(self, instance.nics),
11923 "disk_template": instance.disk_template,
11925 "hypervisor": instance.hypervisor,
11926 "network_port": instance.network_port,
11927 "hv_instance": instance.hvparams,
11928 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11929 "be_instance": instance.beparams,
11930 "be_actual": cluster.FillBE(instance),
11931 "os_instance": instance.osparams,
11932 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11933 "serial_no": instance.serial_no,
11934 "mtime": instance.mtime,
11935 "ctime": instance.ctime,
11936 "uuid": instance.uuid,
11942 def PrepareContainerMods(mods, private_fn):
11943 """Prepares a list of container modifications by adding a private data field.
11945 @type mods: list of tuples; (operation, index, parameters)
11946 @param mods: List of modifications
11947 @type private_fn: callable or None
11948 @param private_fn: Callable for constructing a private data field for a
11953 if private_fn is None:
11958 return [(op, idx, params, fn()) for (op, idx, params) in mods]
11961 #: Type description for changes as returned by L{ApplyContainerMods}'s
11963 _TApplyContModsCbChanges = \
11964 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11965 ht.TNonEmptyString,
11970 def ApplyContainerMods(kind, container, chgdesc, mods,
11971 create_fn, modify_fn, remove_fn):
11972 """Applies descriptions in C{mods} to C{container}.
11975 @param kind: One-word item description
11976 @type container: list
11977 @param container: Container to modify
11978 @type chgdesc: None or list
11979 @param chgdesc: List of applied changes
11981 @param mods: Modifications as returned by L{PrepareContainerMods}
11982 @type create_fn: callable
11983 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11984 receives absolute item index, parameters and private data object as added
11985 by L{PrepareContainerMods}, returns tuple containing new item and changes
11987 @type modify_fn: callable
11988 @param modify_fn: Callback for modifying an existing item
11989 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11990 and private data object as added by L{PrepareContainerMods}, returns
11992 @type remove_fn: callable
11993 @param remove_fn: Callback on removing item; receives absolute item index,
11994 item and private data object as added by L{PrepareContainerMods}
11997 for (op, idx, params, private) in mods:
12000 absidx = len(container) - 1
12002 raise IndexError("Not accepting negative indices other than -1")
12003 elif idx > len(container):
12004 raise IndexError("Got %s index %s, but there are only %s" %
12005 (kind, idx, len(container)))
12011 if op == constants.DDM_ADD:
12012 # Calculate where item will be added
12014 addidx = len(container)
12018 if create_fn is None:
12021 (item, changes) = create_fn(addidx, params, private)
12024 container.append(item)
12027 assert idx <= len(container)
12028 # list.insert does so before the specified index
12029 container.insert(idx, item)
12031 # Retrieve existing item
12033 item = container[absidx]
12035 raise IndexError("Invalid %s index %s" % (kind, idx))
12037 if op == constants.DDM_REMOVE:
12040 if remove_fn is not None:
12041 remove_fn(absidx, item, private)
12043 changes = [("%s/%s" % (kind, absidx), "remove")]
12045 assert container[absidx] == item
12046 del container[absidx]
12047 elif op == constants.DDM_MODIFY:
12048 if modify_fn is not None:
12049 changes = modify_fn(absidx, item, params, private)
12051 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12053 assert _TApplyContModsCbChanges(changes)
12055 if not (chgdesc is None or changes is None):
12056 chgdesc.extend(changes)
12059 def _UpdateIvNames(base_index, disks):
12060 """Updates the C{iv_name} attribute of disks.
12062 @type disks: list of L{objects.Disk}
12065 for (idx, disk) in enumerate(disks):
12066 disk.iv_name = "disk/%s" % (base_index + idx, )
12069 class _InstNicModPrivate:
12070 """Data structure for network interface modifications.
12072 Used by L{LUInstanceSetParams}.
12075 def __init__(self):
12080 class LUInstanceSetParams(LogicalUnit):
12081 """Modifies an instances's parameters.
12084 HPATH = "instance-modify"
12085 HTYPE = constants.HTYPE_INSTANCE
12089 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12090 assert ht.TList(mods)
12091 assert not mods or len(mods[0]) in (2, 3)
12093 if mods and len(mods[0]) == 2:
12097 for op, params in mods:
12098 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12099 result.append((op, -1, params))
12103 raise errors.OpPrereqError("Only one %s add or remove operation is"
12104 " supported at a time" % kind,
12105 errors.ECODE_INVAL)
12107 result.append((constants.DDM_MODIFY, op, params))
12109 assert verify_fn(result)
12116 def _CheckMods(kind, mods, key_types, item_fn):
12117 """Ensures requested disk/NIC modifications are valid.
12120 for (op, _, params) in mods:
12121 assert ht.TDict(params)
12123 utils.ForceDictType(params, key_types)
12125 if op == constants.DDM_REMOVE:
12127 raise errors.OpPrereqError("No settings should be passed when"
12128 " removing a %s" % kind,
12129 errors.ECODE_INVAL)
12130 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12131 item_fn(op, params)
12133 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12136 def _VerifyDiskModification(op, params):
12137 """Verifies a disk modification.
12140 if op == constants.DDM_ADD:
12141 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12142 if mode not in constants.DISK_ACCESS_SET:
12143 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12144 errors.ECODE_INVAL)
12146 size = params.get(constants.IDISK_SIZE, None)
12148 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12149 constants.IDISK_SIZE, errors.ECODE_INVAL)
12153 except (TypeError, ValueError), err:
12154 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12155 errors.ECODE_INVAL)
12157 params[constants.IDISK_SIZE] = size
12159 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12160 raise errors.OpPrereqError("Disk size change not possible, use"
12161 " grow-disk", errors.ECODE_INVAL)
12164 def _VerifyNicModification(op, params):
12165 """Verifies a network interface modification.
12168 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12169 ip = params.get(constants.INIC_IP, None)
12172 elif ip.lower() == constants.VALUE_NONE:
12173 params[constants.INIC_IP] = None
12174 elif not netutils.IPAddress.IsValid(ip):
12175 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12176 errors.ECODE_INVAL)
12178 bridge = params.get("bridge", None)
12179 link = params.get(constants.INIC_LINK, None)
12180 if bridge and link:
12181 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12182 " at the same time", errors.ECODE_INVAL)
12183 elif bridge and bridge.lower() == constants.VALUE_NONE:
12184 params["bridge"] = None
12185 elif link and link.lower() == constants.VALUE_NONE:
12186 params[constants.INIC_LINK] = None
12188 if op == constants.DDM_ADD:
12189 macaddr = params.get(constants.INIC_MAC, None)
12190 if macaddr is None:
12191 params[constants.INIC_MAC] = constants.VALUE_AUTO
12193 if constants.INIC_MAC in params:
12194 macaddr = params[constants.INIC_MAC]
12195 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12196 macaddr = utils.NormalizeAndValidateMac(macaddr)
12198 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12199 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12200 " modifying an existing NIC",
12201 errors.ECODE_INVAL)
12203 def CheckArguments(self):
12204 if not (self.op.nics or self.op.disks or self.op.disk_template or
12205 self.op.hvparams or self.op.beparams or self.op.os_name or
12206 self.op.offline is not None or self.op.runtime_mem):
12207 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12209 if self.op.hvparams:
12210 _CheckGlobalHvParams(self.op.hvparams)
12213 self._UpgradeDiskNicMods("disk", self.op.disks,
12214 opcodes.OpInstanceSetParams.TestDiskModifications)
12216 self._UpgradeDiskNicMods("NIC", self.op.nics,
12217 opcodes.OpInstanceSetParams.TestNicModifications)
12219 # Check disk modifications
12220 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12221 self._VerifyDiskModification)
12223 if self.op.disks and self.op.disk_template is not None:
12224 raise errors.OpPrereqError("Disk template conversion and other disk"
12225 " changes not supported at the same time",
12226 errors.ECODE_INVAL)
12228 if (self.op.disk_template and
12229 self.op.disk_template in constants.DTS_INT_MIRROR and
12230 self.op.remote_node is None):
12231 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12232 " one requires specifying a secondary node",
12233 errors.ECODE_INVAL)
12235 # Check NIC modifications
12236 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12237 self._VerifyNicModification)
12239 def ExpandNames(self):
12240 self._ExpandAndLockInstance()
12241 # Can't even acquire node locks in shared mode as upcoming changes in
12242 # Ganeti 2.6 will start to modify the node object on disk conversion
12243 self.needed_locks[locking.LEVEL_NODE] = []
12244 self.needed_locks[locking.LEVEL_NODE_RES] = []
12245 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12247 def DeclareLocks(self, level):
12248 # TODO: Acquire group lock in shared mode (disk parameters)
12249 if level == locking.LEVEL_NODE:
12250 self._LockInstancesNodes()
12251 if self.op.disk_template and self.op.remote_node:
12252 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12253 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12254 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12256 self.needed_locks[locking.LEVEL_NODE_RES] = \
12257 self.needed_locks[locking.LEVEL_NODE][:]
12259 def BuildHooksEnv(self):
12260 """Build hooks env.
12262 This runs on the master, primary and secondaries.
12266 if constants.BE_MINMEM in self.be_new:
12267 args["minmem"] = self.be_new[constants.BE_MINMEM]
12268 if constants.BE_MAXMEM in self.be_new:
12269 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12270 if constants.BE_VCPUS in self.be_new:
12271 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12272 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12273 # information at all.
12275 if self._new_nics is not None:
12278 for nic in self._new_nics:
12279 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12280 mode = nicparams[constants.NIC_MODE]
12281 link = nicparams[constants.NIC_LINK]
12282 nics.append((nic.ip, nic.mac, mode, link))
12284 args["nics"] = nics
12286 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12287 if self.op.disk_template:
12288 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12289 if self.op.runtime_mem:
12290 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12294 def BuildHooksNodes(self):
12295 """Build hooks nodes.
12298 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12301 def _PrepareNicModification(self, params, private, old_ip, old_params,
12303 update_params_dict = dict([(key, params[key])
12304 for key in constants.NICS_PARAMETERS
12307 if "bridge" in params:
12308 update_params_dict[constants.NIC_LINK] = params["bridge"]
12310 new_params = _GetUpdatedParams(old_params, update_params_dict)
12311 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12313 new_filled_params = cluster.SimpleFillNIC(new_params)
12314 objects.NIC.CheckParameterSyntax(new_filled_params)
12316 new_mode = new_filled_params[constants.NIC_MODE]
12317 if new_mode == constants.NIC_MODE_BRIDGED:
12318 bridge = new_filled_params[constants.NIC_LINK]
12319 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12321 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12323 self.warn.append(msg)
12325 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12327 elif new_mode == constants.NIC_MODE_ROUTED:
12328 ip = params.get(constants.INIC_IP, old_ip)
12330 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12331 " on a routed NIC", errors.ECODE_INVAL)
12333 if constants.INIC_MAC in params:
12334 mac = params[constants.INIC_MAC]
12336 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12337 errors.ECODE_INVAL)
12338 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12339 # otherwise generate the MAC address
12340 params[constants.INIC_MAC] = \
12341 self.cfg.GenerateMAC(self.proc.GetECId())
12343 # or validate/reserve the current one
12345 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12346 except errors.ReservationError:
12347 raise errors.OpPrereqError("MAC address '%s' already in use"
12348 " in cluster" % mac,
12349 errors.ECODE_NOTUNIQUE)
12351 private.params = new_params
12352 private.filled = new_filled_params
12354 def CheckPrereq(self):
12355 """Check prerequisites.
12357 This only checks the instance list against the existing names.
12360 # checking the new params on the primary/secondary nodes
12362 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12363 cluster = self.cluster = self.cfg.GetClusterInfo()
12364 assert self.instance is not None, \
12365 "Cannot retrieve locked instance %s" % self.op.instance_name
12366 pnode = instance.primary_node
12367 nodelist = list(instance.all_nodes)
12368 pnode_info = self.cfg.GetNodeInfo(pnode)
12369 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12371 # Prepare disk/NIC modifications
12372 self.diskmod = PrepareContainerMods(self.op.disks, None)
12373 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12376 if self.op.os_name and not self.op.force:
12377 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12378 self.op.force_variant)
12379 instance_os = self.op.os_name
12381 instance_os = instance.os
12383 assert not (self.op.disk_template and self.op.disks), \
12384 "Can't modify disk template and apply disk changes at the same time"
12386 if self.op.disk_template:
12387 if instance.disk_template == self.op.disk_template:
12388 raise errors.OpPrereqError("Instance already has disk template %s" %
12389 instance.disk_template, errors.ECODE_INVAL)
12391 if (instance.disk_template,
12392 self.op.disk_template) not in self._DISK_CONVERSIONS:
12393 raise errors.OpPrereqError("Unsupported disk template conversion from"
12394 " %s to %s" % (instance.disk_template,
12395 self.op.disk_template),
12396 errors.ECODE_INVAL)
12397 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12398 msg="cannot change disk template")
12399 if self.op.disk_template in constants.DTS_INT_MIRROR:
12400 if self.op.remote_node == pnode:
12401 raise errors.OpPrereqError("Given new secondary node %s is the same"
12402 " as the primary node of the instance" %
12403 self.op.remote_node, errors.ECODE_STATE)
12404 _CheckNodeOnline(self, self.op.remote_node)
12405 _CheckNodeNotDrained(self, self.op.remote_node)
12406 # FIXME: here we assume that the old instance type is DT_PLAIN
12407 assert instance.disk_template == constants.DT_PLAIN
12408 disks = [{constants.IDISK_SIZE: d.size,
12409 constants.IDISK_VG: d.logical_id[0]}
12410 for d in instance.disks]
12411 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12412 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12414 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12415 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12416 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12417 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12418 ignore=self.op.ignore_ipolicy)
12419 if pnode_info.group != snode_info.group:
12420 self.LogWarning("The primary and secondary nodes are in two"
12421 " different node groups; the disk parameters"
12422 " from the first disk's node group will be"
12425 # hvparams processing
12426 if self.op.hvparams:
12427 hv_type = instance.hypervisor
12428 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12429 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12430 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12433 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12434 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12435 self.hv_proposed = self.hv_new = hv_new # the new actual values
12436 self.hv_inst = i_hvdict # the new dict (without defaults)
12438 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12440 self.hv_new = self.hv_inst = {}
12442 # beparams processing
12443 if self.op.beparams:
12444 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12446 objects.UpgradeBeParams(i_bedict)
12447 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12448 be_new = cluster.SimpleFillBE(i_bedict)
12449 self.be_proposed = self.be_new = be_new # the new actual values
12450 self.be_inst = i_bedict # the new dict (without defaults)
12452 self.be_new = self.be_inst = {}
12453 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12454 be_old = cluster.FillBE(instance)
12456 # CPU param validation -- checking every time a parameter is
12457 # changed to cover all cases where either CPU mask or vcpus have
12459 if (constants.BE_VCPUS in self.be_proposed and
12460 constants.HV_CPU_MASK in self.hv_proposed):
12462 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12463 # Verify mask is consistent with number of vCPUs. Can skip this
12464 # test if only 1 entry in the CPU mask, which means same mask
12465 # is applied to all vCPUs.
12466 if (len(cpu_list) > 1 and
12467 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12468 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12470 (self.be_proposed[constants.BE_VCPUS],
12471 self.hv_proposed[constants.HV_CPU_MASK]),
12472 errors.ECODE_INVAL)
12474 # Only perform this test if a new CPU mask is given
12475 if constants.HV_CPU_MASK in self.hv_new:
12476 # Calculate the largest CPU number requested
12477 max_requested_cpu = max(map(max, cpu_list))
12478 # Check that all of the instance's nodes have enough physical CPUs to
12479 # satisfy the requested CPU mask
12480 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12481 max_requested_cpu + 1, instance.hypervisor)
12483 # osparams processing
12484 if self.op.osparams:
12485 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12486 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12487 self.os_inst = i_osdict # the new dict (without defaults)
12493 #TODO(dynmem): do the appropriate check involving MINMEM
12494 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12495 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12496 mem_check_list = [pnode]
12497 if be_new[constants.BE_AUTO_BALANCE]:
12498 # either we changed auto_balance to yes or it was from before
12499 mem_check_list.extend(instance.secondary_nodes)
12500 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12501 instance.hypervisor)
12502 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12503 [instance.hypervisor])
12504 pninfo = nodeinfo[pnode]
12505 msg = pninfo.fail_msg
12507 # Assume the primary node is unreachable and go ahead
12508 self.warn.append("Can't get info from primary node %s: %s" %
12511 (_, _, (pnhvinfo, )) = pninfo.payload
12512 if not isinstance(pnhvinfo.get("memory_free", None), int):
12513 self.warn.append("Node data from primary node %s doesn't contain"
12514 " free memory information" % pnode)
12515 elif instance_info.fail_msg:
12516 self.warn.append("Can't get instance runtime information: %s" %
12517 instance_info.fail_msg)
12519 if instance_info.payload:
12520 current_mem = int(instance_info.payload["memory"])
12522 # Assume instance not running
12523 # (there is a slight race condition here, but it's not very
12524 # probable, and we have no other way to check)
12525 # TODO: Describe race condition
12527 #TODO(dynmem): do the appropriate check involving MINMEM
12528 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12529 pnhvinfo["memory_free"])
12531 raise errors.OpPrereqError("This change will prevent the instance"
12532 " from starting, due to %d MB of memory"
12533 " missing on its primary node" %
12535 errors.ECODE_NORES)
12537 if be_new[constants.BE_AUTO_BALANCE]:
12538 for node, nres in nodeinfo.items():
12539 if node not in instance.secondary_nodes:
12541 nres.Raise("Can't get info from secondary node %s" % node,
12542 prereq=True, ecode=errors.ECODE_STATE)
12543 (_, _, (nhvinfo, )) = nres.payload
12544 if not isinstance(nhvinfo.get("memory_free", None), int):
12545 raise errors.OpPrereqError("Secondary node %s didn't return free"
12546 " memory information" % node,
12547 errors.ECODE_STATE)
12548 #TODO(dynmem): do the appropriate check involving MINMEM
12549 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12550 raise errors.OpPrereqError("This change will prevent the instance"
12551 " from failover to its secondary node"
12552 " %s, due to not enough memory" % node,
12553 errors.ECODE_STATE)
12555 if self.op.runtime_mem:
12556 remote_info = self.rpc.call_instance_info(instance.primary_node,
12558 instance.hypervisor)
12559 remote_info.Raise("Error checking node %s" % instance.primary_node)
12560 if not remote_info.payload: # not running already
12561 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12562 errors.ECODE_STATE)
12564 current_memory = remote_info.payload["memory"]
12565 if (not self.op.force and
12566 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12567 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12568 raise errors.OpPrereqError("Instance %s must have memory between %d"
12569 " and %d MB of memory unless --force is"
12570 " given" % (instance.name,
12571 self.be_proposed[constants.BE_MINMEM],
12572 self.be_proposed[constants.BE_MAXMEM]),
12573 errors.ECODE_INVAL)
12575 if self.op.runtime_mem > current_memory:
12576 _CheckNodeFreeMemory(self, instance.primary_node,
12577 "ballooning memory for instance %s" %
12579 self.op.memory - current_memory,
12580 instance.hypervisor)
12582 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12583 raise errors.OpPrereqError("Disk operations not supported for"
12584 " diskless instances",
12585 errors.ECODE_INVAL)
12587 def _PrepareNicCreate(_, params, private):
12588 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
12589 return (None, None)
12591 def _PrepareNicMod(_, nic, params, private):
12592 self._PrepareNicModification(params, private, nic.ip,
12593 nic.nicparams, cluster, pnode)
12596 # Verify NIC changes (operating on copy)
12597 nics = instance.nics[:]
12598 ApplyContainerMods("NIC", nics, None, self.nicmod,
12599 _PrepareNicCreate, _PrepareNicMod, None)
12600 if len(nics) > constants.MAX_NICS:
12601 raise errors.OpPrereqError("Instance has too many network interfaces"
12602 " (%d), cannot add more" % constants.MAX_NICS,
12603 errors.ECODE_STATE)
12605 # Verify disk changes (operating on a copy)
12606 disks = instance.disks[:]
12607 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12608 if len(disks) > constants.MAX_DISKS:
12609 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12610 " more" % constants.MAX_DISKS,
12611 errors.ECODE_STATE)
12613 if self.op.offline is not None:
12614 if self.op.offline:
12615 msg = "can't change to offline"
12617 msg = "can't change to online"
12618 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12620 # Pre-compute NIC changes (necessary to use result in hooks)
12621 self._nic_chgdesc = []
12623 # Operate on copies as this is still in prereq
12624 nics = [nic.Copy() for nic in instance.nics]
12625 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12626 self._CreateNewNic, self._ApplyNicMods, None)
12627 self._new_nics = nics
12629 self._new_nics = None
12631 def _ConvertPlainToDrbd(self, feedback_fn):
12632 """Converts an instance from plain to drbd.
12635 feedback_fn("Converting template to drbd")
12636 instance = self.instance
12637 pnode = instance.primary_node
12638 snode = self.op.remote_node
12640 assert instance.disk_template == constants.DT_PLAIN
12642 # create a fake disk info for _GenerateDiskTemplate
12643 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12644 constants.IDISK_VG: d.logical_id[0]}
12645 for d in instance.disks]
12646 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12647 instance.name, pnode, [snode],
12648 disk_info, None, None, 0, feedback_fn,
12650 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12652 info = _GetInstanceInfoText(instance)
12653 feedback_fn("Creating additional volumes...")
12654 # first, create the missing data and meta devices
12655 for disk in anno_disks:
12656 # unfortunately this is... not too nice
12657 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12659 for child in disk.children:
12660 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12661 # at this stage, all new LVs have been created, we can rename the
12663 feedback_fn("Renaming original volumes...")
12664 rename_list = [(o, n.children[0].logical_id)
12665 for (o, n) in zip(instance.disks, new_disks)]
12666 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12667 result.Raise("Failed to rename original LVs")
12669 feedback_fn("Initializing DRBD devices...")
12670 # all child devices are in place, we can now create the DRBD devices
12671 for disk in anno_disks:
12672 for node in [pnode, snode]:
12673 f_create = node == pnode
12674 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12676 # at this point, the instance has been modified
12677 instance.disk_template = constants.DT_DRBD8
12678 instance.disks = new_disks
12679 self.cfg.Update(instance, feedback_fn)
12681 # Release node locks while waiting for sync
12682 _ReleaseLocks(self, locking.LEVEL_NODE)
12684 # disks are created, waiting for sync
12685 disk_abort = not _WaitForSync(self, instance,
12686 oneshot=not self.op.wait_for_sync)
12688 raise errors.OpExecError("There are some degraded disks for"
12689 " this instance, please cleanup manually")
12691 # Node resource locks will be released by caller
12693 def _ConvertDrbdToPlain(self, feedback_fn):
12694 """Converts an instance from drbd to plain.
12697 instance = self.instance
12699 assert len(instance.secondary_nodes) == 1
12700 assert instance.disk_template == constants.DT_DRBD8
12702 pnode = instance.primary_node
12703 snode = instance.secondary_nodes[0]
12704 feedback_fn("Converting template to plain")
12706 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12707 new_disks = [d.children[0] for d in instance.disks]
12709 # copy over size and mode
12710 for parent, child in zip(old_disks, new_disks):
12711 child.size = parent.size
12712 child.mode = parent.mode
12714 # this is a DRBD disk, return its port to the pool
12715 # NOTE: this must be done right before the call to cfg.Update!
12716 for disk in old_disks:
12717 tcp_port = disk.logical_id[2]
12718 self.cfg.AddTcpUdpPort(tcp_port)
12720 # update instance structure
12721 instance.disks = new_disks
12722 instance.disk_template = constants.DT_PLAIN
12723 self.cfg.Update(instance, feedback_fn)
12725 # Release locks in case removing disks takes a while
12726 _ReleaseLocks(self, locking.LEVEL_NODE)
12728 feedback_fn("Removing volumes on the secondary node...")
12729 for disk in old_disks:
12730 self.cfg.SetDiskID(disk, snode)
12731 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12733 self.LogWarning("Could not remove block device %s on node %s,"
12734 " continuing anyway: %s", disk.iv_name, snode, msg)
12736 feedback_fn("Removing unneeded volumes on the primary node...")
12737 for idx, disk in enumerate(old_disks):
12738 meta = disk.children[1]
12739 self.cfg.SetDiskID(meta, pnode)
12740 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12742 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12743 " continuing anyway: %s", idx, pnode, msg)
12745 def _CreateNewDisk(self, idx, params, _):
12746 """Creates a new disk.
12749 instance = self.instance
12752 if instance.disk_template in constants.DTS_FILEBASED:
12753 (file_driver, file_path) = instance.disks[0].logical_id
12754 file_path = os.path.dirname(file_path)
12756 file_driver = file_path = None
12759 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12760 instance.primary_node, instance.secondary_nodes,
12761 [params], file_path, file_driver, idx,
12762 self.Log, self.diskparams)[0]
12764 info = _GetInstanceInfoText(instance)
12766 logging.info("Creating volume %s for instance %s",
12767 disk.iv_name, instance.name)
12768 # Note: this needs to be kept in sync with _CreateDisks
12770 for node in instance.all_nodes:
12771 f_create = (node == instance.primary_node)
12773 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12774 except errors.OpExecError, err:
12775 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12776 disk.iv_name, disk, node, err)
12779 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12783 def _ModifyDisk(idx, disk, params, _):
12784 """Modifies a disk.
12787 disk.mode = params[constants.IDISK_MODE]
12790 ("disk.mode/%d" % idx, disk.mode),
12793 def _RemoveDisk(self, idx, root, _):
12797 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12798 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12799 self.cfg.SetDiskID(disk, node)
12800 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12802 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12803 " continuing anyway", idx, node, msg)
12805 # if this is a DRBD disk, return its port to the pool
12806 if root.dev_type in constants.LDS_DRBD:
12807 self.cfg.AddTcpUdpPort(root.logical_id[2])
12810 def _CreateNewNic(idx, params, private):
12811 """Creates data structure for a new network interface.
12814 mac = params[constants.INIC_MAC]
12815 ip = params.get(constants.INIC_IP, None)
12816 nicparams = private.params
12818 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12820 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12821 (mac, ip, private.filled[constants.NIC_MODE],
12822 private.filled[constants.NIC_LINK])),
12826 def _ApplyNicMods(idx, nic, params, private):
12827 """Modifies a network interface.
12832 for key in [constants.INIC_MAC, constants.INIC_IP]:
12834 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12835 setattr(nic, key, params[key])
12838 nic.nicparams = private.params
12840 for (key, val) in params.items():
12841 changes.append(("nic.%s/%d" % (key, idx), val))
12845 def Exec(self, feedback_fn):
12846 """Modifies an instance.
12848 All parameters take effect only at the next restart of the instance.
12851 # Process here the warnings from CheckPrereq, as we don't have a
12852 # feedback_fn there.
12853 # TODO: Replace with self.LogWarning
12854 for warn in self.warn:
12855 feedback_fn("WARNING: %s" % warn)
12857 assert ((self.op.disk_template is None) ^
12858 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12859 "Not owning any node resource locks"
12862 instance = self.instance
12865 if self.op.runtime_mem:
12866 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12868 self.op.runtime_mem)
12869 rpcres.Raise("Cannot modify instance runtime memory")
12870 result.append(("runtime_memory", self.op.runtime_mem))
12872 # Apply disk changes
12873 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12874 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12875 _UpdateIvNames(0, instance.disks)
12877 if self.op.disk_template:
12879 check_nodes = set(instance.all_nodes)
12880 if self.op.remote_node:
12881 check_nodes.add(self.op.remote_node)
12882 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12883 owned = self.owned_locks(level)
12884 assert not (check_nodes - owned), \
12885 ("Not owning the correct locks, owning %r, expected at least %r" %
12886 (owned, check_nodes))
12888 r_shut = _ShutdownInstanceDisks(self, instance)
12890 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12891 " proceed with disk template conversion")
12892 mode = (instance.disk_template, self.op.disk_template)
12894 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12896 self.cfg.ReleaseDRBDMinors(instance.name)
12898 result.append(("disk_template", self.op.disk_template))
12900 assert instance.disk_template == self.op.disk_template, \
12901 ("Expected disk template '%s', found '%s'" %
12902 (self.op.disk_template, instance.disk_template))
12904 # Release node and resource locks if there are any (they might already have
12905 # been released during disk conversion)
12906 _ReleaseLocks(self, locking.LEVEL_NODE)
12907 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12909 # Apply NIC changes
12910 if self._new_nics is not None:
12911 instance.nics = self._new_nics
12912 result.extend(self._nic_chgdesc)
12915 if self.op.hvparams:
12916 instance.hvparams = self.hv_inst
12917 for key, val in self.op.hvparams.iteritems():
12918 result.append(("hv/%s" % key, val))
12921 if self.op.beparams:
12922 instance.beparams = self.be_inst
12923 for key, val in self.op.beparams.iteritems():
12924 result.append(("be/%s" % key, val))
12927 if self.op.os_name:
12928 instance.os = self.op.os_name
12931 if self.op.osparams:
12932 instance.osparams = self.os_inst
12933 for key, val in self.op.osparams.iteritems():
12934 result.append(("os/%s" % key, val))
12936 if self.op.offline is None:
12939 elif self.op.offline:
12940 # Mark instance as offline
12941 self.cfg.MarkInstanceOffline(instance.name)
12942 result.append(("admin_state", constants.ADMINST_OFFLINE))
12944 # Mark instance as online, but stopped
12945 self.cfg.MarkInstanceDown(instance.name)
12946 result.append(("admin_state", constants.ADMINST_DOWN))
12948 self.cfg.Update(instance, feedback_fn)
12950 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12951 self.owned_locks(locking.LEVEL_NODE)), \
12952 "All node locks should have been released by now"
12956 _DISK_CONVERSIONS = {
12957 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12958 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12962 class LUInstanceChangeGroup(LogicalUnit):
12963 HPATH = "instance-change-group"
12964 HTYPE = constants.HTYPE_INSTANCE
12967 def ExpandNames(self):
12968 self.share_locks = _ShareAll()
12969 self.needed_locks = {
12970 locking.LEVEL_NODEGROUP: [],
12971 locking.LEVEL_NODE: [],
12974 self._ExpandAndLockInstance()
12976 if self.op.target_groups:
12977 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12978 self.op.target_groups)
12980 self.req_target_uuids = None
12982 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12984 def DeclareLocks(self, level):
12985 if level == locking.LEVEL_NODEGROUP:
12986 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12988 if self.req_target_uuids:
12989 lock_groups = set(self.req_target_uuids)
12991 # Lock all groups used by instance optimistically; this requires going
12992 # via the node before it's locked, requiring verification later on
12993 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12994 lock_groups.update(instance_groups)
12996 # No target groups, need to lock all of them
12997 lock_groups = locking.ALL_SET
12999 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13001 elif level == locking.LEVEL_NODE:
13002 if self.req_target_uuids:
13003 # Lock all nodes used by instances
13004 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13005 self._LockInstancesNodes()
13007 # Lock all nodes in all potential target groups
13008 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13009 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13010 member_nodes = [node_name
13011 for group in lock_groups
13012 for node_name in self.cfg.GetNodeGroup(group).members]
13013 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13015 # Lock all nodes as all groups are potential targets
13016 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13018 def CheckPrereq(self):
13019 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13020 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13021 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13023 assert (self.req_target_uuids is None or
13024 owned_groups.issuperset(self.req_target_uuids))
13025 assert owned_instances == set([self.op.instance_name])
13027 # Get instance information
13028 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13030 # Check if node groups for locked instance are still correct
13031 assert owned_nodes.issuperset(self.instance.all_nodes), \
13032 ("Instance %s's nodes changed while we kept the lock" %
13033 self.op.instance_name)
13035 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13038 if self.req_target_uuids:
13039 # User requested specific target groups
13040 self.target_uuids = frozenset(self.req_target_uuids)
13042 # All groups except those used by the instance are potential targets
13043 self.target_uuids = owned_groups - inst_groups
13045 conflicting_groups = self.target_uuids & inst_groups
13046 if conflicting_groups:
13047 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13048 " used by the instance '%s'" %
13049 (utils.CommaJoin(conflicting_groups),
13050 self.op.instance_name),
13051 errors.ECODE_INVAL)
13053 if not self.target_uuids:
13054 raise errors.OpPrereqError("There are no possible target groups",
13055 errors.ECODE_INVAL)
13057 def BuildHooksEnv(self):
13058 """Build hooks env.
13061 assert self.target_uuids
13064 "TARGET_GROUPS": " ".join(self.target_uuids),
13067 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13071 def BuildHooksNodes(self):
13072 """Build hooks nodes.
13075 mn = self.cfg.GetMasterNode()
13076 return ([mn], [mn])
13078 def Exec(self, feedback_fn):
13079 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13081 assert instances == [self.op.instance_name], "Instance not locked"
13083 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13084 instances=instances, target_groups=list(self.target_uuids))
13086 ial.Run(self.op.iallocator)
13088 if not ial.success:
13089 raise errors.OpPrereqError("Can't compute solution for changing group of"
13090 " instance '%s' using iallocator '%s': %s" %
13091 (self.op.instance_name, self.op.iallocator,
13093 errors.ECODE_NORES)
13095 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13097 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13098 " instance '%s'", len(jobs), self.op.instance_name)
13100 return ResultWithJobs(jobs)
13103 class LUBackupQuery(NoHooksLU):
13104 """Query the exports list
13109 def CheckArguments(self):
13110 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13111 ["node", "export"], self.op.use_locking)
13113 def ExpandNames(self):
13114 self.expq.ExpandNames(self)
13116 def DeclareLocks(self, level):
13117 self.expq.DeclareLocks(self, level)
13119 def Exec(self, feedback_fn):
13122 for (node, expname) in self.expq.OldStyleQuery(self):
13123 if expname is None:
13124 result[node] = False
13126 result.setdefault(node, []).append(expname)
13131 class _ExportQuery(_QueryBase):
13132 FIELDS = query.EXPORT_FIELDS
13134 #: The node name is not a unique key for this query
13135 SORT_FIELD = "node"
13137 def ExpandNames(self, lu):
13138 lu.needed_locks = {}
13140 # The following variables interact with _QueryBase._GetNames
13142 self.wanted = _GetWantedNodes(lu, self.names)
13144 self.wanted = locking.ALL_SET
13146 self.do_locking = self.use_locking
13148 if self.do_locking:
13149 lu.share_locks = _ShareAll()
13150 lu.needed_locks = {
13151 locking.LEVEL_NODE: self.wanted,
13154 def DeclareLocks(self, lu, level):
13157 def _GetQueryData(self, lu):
13158 """Computes the list of nodes and their attributes.
13161 # Locking is not used
13163 assert not (compat.any(lu.glm.is_owned(level)
13164 for level in locking.LEVELS
13165 if level != locking.LEVEL_CLUSTER) or
13166 self.do_locking or self.use_locking)
13168 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13172 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13174 result.append((node, None))
13176 result.extend((node, expname) for expname in nres.payload)
13181 class LUBackupPrepare(NoHooksLU):
13182 """Prepares an instance for an export and returns useful information.
13187 def ExpandNames(self):
13188 self._ExpandAndLockInstance()
13190 def CheckPrereq(self):
13191 """Check prerequisites.
13194 instance_name = self.op.instance_name
13196 self.instance = self.cfg.GetInstanceInfo(instance_name)
13197 assert self.instance is not None, \
13198 "Cannot retrieve locked instance %s" % self.op.instance_name
13199 _CheckNodeOnline(self, self.instance.primary_node)
13201 self._cds = _GetClusterDomainSecret()
13203 def Exec(self, feedback_fn):
13204 """Prepares an instance for an export.
13207 instance = self.instance
13209 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13210 salt = utils.GenerateSecret(8)
13212 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13213 result = self.rpc.call_x509_cert_create(instance.primary_node,
13214 constants.RIE_CERT_VALIDITY)
13215 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13217 (name, cert_pem) = result.payload
13219 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13223 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13224 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13226 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13232 class LUBackupExport(LogicalUnit):
13233 """Export an instance to an image in the cluster.
13236 HPATH = "instance-export"
13237 HTYPE = constants.HTYPE_INSTANCE
13240 def CheckArguments(self):
13241 """Check the arguments.
13244 self.x509_key_name = self.op.x509_key_name
13245 self.dest_x509_ca_pem = self.op.destination_x509_ca
13247 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13248 if not self.x509_key_name:
13249 raise errors.OpPrereqError("Missing X509 key name for encryption",
13250 errors.ECODE_INVAL)
13252 if not self.dest_x509_ca_pem:
13253 raise errors.OpPrereqError("Missing destination X509 CA",
13254 errors.ECODE_INVAL)
13256 def ExpandNames(self):
13257 self._ExpandAndLockInstance()
13259 # Lock all nodes for local exports
13260 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13261 # FIXME: lock only instance primary and destination node
13263 # Sad but true, for now we have do lock all nodes, as we don't know where
13264 # the previous export might be, and in this LU we search for it and
13265 # remove it from its current node. In the future we could fix this by:
13266 # - making a tasklet to search (share-lock all), then create the
13267 # new one, then one to remove, after
13268 # - removing the removal operation altogether
13269 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13271 def DeclareLocks(self, level):
13272 """Last minute lock declaration."""
13273 # All nodes are locked anyway, so nothing to do here.
13275 def BuildHooksEnv(self):
13276 """Build hooks env.
13278 This will run on the master, primary node and target node.
13282 "EXPORT_MODE": self.op.mode,
13283 "EXPORT_NODE": self.op.target_node,
13284 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13285 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13286 # TODO: Generic function for boolean env variables
13287 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13290 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13294 def BuildHooksNodes(self):
13295 """Build hooks nodes.
13298 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13300 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13301 nl.append(self.op.target_node)
13305 def CheckPrereq(self):
13306 """Check prerequisites.
13308 This checks that the instance and node names are valid.
13311 instance_name = self.op.instance_name
13313 self.instance = self.cfg.GetInstanceInfo(instance_name)
13314 assert self.instance is not None, \
13315 "Cannot retrieve locked instance %s" % self.op.instance_name
13316 _CheckNodeOnline(self, self.instance.primary_node)
13318 if (self.op.remove_instance and
13319 self.instance.admin_state == constants.ADMINST_UP and
13320 not self.op.shutdown):
13321 raise errors.OpPrereqError("Can not remove instance without shutting it"
13324 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13325 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13326 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13327 assert self.dst_node is not None
13329 _CheckNodeOnline(self, self.dst_node.name)
13330 _CheckNodeNotDrained(self, self.dst_node.name)
13333 self.dest_disk_info = None
13334 self.dest_x509_ca = None
13336 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13337 self.dst_node = None
13339 if len(self.op.target_node) != len(self.instance.disks):
13340 raise errors.OpPrereqError(("Received destination information for %s"
13341 " disks, but instance %s has %s disks") %
13342 (len(self.op.target_node), instance_name,
13343 len(self.instance.disks)),
13344 errors.ECODE_INVAL)
13346 cds = _GetClusterDomainSecret()
13348 # Check X509 key name
13350 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13351 except (TypeError, ValueError), err:
13352 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13354 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13355 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13356 errors.ECODE_INVAL)
13358 # Load and verify CA
13360 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13361 except OpenSSL.crypto.Error, err:
13362 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13363 (err, ), errors.ECODE_INVAL)
13365 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13366 if errcode is not None:
13367 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13368 (msg, ), errors.ECODE_INVAL)
13370 self.dest_x509_ca = cert
13372 # Verify target information
13374 for idx, disk_data in enumerate(self.op.target_node):
13376 (host, port, magic) = \
13377 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13378 except errors.GenericError, err:
13379 raise errors.OpPrereqError("Target info for disk %s: %s" %
13380 (idx, err), errors.ECODE_INVAL)
13382 disk_info.append((host, port, magic))
13384 assert len(disk_info) == len(self.op.target_node)
13385 self.dest_disk_info = disk_info
13388 raise errors.ProgrammerError("Unhandled export mode %r" %
13391 # instance disk type verification
13392 # TODO: Implement export support for file-based disks
13393 for disk in self.instance.disks:
13394 if disk.dev_type == constants.LD_FILE:
13395 raise errors.OpPrereqError("Export not supported for instances with"
13396 " file-based disks", errors.ECODE_INVAL)
13398 def _CleanupExports(self, feedback_fn):
13399 """Removes exports of current instance from all other nodes.
13401 If an instance in a cluster with nodes A..D was exported to node C, its
13402 exports will be removed from the nodes A, B and D.
13405 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13407 nodelist = self.cfg.GetNodeList()
13408 nodelist.remove(self.dst_node.name)
13410 # on one-node clusters nodelist will be empty after the removal
13411 # if we proceed the backup would be removed because OpBackupQuery
13412 # substitutes an empty list with the full cluster node list.
13413 iname = self.instance.name
13415 feedback_fn("Removing old exports for instance %s" % iname)
13416 exportlist = self.rpc.call_export_list(nodelist)
13417 for node in exportlist:
13418 if exportlist[node].fail_msg:
13420 if iname in exportlist[node].payload:
13421 msg = self.rpc.call_export_remove(node, iname).fail_msg
13423 self.LogWarning("Could not remove older export for instance %s"
13424 " on node %s: %s", iname, node, msg)
13426 def Exec(self, feedback_fn):
13427 """Export an instance to an image in the cluster.
13430 assert self.op.mode in constants.EXPORT_MODES
13432 instance = self.instance
13433 src_node = instance.primary_node
13435 if self.op.shutdown:
13436 # shutdown the instance, but not the disks
13437 feedback_fn("Shutting down instance %s" % instance.name)
13438 result = self.rpc.call_instance_shutdown(src_node, instance,
13439 self.op.shutdown_timeout)
13440 # TODO: Maybe ignore failures if ignore_remove_failures is set
13441 result.Raise("Could not shutdown instance %s on"
13442 " node %s" % (instance.name, src_node))
13444 # set the disks ID correctly since call_instance_start needs the
13445 # correct drbd minor to create the symlinks
13446 for disk in instance.disks:
13447 self.cfg.SetDiskID(disk, src_node)
13449 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13452 # Activate the instance disks if we'exporting a stopped instance
13453 feedback_fn("Activating disks for %s" % instance.name)
13454 _StartInstanceDisks(self, instance, None)
13457 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13460 helper.CreateSnapshots()
13462 if (self.op.shutdown and
13463 instance.admin_state == constants.ADMINST_UP and
13464 not self.op.remove_instance):
13465 assert not activate_disks
13466 feedback_fn("Starting instance %s" % instance.name)
13467 result = self.rpc.call_instance_start(src_node,
13468 (instance, None, None), False)
13469 msg = result.fail_msg
13471 feedback_fn("Failed to start instance: %s" % msg)
13472 _ShutdownInstanceDisks(self, instance)
13473 raise errors.OpExecError("Could not start instance: %s" % msg)
13475 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13476 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13477 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13478 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13479 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13481 (key_name, _, _) = self.x509_key_name
13484 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13487 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13488 key_name, dest_ca_pem,
13493 # Check for backwards compatibility
13494 assert len(dresults) == len(instance.disks)
13495 assert compat.all(isinstance(i, bool) for i in dresults), \
13496 "Not all results are boolean: %r" % dresults
13500 feedback_fn("Deactivating disks for %s" % instance.name)
13501 _ShutdownInstanceDisks(self, instance)
13503 if not (compat.all(dresults) and fin_resu):
13506 failures.append("export finalization")
13507 if not compat.all(dresults):
13508 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13510 failures.append("disk export: disk(s) %s" % fdsk)
13512 raise errors.OpExecError("Export failed, errors in %s" %
13513 utils.CommaJoin(failures))
13515 # At this point, the export was successful, we can cleanup/finish
13517 # Remove instance if requested
13518 if self.op.remove_instance:
13519 feedback_fn("Removing instance %s" % instance.name)
13520 _RemoveInstance(self, feedback_fn, instance,
13521 self.op.ignore_remove_failures)
13523 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13524 self._CleanupExports(feedback_fn)
13526 return fin_resu, dresults
13529 class LUBackupRemove(NoHooksLU):
13530 """Remove exports related to the named instance.
13535 def ExpandNames(self):
13536 self.needed_locks = {}
13537 # We need all nodes to be locked in order for RemoveExport to work, but we
13538 # don't need to lock the instance itself, as nothing will happen to it (and
13539 # we can remove exports also for a removed instance)
13540 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13542 def Exec(self, feedback_fn):
13543 """Remove any export.
13546 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13547 # If the instance was not found we'll try with the name that was passed in.
13548 # This will only work if it was an FQDN, though.
13550 if not instance_name:
13552 instance_name = self.op.instance_name
13554 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13555 exportlist = self.rpc.call_export_list(locked_nodes)
13557 for node in exportlist:
13558 msg = exportlist[node].fail_msg
13560 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13562 if instance_name in exportlist[node].payload:
13564 result = self.rpc.call_export_remove(node, instance_name)
13565 msg = result.fail_msg
13567 logging.error("Could not remove export for instance %s"
13568 " on node %s: %s", instance_name, node, msg)
13570 if fqdn_warn and not found:
13571 feedback_fn("Export not found. If trying to remove an export belonging"
13572 " to a deleted instance please use its Fully Qualified"
13576 class LUGroupAdd(LogicalUnit):
13577 """Logical unit for creating node groups.
13580 HPATH = "group-add"
13581 HTYPE = constants.HTYPE_GROUP
13584 def ExpandNames(self):
13585 # We need the new group's UUID here so that we can create and acquire the
13586 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13587 # that it should not check whether the UUID exists in the configuration.
13588 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13589 self.needed_locks = {}
13590 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13592 def CheckPrereq(self):
13593 """Check prerequisites.
13595 This checks that the given group name is not an existing node group
13600 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13601 except errors.OpPrereqError:
13604 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13605 " node group (UUID: %s)" %
13606 (self.op.group_name, existing_uuid),
13607 errors.ECODE_EXISTS)
13609 if self.op.ndparams:
13610 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13612 if self.op.hv_state:
13613 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13615 self.new_hv_state = None
13617 if self.op.disk_state:
13618 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13620 self.new_disk_state = None
13622 if self.op.diskparams:
13623 for templ in constants.DISK_TEMPLATES:
13624 if templ in self.op.diskparams:
13625 utils.ForceDictType(self.op.diskparams[templ],
13626 constants.DISK_DT_TYPES)
13627 self.new_diskparams = self.op.diskparams
13629 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13630 except errors.OpPrereqError, err:
13631 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13632 errors.ECODE_INVAL)
13634 self.new_diskparams = {}
13636 if self.op.ipolicy:
13637 cluster = self.cfg.GetClusterInfo()
13638 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13640 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13641 except errors.ConfigurationError, err:
13642 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13643 errors.ECODE_INVAL)
13645 def BuildHooksEnv(self):
13646 """Build hooks env.
13650 "GROUP_NAME": self.op.group_name,
13653 def BuildHooksNodes(self):
13654 """Build hooks nodes.
13657 mn = self.cfg.GetMasterNode()
13658 return ([mn], [mn])
13660 def Exec(self, feedback_fn):
13661 """Add the node group to the cluster.
13664 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13665 uuid=self.group_uuid,
13666 alloc_policy=self.op.alloc_policy,
13667 ndparams=self.op.ndparams,
13668 diskparams=self.new_diskparams,
13669 ipolicy=self.op.ipolicy,
13670 hv_state_static=self.new_hv_state,
13671 disk_state_static=self.new_disk_state)
13673 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13674 del self.remove_locks[locking.LEVEL_NODEGROUP]
13677 class LUGroupAssignNodes(NoHooksLU):
13678 """Logical unit for assigning nodes to groups.
13683 def ExpandNames(self):
13684 # These raise errors.OpPrereqError on their own:
13685 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13686 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13688 # We want to lock all the affected nodes and groups. We have readily
13689 # available the list of nodes, and the *destination* group. To gather the
13690 # list of "source" groups, we need to fetch node information later on.
13691 self.needed_locks = {
13692 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13693 locking.LEVEL_NODE: self.op.nodes,
13696 def DeclareLocks(self, level):
13697 if level == locking.LEVEL_NODEGROUP:
13698 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13700 # Try to get all affected nodes' groups without having the group or node
13701 # lock yet. Needs verification later in the code flow.
13702 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13704 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13706 def CheckPrereq(self):
13707 """Check prerequisites.
13710 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13711 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13712 frozenset(self.op.nodes))
13714 expected_locks = (set([self.group_uuid]) |
13715 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13716 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13717 if actual_locks != expected_locks:
13718 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13719 " current groups are '%s', used to be '%s'" %
13720 (utils.CommaJoin(expected_locks),
13721 utils.CommaJoin(actual_locks)))
13723 self.node_data = self.cfg.GetAllNodesInfo()
13724 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13725 instance_data = self.cfg.GetAllInstancesInfo()
13727 if self.group is None:
13728 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13729 (self.op.group_name, self.group_uuid))
13731 (new_splits, previous_splits) = \
13732 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13733 for node in self.op.nodes],
13734 self.node_data, instance_data)
13737 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13739 if not self.op.force:
13740 raise errors.OpExecError("The following instances get split by this"
13741 " change and --force was not given: %s" %
13744 self.LogWarning("This operation will split the following instances: %s",
13747 if previous_splits:
13748 self.LogWarning("In addition, these already-split instances continue"
13749 " to be split across groups: %s",
13750 utils.CommaJoin(utils.NiceSort(previous_splits)))
13752 def Exec(self, feedback_fn):
13753 """Assign nodes to a new group.
13756 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13758 self.cfg.AssignGroupNodes(mods)
13761 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13762 """Check for split instances after a node assignment.
13764 This method considers a series of node assignments as an atomic operation,
13765 and returns information about split instances after applying the set of
13768 In particular, it returns information about newly split instances, and
13769 instances that were already split, and remain so after the change.
13771 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13774 @type changes: list of (node_name, new_group_uuid) pairs.
13775 @param changes: list of node assignments to consider.
13776 @param node_data: a dict with data for all nodes
13777 @param instance_data: a dict with all instances to consider
13778 @rtype: a two-tuple
13779 @return: a list of instances that were previously okay and result split as a
13780 consequence of this change, and a list of instances that were previously
13781 split and this change does not fix.
13784 changed_nodes = dict((node, group) for node, group in changes
13785 if node_data[node].group != group)
13787 all_split_instances = set()
13788 previously_split_instances = set()
13790 def InstanceNodes(instance):
13791 return [instance.primary_node] + list(instance.secondary_nodes)
13793 for inst in instance_data.values():
13794 if inst.disk_template not in constants.DTS_INT_MIRROR:
13797 instance_nodes = InstanceNodes(inst)
13799 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13800 previously_split_instances.add(inst.name)
13802 if len(set(changed_nodes.get(node, node_data[node].group)
13803 for node in instance_nodes)) > 1:
13804 all_split_instances.add(inst.name)
13806 return (list(all_split_instances - previously_split_instances),
13807 list(previously_split_instances & all_split_instances))
13810 class _GroupQuery(_QueryBase):
13811 FIELDS = query.GROUP_FIELDS
13813 def ExpandNames(self, lu):
13814 lu.needed_locks = {}
13816 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13817 self._cluster = lu.cfg.GetClusterInfo()
13818 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13821 self.wanted = [name_to_uuid[name]
13822 for name in utils.NiceSort(name_to_uuid.keys())]
13824 # Accept names to be either names or UUIDs.
13827 all_uuid = frozenset(self._all_groups.keys())
13829 for name in self.names:
13830 if name in all_uuid:
13831 self.wanted.append(name)
13832 elif name in name_to_uuid:
13833 self.wanted.append(name_to_uuid[name])
13835 missing.append(name)
13838 raise errors.OpPrereqError("Some groups do not exist: %s" %
13839 utils.CommaJoin(missing),
13840 errors.ECODE_NOENT)
13842 def DeclareLocks(self, lu, level):
13845 def _GetQueryData(self, lu):
13846 """Computes the list of node groups and their attributes.
13849 do_nodes = query.GQ_NODE in self.requested_data
13850 do_instances = query.GQ_INST in self.requested_data
13852 group_to_nodes = None
13853 group_to_instances = None
13855 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13856 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13857 # latter GetAllInstancesInfo() is not enough, for we have to go through
13858 # instance->node. Hence, we will need to process nodes even if we only need
13859 # instance information.
13860 if do_nodes or do_instances:
13861 all_nodes = lu.cfg.GetAllNodesInfo()
13862 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13865 for node in all_nodes.values():
13866 if node.group in group_to_nodes:
13867 group_to_nodes[node.group].append(node.name)
13868 node_to_group[node.name] = node.group
13871 all_instances = lu.cfg.GetAllInstancesInfo()
13872 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13874 for instance in all_instances.values():
13875 node = instance.primary_node
13876 if node in node_to_group:
13877 group_to_instances[node_to_group[node]].append(instance.name)
13880 # Do not pass on node information if it was not requested.
13881 group_to_nodes = None
13883 return query.GroupQueryData(self._cluster,
13884 [self._all_groups[uuid]
13885 for uuid in self.wanted],
13886 group_to_nodes, group_to_instances,
13887 query.GQ_DISKPARAMS in self.requested_data)
13890 class LUGroupQuery(NoHooksLU):
13891 """Logical unit for querying node groups.
13896 def CheckArguments(self):
13897 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13898 self.op.output_fields, False)
13900 def ExpandNames(self):
13901 self.gq.ExpandNames(self)
13903 def DeclareLocks(self, level):
13904 self.gq.DeclareLocks(self, level)
13906 def Exec(self, feedback_fn):
13907 return self.gq.OldStyleQuery(self)
13910 class LUGroupSetParams(LogicalUnit):
13911 """Modifies the parameters of a node group.
13914 HPATH = "group-modify"
13915 HTYPE = constants.HTYPE_GROUP
13918 def CheckArguments(self):
13921 self.op.diskparams,
13922 self.op.alloc_policy,
13924 self.op.disk_state,
13928 if all_changes.count(None) == len(all_changes):
13929 raise errors.OpPrereqError("Please pass at least one modification",
13930 errors.ECODE_INVAL)
13932 def ExpandNames(self):
13933 # This raises errors.OpPrereqError on its own:
13934 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13936 self.needed_locks = {
13937 locking.LEVEL_INSTANCE: [],
13938 locking.LEVEL_NODEGROUP: [self.group_uuid],
13941 self.share_locks[locking.LEVEL_INSTANCE] = 1
13943 def DeclareLocks(self, level):
13944 if level == locking.LEVEL_INSTANCE:
13945 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13947 # Lock instances optimistically, needs verification once group lock has
13949 self.needed_locks[locking.LEVEL_INSTANCE] = \
13950 self.cfg.GetNodeGroupInstances(self.group_uuid)
13953 def _UpdateAndVerifyDiskParams(old, new):
13954 """Updates and verifies disk parameters.
13957 new_params = _GetUpdatedParams(old, new)
13958 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
13961 def CheckPrereq(self):
13962 """Check prerequisites.
13965 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13967 # Check if locked instances are still correct
13968 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13970 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13971 cluster = self.cfg.GetClusterInfo()
13973 if self.group is None:
13974 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13975 (self.op.group_name, self.group_uuid))
13977 if self.op.ndparams:
13978 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13979 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
13980 self.new_ndparams = new_ndparams
13982 if self.op.diskparams:
13983 diskparams = self.group.diskparams
13984 uavdp = self._UpdateAndVerifyDiskParams
13985 # For each disktemplate subdict update and verify the values
13986 new_diskparams = dict((dt,
13987 uavdp(diskparams.get(dt, {}),
13988 self.op.diskparams[dt]))
13989 for dt in constants.DISK_TEMPLATES
13990 if dt in self.op.diskparams)
13991 # As we've all subdicts of diskparams ready, lets merge the actual
13992 # dict with all updated subdicts
13993 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
13995 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13996 except errors.OpPrereqError, err:
13997 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13998 errors.ECODE_INVAL)
14000 if self.op.hv_state:
14001 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14002 self.group.hv_state_static)
14004 if self.op.disk_state:
14005 self.new_disk_state = \
14006 _MergeAndVerifyDiskState(self.op.disk_state,
14007 self.group.disk_state_static)
14009 if self.op.ipolicy:
14010 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14014 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14015 inst_filter = lambda inst: inst.name in owned_instances
14016 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14018 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14020 new_ipolicy, instances)
14023 self.LogWarning("After the ipolicy change the following instances"
14024 " violate them: %s",
14025 utils.CommaJoin(violations))
14027 def BuildHooksEnv(self):
14028 """Build hooks env.
14032 "GROUP_NAME": self.op.group_name,
14033 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14036 def BuildHooksNodes(self):
14037 """Build hooks nodes.
14040 mn = self.cfg.GetMasterNode()
14041 return ([mn], [mn])
14043 def Exec(self, feedback_fn):
14044 """Modifies the node group.
14049 if self.op.ndparams:
14050 self.group.ndparams = self.new_ndparams
14051 result.append(("ndparams", str(self.group.ndparams)))
14053 if self.op.diskparams:
14054 self.group.diskparams = self.new_diskparams
14055 result.append(("diskparams", str(self.group.diskparams)))
14057 if self.op.alloc_policy:
14058 self.group.alloc_policy = self.op.alloc_policy
14060 if self.op.hv_state:
14061 self.group.hv_state_static = self.new_hv_state
14063 if self.op.disk_state:
14064 self.group.disk_state_static = self.new_disk_state
14066 if self.op.ipolicy:
14067 self.group.ipolicy = self.new_ipolicy
14069 self.cfg.Update(self.group, feedback_fn)
14073 class LUGroupRemove(LogicalUnit):
14074 HPATH = "group-remove"
14075 HTYPE = constants.HTYPE_GROUP
14078 def ExpandNames(self):
14079 # This will raises errors.OpPrereqError on its own:
14080 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14081 self.needed_locks = {
14082 locking.LEVEL_NODEGROUP: [self.group_uuid],
14085 def CheckPrereq(self):
14086 """Check prerequisites.
14088 This checks that the given group name exists as a node group, that is
14089 empty (i.e., contains no nodes), and that is not the last group of the
14093 # Verify that the group is empty.
14094 group_nodes = [node.name
14095 for node in self.cfg.GetAllNodesInfo().values()
14096 if node.group == self.group_uuid]
14099 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14101 (self.op.group_name,
14102 utils.CommaJoin(utils.NiceSort(group_nodes))),
14103 errors.ECODE_STATE)
14105 # Verify the cluster would not be left group-less.
14106 if len(self.cfg.GetNodeGroupList()) == 1:
14107 raise errors.OpPrereqError("Group '%s' is the only group,"
14108 " cannot be removed" %
14109 self.op.group_name,
14110 errors.ECODE_STATE)
14112 def BuildHooksEnv(self):
14113 """Build hooks env.
14117 "GROUP_NAME": self.op.group_name,
14120 def BuildHooksNodes(self):
14121 """Build hooks nodes.
14124 mn = self.cfg.GetMasterNode()
14125 return ([mn], [mn])
14127 def Exec(self, feedback_fn):
14128 """Remove the node group.
14132 self.cfg.RemoveNodeGroup(self.group_uuid)
14133 except errors.ConfigurationError:
14134 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14135 (self.op.group_name, self.group_uuid))
14137 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14140 class LUGroupRename(LogicalUnit):
14141 HPATH = "group-rename"
14142 HTYPE = constants.HTYPE_GROUP
14145 def ExpandNames(self):
14146 # This raises errors.OpPrereqError on its own:
14147 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14149 self.needed_locks = {
14150 locking.LEVEL_NODEGROUP: [self.group_uuid],
14153 def CheckPrereq(self):
14154 """Check prerequisites.
14156 Ensures requested new name is not yet used.
14160 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14161 except errors.OpPrereqError:
14164 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14165 " node group (UUID: %s)" %
14166 (self.op.new_name, new_name_uuid),
14167 errors.ECODE_EXISTS)
14169 def BuildHooksEnv(self):
14170 """Build hooks env.
14174 "OLD_NAME": self.op.group_name,
14175 "NEW_NAME": self.op.new_name,
14178 def BuildHooksNodes(self):
14179 """Build hooks nodes.
14182 mn = self.cfg.GetMasterNode()
14184 all_nodes = self.cfg.GetAllNodesInfo()
14185 all_nodes.pop(mn, None)
14188 run_nodes.extend(node.name for node in all_nodes.values()
14189 if node.group == self.group_uuid)
14191 return (run_nodes, run_nodes)
14193 def Exec(self, feedback_fn):
14194 """Rename the node group.
14197 group = self.cfg.GetNodeGroup(self.group_uuid)
14200 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14201 (self.op.group_name, self.group_uuid))
14203 group.name = self.op.new_name
14204 self.cfg.Update(group, feedback_fn)
14206 return self.op.new_name
14209 class LUGroupEvacuate(LogicalUnit):
14210 HPATH = "group-evacuate"
14211 HTYPE = constants.HTYPE_GROUP
14214 def ExpandNames(self):
14215 # This raises errors.OpPrereqError on its own:
14216 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14218 if self.op.target_groups:
14219 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14220 self.op.target_groups)
14222 self.req_target_uuids = []
14224 if self.group_uuid in self.req_target_uuids:
14225 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14226 " as a target group (targets are %s)" %
14228 utils.CommaJoin(self.req_target_uuids)),
14229 errors.ECODE_INVAL)
14231 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14233 self.share_locks = _ShareAll()
14234 self.needed_locks = {
14235 locking.LEVEL_INSTANCE: [],
14236 locking.LEVEL_NODEGROUP: [],
14237 locking.LEVEL_NODE: [],
14240 def DeclareLocks(self, level):
14241 if level == locking.LEVEL_INSTANCE:
14242 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14244 # Lock instances optimistically, needs verification once node and group
14245 # locks have been acquired
14246 self.needed_locks[locking.LEVEL_INSTANCE] = \
14247 self.cfg.GetNodeGroupInstances(self.group_uuid)
14249 elif level == locking.LEVEL_NODEGROUP:
14250 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14252 if self.req_target_uuids:
14253 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14255 # Lock all groups used by instances optimistically; this requires going
14256 # via the node before it's locked, requiring verification later on
14257 lock_groups.update(group_uuid
14258 for instance_name in
14259 self.owned_locks(locking.LEVEL_INSTANCE)
14261 self.cfg.GetInstanceNodeGroups(instance_name))
14263 # No target groups, need to lock all of them
14264 lock_groups = locking.ALL_SET
14266 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14268 elif level == locking.LEVEL_NODE:
14269 # This will only lock the nodes in the group to be evacuated which
14270 # contain actual instances
14271 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14272 self._LockInstancesNodes()
14274 # Lock all nodes in group to be evacuated and target groups
14275 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14276 assert self.group_uuid in owned_groups
14277 member_nodes = [node_name
14278 for group in owned_groups
14279 for node_name in self.cfg.GetNodeGroup(group).members]
14280 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14282 def CheckPrereq(self):
14283 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14284 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14285 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14287 assert owned_groups.issuperset(self.req_target_uuids)
14288 assert self.group_uuid in owned_groups
14290 # Check if locked instances are still correct
14291 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14293 # Get instance information
14294 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14296 # Check if node groups for locked instances are still correct
14297 _CheckInstancesNodeGroups(self.cfg, self.instances,
14298 owned_groups, owned_nodes, self.group_uuid)
14300 if self.req_target_uuids:
14301 # User requested specific target groups
14302 self.target_uuids = self.req_target_uuids
14304 # All groups except the one to be evacuated are potential targets
14305 self.target_uuids = [group_uuid for group_uuid in owned_groups
14306 if group_uuid != self.group_uuid]
14308 if not self.target_uuids:
14309 raise errors.OpPrereqError("There are no possible target groups",
14310 errors.ECODE_INVAL)
14312 def BuildHooksEnv(self):
14313 """Build hooks env.
14317 "GROUP_NAME": self.op.group_name,
14318 "TARGET_GROUPS": " ".join(self.target_uuids),
14321 def BuildHooksNodes(self):
14322 """Build hooks nodes.
14325 mn = self.cfg.GetMasterNode()
14327 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14329 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14331 return (run_nodes, run_nodes)
14333 def Exec(self, feedback_fn):
14334 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14336 assert self.group_uuid not in self.target_uuids
14338 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14339 instances=instances, target_groups=self.target_uuids)
14341 ial.Run(self.op.iallocator)
14343 if not ial.success:
14344 raise errors.OpPrereqError("Can't compute group evacuation using"
14345 " iallocator '%s': %s" %
14346 (self.op.iallocator, ial.info),
14347 errors.ECODE_NORES)
14349 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14351 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14352 len(jobs), self.op.group_name)
14354 return ResultWithJobs(jobs)
14357 class TagsLU(NoHooksLU): # pylint: disable=W0223
14358 """Generic tags LU.
14360 This is an abstract class which is the parent of all the other tags LUs.
14363 def ExpandNames(self):
14364 self.group_uuid = None
14365 self.needed_locks = {}
14367 if self.op.kind == constants.TAG_NODE:
14368 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14369 lock_level = locking.LEVEL_NODE
14370 lock_name = self.op.name
14371 elif self.op.kind == constants.TAG_INSTANCE:
14372 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14373 lock_level = locking.LEVEL_INSTANCE
14374 lock_name = self.op.name
14375 elif self.op.kind == constants.TAG_NODEGROUP:
14376 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14377 lock_level = locking.LEVEL_NODEGROUP
14378 lock_name = self.group_uuid
14383 if lock_level and getattr(self.op, "use_locking", True):
14384 self.needed_locks[lock_level] = lock_name
14386 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14387 # not possible to acquire the BGL based on opcode parameters)
14389 def CheckPrereq(self):
14390 """Check prerequisites.
14393 if self.op.kind == constants.TAG_CLUSTER:
14394 self.target = self.cfg.GetClusterInfo()
14395 elif self.op.kind == constants.TAG_NODE:
14396 self.target = self.cfg.GetNodeInfo(self.op.name)
14397 elif self.op.kind == constants.TAG_INSTANCE:
14398 self.target = self.cfg.GetInstanceInfo(self.op.name)
14399 elif self.op.kind == constants.TAG_NODEGROUP:
14400 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14402 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14403 str(self.op.kind), errors.ECODE_INVAL)
14406 class LUTagsGet(TagsLU):
14407 """Returns the tags of a given object.
14412 def ExpandNames(self):
14413 TagsLU.ExpandNames(self)
14415 # Share locks as this is only a read operation
14416 self.share_locks = _ShareAll()
14418 def Exec(self, feedback_fn):
14419 """Returns the tag list.
14422 return list(self.target.GetTags())
14425 class LUTagsSearch(NoHooksLU):
14426 """Searches the tags for a given pattern.
14431 def ExpandNames(self):
14432 self.needed_locks = {}
14434 def CheckPrereq(self):
14435 """Check prerequisites.
14437 This checks the pattern passed for validity by compiling it.
14441 self.re = re.compile(self.op.pattern)
14442 except re.error, err:
14443 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14444 (self.op.pattern, err), errors.ECODE_INVAL)
14446 def Exec(self, feedback_fn):
14447 """Returns the tag list.
14451 tgts = [("/cluster", cfg.GetClusterInfo())]
14452 ilist = cfg.GetAllInstancesInfo().values()
14453 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14454 nlist = cfg.GetAllNodesInfo().values()
14455 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14456 tgts.extend(("/nodegroup/%s" % n.name, n)
14457 for n in cfg.GetAllNodeGroupsInfo().values())
14459 for path, target in tgts:
14460 for tag in target.GetTags():
14461 if self.re.search(tag):
14462 results.append((path, tag))
14466 class LUTagsSet(TagsLU):
14467 """Sets a tag on a given object.
14472 def CheckPrereq(self):
14473 """Check prerequisites.
14475 This checks the type and length of the tag name and value.
14478 TagsLU.CheckPrereq(self)
14479 for tag in self.op.tags:
14480 objects.TaggableObject.ValidateTag(tag)
14482 def Exec(self, feedback_fn):
14487 for tag in self.op.tags:
14488 self.target.AddTag(tag)
14489 except errors.TagError, err:
14490 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14491 self.cfg.Update(self.target, feedback_fn)
14494 class LUTagsDel(TagsLU):
14495 """Delete a list of tags from a given object.
14500 def CheckPrereq(self):
14501 """Check prerequisites.
14503 This checks that we have the given tag.
14506 TagsLU.CheckPrereq(self)
14507 for tag in self.op.tags:
14508 objects.TaggableObject.ValidateTag(tag)
14509 del_tags = frozenset(self.op.tags)
14510 cur_tags = self.target.GetTags()
14512 diff_tags = del_tags - cur_tags
14514 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14515 raise errors.OpPrereqError("Tag(s) %s not found" %
14516 (utils.CommaJoin(diff_names), ),
14517 errors.ECODE_NOENT)
14519 def Exec(self, feedback_fn):
14520 """Remove the tag from the object.
14523 for tag in self.op.tags:
14524 self.target.RemoveTag(tag)
14525 self.cfg.Update(self.target, feedback_fn)
14528 class LUTestDelay(NoHooksLU):
14529 """Sleep for a specified amount of time.
14531 This LU sleeps on the master and/or nodes for a specified amount of
14537 def ExpandNames(self):
14538 """Expand names and set required locks.
14540 This expands the node list, if any.
14543 self.needed_locks = {}
14544 if self.op.on_nodes:
14545 # _GetWantedNodes can be used here, but is not always appropriate to use
14546 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14547 # more information.
14548 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14549 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14551 def _TestDelay(self):
14552 """Do the actual sleep.
14555 if self.op.on_master:
14556 if not utils.TestDelay(self.op.duration):
14557 raise errors.OpExecError("Error during master delay test")
14558 if self.op.on_nodes:
14559 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14560 for node, node_result in result.items():
14561 node_result.Raise("Failure during rpc call to node %s" % node)
14563 def Exec(self, feedback_fn):
14564 """Execute the test delay opcode, with the wanted repetitions.
14567 if self.op.repeat == 0:
14570 top_value = self.op.repeat - 1
14571 for i in range(self.op.repeat):
14572 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14576 class LUTestJqueue(NoHooksLU):
14577 """Utility LU to test some aspects of the job queue.
14582 # Must be lower than default timeout for WaitForJobChange to see whether it
14583 # notices changed jobs
14584 _CLIENT_CONNECT_TIMEOUT = 20.0
14585 _CLIENT_CONFIRM_TIMEOUT = 60.0
14588 def _NotifyUsingSocket(cls, cb, errcls):
14589 """Opens a Unix socket and waits for another program to connect.
14592 @param cb: Callback to send socket name to client
14593 @type errcls: class
14594 @param errcls: Exception class to use for errors
14597 # Using a temporary directory as there's no easy way to create temporary
14598 # sockets without writing a custom loop around tempfile.mktemp and
14600 tmpdir = tempfile.mkdtemp()
14602 tmpsock = utils.PathJoin(tmpdir, "sock")
14604 logging.debug("Creating temporary socket at %s", tmpsock)
14605 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14610 # Send details to client
14613 # Wait for client to connect before continuing
14614 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14616 (conn, _) = sock.accept()
14617 except socket.error, err:
14618 raise errcls("Client didn't connect in time (%s)" % err)
14622 # Remove as soon as client is connected
14623 shutil.rmtree(tmpdir)
14625 # Wait for client to close
14628 # pylint: disable=E1101
14629 # Instance of '_socketobject' has no ... member
14630 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14632 except socket.error, err:
14633 raise errcls("Client failed to confirm notification (%s)" % err)
14637 def _SendNotification(self, test, arg, sockname):
14638 """Sends a notification to the client.
14641 @param test: Test name
14642 @param arg: Test argument (depends on test)
14643 @type sockname: string
14644 @param sockname: Socket path
14647 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14649 def _Notify(self, prereq, test, arg):
14650 """Notifies the client of a test.
14653 @param prereq: Whether this is a prereq-phase test
14655 @param test: Test name
14656 @param arg: Test argument (depends on test)
14660 errcls = errors.OpPrereqError
14662 errcls = errors.OpExecError
14664 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14668 def CheckArguments(self):
14669 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14670 self.expandnames_calls = 0
14672 def ExpandNames(self):
14673 checkargs_calls = getattr(self, "checkargs_calls", 0)
14674 if checkargs_calls < 1:
14675 raise errors.ProgrammerError("CheckArguments was not called")
14677 self.expandnames_calls += 1
14679 if self.op.notify_waitlock:
14680 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14682 self.LogInfo("Expanding names")
14684 # Get lock on master node (just to get a lock, not for a particular reason)
14685 self.needed_locks = {
14686 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14689 def Exec(self, feedback_fn):
14690 if self.expandnames_calls < 1:
14691 raise errors.ProgrammerError("ExpandNames was not called")
14693 if self.op.notify_exec:
14694 self._Notify(False, constants.JQT_EXEC, None)
14696 self.LogInfo("Executing")
14698 if self.op.log_messages:
14699 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14700 for idx, msg in enumerate(self.op.log_messages):
14701 self.LogInfo("Sending log message %s", idx + 1)
14702 feedback_fn(constants.JQT_MSGPREFIX + msg)
14703 # Report how many test messages have been sent
14704 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14707 raise errors.OpExecError("Opcode failure was requested")
14712 class IAllocator(object):
14713 """IAllocator framework.
14715 An IAllocator instance has three sets of attributes:
14716 - cfg that is needed to query the cluster
14717 - input data (all members of the _KEYS class attribute are required)
14718 - four buffer attributes (in|out_data|text), that represent the
14719 input (to the external script) in text and data structure format,
14720 and the output from it, again in two formats
14721 - the result variables from the script (success, info, nodes) for
14725 # pylint: disable=R0902
14726 # lots of instance attributes
14728 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14730 self.rpc = rpc_runner
14731 # init buffer variables
14732 self.in_text = self.out_text = self.in_data = self.out_data = None
14733 # init all input fields so that pylint is happy
14735 self.memory = self.disks = self.disk_template = self.spindle_use = None
14736 self.os = self.tags = self.nics = self.vcpus = None
14737 self.hypervisor = None
14738 self.relocate_from = None
14740 self.instances = None
14741 self.evac_mode = None
14742 self.target_groups = []
14744 self.required_nodes = None
14745 # init result fields
14746 self.success = self.info = self.result = None
14749 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14751 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14752 " IAllocator" % self.mode)
14754 keyset = [n for (n, _) in keydata]
14757 if key not in keyset:
14758 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14759 " IAllocator" % key)
14760 setattr(self, key, kwargs[key])
14763 if key not in kwargs:
14764 raise errors.ProgrammerError("Missing input parameter '%s' to"
14765 " IAllocator" % key)
14766 self._BuildInputData(compat.partial(fn, self), keydata)
14768 def _ComputeClusterData(self):
14769 """Compute the generic allocator input data.
14771 This is the data that is independent of the actual operation.
14775 cluster_info = cfg.GetClusterInfo()
14778 "version": constants.IALLOCATOR_VERSION,
14779 "cluster_name": cfg.GetClusterName(),
14780 "cluster_tags": list(cluster_info.GetTags()),
14781 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14782 "ipolicy": cluster_info.ipolicy,
14784 ninfo = cfg.GetAllNodesInfo()
14785 iinfo = cfg.GetAllInstancesInfo().values()
14786 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14789 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14791 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14792 hypervisor_name = self.hypervisor
14793 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14794 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14796 hypervisor_name = cluster_info.primary_hypervisor
14798 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14801 self.rpc.call_all_instances_info(node_list,
14802 cluster_info.enabled_hypervisors)
14804 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14806 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14807 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14808 i_list, config_ndata)
14809 assert len(data["nodes"]) == len(ninfo), \
14810 "Incomplete node data computed"
14812 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14814 self.in_data = data
14817 def _ComputeNodeGroupData(cfg):
14818 """Compute node groups data.
14821 cluster = cfg.GetClusterInfo()
14822 ng = dict((guuid, {
14823 "name": gdata.name,
14824 "alloc_policy": gdata.alloc_policy,
14825 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14827 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14832 def _ComputeBasicNodeData(cfg, node_cfg):
14833 """Compute global node data.
14836 @returns: a dict of name: (node dict, node config)
14839 # fill in static (config-based) values
14840 node_results = dict((ninfo.name, {
14841 "tags": list(ninfo.GetTags()),
14842 "primary_ip": ninfo.primary_ip,
14843 "secondary_ip": ninfo.secondary_ip,
14844 "offline": ninfo.offline,
14845 "drained": ninfo.drained,
14846 "master_candidate": ninfo.master_candidate,
14847 "group": ninfo.group,
14848 "master_capable": ninfo.master_capable,
14849 "vm_capable": ninfo.vm_capable,
14850 "ndparams": cfg.GetNdParams(ninfo),
14852 for ninfo in node_cfg.values())
14854 return node_results
14857 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14859 """Compute global node data.
14861 @param node_results: the basic node structures as filled from the config
14864 #TODO(dynmem): compute the right data on MAX and MIN memory
14865 # make a copy of the current dict
14866 node_results = dict(node_results)
14867 for nname, nresult in node_data.items():
14868 assert nname in node_results, "Missing basic data for node %s" % nname
14869 ninfo = node_cfg[nname]
14871 if not (ninfo.offline or ninfo.drained):
14872 nresult.Raise("Can't get data for node %s" % nname)
14873 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14875 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14877 for attr in ["memory_total", "memory_free", "memory_dom0",
14878 "vg_size", "vg_free", "cpu_total"]:
14879 if attr not in remote_info:
14880 raise errors.OpExecError("Node '%s' didn't return attribute"
14881 " '%s'" % (nname, attr))
14882 if not isinstance(remote_info[attr], int):
14883 raise errors.OpExecError("Node '%s' returned invalid value"
14885 (nname, attr, remote_info[attr]))
14886 # compute memory used by primary instances
14887 i_p_mem = i_p_up_mem = 0
14888 for iinfo, beinfo in i_list:
14889 if iinfo.primary_node == nname:
14890 i_p_mem += beinfo[constants.BE_MAXMEM]
14891 if iinfo.name not in node_iinfo[nname].payload:
14894 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14895 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14896 remote_info["memory_free"] -= max(0, i_mem_diff)
14898 if iinfo.admin_state == constants.ADMINST_UP:
14899 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14901 # compute memory used by instances
14903 "total_memory": remote_info["memory_total"],
14904 "reserved_memory": remote_info["memory_dom0"],
14905 "free_memory": remote_info["memory_free"],
14906 "total_disk": remote_info["vg_size"],
14907 "free_disk": remote_info["vg_free"],
14908 "total_cpus": remote_info["cpu_total"],
14909 "i_pri_memory": i_p_mem,
14910 "i_pri_up_memory": i_p_up_mem,
14912 pnr_dyn.update(node_results[nname])
14913 node_results[nname] = pnr_dyn
14915 return node_results
14918 def _ComputeInstanceData(cluster_info, i_list):
14919 """Compute global instance data.
14923 for iinfo, beinfo in i_list:
14925 for nic in iinfo.nics:
14926 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14930 "mode": filled_params[constants.NIC_MODE],
14931 "link": filled_params[constants.NIC_LINK],
14933 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14934 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14935 nic_data.append(nic_dict)
14937 "tags": list(iinfo.GetTags()),
14938 "admin_state": iinfo.admin_state,
14939 "vcpus": beinfo[constants.BE_VCPUS],
14940 "memory": beinfo[constants.BE_MAXMEM],
14941 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
14943 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14945 "disks": [{constants.IDISK_SIZE: dsk.size,
14946 constants.IDISK_MODE: dsk.mode}
14947 for dsk in iinfo.disks],
14948 "disk_template": iinfo.disk_template,
14949 "hypervisor": iinfo.hypervisor,
14951 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14953 instance_data[iinfo.name] = pir
14955 return instance_data
14957 def _AddNewInstance(self):
14958 """Add new instance data to allocator structure.
14960 This in combination with _AllocatorGetClusterData will create the
14961 correct structure needed as input for the allocator.
14963 The checks for the completeness of the opcode must have already been
14967 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14969 if self.disk_template in constants.DTS_INT_MIRROR:
14970 self.required_nodes = 2
14972 self.required_nodes = 1
14976 "disk_template": self.disk_template,
14979 "vcpus": self.vcpus,
14980 "memory": self.memory,
14981 "spindle_use": self.spindle_use,
14982 "disks": self.disks,
14983 "disk_space_total": disk_space,
14985 "required_nodes": self.required_nodes,
14986 "hypervisor": self.hypervisor,
14991 def _AddRelocateInstance(self):
14992 """Add relocate instance data to allocator structure.
14994 This in combination with _IAllocatorGetClusterData will create the
14995 correct structure needed as input for the allocator.
14997 The checks for the completeness of the opcode must have already been
15001 instance = self.cfg.GetInstanceInfo(self.name)
15002 if instance is None:
15003 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15004 " IAllocator" % self.name)
15006 if instance.disk_template not in constants.DTS_MIRRORED:
15007 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15008 errors.ECODE_INVAL)
15010 if instance.disk_template in constants.DTS_INT_MIRROR and \
15011 len(instance.secondary_nodes) != 1:
15012 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15013 errors.ECODE_STATE)
15015 self.required_nodes = 1
15016 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15017 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15021 "disk_space_total": disk_space,
15022 "required_nodes": self.required_nodes,
15023 "relocate_from": self.relocate_from,
15027 def _AddNodeEvacuate(self):
15028 """Get data for node-evacuate requests.
15032 "instances": self.instances,
15033 "evac_mode": self.evac_mode,
15036 def _AddChangeGroup(self):
15037 """Get data for node-evacuate requests.
15041 "instances": self.instances,
15042 "target_groups": self.target_groups,
15045 def _BuildInputData(self, fn, keydata):
15046 """Build input data structures.
15049 self._ComputeClusterData()
15052 request["type"] = self.mode
15053 for keyname, keytype in keydata:
15054 if keyname not in request:
15055 raise errors.ProgrammerError("Request parameter %s is missing" %
15057 val = request[keyname]
15058 if not keytype(val):
15059 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15060 " validation, value %s, expected"
15061 " type %s" % (keyname, val, keytype))
15062 self.in_data["request"] = request
15064 self.in_text = serializer.Dump(self.in_data)
15066 _STRING_LIST = ht.TListOf(ht.TString)
15067 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15068 # pylint: disable=E1101
15069 # Class '...' has no 'OP_ID' member
15070 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15071 opcodes.OpInstanceMigrate.OP_ID,
15072 opcodes.OpInstanceReplaceDisks.OP_ID])
15076 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15077 ht.TItems([ht.TNonEmptyString,
15078 ht.TNonEmptyString,
15079 ht.TListOf(ht.TNonEmptyString),
15082 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15083 ht.TItems([ht.TNonEmptyString,
15086 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15087 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15090 constants.IALLOCATOR_MODE_ALLOC:
15093 ("name", ht.TString),
15094 ("memory", ht.TInt),
15095 ("spindle_use", ht.TInt),
15096 ("disks", ht.TListOf(ht.TDict)),
15097 ("disk_template", ht.TString),
15098 ("os", ht.TString),
15099 ("tags", _STRING_LIST),
15100 ("nics", ht.TListOf(ht.TDict)),
15101 ("vcpus", ht.TInt),
15102 ("hypervisor", ht.TString),
15104 constants.IALLOCATOR_MODE_RELOC:
15105 (_AddRelocateInstance,
15106 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15108 constants.IALLOCATOR_MODE_NODE_EVAC:
15109 (_AddNodeEvacuate, [
15110 ("instances", _STRING_LIST),
15111 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15113 constants.IALLOCATOR_MODE_CHG_GROUP:
15114 (_AddChangeGroup, [
15115 ("instances", _STRING_LIST),
15116 ("target_groups", _STRING_LIST),
15120 def Run(self, name, validate=True, call_fn=None):
15121 """Run an instance allocator and return the results.
15124 if call_fn is None:
15125 call_fn = self.rpc.call_iallocator_runner
15127 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15128 result.Raise("Failure while running the iallocator script")
15130 self.out_text = result.payload
15132 self._ValidateResult()
15134 def _ValidateResult(self):
15135 """Process the allocator results.
15137 This will process and if successful save the result in
15138 self.out_data and the other parameters.
15142 rdict = serializer.Load(self.out_text)
15143 except Exception, err:
15144 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15146 if not isinstance(rdict, dict):
15147 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15149 # TODO: remove backwards compatiblity in later versions
15150 if "nodes" in rdict and "result" not in rdict:
15151 rdict["result"] = rdict["nodes"]
15154 for key in "success", "info", "result":
15155 if key not in rdict:
15156 raise errors.OpExecError("Can't parse iallocator results:"
15157 " missing key '%s'" % key)
15158 setattr(self, key, rdict[key])
15160 if not self._result_check(self.result):
15161 raise errors.OpExecError("Iallocator returned invalid result,"
15162 " expected %s, got %s" %
15163 (self._result_check, self.result),
15164 errors.ECODE_INVAL)
15166 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15167 assert self.relocate_from is not None
15168 assert self.required_nodes == 1
15170 node2group = dict((name, ndata["group"])
15171 for (name, ndata) in self.in_data["nodes"].items())
15173 fn = compat.partial(self._NodesToGroups, node2group,
15174 self.in_data["nodegroups"])
15176 instance = self.cfg.GetInstanceInfo(self.name)
15177 request_groups = fn(self.relocate_from + [instance.primary_node])
15178 result_groups = fn(rdict["result"] + [instance.primary_node])
15180 if self.success and not set(result_groups).issubset(request_groups):
15181 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15182 " differ from original groups (%s)" %
15183 (utils.CommaJoin(result_groups),
15184 utils.CommaJoin(request_groups)))
15186 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15187 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15189 self.out_data = rdict
15192 def _NodesToGroups(node2group, groups, nodes):
15193 """Returns a list of unique group names for a list of nodes.
15195 @type node2group: dict
15196 @param node2group: Map from node name to group UUID
15198 @param groups: Group information
15200 @param nodes: Node names
15207 group_uuid = node2group[node]
15209 # Ignore unknown node
15213 group = groups[group_uuid]
15215 # Can't find group, let's use UUID
15216 group_name = group_uuid
15218 group_name = group["name"]
15220 result.add(group_name)
15222 return sorted(result)
15225 class LUTestAllocator(NoHooksLU):
15226 """Run allocator tests.
15228 This LU runs the allocator tests
15231 def CheckPrereq(self):
15232 """Check prerequisites.
15234 This checks the opcode parameters depending on the director and mode test.
15237 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15238 for attr in ["memory", "disks", "disk_template",
15239 "os", "tags", "nics", "vcpus"]:
15240 if not hasattr(self.op, attr):
15241 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15242 attr, errors.ECODE_INVAL)
15243 iname = self.cfg.ExpandInstanceName(self.op.name)
15244 if iname is not None:
15245 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15246 iname, errors.ECODE_EXISTS)
15247 if not isinstance(self.op.nics, list):
15248 raise errors.OpPrereqError("Invalid parameter 'nics'",
15249 errors.ECODE_INVAL)
15250 if not isinstance(self.op.disks, list):
15251 raise errors.OpPrereqError("Invalid parameter 'disks'",
15252 errors.ECODE_INVAL)
15253 for row in self.op.disks:
15254 if (not isinstance(row, dict) or
15255 constants.IDISK_SIZE not in row or
15256 not isinstance(row[constants.IDISK_SIZE], int) or
15257 constants.IDISK_MODE not in row or
15258 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15259 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15260 " parameter", errors.ECODE_INVAL)
15261 if self.op.hypervisor is None:
15262 self.op.hypervisor = self.cfg.GetHypervisorType()
15263 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15264 fname = _ExpandInstanceName(self.cfg, self.op.name)
15265 self.op.name = fname
15266 self.relocate_from = \
15267 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15268 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15269 constants.IALLOCATOR_MODE_NODE_EVAC):
15270 if not self.op.instances:
15271 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15272 self.op.instances = _GetWantedInstances(self, self.op.instances)
15274 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15275 self.op.mode, errors.ECODE_INVAL)
15277 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15278 if self.op.allocator is None:
15279 raise errors.OpPrereqError("Missing allocator name",
15280 errors.ECODE_INVAL)
15281 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15282 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15283 self.op.direction, errors.ECODE_INVAL)
15285 def Exec(self, feedback_fn):
15286 """Run the allocator test.
15289 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15290 ial = IAllocator(self.cfg, self.rpc,
15293 memory=self.op.memory,
15294 disks=self.op.disks,
15295 disk_template=self.op.disk_template,
15299 vcpus=self.op.vcpus,
15300 hypervisor=self.op.hypervisor,
15302 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15303 ial = IAllocator(self.cfg, self.rpc,
15306 relocate_from=list(self.relocate_from),
15308 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15309 ial = IAllocator(self.cfg, self.rpc,
15311 instances=self.op.instances,
15312 target_groups=self.op.target_groups)
15313 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15314 ial = IAllocator(self.cfg, self.rpc,
15316 instances=self.op.instances,
15317 evac_mode=self.op.evac_mode)
15319 raise errors.ProgrammerError("Uncatched mode %s in"
15320 " LUTestAllocator.Exec", self.op.mode)
15322 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15323 result = ial.in_text
15325 ial.Run(self.op.allocator, validate=False)
15326 result = ial.out_text
15330 class LUNetworkAdd(LogicalUnit):
15331 """Logical unit for creating networks.
15334 HPATH = "network-add"
15335 HTYPE = constants.HTYPE_NETWORK
15338 def BuildHooksNodes(self):
15339 """Build hooks nodes.
15342 mn = self.cfg.GetMasterNode()
15343 return ([mn], [mn])
15345 def ExpandNames(self):
15346 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15347 self.needed_locks = {}
15348 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15350 def CheckPrereq(self):
15351 """Check prerequisites.
15353 This checks that the given group name is not an existing node group
15357 if self.op.network is None:
15358 raise errors.OpPrereqError("Network must be given",
15359 errors.ECODE_INVAL)
15361 uuid = self.cfg.LookupNetwork(self.op.network_name)
15364 raise errors.OpPrereqError("Network '%s' already defined" %
15365 self.op.network, errors.ECODE_EXISTS)
15368 def BuildHooksEnv(self):
15369 """Build hooks env.
15373 "NETWORK_NAME": self.op.network_name,
15374 "NETWORK_SUBNET": self.op.network,
15375 "NETWORK_GATEWAY": self.op.gateway,
15376 "NETWORK_SUBNET6": self.op.network6,
15377 "NETWORK_GATEWAY6": self.op.gateway6,
15378 "NETWORK_MAC_PREFIX": self.op.mac_prefix,
15379 "NETWORK_TYPE": self.op.network_type,
15383 def Exec(self, feedback_fn):
15384 """Add the ip pool to the cluster.
15387 nobj = objects.Network(name=self.op.network_name,
15388 network=self.op.network,
15389 gateway=self.op.gateway,
15390 network6=self.op.network6,
15391 gateway6=self.op.gateway6,
15392 mac_prefix=self.op.mac_prefix,
15393 network_type=self.op.network_type,
15394 uuid=self.network_uuid,
15396 # Initialize the associated address pool
15398 pool = network.AddressPool.InitializeNetwork(nobj)
15399 except errors.AddressPoolError, e:
15400 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15402 # Check if we need to reserve the nodes and the cluster master IP
15403 # These may not be allocated to any instances in routed mode, as
15404 # they wouldn't function anyway.
15405 for node in self.cfg.GetAllNodesInfo().values():
15406 for ip in [node.primary_ip, node.secondary_ip]:
15409 self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
15411 except errors.AddressPoolError:
15414 master_ip = self.cfg.GetClusterInfo().master_ip
15416 pool.Reserve(master_ip)
15417 self.LogInfo("Reserved cluster master IP (%s)", master_ip)
15418 except errors.AddressPoolError:
15421 if self.op.add_reserved_ips:
15422 for ip in self.op.add_reserved_ips:
15424 pool.Reserve(ip, external=True)
15425 except errors.AddressPoolError, e:
15426 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15428 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15429 del self.remove_locks[locking.LEVEL_NETWORK]
15432 class LUNetworkRemove(LogicalUnit):
15433 HPATH = "network-remove"
15434 HTYPE = constants.HTYPE_NETWORK
15437 def ExpandNames(self):
15438 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15440 self.needed_locks = {
15441 locking.LEVEL_NETWORK: [self.network_uuid],
15445 def CheckPrereq(self):
15446 """Check prerequisites.
15448 This checks that the given network name exists as a network, that is
15449 empty (i.e., contains no nodes), and that is not the last group of the
15453 if not self.network_uuid:
15454 raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15455 errors.ECODE_INVAL)
15457 # Verify that the network is not conncted.
15458 node_groups = [group.name
15459 for group in self.cfg.GetAllNodeGroupsInfo().values()
15460 for network in group.networks.keys()
15461 if network == self.network_uuid]
15464 self.LogWarning("Nework '%s' is connected to the following"
15465 " node groups: %s" % (self.op.network_name,
15466 utils.CommaJoin(utils.NiceSort(node_groups))))
15467 raise errors.OpPrereqError("Network still connected",
15468 errors.ECODE_STATE)
15470 def BuildHooksEnv(self):
15471 """Build hooks env.
15475 "NETWORK_NAME": self.op.network_name,
15478 def BuildHooksNodes(self):
15479 """Build hooks nodes.
15482 mn = self.cfg.GetMasterNode()
15483 return ([mn], [mn])
15485 def Exec(self, feedback_fn):
15486 """Remove the network.
15490 self.cfg.RemoveNetwork(self.network_uuid)
15491 except errors.ConfigurationError:
15492 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15493 (self.op.network_name, self.network_uuid))
15496 class LUNetworkSetParams(LogicalUnit):
15497 """Modifies the parameters of a network.
15500 HPATH = "network-modify"
15501 HTYPE = constants.HTYPE_NETWORK
15504 def CheckArguments(self):
15505 if (self.op.gateway and
15506 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15507 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15508 " at once", errors.ECODE_INVAL)
15511 def ExpandNames(self):
15512 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15513 self.network = self.cfg.GetNetwork(self.network_uuid)
15514 self.needed_locks = {
15515 locking.LEVEL_NETWORK: [self.network_uuid],
15519 if self.network is None:
15520 raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15521 (self.op.network_name, self.network_uuid),
15522 errors.ECODE_INVAL)
15524 def CheckPrereq(self):
15525 """Check prerequisites.
15528 self.gateway = self.network.gateway
15529 self.network_type = self.network.network_type
15530 self.mac_prefix = self.network.mac_prefix
15531 self.network6 = self.network.network6
15532 self.gateway6 = self.network.gateway6
15534 self.pool = network.AddressPool(self.network)
15536 if self.op.gateway:
15537 if self.op.gateway == constants.VALUE_NONE:
15538 self.gateway = None
15540 self.gateway = self.op.gateway
15541 if self.pool.IsReserved(self.gateway):
15542 raise errors.OpPrereqError("%s is already reserved" %
15543 self.gateway, errors.ECODE_INVAL)
15545 if self.op.network_type:
15546 if self.op.network_type == constants.VALUE_NONE:
15547 self.network_type = None
15549 self.network_type = self.op.network_type
15551 if self.op.mac_prefix:
15552 if self.op.mac_prefix == constants.VALUE_NONE:
15553 self.mac_prefix = None
15555 self.mac_prefix = self.op.mac_prefix
15557 if self.op.gateway6:
15558 if self.op.gateway6 == constants.VALUE_NONE:
15559 self.gateway6 = None
15561 self.gateway6 = self.op.gateway6
15563 if self.op.network6:
15564 if self.op.network6 == constants.VALUE_NONE:
15565 self.network6 = None
15567 self.network6 = self.op.network6
15571 def BuildHooksEnv(self):
15572 """Build hooks env.
15576 "NETWORK_NAME": self.op.network_name,
15577 "NETWORK_SUBNET": self.network.network,
15578 "NETWORK_GATEWAY": self.gateway,
15579 "NETWORK_SUBNET6": self.network6,
15580 "NETWORK_GATEWAY6": self.gateway6,
15581 "NETWORK_MAC_PREFIX": self.mac_prefix,
15582 "NETWORK_TYPE": self.network_type,
15586 def BuildHooksNodes(self):
15587 """Build hooks nodes.
15590 mn = self.cfg.GetMasterNode()
15591 return ([mn], [mn])
15593 def Exec(self, feedback_fn):
15594 """Modifies the network.
15597 #TODO: reserve/release via temporary reservation manager
15598 # extend cfg.ReserveIp/ReleaseIp with the external flag
15599 if self.op.gateway:
15600 if self.gateway == self.network.gateway:
15601 self.LogWarning("Gateway is already %s" % self.gateway)
15604 self.pool.Reserve(self.gateway, external=True)
15605 if self.network.gateway:
15606 self.pool.Release(self.network.gateway, external=True)
15607 self.network.gateway = self.gateway
15609 if self.op.add_reserved_ips:
15610 for ip in self.op.add_reserved_ips:
15612 if self.pool.IsReserved(ip):
15613 self.LogWarning("IP %s is already reserved" % ip)
15615 self.pool.Reserve(ip, external=True)
15616 except errors.AddressPoolError, e:
15617 self.LogWarning("Cannot reserve ip %s. %s" % (ip, e))
15619 if self.op.remove_reserved_ips:
15620 for ip in self.op.remove_reserved_ips:
15621 if ip == self.network.gateway:
15622 self.LogWarning("Cannot unreserve Gateway's IP")
15625 if not self.pool.IsReserved(ip):
15626 self.LogWarning("IP %s is already unreserved" % ip)
15628 self.pool.Release(ip, external=True)
15629 except errors.AddressPoolError, e:
15630 self.LogWarning("Cannot release ip %s. %s" % (ip, e))
15632 if self.op.mac_prefix:
15633 self.network.mac_prefix = self.mac_prefix
15635 if self.op.network6:
15636 self.network.network6 = self.network6
15638 if self.op.gateway6:
15639 self.network.gateway6 = self.gateway6
15641 if self.op.network_type:
15642 self.network.network_type = self.network_type
15644 self.pool.Validate()
15646 self.cfg.Update(self.network, feedback_fn)
15649 class _NetworkQuery(_QueryBase):
15650 FIELDS = query.NETWORK_FIELDS
15652 def ExpandNames(self, lu):
15653 lu.needed_locks = {}
15655 self._all_networks = lu.cfg.GetAllNetworksInfo()
15656 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
15659 self.wanted = [name_to_uuid[name]
15660 for name in utils.NiceSort(name_to_uuid.keys())]
15662 # Accept names to be either names or UUIDs.
15665 all_uuid = frozenset(self._all_networks.keys())
15667 for name in self.names:
15668 if name in all_uuid:
15669 self.wanted.append(name)
15670 elif name in name_to_uuid:
15671 self.wanted.append(name_to_uuid[name])
15673 missing.append(name)
15676 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
15677 errors.ECODE_NOENT)
15679 def DeclareLocks(self, lu, level):
15682 def _GetQueryData(self, lu):
15683 """Computes the list of networks and their attributes.
15686 do_instances = query.NETQ_INST in self.requested_data
15687 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
15688 do_stats = query.NETQ_STATS in self.requested_data
15689 cluster = lu.cfg.GetClusterInfo()
15691 network_to_groups = None
15692 network_to_instances = None
15695 # For NETQ_GROUP, we need to map network->[groups]
15697 all_groups = lu.cfg.GetAllNodeGroupsInfo()
15698 network_to_groups = dict((uuid, []) for uuid in self.wanted)
15699 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
15702 all_instances = lu.cfg.GetAllInstancesInfo()
15703 all_nodes = lu.cfg.GetAllNodesInfo()
15704 network_to_instances = dict((uuid, []) for uuid in self.wanted)
15707 for group in all_groups.values():
15709 group_nodes = [node.name for node in all_nodes.values() if
15710 node.group == group.uuid]
15711 group_instances = [instance for instance in all_instances.values()
15712 if instance.primary_node in group_nodes]
15714 for net_uuid in group.networks.keys():
15715 if net_uuid in network_to_groups:
15716 netparams = group.networks[net_uuid]
15717 mode = netparams[constants.NIC_MODE]
15718 link = netparams[constants.NIC_LINK]
15719 info = group.name + '(' + mode + ', ' + link + ')'
15720 network_to_groups[net_uuid].append(info)
15723 for instance in group_instances:
15724 for nic in instance.nics:
15725 if nic.network == self._all_networks[net_uuid].name:
15726 network_to_instances[net_uuid].append(instance.name)
15731 for uuid, net in self._all_networks.items():
15732 if uuid in self.wanted:
15733 pool = network.AddressPool(net)
15735 "free_count": pool.GetFreeCount(),
15736 "reserved_count": pool.GetReservedCount(),
15737 "map": pool.GetMap(),
15738 "external_reservations": ", ".join(pool.GetExternalReservations()),
15741 return query.NetworkQueryData([self._all_networks[uuid]
15742 for uuid in self.wanted],
15744 network_to_instances,
15748 class LUNetworkQuery(NoHooksLU):
15749 """Logical unit for querying networks.
15754 def CheckArguments(self):
15755 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
15756 self.op.output_fields, False)
15758 def ExpandNames(self):
15759 self.nq.ExpandNames(self)
15761 def Exec(self, feedback_fn):
15762 return self.nq.OldStyleQuery(self)
15766 class LUNetworkConnect(LogicalUnit):
15767 def BuildHooksNodes(self):
15770 def BuildHooksEnv(self):
15774 class LUNetworkDisconnect(LogicalUnit):
15775 def BuildHooksNodes(self):
15778 def BuildHooksEnv(self):
15782 #: Query type implementations
15784 constants.QR_CLUSTER: _ClusterQuery,
15785 constants.QR_INSTANCE: _InstanceQuery,
15786 constants.QR_NODE: _NodeQuery,
15787 constants.QR_GROUP: _GroupQuery,
15788 constants.QR_NETWORK: _NetworkQuery,
15789 constants.QR_OS: _OsQuery,
15790 constants.QR_EXPORT: _ExportQuery,
15793 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15796 def _GetQueryImplementation(name):
15797 """Returns the implemtnation for a query type.
15799 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15803 return _QUERY_IMPL[name]
15805 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15806 errors.ECODE_INVAL)