4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
63 from ganeti import runtime
64 from ganeti import network
66 import ganeti.masterd.instance # pylint: disable=W0611
69 #: Size of DRBD meta block device
73 INSTANCE_DOWN = [constants.ADMINST_DOWN]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
77 #: Instance status in which an instance can be marked as offline/online
78 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
79 constants.ADMINST_OFFLINE,
84 """Data container for LU results with jobs.
86 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
87 by L{mcpu._ProcessResult}. The latter will then submit the jobs
88 contained in the C{jobs} attribute and include the job IDs in the opcode
92 def __init__(self, jobs, **kwargs):
93 """Initializes this class.
95 Additional return values can be specified as keyword arguments.
97 @type jobs: list of lists of L{opcode.OpCode}
98 @param jobs: A list of lists of opcode objects
105 class LogicalUnit(object):
106 """Logical Unit base class.
108 Subclasses must follow these rules:
109 - implement ExpandNames
110 - implement CheckPrereq (except when tasklets are used)
111 - implement Exec (except when tasklets are used)
112 - implement BuildHooksEnv
113 - implement BuildHooksNodes
114 - redefine HPATH and HTYPE
115 - optionally redefine their run requirements:
116 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
118 Note that all commands require root permissions.
120 @ivar dry_run_result: the value (if any) that will be returned to the caller
121 in dry-run mode (signalled by opcode dry_run parameter)
128 def __init__(self, processor, op, context, rpc_runner):
129 """Constructor for LogicalUnit.
131 This needs to be overridden in derived classes in order to check op
135 self.proc = processor
137 self.cfg = context.cfg
138 self.glm = context.glm
140 self.owned_locks = context.glm.list_owned
141 self.context = context
142 self.rpc = rpc_runner
143 # Dicts used to declare locking needs to mcpu
144 self.needed_locks = None
145 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
147 self.remove_locks = {}
148 # Used to force good behavior when calling helper functions
149 self.recalculate_locks = {}
151 self.Log = processor.Log # pylint: disable=C0103
152 self.LogWarning = processor.LogWarning # pylint: disable=C0103
153 self.LogInfo = processor.LogInfo # pylint: disable=C0103
154 self.LogStep = processor.LogStep # pylint: disable=C0103
155 # support for dry-run
156 self.dry_run_result = None
157 # support for generic debug attribute
158 if (not hasattr(self.op, "debug_level") or
159 not isinstance(self.op.debug_level, int)):
160 self.op.debug_level = 0
165 # Validate opcode parameters and set defaults
166 self.op.Validate(True)
168 self.CheckArguments()
170 def CheckArguments(self):
171 """Check syntactic validity for the opcode arguments.
173 This method is for doing a simple syntactic check and ensure
174 validity of opcode parameters, without any cluster-related
175 checks. While the same can be accomplished in ExpandNames and/or
176 CheckPrereq, doing these separate is better because:
178 - ExpandNames is left as as purely a lock-related function
179 - CheckPrereq is run after we have acquired locks (and possible
182 The function is allowed to change the self.op attribute so that
183 later methods can no longer worry about missing parameters.
188 def ExpandNames(self):
189 """Expand names for this LU.
191 This method is called before starting to execute the opcode, and it should
192 update all the parameters of the opcode to their canonical form (e.g. a
193 short node name must be fully expanded after this method has successfully
194 completed). This way locking, hooks, logging, etc. can work correctly.
196 LUs which implement this method must also populate the self.needed_locks
197 member, as a dict with lock levels as keys, and a list of needed lock names
200 - use an empty dict if you don't need any lock
201 - if you don't need any lock at a particular level omit that
202 level (note that in this case C{DeclareLocks} won't be called
203 at all for that level)
204 - if you need locks at a level, but you can't calculate it in
205 this function, initialise that level with an empty list and do
206 further processing in L{LogicalUnit.DeclareLocks} (see that
207 function's docstring)
208 - don't put anything for the BGL level
209 - if you want all locks at a level use L{locking.ALL_SET} as a value
211 If you need to share locks (rather than acquire them exclusively) at one
212 level you can modify self.share_locks, setting a true value (usually 1) for
213 that level. By default locks are not shared.
215 This function can also define a list of tasklets, which then will be
216 executed in order instead of the usual LU-level CheckPrereq and Exec
217 functions, if those are not defined by the LU.
221 # Acquire all nodes and one instance
222 self.needed_locks = {
223 locking.LEVEL_NODE: locking.ALL_SET,
224 locking.LEVEL_INSTANCE: ['instance1.example.com'],
226 # Acquire just two nodes
227 self.needed_locks = {
228 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
231 self.needed_locks = {} # No, you can't leave it to the default value None
234 # The implementation of this method is mandatory only if the new LU is
235 # concurrent, so that old LUs don't need to be changed all at the same
238 self.needed_locks = {} # Exclusive LUs don't need locks.
240 raise NotImplementedError
242 def DeclareLocks(self, level):
243 """Declare LU locking needs for a level
245 While most LUs can just declare their locking needs at ExpandNames time,
246 sometimes there's the need to calculate some locks after having acquired
247 the ones before. This function is called just before acquiring locks at a
248 particular level, but after acquiring the ones at lower levels, and permits
249 such calculations. It can be used to modify self.needed_locks, and by
250 default it does nothing.
252 This function is only called if you have something already set in
253 self.needed_locks for the level.
255 @param level: Locking level which is going to be locked
256 @type level: member of L{ganeti.locking.LEVELS}
260 def CheckPrereq(self):
261 """Check prerequisites for this LU.
263 This method should check that the prerequisites for the execution
264 of this LU are fulfilled. It can do internode communication, but
265 it should be idempotent - no cluster or system changes are
268 The method should raise errors.OpPrereqError in case something is
269 not fulfilled. Its return value is ignored.
271 This method should also update all the parameters of the opcode to
272 their canonical form if it hasn't been done by ExpandNames before.
275 if self.tasklets is not None:
276 for (idx, tl) in enumerate(self.tasklets):
277 logging.debug("Checking prerequisites for tasklet %s/%s",
278 idx + 1, len(self.tasklets))
283 def Exec(self, feedback_fn):
286 This method should implement the actual work. It should raise
287 errors.OpExecError for failures that are somewhat dealt with in
291 if self.tasklets is not None:
292 for (idx, tl) in enumerate(self.tasklets):
293 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
296 raise NotImplementedError
298 def BuildHooksEnv(self):
299 """Build hooks environment for this LU.
302 @return: Dictionary containing the environment that will be used for
303 running the hooks for this LU. The keys of the dict must not be prefixed
304 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
305 will extend the environment with additional variables. If no environment
306 should be defined, an empty dictionary should be returned (not C{None}).
307 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311 raise NotImplementedError
313 def BuildHooksNodes(self):
314 """Build list of nodes to run LU's hooks.
316 @rtype: tuple; (list, list)
317 @return: Tuple containing a list of node names on which the hook
318 should run before the execution and a list of node names on which the
319 hook should run after the execution. No nodes should be returned as an
320 empty list (and not None).
321 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
325 raise NotImplementedError
327 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
328 """Notify the LU about the results of its hooks.
330 This method is called every time a hooks phase is executed, and notifies
331 the Logical Unit about the hooks' result. The LU can then use it to alter
332 its result based on the hooks. By default the method does nothing and the
333 previous result is passed back unchanged but any LU can define it if it
334 wants to use the local cluster hook-scripts somehow.
336 @param phase: one of L{constants.HOOKS_PHASE_POST} or
337 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
338 @param hook_results: the results of the multi-node hooks rpc call
339 @param feedback_fn: function used send feedback back to the caller
340 @param lu_result: the previous Exec result this LU had, or None
342 @return: the new Exec result, based on the previous result
346 # API must be kept, thus we ignore the unused argument and could
347 # be a function warnings
348 # pylint: disable=W0613,R0201
351 def _ExpandAndLockInstance(self):
352 """Helper function to expand and lock an instance.
354 Many LUs that work on an instance take its name in self.op.instance_name
355 and need to expand it and then declare the expanded name for locking. This
356 function does it, and then updates self.op.instance_name to the expanded
357 name. It also initializes needed_locks as a dict, if this hasn't been done
361 if self.needed_locks is None:
362 self.needed_locks = {}
364 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
365 "_ExpandAndLockInstance called with instance-level locks set"
366 self.op.instance_name = _ExpandInstanceName(self.cfg,
367 self.op.instance_name)
368 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
370 def _LockInstancesNodes(self, primary_only=False,
371 level=locking.LEVEL_NODE):
372 """Helper function to declare instances' nodes for locking.
374 This function should be called after locking one or more instances to lock
375 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
376 with all primary or secondary nodes for instances already locked and
377 present in self.needed_locks[locking.LEVEL_INSTANCE].
379 It should be called from DeclareLocks, and for safety only works if
380 self.recalculate_locks[locking.LEVEL_NODE] is set.
382 In the future it may grow parameters to just lock some instance's nodes, or
383 to just lock primaries or secondary nodes, if needed.
385 If should be called in DeclareLocks in a way similar to::
387 if level == locking.LEVEL_NODE:
388 self._LockInstancesNodes()
390 @type primary_only: boolean
391 @param primary_only: only lock primary nodes of locked instances
392 @param level: Which lock level to use for locking nodes
395 assert level in self.recalculate_locks, \
396 "_LockInstancesNodes helper function called with no nodes to recalculate"
398 # TODO: check if we're really been called with the instance locks held
400 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
401 # future we might want to have different behaviors depending on the value
402 # of self.recalculate_locks[locking.LEVEL_NODE]
404 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
405 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
406 wanted_nodes.append(instance.primary_node)
408 wanted_nodes.extend(instance.secondary_nodes)
410 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
411 self.needed_locks[level] = wanted_nodes
412 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
413 self.needed_locks[level].extend(wanted_nodes)
415 raise errors.ProgrammerError("Unknown recalculation mode")
417 del self.recalculate_locks[level]
420 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
421 """Simple LU which runs no hooks.
423 This LU is intended as a parent for other LogicalUnits which will
424 run no hooks, in order to reduce duplicate code.
430 def BuildHooksEnv(self):
431 """Empty BuildHooksEnv for NoHooksLu.
433 This just raises an error.
436 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
438 def BuildHooksNodes(self):
439 """Empty BuildHooksNodes for NoHooksLU.
442 raise AssertionError("BuildHooksNodes called for NoHooksLU")
446 """Tasklet base class.
448 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
449 they can mix legacy code with tasklets. Locking needs to be done in the LU,
450 tasklets know nothing about locks.
452 Subclasses must follow these rules:
453 - Implement CheckPrereq
457 def __init__(self, lu):
464 def CheckPrereq(self):
465 """Check prerequisites for this tasklets.
467 This method should check whether the prerequisites for the execution of
468 this tasklet are fulfilled. It can do internode communication, but it
469 should be idempotent - no cluster or system changes are allowed.
471 The method should raise errors.OpPrereqError in case something is not
472 fulfilled. Its return value is ignored.
474 This method should also update all parameters to their canonical form if it
475 hasn't been done before.
480 def Exec(self, feedback_fn):
481 """Execute the tasklet.
483 This method should implement the actual work. It should raise
484 errors.OpExecError for failures that are somewhat dealt with in code, or
488 raise NotImplementedError
492 """Base for query utility classes.
495 #: Attribute holding field definitions
501 def __init__(self, qfilter, fields, use_locking):
502 """Initializes this class.
505 self.use_locking = use_locking
507 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
508 namefield=self.SORT_FIELD)
509 self.requested_data = self.query.RequestedData()
510 self.names = self.query.RequestedNames()
512 # Sort only if no names were requested
513 self.sort_by_name = not self.names
515 self.do_locking = None
518 def _GetNames(self, lu, all_names, lock_level):
519 """Helper function to determine names asked for in the query.
523 names = lu.owned_locks(lock_level)
527 if self.wanted == locking.ALL_SET:
528 assert not self.names
529 # caller didn't specify names, so ordering is not important
530 return utils.NiceSort(names)
532 # caller specified names and we must keep the same order
534 assert not self.do_locking or lu.glm.is_owned(lock_level)
536 missing = set(self.wanted).difference(names)
538 raise errors.OpExecError("Some items were removed before retrieving"
539 " their data: %s" % missing)
541 # Return expanded names
544 def ExpandNames(self, lu):
545 """Expand names for this query.
547 See L{LogicalUnit.ExpandNames}.
550 raise NotImplementedError()
552 def DeclareLocks(self, lu, level):
553 """Declare locks for this query.
555 See L{LogicalUnit.DeclareLocks}.
558 raise NotImplementedError()
560 def _GetQueryData(self, lu):
561 """Collects all data for this query.
563 @return: Query data object
566 raise NotImplementedError()
568 def NewStyleQuery(self, lu):
569 """Collect data and execute query.
572 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
573 sort_by_name=self.sort_by_name)
575 def OldStyleQuery(self, lu):
576 """Collect data and execute query.
579 return self.query.OldStyleQuery(self._GetQueryData(lu),
580 sort_by_name=self.sort_by_name)
584 """Returns a dict declaring all lock levels shared.
587 return dict.fromkeys(locking.LEVELS, 1)
590 def _MakeLegacyNodeInfo(data):
591 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
593 Converts the data into a single dictionary. This is fine for most use cases,
594 but some require information from more than one volume group or hypervisor.
597 (bootid, (vg_info, ), (hv_info, )) = data
599 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
604 def _AnnotateDiskParams(instance, devs, cfg):
605 """Little helper wrapper to the rpc annotation method.
607 @param instance: The instance object
608 @type devs: List of L{objects.Disk}
609 @param devs: The root devices (not any of its children!)
610 @param cfg: The config object
611 @returns The annotated disk copies
612 @see L{rpc.AnnotateDiskParams}
615 return rpc.AnnotateDiskParams(instance.disk_template, devs,
616 cfg.GetInstanceDiskParams(instance))
619 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
621 """Checks if node groups for locked instances are still correct.
623 @type cfg: L{config.ConfigWriter}
624 @param cfg: Cluster configuration
625 @type instances: dict; string as key, L{objects.Instance} as value
626 @param instances: Dictionary, instance name as key, instance object as value
627 @type owned_groups: iterable of string
628 @param owned_groups: List of owned groups
629 @type owned_nodes: iterable of string
630 @param owned_nodes: List of owned nodes
631 @type cur_group_uuid: string or None
632 @param cur_group_uuid: Optional group UUID to check against instance's groups
635 for (name, inst) in instances.items():
636 assert owned_nodes.issuperset(inst.all_nodes), \
637 "Instance %s's nodes changed while we kept the lock" % name
639 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
641 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
642 "Instance %s has no node in group %s" % (name, cur_group_uuid)
645 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
646 """Checks if the owned node groups are still correct for an instance.
648 @type cfg: L{config.ConfigWriter}
649 @param cfg: The cluster configuration
650 @type instance_name: string
651 @param instance_name: Instance name
652 @type owned_groups: set or frozenset
653 @param owned_groups: List of currently owned node groups
656 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
658 if not owned_groups.issuperset(inst_groups):
659 raise errors.OpPrereqError("Instance %s's node groups changed since"
660 " locks were acquired, current groups are"
661 " are '%s', owning groups '%s'; retry the"
664 utils.CommaJoin(inst_groups),
665 utils.CommaJoin(owned_groups)),
671 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
672 """Checks if the instances in a node group are still correct.
674 @type cfg: L{config.ConfigWriter}
675 @param cfg: The cluster configuration
676 @type group_uuid: string
677 @param group_uuid: Node group UUID
678 @type owned_instances: set or frozenset
679 @param owned_instances: List of currently owned instances
682 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
683 if owned_instances != wanted_instances:
684 raise errors.OpPrereqError("Instances in node group '%s' changed since"
685 " locks were acquired, wanted '%s', have '%s';"
686 " retry the operation" %
688 utils.CommaJoin(wanted_instances),
689 utils.CommaJoin(owned_instances)),
692 return wanted_instances
695 def _SupportsOob(cfg, node):
696 """Tells if node supports OOB.
698 @type cfg: L{config.ConfigWriter}
699 @param cfg: The cluster configuration
700 @type node: L{objects.Node}
701 @param node: The node
702 @return: The OOB script if supported or an empty string otherwise
705 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
708 def _GetWantedNodes(lu, nodes):
709 """Returns list of checked and expanded node names.
711 @type lu: L{LogicalUnit}
712 @param lu: the logical unit on whose behalf we execute
714 @param nodes: list of node names or None for all nodes
716 @return: the list of nodes, sorted
717 @raise errors.ProgrammerError: if the nodes parameter is wrong type
721 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
723 return utils.NiceSort(lu.cfg.GetNodeList())
726 def _GetWantedInstances(lu, instances):
727 """Returns list of checked and expanded instance names.
729 @type lu: L{LogicalUnit}
730 @param lu: the logical unit on whose behalf we execute
731 @type instances: list
732 @param instances: list of instance names or None for all instances
734 @return: the list of instances, sorted
735 @raise errors.OpPrereqError: if the instances parameter is wrong type
736 @raise errors.OpPrereqError: if any of the passed instances is not found
740 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
742 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
746 def _GetUpdatedParams(old_params, update_dict,
747 use_default=True, use_none=False):
748 """Return the new version of a parameter dictionary.
750 @type old_params: dict
751 @param old_params: old parameters
752 @type update_dict: dict
753 @param update_dict: dict containing new parameter values, or
754 constants.VALUE_DEFAULT to reset the parameter to its default
756 @param use_default: boolean
757 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
758 values as 'to be deleted' values
759 @param use_none: boolean
760 @type use_none: whether to recognise C{None} values as 'to be
763 @return: the new parameter dictionary
766 params_copy = copy.deepcopy(old_params)
767 for key, val in update_dict.iteritems():
768 if ((use_default and val == constants.VALUE_DEFAULT) or
769 (use_none and val is None)):
775 params_copy[key] = val
779 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
780 """Return the new version of a instance policy.
782 @param group_policy: whether this policy applies to a group and thus
783 we should support removal of policy entries
786 use_none = use_default = group_policy
787 ipolicy = copy.deepcopy(old_ipolicy)
788 for key, value in new_ipolicy.items():
789 if key not in constants.IPOLICY_ALL_KEYS:
790 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
792 if key in constants.IPOLICY_ISPECS:
793 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
794 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
796 use_default=use_default)
798 if (not value or value == [constants.VALUE_DEFAULT] or
799 value == constants.VALUE_DEFAULT):
803 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
804 " on the cluster'" % key,
807 if key in constants.IPOLICY_PARAMETERS:
808 # FIXME: we assume all such values are float
810 ipolicy[key] = float(value)
811 except (TypeError, ValueError), err:
812 raise errors.OpPrereqError("Invalid value for attribute"
813 " '%s': '%s', error: %s" %
814 (key, value, err), errors.ECODE_INVAL)
816 # FIXME: we assume all others are lists; this should be redone
818 ipolicy[key] = list(value)
820 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
821 except errors.ConfigurationError, err:
822 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
827 def _UpdateAndVerifySubDict(base, updates, type_check):
828 """Updates and verifies a dict with sub dicts of the same type.
830 @param base: The dict with the old data
831 @param updates: The dict with the new data
832 @param type_check: Dict suitable to ForceDictType to verify correct types
833 @returns: A new dict with updated and verified values
837 new = _GetUpdatedParams(old, value)
838 utils.ForceDictType(new, type_check)
841 ret = copy.deepcopy(base)
842 ret.update(dict((key, fn(base.get(key, {}), value))
843 for key, value in updates.items()))
847 def _MergeAndVerifyHvState(op_input, obj_input):
848 """Combines the hv state from an opcode with the one of the object
850 @param op_input: The input dict from the opcode
851 @param obj_input: The input dict from the objects
852 @return: The verified and updated dict
856 invalid_hvs = set(op_input) - constants.HYPER_TYPES
858 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
859 " %s" % utils.CommaJoin(invalid_hvs),
861 if obj_input is None:
863 type_check = constants.HVSTS_PARAMETER_TYPES
864 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
869 def _MergeAndVerifyDiskState(op_input, obj_input):
870 """Combines the disk state from an opcode with the one of the object
872 @param op_input: The input dict from the opcode
873 @param obj_input: The input dict from the objects
874 @return: The verified and updated dict
877 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
879 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
880 utils.CommaJoin(invalid_dst),
882 type_check = constants.DSS_PARAMETER_TYPES
883 if obj_input is None:
885 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
887 for key, value in op_input.items())
892 def _ReleaseLocks(lu, level, names=None, keep=None):
893 """Releases locks owned by an LU.
895 @type lu: L{LogicalUnit}
896 @param level: Lock level
897 @type names: list or None
898 @param names: Names of locks to release
899 @type keep: list or None
900 @param keep: Names of locks to retain
903 assert not (keep is not None and names is not None), \
904 "Only one of the 'names' and the 'keep' parameters can be given"
906 if names is not None:
907 should_release = names.__contains__
909 should_release = lambda name: name not in keep
911 should_release = None
913 owned = lu.owned_locks(level)
915 # Not owning any lock at this level, do nothing
922 # Determine which locks to release
924 if should_release(name):
929 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
931 # Release just some locks
932 lu.glm.release(level, names=release)
934 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
937 lu.glm.release(level)
939 assert not lu.glm.is_owned(level), "No locks should be owned"
942 def _MapInstanceDisksToNodes(instances):
943 """Creates a map from (node, volume) to instance name.
945 @type instances: list of L{objects.Instance}
946 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
949 return dict(((node, vol), inst.name)
950 for inst in instances
951 for (node, vols) in inst.MapLVsByNode().items()
955 def _RunPostHook(lu, node_name):
956 """Runs the post-hook for an opcode on a single node.
959 hm = lu.proc.BuildHooksManager(lu)
961 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
963 # pylint: disable=W0702
964 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
967 def _CheckOutputFields(static, dynamic, selected):
968 """Checks whether all selected fields are valid.
970 @type static: L{utils.FieldSet}
971 @param static: static fields set
972 @type dynamic: L{utils.FieldSet}
973 @param dynamic: dynamic fields set
980 delta = f.NonMatching(selected)
982 raise errors.OpPrereqError("Unknown output fields selected: %s"
983 % ",".join(delta), errors.ECODE_INVAL)
986 def _CheckGlobalHvParams(params):
987 """Validates that given hypervisor params are not global ones.
989 This will ensure that instances don't get customised versions of
993 used_globals = constants.HVC_GLOBALS.intersection(params)
995 msg = ("The following hypervisor parameters are global and cannot"
996 " be customized at instance level, please modify them at"
997 " cluster level: %s" % utils.CommaJoin(used_globals))
998 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1001 def _CheckNodeOnline(lu, node, msg=None):
1002 """Ensure that a given node is online.
1004 @param lu: the LU on behalf of which we make the check
1005 @param node: the node to check
1006 @param msg: if passed, should be a message to replace the default one
1007 @raise errors.OpPrereqError: if the node is offline
1011 msg = "Can't use offline node"
1012 if lu.cfg.GetNodeInfo(node).offline:
1013 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1016 def _CheckNodeNotDrained(lu, node):
1017 """Ensure that a given node is not drained.
1019 @param lu: the LU on behalf of which we make the check
1020 @param node: the node to check
1021 @raise errors.OpPrereqError: if the node is drained
1024 if lu.cfg.GetNodeInfo(node).drained:
1025 raise errors.OpPrereqError("Can't use drained node %s" % node,
1029 def _CheckNodeVmCapable(lu, node):
1030 """Ensure that a given node is vm capable.
1032 @param lu: the LU on behalf of which we make the check
1033 @param node: the node to check
1034 @raise errors.OpPrereqError: if the node is not vm capable
1037 if not lu.cfg.GetNodeInfo(node).vm_capable:
1038 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1042 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1043 """Ensure that a node supports a given OS.
1045 @param lu: the LU on behalf of which we make the check
1046 @param node: the node to check
1047 @param os_name: the OS to query about
1048 @param force_variant: whether to ignore variant errors
1049 @raise errors.OpPrereqError: if the node is not supporting the OS
1052 result = lu.rpc.call_os_get(node, os_name)
1053 result.Raise("OS '%s' not in supported OS list for node %s" %
1055 prereq=True, ecode=errors.ECODE_INVAL)
1056 if not force_variant:
1057 _CheckOSVariant(result.payload, os_name)
1060 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1061 """Ensure that a node has the given secondary ip.
1063 @type lu: L{LogicalUnit}
1064 @param lu: the LU on behalf of which we make the check
1066 @param node: the node to check
1067 @type secondary_ip: string
1068 @param secondary_ip: the ip to check
1069 @type prereq: boolean
1070 @param prereq: whether to throw a prerequisite or an execute error
1071 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1072 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1075 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1076 result.Raise("Failure checking secondary ip on node %s" % node,
1077 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1078 if not result.payload:
1079 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1080 " please fix and re-run this command" % secondary_ip)
1082 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1084 raise errors.OpExecError(msg)
1087 def _GetClusterDomainSecret():
1088 """Reads the cluster domain secret.
1091 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1095 def _CheckInstanceState(lu, instance, req_states, msg=None):
1096 """Ensure that an instance is in one of the required states.
1098 @param lu: the LU on behalf of which we make the check
1099 @param instance: the instance to check
1100 @param msg: if passed, should be a message to replace the default one
1101 @raise errors.OpPrereqError: if the instance is not in the required state
1105 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1106 if instance.admin_state not in req_states:
1107 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1108 (instance.name, instance.admin_state, msg),
1111 if constants.ADMINST_UP not in req_states:
1112 pnode = instance.primary_node
1113 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1114 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1115 prereq=True, ecode=errors.ECODE_ENVIRON)
1117 if instance.name in ins_l.payload:
1118 raise errors.OpPrereqError("Instance %s is running, %s" %
1119 (instance.name, msg), errors.ECODE_STATE)
1122 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1123 """Computes if value is in the desired range.
1125 @param name: name of the parameter for which we perform the check
1126 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1128 @param ipolicy: dictionary containing min, max and std values
1129 @param value: actual value that we want to use
1130 @return: None or element not meeting the criteria
1134 if value in [None, constants.VALUE_AUTO]:
1136 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1137 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1138 if value > max_v or min_v > value:
1140 fqn = "%s/%s" % (name, qualifier)
1143 return ("%s value %s is not in range [%s, %s]" %
1144 (fqn, value, min_v, max_v))
1148 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1149 nic_count, disk_sizes, spindle_use,
1150 _compute_fn=_ComputeMinMaxSpec):
1151 """Verifies ipolicy against provided specs.
1154 @param ipolicy: The ipolicy
1156 @param mem_size: The memory size
1157 @type cpu_count: int
1158 @param cpu_count: Used cpu cores
1159 @type disk_count: int
1160 @param disk_count: Number of disks used
1161 @type nic_count: int
1162 @param nic_count: Number of nics used
1163 @type disk_sizes: list of ints
1164 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1165 @type spindle_use: int
1166 @param spindle_use: The number of spindles this instance uses
1167 @param _compute_fn: The compute function (unittest only)
1168 @return: A list of violations, or an empty list of no violations are found
1171 assert disk_count == len(disk_sizes)
1174 (constants.ISPEC_MEM_SIZE, "", mem_size),
1175 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1176 (constants.ISPEC_DISK_COUNT, "", disk_count),
1177 (constants.ISPEC_NIC_COUNT, "", nic_count),
1178 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1179 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1180 for idx, d in enumerate(disk_sizes)]
1183 (_compute_fn(name, qualifier, ipolicy, value)
1184 for (name, qualifier, value) in test_settings))
1187 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1188 _compute_fn=_ComputeIPolicySpecViolation):
1189 """Compute if instance meets the specs of ipolicy.
1192 @param ipolicy: The ipolicy to verify against
1193 @type instance: L{objects.Instance}
1194 @param instance: The instance to verify
1195 @param _compute_fn: The function to verify ipolicy (unittest only)
1196 @see: L{_ComputeIPolicySpecViolation}
1199 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1200 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1201 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1202 disk_count = len(instance.disks)
1203 disk_sizes = [disk.size for disk in instance.disks]
1204 nic_count = len(instance.nics)
1206 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1207 disk_sizes, spindle_use)
1210 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1211 _compute_fn=_ComputeIPolicySpecViolation):
1212 """Compute if instance specs meets the specs of ipolicy.
1215 @param ipolicy: The ipolicy to verify against
1216 @param instance_spec: dict
1217 @param instance_spec: The instance spec to verify
1218 @param _compute_fn: The function to verify ipolicy (unittest only)
1219 @see: L{_ComputeIPolicySpecViolation}
1222 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1223 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1224 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1225 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1226 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1227 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1229 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1230 disk_sizes, spindle_use)
1233 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1235 _compute_fn=_ComputeIPolicyInstanceViolation):
1236 """Compute if instance meets the specs of the new target group.
1238 @param ipolicy: The ipolicy to verify
1239 @param instance: The instance object to verify
1240 @param current_group: The current group of the instance
1241 @param target_group: The new group of the instance
1242 @param _compute_fn: The function to verify ipolicy (unittest only)
1243 @see: L{_ComputeIPolicySpecViolation}
1246 if current_group == target_group:
1249 return _compute_fn(ipolicy, instance)
1252 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1253 _compute_fn=_ComputeIPolicyNodeViolation):
1254 """Checks that the target node is correct in terms of instance policy.
1256 @param ipolicy: The ipolicy to verify
1257 @param instance: The instance object to verify
1258 @param node: The new node to relocate
1259 @param ignore: Ignore violations of the ipolicy
1260 @param _compute_fn: The function to verify ipolicy (unittest only)
1261 @see: L{_ComputeIPolicySpecViolation}
1264 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1265 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1268 msg = ("Instance does not meet target node group's (%s) instance"
1269 " policy: %s") % (node.group, utils.CommaJoin(res))
1273 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1276 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1277 """Computes a set of any instances that would violate the new ipolicy.
1279 @param old_ipolicy: The current (still in-place) ipolicy
1280 @param new_ipolicy: The new (to become) ipolicy
1281 @param instances: List of instances to verify
1282 @return: A list of instances which violates the new ipolicy but
1286 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1287 _ComputeViolatingInstances(old_ipolicy, instances))
1290 def _ExpandItemName(fn, name, kind):
1291 """Expand an item name.
1293 @param fn: the function to use for expansion
1294 @param name: requested item name
1295 @param kind: text description ('Node' or 'Instance')
1296 @return: the resolved (full) name
1297 @raise errors.OpPrereqError: if the item is not found
1300 full_name = fn(name)
1301 if full_name is None:
1302 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1307 def _ExpandNodeName(cfg, name):
1308 """Wrapper over L{_ExpandItemName} for nodes."""
1309 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1312 def _ExpandInstanceName(cfg, name):
1313 """Wrapper over L{_ExpandItemName} for instance."""
1314 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1316 def _BuildNetworkHookEnv(name, network, gateway, network6, gateway6,
1317 network_type, mac_prefix, tags):
1320 env["NETWORK_NAME"] = name
1322 env["NETWORK_SUBNET"] = network
1324 env["NETWORK_GATEWAY"] = gateway
1326 env["NETWORK_SUBNET6"] = network6
1328 env["NETWORK_GATEWAY6"] = gateway6
1330 env["NETWORK_MAC_PREFIX"] = mac_prefix
1332 env["NETWORK_TYPE"] = network_type
1334 env["NETWORK_TAGS"] = " ".join(tags)
1339 def _BuildNetworkHookEnvByObject(lu, network):
1341 "name": network.name,
1342 "network": network.network,
1343 "gateway": network.gateway,
1344 "network6": network.network6,
1345 "gateway6": network.gateway6,
1346 "network_type": network.network_type,
1347 "mac_prefix": network.mac_prefix,
1348 "tags" : network.tags,
1350 return _BuildNetworkHookEnv(**args)
1353 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1354 minmem, maxmem, vcpus, nics, disk_template, disks,
1355 bep, hvp, hypervisor_name, tags):
1356 """Builds instance related env variables for hooks
1358 This builds the hook environment from individual variables.
1361 @param name: the name of the instance
1362 @type primary_node: string
1363 @param primary_node: the name of the instance's primary node
1364 @type secondary_nodes: list
1365 @param secondary_nodes: list of secondary nodes as strings
1366 @type os_type: string
1367 @param os_type: the name of the instance's OS
1368 @type status: string
1369 @param status: the desired status of the instance
1370 @type minmem: string
1371 @param minmem: the minimum memory size of the instance
1372 @type maxmem: string
1373 @param maxmem: the maximum memory size of the instance
1375 @param vcpus: the count of VCPUs the instance has
1377 @param nics: list of tuples (ip, mac, mode, link, network) representing
1378 the NICs the instance has
1379 @type disk_template: string
1380 @param disk_template: the disk template of the instance
1382 @param disks: the list of (size, mode) pairs
1384 @param bep: the backend parameters for the instance
1386 @param hvp: the hypervisor parameters for the instance
1387 @type hypervisor_name: string
1388 @param hypervisor_name: the hypervisor for the instance
1390 @param tags: list of instance tags as strings
1392 @return: the hook environment for this instance
1397 "INSTANCE_NAME": name,
1398 "INSTANCE_PRIMARY": primary_node,
1399 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1400 "INSTANCE_OS_TYPE": os_type,
1401 "INSTANCE_STATUS": status,
1402 "INSTANCE_MINMEM": minmem,
1403 "INSTANCE_MAXMEM": maxmem,
1404 # TODO(2.7) remove deprecated "memory" value
1405 "INSTANCE_MEMORY": maxmem,
1406 "INSTANCE_VCPUS": vcpus,
1407 "INSTANCE_DISK_TEMPLATE": disk_template,
1408 "INSTANCE_HYPERVISOR": hypervisor_name,
1411 nic_count = len(nics)
1412 for idx, (ip, mac, mode, link, network, netinfo) in enumerate(nics):
1415 env["INSTANCE_NIC%d_IP" % idx] = ip
1416 env["INSTANCE_NIC%d_MAC" % idx] = mac
1417 env["INSTANCE_NIC%d_MODE" % idx] = mode
1418 env["INSTANCE_NIC%d_LINK" % idx] = link
1420 env["INSTANCE_NIC%d_NETWORK" % idx] = network
1422 nobj = objects.Network.FromDict(netinfo)
1424 env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1426 env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1428 env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1430 env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1432 env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1433 if nobj.network_type:
1434 env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1436 env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1437 if mode == constants.NIC_MODE_BRIDGED:
1438 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1442 env["INSTANCE_NIC_COUNT"] = nic_count
1445 disk_count = len(disks)
1446 for idx, (size, mode) in enumerate(disks):
1447 env["INSTANCE_DISK%d_SIZE" % idx] = size
1448 env["INSTANCE_DISK%d_MODE" % idx] = mode
1452 env["INSTANCE_DISK_COUNT"] = disk_count
1457 env["INSTANCE_TAGS"] = " ".join(tags)
1459 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1460 for key, value in source.items():
1461 env["INSTANCE_%s_%s" % (kind, key)] = value
1465 def _NICToTuple(lu, nic):
1466 """Build a tupple of nic information.
1468 @type lu: L{LogicalUnit}
1469 @param lu: the logical unit on whose behalf we execute
1470 @type nic: L{objects.NIC}
1471 @param nic: nic to convert to hooks tuple
1474 cluster = lu.cfg.GetClusterInfo()
1477 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1478 mode = filled_params[constants.NIC_MODE]
1479 link = filled_params[constants.NIC_LINK]
1480 network = nic.network
1483 net_uuid = lu.cfg.LookupNetwork(network)
1485 nobj = lu.cfg.GetNetwork(net_uuid)
1486 netinfo = objects.Network.ToDict(nobj)
1487 return (ip, mac, mode, link, network, netinfo)
1489 def _NICListToTuple(lu, nics):
1490 """Build a list of nic information tuples.
1492 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1493 value in LUInstanceQueryData.
1495 @type lu: L{LogicalUnit}
1496 @param lu: the logical unit on whose behalf we execute
1497 @type nics: list of L{objects.NIC}
1498 @param nics: list of nics to convert to hooks tuples
1502 cluster = lu.cfg.GetClusterInfo()
1504 hooks_nics.append(_NICToTuple(lu, nic))
1507 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1508 """Builds instance related env variables for hooks from an object.
1510 @type lu: L{LogicalUnit}
1511 @param lu: the logical unit on whose behalf we execute
1512 @type instance: L{objects.Instance}
1513 @param instance: the instance for which we should build the
1515 @type override: dict
1516 @param override: dictionary with key/values that will override
1519 @return: the hook environment dictionary
1522 cluster = lu.cfg.GetClusterInfo()
1523 bep = cluster.FillBE(instance)
1524 hvp = cluster.FillHV(instance)
1526 "name": instance.name,
1527 "primary_node": instance.primary_node,
1528 "secondary_nodes": instance.secondary_nodes,
1529 "os_type": instance.os,
1530 "status": instance.admin_state,
1531 "maxmem": bep[constants.BE_MAXMEM],
1532 "minmem": bep[constants.BE_MINMEM],
1533 "vcpus": bep[constants.BE_VCPUS],
1534 "nics": _NICListToTuple(lu, instance.nics),
1535 "disk_template": instance.disk_template,
1536 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1539 "hypervisor_name": instance.hypervisor,
1540 "tags": instance.tags,
1543 args.update(override)
1544 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1547 def _AdjustCandidatePool(lu, exceptions):
1548 """Adjust the candidate pool after node operations.
1551 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1553 lu.LogInfo("Promoted nodes to master candidate role: %s",
1554 utils.CommaJoin(node.name for node in mod_list))
1555 for name in mod_list:
1556 lu.context.ReaddNode(name)
1557 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1559 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1563 def _DecideSelfPromotion(lu, exceptions=None):
1564 """Decide whether I should promote myself as a master candidate.
1567 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1568 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1569 # the new node will increase mc_max with one, so:
1570 mc_should = min(mc_should + 1, cp_size)
1571 return mc_now < mc_should
1574 def _CalculateGroupIPolicy(cluster, group):
1575 """Calculate instance policy for group.
1578 return cluster.SimpleFillIPolicy(group.ipolicy)
1581 def _ComputeViolatingInstances(ipolicy, instances):
1582 """Computes a set of instances who violates given ipolicy.
1584 @param ipolicy: The ipolicy to verify
1585 @type instances: object.Instance
1586 @param instances: List of instances to verify
1587 @return: A frozenset of instance names violating the ipolicy
1590 return frozenset([inst.name for inst in instances
1591 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1594 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1595 """Check that the brigdes needed by a list of nics exist.
1598 cluster = lu.cfg.GetClusterInfo()
1599 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1600 brlist = [params[constants.NIC_LINK] for params in paramslist
1601 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1603 result = lu.rpc.call_bridges_exist(target_node, brlist)
1604 result.Raise("Error checking bridges on destination node '%s'" %
1605 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1608 def _CheckInstanceBridgesExist(lu, instance, node=None):
1609 """Check that the brigdes needed by an instance exist.
1613 node = instance.primary_node
1614 _CheckNicsBridgesExist(lu, instance.nics, node)
1617 def _CheckOSVariant(os_obj, name):
1618 """Check whether an OS name conforms to the os variants specification.
1620 @type os_obj: L{objects.OS}
1621 @param os_obj: OS object to check
1623 @param name: OS name passed by the user, to check for validity
1626 variant = objects.OS.GetVariant(name)
1627 if not os_obj.supported_variants:
1629 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1630 " passed)" % (os_obj.name, variant),
1634 raise errors.OpPrereqError("OS name must include a variant",
1637 if variant not in os_obj.supported_variants:
1638 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1641 def _GetNodeInstancesInner(cfg, fn):
1642 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1645 def _GetNodeInstances(cfg, node_name):
1646 """Returns a list of all primary and secondary instances on a node.
1650 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1653 def _GetNodePrimaryInstances(cfg, node_name):
1654 """Returns primary instances on a node.
1657 return _GetNodeInstancesInner(cfg,
1658 lambda inst: node_name == inst.primary_node)
1661 def _GetNodeSecondaryInstances(cfg, node_name):
1662 """Returns secondary instances on a node.
1665 return _GetNodeInstancesInner(cfg,
1666 lambda inst: node_name in inst.secondary_nodes)
1669 def _GetStorageTypeArgs(cfg, storage_type):
1670 """Returns the arguments for a storage type.
1673 # Special case for file storage
1674 if storage_type == constants.ST_FILE:
1675 # storage.FileStorage wants a list of storage directories
1676 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1681 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1684 for dev in instance.disks:
1685 cfg.SetDiskID(dev, node_name)
1687 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1689 result.Raise("Failed to get disk status from node %s" % node_name,
1690 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1692 for idx, bdev_status in enumerate(result.payload):
1693 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1699 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1700 """Check the sanity of iallocator and node arguments and use the
1701 cluster-wide iallocator if appropriate.
1703 Check that at most one of (iallocator, node) is specified. If none is
1704 specified, then the LU's opcode's iallocator slot is filled with the
1705 cluster-wide default iallocator.
1707 @type iallocator_slot: string
1708 @param iallocator_slot: the name of the opcode iallocator slot
1709 @type node_slot: string
1710 @param node_slot: the name of the opcode target node slot
1713 node = getattr(lu.op, node_slot, None)
1714 iallocator = getattr(lu.op, iallocator_slot, None)
1716 if node is not None and iallocator is not None:
1717 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1719 elif node is None and iallocator is None:
1720 default_iallocator = lu.cfg.GetDefaultIAllocator()
1721 if default_iallocator:
1722 setattr(lu.op, iallocator_slot, default_iallocator)
1724 raise errors.OpPrereqError("No iallocator or node given and no"
1725 " cluster-wide default iallocator found;"
1726 " please specify either an iallocator or a"
1727 " node, or set a cluster-wide default"
1731 def _GetDefaultIAllocator(cfg, iallocator):
1732 """Decides on which iallocator to use.
1734 @type cfg: L{config.ConfigWriter}
1735 @param cfg: Cluster configuration object
1736 @type iallocator: string or None
1737 @param iallocator: Iallocator specified in opcode
1739 @return: Iallocator name
1743 # Use default iallocator
1744 iallocator = cfg.GetDefaultIAllocator()
1747 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1748 " opcode nor as a cluster-wide default",
1754 class LUClusterPostInit(LogicalUnit):
1755 """Logical unit for running hooks after cluster initialization.
1758 HPATH = "cluster-init"
1759 HTYPE = constants.HTYPE_CLUSTER
1761 def BuildHooksEnv(self):
1766 "OP_TARGET": self.cfg.GetClusterName(),
1769 def BuildHooksNodes(self):
1770 """Build hooks nodes.
1773 return ([], [self.cfg.GetMasterNode()])
1775 def Exec(self, feedback_fn):
1782 class LUClusterDestroy(LogicalUnit):
1783 """Logical unit for destroying the cluster.
1786 HPATH = "cluster-destroy"
1787 HTYPE = constants.HTYPE_CLUSTER
1789 def BuildHooksEnv(self):
1794 "OP_TARGET": self.cfg.GetClusterName(),
1797 def BuildHooksNodes(self):
1798 """Build hooks nodes.
1803 def CheckPrereq(self):
1804 """Check prerequisites.
1806 This checks whether the cluster is empty.
1808 Any errors are signaled by raising errors.OpPrereqError.
1811 master = self.cfg.GetMasterNode()
1813 nodelist = self.cfg.GetNodeList()
1814 if len(nodelist) != 1 or nodelist[0] != master:
1815 raise errors.OpPrereqError("There are still %d node(s) in"
1816 " this cluster." % (len(nodelist) - 1),
1818 instancelist = self.cfg.GetInstanceList()
1820 raise errors.OpPrereqError("There are still %d instance(s) in"
1821 " this cluster." % len(instancelist),
1824 def Exec(self, feedback_fn):
1825 """Destroys the cluster.
1828 master_params = self.cfg.GetMasterNetworkParameters()
1830 # Run post hooks on master node before it's removed
1831 _RunPostHook(self, master_params.name)
1833 ems = self.cfg.GetUseExternalMipScript()
1834 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1837 self.LogWarning("Error disabling the master IP address: %s",
1840 return master_params.name
1843 def _VerifyCertificate(filename):
1844 """Verifies a certificate for L{LUClusterVerifyConfig}.
1846 @type filename: string
1847 @param filename: Path to PEM file
1851 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1852 utils.ReadFile(filename))
1853 except Exception, err: # pylint: disable=W0703
1854 return (LUClusterVerifyConfig.ETYPE_ERROR,
1855 "Failed to load X509 certificate %s: %s" % (filename, err))
1858 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1859 constants.SSL_CERT_EXPIRATION_ERROR)
1862 fnamemsg = "While verifying %s: %s" % (filename, msg)
1867 return (None, fnamemsg)
1868 elif errcode == utils.CERT_WARNING:
1869 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1870 elif errcode == utils.CERT_ERROR:
1871 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1873 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1876 def _GetAllHypervisorParameters(cluster, instances):
1877 """Compute the set of all hypervisor parameters.
1879 @type cluster: L{objects.Cluster}
1880 @param cluster: the cluster object
1881 @param instances: list of L{objects.Instance}
1882 @param instances: additional instances from which to obtain parameters
1883 @rtype: list of (origin, hypervisor, parameters)
1884 @return: a list with all parameters found, indicating the hypervisor they
1885 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1890 for hv_name in cluster.enabled_hypervisors:
1891 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1893 for os_name, os_hvp in cluster.os_hvp.items():
1894 for hv_name, hv_params in os_hvp.items():
1896 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1897 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1899 # TODO: collapse identical parameter values in a single one
1900 for instance in instances:
1901 if instance.hvparams:
1902 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1903 cluster.FillHV(instance)))
1908 class _VerifyErrors(object):
1909 """Mix-in for cluster/group verify LUs.
1911 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1912 self.op and self._feedback_fn to be available.)
1916 ETYPE_FIELD = "code"
1917 ETYPE_ERROR = "ERROR"
1918 ETYPE_WARNING = "WARNING"
1920 def _Error(self, ecode, item, msg, *args, **kwargs):
1921 """Format an error message.
1923 Based on the opcode's error_codes parameter, either format a
1924 parseable error code, or a simpler error string.
1926 This must be called only from Exec and functions called from Exec.
1929 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1930 itype, etxt, _ = ecode
1931 # first complete the msg
1934 # then format the whole message
1935 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1936 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1942 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1943 # and finally report it via the feedback_fn
1944 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1946 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1947 """Log an error message if the passed condition is True.
1951 or self.op.debug_simulate_errors) # pylint: disable=E1101
1953 # If the error code is in the list of ignored errors, demote the error to a
1955 (_, etxt, _) = ecode
1956 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1957 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1960 self._Error(ecode, *args, **kwargs)
1962 # do not mark the operation as failed for WARN cases only
1963 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1964 self.bad = self.bad or cond
1967 class LUClusterVerify(NoHooksLU):
1968 """Submits all jobs necessary to verify the cluster.
1973 def ExpandNames(self):
1974 self.needed_locks = {}
1976 def Exec(self, feedback_fn):
1979 if self.op.group_name:
1980 groups = [self.op.group_name]
1981 depends_fn = lambda: None
1983 groups = self.cfg.GetNodeGroupList()
1985 # Verify global configuration
1987 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1990 # Always depend on global verification
1991 depends_fn = lambda: [(-len(jobs), [])]
1993 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1994 ignore_errors=self.op.ignore_errors,
1995 depends=depends_fn())]
1996 for group in groups)
1998 # Fix up all parameters
1999 for op in itertools.chain(*jobs): # pylint: disable=W0142
2000 op.debug_simulate_errors = self.op.debug_simulate_errors
2001 op.verbose = self.op.verbose
2002 op.error_codes = self.op.error_codes
2004 op.skip_checks = self.op.skip_checks
2005 except AttributeError:
2006 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2008 return ResultWithJobs(jobs)
2011 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2012 """Verifies the cluster config.
2017 def _VerifyHVP(self, hvp_data):
2018 """Verifies locally the syntax of the hypervisor parameters.
2021 for item, hv_name, hv_params in hvp_data:
2022 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2025 hv_class = hypervisor.GetHypervisor(hv_name)
2026 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2027 hv_class.CheckParameterSyntax(hv_params)
2028 except errors.GenericError, err:
2029 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2031 def ExpandNames(self):
2032 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2033 self.share_locks = _ShareAll()
2035 def CheckPrereq(self):
2036 """Check prerequisites.
2039 # Retrieve all information
2040 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2041 self.all_node_info = self.cfg.GetAllNodesInfo()
2042 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2044 def Exec(self, feedback_fn):
2045 """Verify integrity of cluster, performing various test on nodes.
2049 self._feedback_fn = feedback_fn
2051 feedback_fn("* Verifying cluster config")
2053 for msg in self.cfg.VerifyConfig():
2054 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2056 feedback_fn("* Verifying cluster certificate files")
2058 for cert_filename in constants.ALL_CERT_FILES:
2059 (errcode, msg) = _VerifyCertificate(cert_filename)
2060 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2062 feedback_fn("* Verifying hypervisor parameters")
2064 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2065 self.all_inst_info.values()))
2067 feedback_fn("* Verifying all nodes belong to an existing group")
2069 # We do this verification here because, should this bogus circumstance
2070 # occur, it would never be caught by VerifyGroup, which only acts on
2071 # nodes/instances reachable from existing node groups.
2073 dangling_nodes = set(node.name for node in self.all_node_info.values()
2074 if node.group not in self.all_group_info)
2076 dangling_instances = {}
2077 no_node_instances = []
2079 for inst in self.all_inst_info.values():
2080 if inst.primary_node in dangling_nodes:
2081 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2082 elif inst.primary_node not in self.all_node_info:
2083 no_node_instances.append(inst.name)
2088 utils.CommaJoin(dangling_instances.get(node.name,
2090 for node in dangling_nodes]
2092 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2094 "the following nodes (and their instances) belong to a non"
2095 " existing group: %s", utils.CommaJoin(pretty_dangling))
2097 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2099 "the following instances have a non-existing primary-node:"
2100 " %s", utils.CommaJoin(no_node_instances))
2105 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2106 """Verifies the status of a node group.
2109 HPATH = "cluster-verify"
2110 HTYPE = constants.HTYPE_CLUSTER
2113 _HOOKS_INDENT_RE = re.compile("^", re.M)
2115 class NodeImage(object):
2116 """A class representing the logical and physical status of a node.
2119 @ivar name: the node name to which this object refers
2120 @ivar volumes: a structure as returned from
2121 L{ganeti.backend.GetVolumeList} (runtime)
2122 @ivar instances: a list of running instances (runtime)
2123 @ivar pinst: list of configured primary instances (config)
2124 @ivar sinst: list of configured secondary instances (config)
2125 @ivar sbp: dictionary of {primary-node: list of instances} for all
2126 instances for which this node is secondary (config)
2127 @ivar mfree: free memory, as reported by hypervisor (runtime)
2128 @ivar dfree: free disk, as reported by the node (runtime)
2129 @ivar offline: the offline status (config)
2130 @type rpc_fail: boolean
2131 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2132 not whether the individual keys were correct) (runtime)
2133 @type lvm_fail: boolean
2134 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2135 @type hyp_fail: boolean
2136 @ivar hyp_fail: whether the RPC call didn't return the instance list
2137 @type ghost: boolean
2138 @ivar ghost: whether this is a known node or not (config)
2139 @type os_fail: boolean
2140 @ivar os_fail: whether the RPC call didn't return valid OS data
2142 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2143 @type vm_capable: boolean
2144 @ivar vm_capable: whether the node can host instances
2147 def __init__(self, offline=False, name=None, vm_capable=True):
2156 self.offline = offline
2157 self.vm_capable = vm_capable
2158 self.rpc_fail = False
2159 self.lvm_fail = False
2160 self.hyp_fail = False
2162 self.os_fail = False
2165 def ExpandNames(self):
2166 # This raises errors.OpPrereqError on its own:
2167 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2169 # Get instances in node group; this is unsafe and needs verification later
2171 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2173 self.needed_locks = {
2174 locking.LEVEL_INSTANCE: inst_names,
2175 locking.LEVEL_NODEGROUP: [self.group_uuid],
2176 locking.LEVEL_NODE: [],
2179 self.share_locks = _ShareAll()
2181 def DeclareLocks(self, level):
2182 if level == locking.LEVEL_NODE:
2183 # Get members of node group; this is unsafe and needs verification later
2184 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2186 all_inst_info = self.cfg.GetAllInstancesInfo()
2188 # In Exec(), we warn about mirrored instances that have primary and
2189 # secondary living in separate node groups. To fully verify that
2190 # volumes for these instances are healthy, we will need to do an
2191 # extra call to their secondaries. We ensure here those nodes will
2193 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2194 # Important: access only the instances whose lock is owned
2195 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2196 nodes.update(all_inst_info[inst].secondary_nodes)
2198 self.needed_locks[locking.LEVEL_NODE] = nodes
2200 def CheckPrereq(self):
2201 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2202 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2204 group_nodes = set(self.group_info.members)
2206 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2209 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2211 unlocked_instances = \
2212 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2215 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2216 utils.CommaJoin(unlocked_nodes),
2219 if unlocked_instances:
2220 raise errors.OpPrereqError("Missing lock for instances: %s" %
2221 utils.CommaJoin(unlocked_instances),
2224 self.all_node_info = self.cfg.GetAllNodesInfo()
2225 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2227 self.my_node_names = utils.NiceSort(group_nodes)
2228 self.my_inst_names = utils.NiceSort(group_instances)
2230 self.my_node_info = dict((name, self.all_node_info[name])
2231 for name in self.my_node_names)
2233 self.my_inst_info = dict((name, self.all_inst_info[name])
2234 for name in self.my_inst_names)
2236 # We detect here the nodes that will need the extra RPC calls for verifying
2237 # split LV volumes; they should be locked.
2238 extra_lv_nodes = set()
2240 for inst in self.my_inst_info.values():
2241 if inst.disk_template in constants.DTS_INT_MIRROR:
2242 for nname in inst.all_nodes:
2243 if self.all_node_info[nname].group != self.group_uuid:
2244 extra_lv_nodes.add(nname)
2246 unlocked_lv_nodes = \
2247 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2249 if unlocked_lv_nodes:
2250 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2251 utils.CommaJoin(unlocked_lv_nodes),
2253 self.extra_lv_nodes = list(extra_lv_nodes)
2255 def _VerifyNode(self, ninfo, nresult):
2256 """Perform some basic validation on data returned from a node.
2258 - check the result data structure is well formed and has all the
2260 - check ganeti version
2262 @type ninfo: L{objects.Node}
2263 @param ninfo: the node to check
2264 @param nresult: the results from the node
2266 @return: whether overall this call was successful (and we can expect
2267 reasonable values in the respose)
2271 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2273 # main result, nresult should be a non-empty dict
2274 test = not nresult or not isinstance(nresult, dict)
2275 _ErrorIf(test, constants.CV_ENODERPC, node,
2276 "unable to verify node: no data returned")
2280 # compares ganeti version
2281 local_version = constants.PROTOCOL_VERSION
2282 remote_version = nresult.get("version", None)
2283 test = not (remote_version and
2284 isinstance(remote_version, (list, tuple)) and
2285 len(remote_version) == 2)
2286 _ErrorIf(test, constants.CV_ENODERPC, node,
2287 "connection to node returned invalid data")
2291 test = local_version != remote_version[0]
2292 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2293 "incompatible protocol versions: master %s,"
2294 " node %s", local_version, remote_version[0])
2298 # node seems compatible, we can actually try to look into its results
2300 # full package version
2301 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2302 constants.CV_ENODEVERSION, node,
2303 "software version mismatch: master %s, node %s",
2304 constants.RELEASE_VERSION, remote_version[1],
2305 code=self.ETYPE_WARNING)
2307 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2308 if ninfo.vm_capable and isinstance(hyp_result, dict):
2309 for hv_name, hv_result in hyp_result.iteritems():
2310 test = hv_result is not None
2311 _ErrorIf(test, constants.CV_ENODEHV, node,
2312 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2314 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2315 if ninfo.vm_capable and isinstance(hvp_result, list):
2316 for item, hv_name, hv_result in hvp_result:
2317 _ErrorIf(True, constants.CV_ENODEHV, node,
2318 "hypervisor %s parameter verify failure (source %s): %s",
2319 hv_name, item, hv_result)
2321 test = nresult.get(constants.NV_NODESETUP,
2322 ["Missing NODESETUP results"])
2323 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2328 def _VerifyNodeTime(self, ninfo, nresult,
2329 nvinfo_starttime, nvinfo_endtime):
2330 """Check the node time.
2332 @type ninfo: L{objects.Node}
2333 @param ninfo: the node to check
2334 @param nresult: the remote results for the node
2335 @param nvinfo_starttime: the start time of the RPC call
2336 @param nvinfo_endtime: the end time of the RPC call
2340 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2342 ntime = nresult.get(constants.NV_TIME, None)
2344 ntime_merged = utils.MergeTime(ntime)
2345 except (ValueError, TypeError):
2346 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2349 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2350 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2351 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2352 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2356 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2357 "Node time diverges by at least %s from master node time",
2360 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2361 """Check the node LVM results.
2363 @type ninfo: L{objects.Node}
2364 @param ninfo: the node to check
2365 @param nresult: the remote results for the node
2366 @param vg_name: the configured VG name
2373 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2375 # checks vg existence and size > 20G
2376 vglist = nresult.get(constants.NV_VGLIST, None)
2378 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2380 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2381 constants.MIN_VG_SIZE)
2382 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2385 pvlist = nresult.get(constants.NV_PVLIST, None)
2386 test = pvlist is None
2387 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2389 # check that ':' is not present in PV names, since it's a
2390 # special character for lvcreate (denotes the range of PEs to
2392 for _, pvname, owner_vg in pvlist:
2393 test = ":" in pvname
2394 _ErrorIf(test, constants.CV_ENODELVM, node,
2395 "Invalid character ':' in PV '%s' of VG '%s'",
2398 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2399 """Check the node bridges.
2401 @type ninfo: L{objects.Node}
2402 @param ninfo: the node to check
2403 @param nresult: the remote results for the node
2404 @param bridges: the expected list of bridges
2411 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2413 missing = nresult.get(constants.NV_BRIDGES, None)
2414 test = not isinstance(missing, list)
2415 _ErrorIf(test, constants.CV_ENODENET, node,
2416 "did not return valid bridge information")
2418 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2419 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2421 def _VerifyNodeUserScripts(self, ninfo, nresult):
2422 """Check the results of user scripts presence and executability on the node
2424 @type ninfo: L{objects.Node}
2425 @param ninfo: the node to check
2426 @param nresult: the remote results for the node
2431 test = not constants.NV_USERSCRIPTS in nresult
2432 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2433 "did not return user scripts information")
2435 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2437 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2438 "user scripts not present or not executable: %s" %
2439 utils.CommaJoin(sorted(broken_scripts)))
2441 def _VerifyNodeNetwork(self, ninfo, nresult):
2442 """Check the node network connectivity results.
2444 @type ninfo: L{objects.Node}
2445 @param ninfo: the node to check
2446 @param nresult: the remote results for the node
2450 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2452 test = constants.NV_NODELIST not in nresult
2453 _ErrorIf(test, constants.CV_ENODESSH, node,
2454 "node hasn't returned node ssh connectivity data")
2456 if nresult[constants.NV_NODELIST]:
2457 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2458 _ErrorIf(True, constants.CV_ENODESSH, node,
2459 "ssh communication with node '%s': %s", a_node, a_msg)
2461 test = constants.NV_NODENETTEST not in nresult
2462 _ErrorIf(test, constants.CV_ENODENET, node,
2463 "node hasn't returned node tcp connectivity data")
2465 if nresult[constants.NV_NODENETTEST]:
2466 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2468 _ErrorIf(True, constants.CV_ENODENET, node,
2469 "tcp communication with node '%s': %s",
2470 anode, nresult[constants.NV_NODENETTEST][anode])
2472 test = constants.NV_MASTERIP not in nresult
2473 _ErrorIf(test, constants.CV_ENODENET, node,
2474 "node hasn't returned node master IP reachability data")
2476 if not nresult[constants.NV_MASTERIP]:
2477 if node == self.master_node:
2478 msg = "the master node cannot reach the master IP (not configured?)"
2480 msg = "cannot reach the master IP"
2481 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2483 def _VerifyInstance(self, instance, instanceconfig, node_image,
2485 """Verify an instance.
2487 This function checks to see if the required block devices are
2488 available on the instance's node.
2491 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2492 node_current = instanceconfig.primary_node
2494 node_vol_should = {}
2495 instanceconfig.MapLVsByNode(node_vol_should)
2497 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2498 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2499 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2501 for node in node_vol_should:
2502 n_img = node_image[node]
2503 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2504 # ignore missing volumes on offline or broken nodes
2506 for volume in node_vol_should[node]:
2507 test = volume not in n_img.volumes
2508 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2509 "volume %s missing on node %s", volume, node)
2511 if instanceconfig.admin_state == constants.ADMINST_UP:
2512 pri_img = node_image[node_current]
2513 test = instance not in pri_img.instances and not pri_img.offline
2514 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2515 "instance not running on its primary node %s",
2518 diskdata = [(nname, success, status, idx)
2519 for (nname, disks) in diskstatus.items()
2520 for idx, (success, status) in enumerate(disks)]
2522 for nname, success, bdev_status, idx in diskdata:
2523 # the 'ghost node' construction in Exec() ensures that we have a
2525 snode = node_image[nname]
2526 bad_snode = snode.ghost or snode.offline
2527 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2528 not success and not bad_snode,
2529 constants.CV_EINSTANCEFAULTYDISK, instance,
2530 "couldn't retrieve status for disk/%s on %s: %s",
2531 idx, nname, bdev_status)
2532 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2533 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2534 constants.CV_EINSTANCEFAULTYDISK, instance,
2535 "disk/%s on %s is faulty", idx, nname)
2537 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2538 """Verify if there are any unknown volumes in the cluster.
2540 The .os, .swap and backup volumes are ignored. All other volumes are
2541 reported as unknown.
2543 @type reserved: L{ganeti.utils.FieldSet}
2544 @param reserved: a FieldSet of reserved volume names
2547 for node, n_img in node_image.items():
2548 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2549 self.all_node_info[node].group != self.group_uuid):
2550 # skip non-healthy nodes
2552 for volume in n_img.volumes:
2553 test = ((node not in node_vol_should or
2554 volume not in node_vol_should[node]) and
2555 not reserved.Matches(volume))
2556 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2557 "volume %s is unknown", volume)
2559 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2560 """Verify N+1 Memory Resilience.
2562 Check that if one single node dies we can still start all the
2563 instances it was primary for.
2566 cluster_info = self.cfg.GetClusterInfo()
2567 for node, n_img in node_image.items():
2568 # This code checks that every node which is now listed as
2569 # secondary has enough memory to host all instances it is
2570 # supposed to should a single other node in the cluster fail.
2571 # FIXME: not ready for failover to an arbitrary node
2572 # FIXME: does not support file-backed instances
2573 # WARNING: we currently take into account down instances as well
2574 # as up ones, considering that even if they're down someone
2575 # might want to start them even in the event of a node failure.
2576 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2577 # we're skipping nodes marked offline and nodes in other groups from
2578 # the N+1 warning, since most likely we don't have good memory
2579 # infromation from them; we already list instances living on such
2580 # nodes, and that's enough warning
2582 #TODO(dynmem): also consider ballooning out other instances
2583 for prinode, instances in n_img.sbp.items():
2585 for instance in instances:
2586 bep = cluster_info.FillBE(instance_cfg[instance])
2587 if bep[constants.BE_AUTO_BALANCE]:
2588 needed_mem += bep[constants.BE_MINMEM]
2589 test = n_img.mfree < needed_mem
2590 self._ErrorIf(test, constants.CV_ENODEN1, node,
2591 "not enough memory to accomodate instance failovers"
2592 " should node %s fail (%dMiB needed, %dMiB available)",
2593 prinode, needed_mem, n_img.mfree)
2596 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2597 (files_all, files_opt, files_mc, files_vm)):
2598 """Verifies file checksums collected from all nodes.
2600 @param errorif: Callback for reporting errors
2601 @param nodeinfo: List of L{objects.Node} objects
2602 @param master_node: Name of master node
2603 @param all_nvinfo: RPC results
2606 # Define functions determining which nodes to consider for a file
2609 (files_mc, lambda node: (node.master_candidate or
2610 node.name == master_node)),
2611 (files_vm, lambda node: node.vm_capable),
2614 # Build mapping from filename to list of nodes which should have the file
2616 for (files, fn) in files2nodefn:
2618 filenodes = nodeinfo
2620 filenodes = filter(fn, nodeinfo)
2621 nodefiles.update((filename,
2622 frozenset(map(operator.attrgetter("name"), filenodes)))
2623 for filename in files)
2625 assert set(nodefiles) == (files_all | files_mc | files_vm)
2627 fileinfo = dict((filename, {}) for filename in nodefiles)
2628 ignore_nodes = set()
2630 for node in nodeinfo:
2632 ignore_nodes.add(node.name)
2635 nresult = all_nvinfo[node.name]
2637 if nresult.fail_msg or not nresult.payload:
2640 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2642 test = not (node_files and isinstance(node_files, dict))
2643 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2644 "Node did not return file checksum data")
2646 ignore_nodes.add(node.name)
2649 # Build per-checksum mapping from filename to nodes having it
2650 for (filename, checksum) in node_files.items():
2651 assert filename in nodefiles
2652 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2654 for (filename, checksums) in fileinfo.items():
2655 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2657 # Nodes having the file
2658 with_file = frozenset(node_name
2659 for nodes in fileinfo[filename].values()
2660 for node_name in nodes) - ignore_nodes
2662 expected_nodes = nodefiles[filename] - ignore_nodes
2664 # Nodes missing file
2665 missing_file = expected_nodes - with_file
2667 if filename in files_opt:
2669 errorif(missing_file and missing_file != expected_nodes,
2670 constants.CV_ECLUSTERFILECHECK, None,
2671 "File %s is optional, but it must exist on all or no"
2672 " nodes (not found on %s)",
2673 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2675 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2676 "File %s is missing from node(s) %s", filename,
2677 utils.CommaJoin(utils.NiceSort(missing_file)))
2679 # Warn if a node has a file it shouldn't
2680 unexpected = with_file - expected_nodes
2682 constants.CV_ECLUSTERFILECHECK, None,
2683 "File %s should not exist on node(s) %s",
2684 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2686 # See if there are multiple versions of the file
2687 test = len(checksums) > 1
2689 variants = ["variant %s on %s" %
2690 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2691 for (idx, (checksum, nodes)) in
2692 enumerate(sorted(checksums.items()))]
2696 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2697 "File %s found with %s different checksums (%s)",
2698 filename, len(checksums), "; ".join(variants))
2700 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2702 """Verifies and the node DRBD status.
2704 @type ninfo: L{objects.Node}
2705 @param ninfo: the node to check
2706 @param nresult: the remote results for the node
2707 @param instanceinfo: the dict of instances
2708 @param drbd_helper: the configured DRBD usermode helper
2709 @param drbd_map: the DRBD map as returned by
2710 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2714 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2717 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2718 test = (helper_result == None)
2719 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2720 "no drbd usermode helper returned")
2722 status, payload = helper_result
2724 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2725 "drbd usermode helper check unsuccessful: %s", payload)
2726 test = status and (payload != drbd_helper)
2727 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2728 "wrong drbd usermode helper: %s", payload)
2730 # compute the DRBD minors
2732 for minor, instance in drbd_map[node].items():
2733 test = instance not in instanceinfo
2734 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2735 "ghost instance '%s' in temporary DRBD map", instance)
2736 # ghost instance should not be running, but otherwise we
2737 # don't give double warnings (both ghost instance and
2738 # unallocated minor in use)
2740 node_drbd[minor] = (instance, False)
2742 instance = instanceinfo[instance]
2743 node_drbd[minor] = (instance.name,
2744 instance.admin_state == constants.ADMINST_UP)
2746 # and now check them
2747 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2748 test = not isinstance(used_minors, (tuple, list))
2749 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2750 "cannot parse drbd status file: %s", str(used_minors))
2752 # we cannot check drbd status
2755 for minor, (iname, must_exist) in node_drbd.items():
2756 test = minor not in used_minors and must_exist
2757 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2758 "drbd minor %d of instance %s is not active", minor, iname)
2759 for minor in used_minors:
2760 test = minor not in node_drbd
2761 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2762 "unallocated drbd minor %d is in use", minor)
2764 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2765 """Builds the node OS structures.
2767 @type ninfo: L{objects.Node}
2768 @param ninfo: the node to check
2769 @param nresult: the remote results for the node
2770 @param nimg: the node image object
2774 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2776 remote_os = nresult.get(constants.NV_OSLIST, None)
2777 test = (not isinstance(remote_os, list) or
2778 not compat.all(isinstance(v, list) and len(v) == 7
2779 for v in remote_os))
2781 _ErrorIf(test, constants.CV_ENODEOS, node,
2782 "node hasn't returned valid OS data")
2791 for (name, os_path, status, diagnose,
2792 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2794 if name not in os_dict:
2797 # parameters is a list of lists instead of list of tuples due to
2798 # JSON lacking a real tuple type, fix it:
2799 parameters = [tuple(v) for v in parameters]
2800 os_dict[name].append((os_path, status, diagnose,
2801 set(variants), set(parameters), set(api_ver)))
2803 nimg.oslist = os_dict
2805 def _VerifyNodeOS(self, ninfo, nimg, base):
2806 """Verifies the node OS list.
2808 @type ninfo: L{objects.Node}
2809 @param ninfo: the node to check
2810 @param nimg: the node image object
2811 @param base: the 'template' node we match against (e.g. from the master)
2815 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2817 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2819 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2820 for os_name, os_data in nimg.oslist.items():
2821 assert os_data, "Empty OS status for OS %s?!" % os_name
2822 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2823 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2824 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2825 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2826 "OS '%s' has multiple entries (first one shadows the rest): %s",
2827 os_name, utils.CommaJoin([v[0] for v in os_data]))
2828 # comparisons with the 'base' image
2829 test = os_name not in base.oslist
2830 _ErrorIf(test, constants.CV_ENODEOS, node,
2831 "Extra OS %s not present on reference node (%s)",
2835 assert base.oslist[os_name], "Base node has empty OS status?"
2836 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2838 # base OS is invalid, skipping
2840 for kind, a, b in [("API version", f_api, b_api),
2841 ("variants list", f_var, b_var),
2842 ("parameters", beautify_params(f_param),
2843 beautify_params(b_param))]:
2844 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2845 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2846 kind, os_name, base.name,
2847 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2849 # check any missing OSes
2850 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2851 _ErrorIf(missing, constants.CV_ENODEOS, node,
2852 "OSes present on reference node %s but missing on this node: %s",
2853 base.name, utils.CommaJoin(missing))
2855 def _VerifyOob(self, ninfo, nresult):
2856 """Verifies out of band functionality of a node.
2858 @type ninfo: L{objects.Node}
2859 @param ninfo: the node to check
2860 @param nresult: the remote results for the node
2864 # We just have to verify the paths on master and/or master candidates
2865 # as the oob helper is invoked on the master
2866 if ((ninfo.master_candidate or ninfo.master_capable) and
2867 constants.NV_OOB_PATHS in nresult):
2868 for path_result in nresult[constants.NV_OOB_PATHS]:
2869 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2871 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2872 """Verifies and updates the node volume data.
2874 This function will update a L{NodeImage}'s internal structures
2875 with data from the remote call.
2877 @type ninfo: L{objects.Node}
2878 @param ninfo: the node to check
2879 @param nresult: the remote results for the node
2880 @param nimg: the node image object
2881 @param vg_name: the configured VG name
2885 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2887 nimg.lvm_fail = True
2888 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2891 elif isinstance(lvdata, basestring):
2892 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2893 utils.SafeEncode(lvdata))
2894 elif not isinstance(lvdata, dict):
2895 _ErrorIf(True, constants.CV_ENODELVM, node,
2896 "rpc call to node failed (lvlist)")
2898 nimg.volumes = lvdata
2899 nimg.lvm_fail = False
2901 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2902 """Verifies and updates the node instance list.
2904 If the listing was successful, then updates this node's instance
2905 list. Otherwise, it marks the RPC call as failed for the instance
2908 @type ninfo: L{objects.Node}
2909 @param ninfo: the node to check
2910 @param nresult: the remote results for the node
2911 @param nimg: the node image object
2914 idata = nresult.get(constants.NV_INSTANCELIST, None)
2915 test = not isinstance(idata, list)
2916 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2917 "rpc call to node failed (instancelist): %s",
2918 utils.SafeEncode(str(idata)))
2920 nimg.hyp_fail = True
2922 nimg.instances = idata
2924 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2925 """Verifies and computes a node information map
2927 @type ninfo: L{objects.Node}
2928 @param ninfo: the node to check
2929 @param nresult: the remote results for the node
2930 @param nimg: the node image object
2931 @param vg_name: the configured VG name
2935 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2937 # try to read free memory (from the hypervisor)
2938 hv_info = nresult.get(constants.NV_HVINFO, None)
2939 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2940 _ErrorIf(test, constants.CV_ENODEHV, node,
2941 "rpc call to node failed (hvinfo)")
2944 nimg.mfree = int(hv_info["memory_free"])
2945 except (ValueError, TypeError):
2946 _ErrorIf(True, constants.CV_ENODERPC, node,
2947 "node returned invalid nodeinfo, check hypervisor")
2949 # FIXME: devise a free space model for file based instances as well
2950 if vg_name is not None:
2951 test = (constants.NV_VGLIST not in nresult or
2952 vg_name not in nresult[constants.NV_VGLIST])
2953 _ErrorIf(test, constants.CV_ENODELVM, node,
2954 "node didn't return data for the volume group '%s'"
2955 " - it is either missing or broken", vg_name)
2958 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2959 except (ValueError, TypeError):
2960 _ErrorIf(True, constants.CV_ENODERPC, node,
2961 "node returned invalid LVM info, check LVM status")
2963 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2964 """Gets per-disk status information for all instances.
2966 @type nodelist: list of strings
2967 @param nodelist: Node names
2968 @type node_image: dict of (name, L{objects.Node})
2969 @param node_image: Node objects
2970 @type instanceinfo: dict of (name, L{objects.Instance})
2971 @param instanceinfo: Instance objects
2972 @rtype: {instance: {node: [(succes, payload)]}}
2973 @return: a dictionary of per-instance dictionaries with nodes as
2974 keys and disk information as values; the disk information is a
2975 list of tuples (success, payload)
2978 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2981 node_disks_devonly = {}
2982 diskless_instances = set()
2983 diskless = constants.DT_DISKLESS
2985 for nname in nodelist:
2986 node_instances = list(itertools.chain(node_image[nname].pinst,
2987 node_image[nname].sinst))
2988 diskless_instances.update(inst for inst in node_instances
2989 if instanceinfo[inst].disk_template == diskless)
2990 disks = [(inst, disk)
2991 for inst in node_instances
2992 for disk in instanceinfo[inst].disks]
2995 # No need to collect data
2998 node_disks[nname] = disks
3000 # _AnnotateDiskParams makes already copies of the disks
3002 for (inst, dev) in disks:
3003 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3004 self.cfg.SetDiskID(anno_disk, nname)
3005 devonly.append(anno_disk)
3007 node_disks_devonly[nname] = devonly
3009 assert len(node_disks) == len(node_disks_devonly)
3011 # Collect data from all nodes with disks
3012 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3015 assert len(result) == len(node_disks)
3019 for (nname, nres) in result.items():
3020 disks = node_disks[nname]
3023 # No data from this node
3024 data = len(disks) * [(False, "node offline")]
3027 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3028 "while getting disk information: %s", msg)
3030 # No data from this node
3031 data = len(disks) * [(False, msg)]
3034 for idx, i in enumerate(nres.payload):
3035 if isinstance(i, (tuple, list)) and len(i) == 2:
3038 logging.warning("Invalid result from node %s, entry %d: %s",
3040 data.append((False, "Invalid result from the remote node"))
3042 for ((inst, _), status) in zip(disks, data):
3043 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3045 # Add empty entries for diskless instances.
3046 for inst in diskless_instances:
3047 assert inst not in instdisk
3050 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3051 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3052 compat.all(isinstance(s, (tuple, list)) and
3053 len(s) == 2 for s in statuses)
3054 for inst, nnames in instdisk.items()
3055 for nname, statuses in nnames.items())
3056 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3061 def _SshNodeSelector(group_uuid, all_nodes):
3062 """Create endless iterators for all potential SSH check hosts.
3065 nodes = [node for node in all_nodes
3066 if (node.group != group_uuid and
3068 keyfunc = operator.attrgetter("group")
3070 return map(itertools.cycle,
3071 [sorted(map(operator.attrgetter("name"), names))
3072 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3076 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3077 """Choose which nodes should talk to which other nodes.
3079 We will make nodes contact all nodes in their group, and one node from
3082 @warning: This algorithm has a known issue if one node group is much
3083 smaller than others (e.g. just one node). In such a case all other
3084 nodes will talk to the single node.
3087 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3088 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3090 return (online_nodes,
3091 dict((name, sorted([i.next() for i in sel]))
3092 for name in online_nodes))
3094 def BuildHooksEnv(self):
3097 Cluster-Verify hooks just ran in the post phase and their failure makes
3098 the output be logged in the verify output and the verification to fail.
3102 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3105 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3106 for node in self.my_node_info.values())
3110 def BuildHooksNodes(self):
3111 """Build hooks nodes.
3114 return ([], self.my_node_names)
3116 def Exec(self, feedback_fn):
3117 """Verify integrity of the node group, performing various test on nodes.
3120 # This method has too many local variables. pylint: disable=R0914
3121 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3123 if not self.my_node_names:
3125 feedback_fn("* Empty node group, skipping verification")
3129 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3130 verbose = self.op.verbose
3131 self._feedback_fn = feedback_fn
3133 vg_name = self.cfg.GetVGName()
3134 drbd_helper = self.cfg.GetDRBDHelper()
3135 cluster = self.cfg.GetClusterInfo()
3136 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3137 hypervisors = cluster.enabled_hypervisors
3138 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3140 i_non_redundant = [] # Non redundant instances
3141 i_non_a_balanced = [] # Non auto-balanced instances
3142 i_offline = 0 # Count of offline instances
3143 n_offline = 0 # Count of offline nodes
3144 n_drained = 0 # Count of nodes being drained
3145 node_vol_should = {}
3147 # FIXME: verify OS list
3150 filemap = _ComputeAncillaryFiles(cluster, False)
3152 # do local checksums
3153 master_node = self.master_node = self.cfg.GetMasterNode()
3154 master_ip = self.cfg.GetMasterIP()
3156 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3159 if self.cfg.GetUseExternalMipScript():
3160 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3162 node_verify_param = {
3163 constants.NV_FILELIST:
3164 utils.UniqueSequence(filename
3165 for files in filemap
3166 for filename in files),
3167 constants.NV_NODELIST:
3168 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3169 self.all_node_info.values()),
3170 constants.NV_HYPERVISOR: hypervisors,
3171 constants.NV_HVPARAMS:
3172 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3173 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3174 for node in node_data_list
3175 if not node.offline],
3176 constants.NV_INSTANCELIST: hypervisors,
3177 constants.NV_VERSION: None,
3178 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3179 constants.NV_NODESETUP: None,
3180 constants.NV_TIME: None,
3181 constants.NV_MASTERIP: (master_node, master_ip),
3182 constants.NV_OSLIST: None,
3183 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3184 constants.NV_USERSCRIPTS: user_scripts,
3187 if vg_name is not None:
3188 node_verify_param[constants.NV_VGLIST] = None
3189 node_verify_param[constants.NV_LVLIST] = vg_name
3190 node_verify_param[constants.NV_PVLIST] = [vg_name]
3191 node_verify_param[constants.NV_DRBDLIST] = None
3194 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3197 # FIXME: this needs to be changed per node-group, not cluster-wide
3199 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3200 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3201 bridges.add(default_nicpp[constants.NIC_LINK])
3202 for instance in self.my_inst_info.values():
3203 for nic in instance.nics:
3204 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3205 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3206 bridges.add(full_nic[constants.NIC_LINK])
3209 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3211 # Build our expected cluster state
3212 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3214 vm_capable=node.vm_capable))
3215 for node in node_data_list)
3219 for node in self.all_node_info.values():
3220 path = _SupportsOob(self.cfg, node)
3221 if path and path not in oob_paths:
3222 oob_paths.append(path)
3225 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3227 for instance in self.my_inst_names:
3228 inst_config = self.my_inst_info[instance]
3229 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3232 for nname in inst_config.all_nodes:
3233 if nname not in node_image:
3234 gnode = self.NodeImage(name=nname)
3235 gnode.ghost = (nname not in self.all_node_info)
3236 node_image[nname] = gnode
3238 inst_config.MapLVsByNode(node_vol_should)
3240 pnode = inst_config.primary_node
3241 node_image[pnode].pinst.append(instance)
3243 for snode in inst_config.secondary_nodes:
3244 nimg = node_image[snode]
3245 nimg.sinst.append(instance)
3246 if pnode not in nimg.sbp:
3247 nimg.sbp[pnode] = []
3248 nimg.sbp[pnode].append(instance)
3250 # At this point, we have the in-memory data structures complete,
3251 # except for the runtime information, which we'll gather next
3253 # Due to the way our RPC system works, exact response times cannot be
3254 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3255 # time before and after executing the request, we can at least have a time
3257 nvinfo_starttime = time.time()
3258 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3260 self.cfg.GetClusterName())
3261 nvinfo_endtime = time.time()
3263 if self.extra_lv_nodes and vg_name is not None:
3265 self.rpc.call_node_verify(self.extra_lv_nodes,
3266 {constants.NV_LVLIST: vg_name},
3267 self.cfg.GetClusterName())
3269 extra_lv_nvinfo = {}
3271 all_drbd_map = self.cfg.ComputeDRBDMap()
3273 feedback_fn("* Gathering disk information (%s nodes)" %
3274 len(self.my_node_names))
3275 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3278 feedback_fn("* Verifying configuration file consistency")
3280 # If not all nodes are being checked, we need to make sure the master node
3281 # and a non-checked vm_capable node are in the list.
3282 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3284 vf_nvinfo = all_nvinfo.copy()
3285 vf_node_info = list(self.my_node_info.values())
3286 additional_nodes = []
3287 if master_node not in self.my_node_info:
3288 additional_nodes.append(master_node)
3289 vf_node_info.append(self.all_node_info[master_node])
3290 # Add the first vm_capable node we find which is not included,
3291 # excluding the master node (which we already have)
3292 for node in absent_nodes:
3293 nodeinfo = self.all_node_info[node]
3294 if (nodeinfo.vm_capable and not nodeinfo.offline and
3295 node != master_node):
3296 additional_nodes.append(node)
3297 vf_node_info.append(self.all_node_info[node])
3299 key = constants.NV_FILELIST
3300 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3301 {key: node_verify_param[key]},
3302 self.cfg.GetClusterName()))
3304 vf_nvinfo = all_nvinfo
3305 vf_node_info = self.my_node_info.values()
3307 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3309 feedback_fn("* Verifying node status")
3313 for node_i in node_data_list:
3315 nimg = node_image[node]
3319 feedback_fn("* Skipping offline node %s" % (node,))
3323 if node == master_node:
3325 elif node_i.master_candidate:
3326 ntype = "master candidate"
3327 elif node_i.drained:
3333 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3335 msg = all_nvinfo[node].fail_msg
3336 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3339 nimg.rpc_fail = True
3342 nresult = all_nvinfo[node].payload
3344 nimg.call_ok = self._VerifyNode(node_i, nresult)
3345 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3346 self._VerifyNodeNetwork(node_i, nresult)
3347 self._VerifyNodeUserScripts(node_i, nresult)
3348 self._VerifyOob(node_i, nresult)
3351 self._VerifyNodeLVM(node_i, nresult, vg_name)
3352 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3355 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3356 self._UpdateNodeInstances(node_i, nresult, nimg)
3357 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3358 self._UpdateNodeOS(node_i, nresult, nimg)
3360 if not nimg.os_fail:
3361 if refos_img is None:
3363 self._VerifyNodeOS(node_i, nimg, refos_img)
3364 self._VerifyNodeBridges(node_i, nresult, bridges)
3366 # Check whether all running instancies are primary for the node. (This
3367 # can no longer be done from _VerifyInstance below, since some of the
3368 # wrong instances could be from other node groups.)
3369 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3371 for inst in non_primary_inst:
3372 test = inst in self.all_inst_info
3373 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3374 "instance should not run on node %s", node_i.name)
3375 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3376 "node is running unknown instance %s", inst)
3378 for node, result in extra_lv_nvinfo.items():
3379 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3380 node_image[node], vg_name)
3382 feedback_fn("* Verifying instance status")
3383 for instance in self.my_inst_names:
3385 feedback_fn("* Verifying instance %s" % instance)
3386 inst_config = self.my_inst_info[instance]
3387 self._VerifyInstance(instance, inst_config, node_image,
3389 inst_nodes_offline = []
3391 pnode = inst_config.primary_node
3392 pnode_img = node_image[pnode]
3393 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3394 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3395 " primary node failed", instance)
3397 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3399 constants.CV_EINSTANCEBADNODE, instance,
3400 "instance is marked as running and lives on offline node %s",
3401 inst_config.primary_node)
3403 # If the instance is non-redundant we cannot survive losing its primary
3404 # node, so we are not N+1 compliant. On the other hand we have no disk
3405 # templates with more than one secondary so that situation is not well
3407 # FIXME: does not support file-backed instances
3408 if not inst_config.secondary_nodes:
3409 i_non_redundant.append(instance)
3411 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3412 constants.CV_EINSTANCELAYOUT,
3413 instance, "instance has multiple secondary nodes: %s",
3414 utils.CommaJoin(inst_config.secondary_nodes),
3415 code=self.ETYPE_WARNING)
3417 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3418 pnode = inst_config.primary_node
3419 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3420 instance_groups = {}
3422 for node in instance_nodes:
3423 instance_groups.setdefault(self.all_node_info[node].group,
3427 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3428 # Sort so that we always list the primary node first.
3429 for group, nodes in sorted(instance_groups.items(),
3430 key=lambda (_, nodes): pnode in nodes,
3433 self._ErrorIf(len(instance_groups) > 1,
3434 constants.CV_EINSTANCESPLITGROUPS,
3435 instance, "instance has primary and secondary nodes in"
3436 " different groups: %s", utils.CommaJoin(pretty_list),
3437 code=self.ETYPE_WARNING)
3439 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3440 i_non_a_balanced.append(instance)
3442 for snode in inst_config.secondary_nodes:
3443 s_img = node_image[snode]
3444 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3445 snode, "instance %s, connection to secondary node failed",
3449 inst_nodes_offline.append(snode)
3451 # warn that the instance lives on offline nodes
3452 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3453 "instance has offline secondary node(s) %s",
3454 utils.CommaJoin(inst_nodes_offline))
3455 # ... or ghost/non-vm_capable nodes
3456 for node in inst_config.all_nodes:
3457 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3458 instance, "instance lives on ghost node %s", node)
3459 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3460 instance, "instance lives on non-vm_capable node %s", node)
3462 feedback_fn("* Verifying orphan volumes")
3463 reserved = utils.FieldSet(*cluster.reserved_lvs)
3465 # We will get spurious "unknown volume" warnings if any node of this group
3466 # is secondary for an instance whose primary is in another group. To avoid
3467 # them, we find these instances and add their volumes to node_vol_should.
3468 for inst in self.all_inst_info.values():
3469 for secondary in inst.secondary_nodes:
3470 if (secondary in self.my_node_info
3471 and inst.name not in self.my_inst_info):
3472 inst.MapLVsByNode(node_vol_should)
3475 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3477 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3478 feedback_fn("* Verifying N+1 Memory redundancy")
3479 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3481 feedback_fn("* Other Notes")
3483 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3484 % len(i_non_redundant))
3486 if i_non_a_balanced:
3487 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3488 % len(i_non_a_balanced))
3491 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3494 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3497 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3501 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3502 """Analyze the post-hooks' result
3504 This method analyses the hook result, handles it, and sends some
3505 nicely-formatted feedback back to the user.
3507 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3508 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3509 @param hooks_results: the results of the multi-node hooks rpc call
3510 @param feedback_fn: function used send feedback back to the caller
3511 @param lu_result: previous Exec result
3512 @return: the new Exec result, based on the previous result
3516 # We only really run POST phase hooks, only for non-empty groups,
3517 # and are only interested in their results
3518 if not self.my_node_names:
3521 elif phase == constants.HOOKS_PHASE_POST:
3522 # Used to change hooks' output to proper indentation
3523 feedback_fn("* Hooks Results")
3524 assert hooks_results, "invalid result from hooks"
3526 for node_name in hooks_results:
3527 res = hooks_results[node_name]
3529 test = msg and not res.offline
3530 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3531 "Communication failure in hooks execution: %s", msg)
3532 if res.offline or msg:
3533 # No need to investigate payload if node is offline or gave
3536 for script, hkr, output in res.payload:
3537 test = hkr == constants.HKR_FAIL
3538 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3539 "Script %s failed, output:", script)
3541 output = self._HOOKS_INDENT_RE.sub(" ", output)
3542 feedback_fn("%s" % output)
3548 class LUClusterVerifyDisks(NoHooksLU):
3549 """Verifies the cluster disks status.
3554 def ExpandNames(self):
3555 self.share_locks = _ShareAll()
3556 self.needed_locks = {
3557 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3560 def Exec(self, feedback_fn):
3561 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3563 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3564 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3565 for group in group_names])
3568 class LUGroupVerifyDisks(NoHooksLU):
3569 """Verifies the status of all disks in a node group.
3574 def ExpandNames(self):
3575 # Raises errors.OpPrereqError on its own if group can't be found
3576 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3578 self.share_locks = _ShareAll()
3579 self.needed_locks = {
3580 locking.LEVEL_INSTANCE: [],
3581 locking.LEVEL_NODEGROUP: [],
3582 locking.LEVEL_NODE: [],
3585 def DeclareLocks(self, level):
3586 if level == locking.LEVEL_INSTANCE:
3587 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3589 # Lock instances optimistically, needs verification once node and group
3590 # locks have been acquired
3591 self.needed_locks[locking.LEVEL_INSTANCE] = \
3592 self.cfg.GetNodeGroupInstances(self.group_uuid)
3594 elif level == locking.LEVEL_NODEGROUP:
3595 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3597 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3598 set([self.group_uuid] +
3599 # Lock all groups used by instances optimistically; this requires
3600 # going via the node before it's locked, requiring verification
3603 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3604 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3606 elif level == locking.LEVEL_NODE:
3607 # This will only lock the nodes in the group to be verified which contain
3609 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3610 self._LockInstancesNodes()
3612 # Lock all nodes in group to be verified
3613 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3614 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3615 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3617 def CheckPrereq(self):
3618 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3619 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3620 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3622 assert self.group_uuid in owned_groups
3624 # Check if locked instances are still correct
3625 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3627 # Get instance information
3628 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3630 # Check if node groups for locked instances are still correct
3631 _CheckInstancesNodeGroups(self.cfg, self.instances,
3632 owned_groups, owned_nodes, self.group_uuid)
3634 def Exec(self, feedback_fn):
3635 """Verify integrity of cluster disks.
3637 @rtype: tuple of three items
3638 @return: a tuple of (dict of node-to-node_error, list of instances
3639 which need activate-disks, dict of instance: (node, volume) for
3644 res_instances = set()
3647 nv_dict = _MapInstanceDisksToNodes([inst
3648 for inst in self.instances.values()
3649 if inst.admin_state == constants.ADMINST_UP])
3652 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3653 set(self.cfg.GetVmCapableNodeList()))
3655 node_lvs = self.rpc.call_lv_list(nodes, [])
3657 for (node, node_res) in node_lvs.items():
3658 if node_res.offline:
3661 msg = node_res.fail_msg
3663 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3664 res_nodes[node] = msg
3667 for lv_name, (_, _, lv_online) in node_res.payload.items():
3668 inst = nv_dict.pop((node, lv_name), None)
3669 if not (lv_online or inst is None):
3670 res_instances.add(inst)
3672 # any leftover items in nv_dict are missing LVs, let's arrange the data
3674 for key, inst in nv_dict.iteritems():
3675 res_missing.setdefault(inst, []).append(list(key))
3677 return (res_nodes, list(res_instances), res_missing)
3680 class LUClusterRepairDiskSizes(NoHooksLU):
3681 """Verifies the cluster disks sizes.
3686 def ExpandNames(self):
3687 if self.op.instances:
3688 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3689 self.needed_locks = {
3690 locking.LEVEL_NODE_RES: [],
3691 locking.LEVEL_INSTANCE: self.wanted_names,
3693 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3695 self.wanted_names = None
3696 self.needed_locks = {
3697 locking.LEVEL_NODE_RES: locking.ALL_SET,
3698 locking.LEVEL_INSTANCE: locking.ALL_SET,
3700 self.share_locks = {
3701 locking.LEVEL_NODE_RES: 1,
3702 locking.LEVEL_INSTANCE: 0,
3705 def DeclareLocks(self, level):
3706 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3707 self._LockInstancesNodes(primary_only=True, level=level)
3709 def CheckPrereq(self):
3710 """Check prerequisites.
3712 This only checks the optional instance list against the existing names.
3715 if self.wanted_names is None:
3716 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3718 self.wanted_instances = \
3719 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3721 def _EnsureChildSizes(self, disk):
3722 """Ensure children of the disk have the needed disk size.
3724 This is valid mainly for DRBD8 and fixes an issue where the
3725 children have smaller disk size.
3727 @param disk: an L{ganeti.objects.Disk} object
3730 if disk.dev_type == constants.LD_DRBD8:
3731 assert disk.children, "Empty children for DRBD8?"
3732 fchild = disk.children[0]
3733 mismatch = fchild.size < disk.size
3735 self.LogInfo("Child disk has size %d, parent %d, fixing",
3736 fchild.size, disk.size)
3737 fchild.size = disk.size
3739 # and we recurse on this child only, not on the metadev
3740 return self._EnsureChildSizes(fchild) or mismatch
3744 def Exec(self, feedback_fn):
3745 """Verify the size of cluster disks.
3748 # TODO: check child disks too
3749 # TODO: check differences in size between primary/secondary nodes
3751 for instance in self.wanted_instances:
3752 pnode = instance.primary_node
3753 if pnode not in per_node_disks:
3754 per_node_disks[pnode] = []
3755 for idx, disk in enumerate(instance.disks):
3756 per_node_disks[pnode].append((instance, idx, disk))
3758 assert not (frozenset(per_node_disks.keys()) -
3759 self.owned_locks(locking.LEVEL_NODE_RES)), \
3760 "Not owning correct locks"
3761 assert not self.owned_locks(locking.LEVEL_NODE)
3764 for node, dskl in per_node_disks.items():
3765 newl = [v[2].Copy() for v in dskl]
3767 self.cfg.SetDiskID(dsk, node)
3768 result = self.rpc.call_blockdev_getsize(node, newl)
3770 self.LogWarning("Failure in blockdev_getsize call to node"
3771 " %s, ignoring", node)
3773 if len(result.payload) != len(dskl):
3774 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3775 " result.payload=%s", node, len(dskl), result.payload)
3776 self.LogWarning("Invalid result from node %s, ignoring node results",
3779 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3781 self.LogWarning("Disk %d of instance %s did not return size"
3782 " information, ignoring", idx, instance.name)
3784 if not isinstance(size, (int, long)):
3785 self.LogWarning("Disk %d of instance %s did not return valid"
3786 " size information, ignoring", idx, instance.name)
3789 if size != disk.size:
3790 self.LogInfo("Disk %d of instance %s has mismatched size,"
3791 " correcting: recorded %d, actual %d", idx,
3792 instance.name, disk.size, size)
3794 self.cfg.Update(instance, feedback_fn)
3795 changed.append((instance.name, idx, size))
3796 if self._EnsureChildSizes(disk):
3797 self.cfg.Update(instance, feedback_fn)
3798 changed.append((instance.name, idx, disk.size))
3802 class LUClusterRename(LogicalUnit):
3803 """Rename the cluster.
3806 HPATH = "cluster-rename"
3807 HTYPE = constants.HTYPE_CLUSTER
3809 def BuildHooksEnv(self):
3814 "OP_TARGET": self.cfg.GetClusterName(),
3815 "NEW_NAME": self.op.name,
3818 def BuildHooksNodes(self):
3819 """Build hooks nodes.
3822 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3824 def CheckPrereq(self):
3825 """Verify that the passed name is a valid one.
3828 hostname = netutils.GetHostname(name=self.op.name,
3829 family=self.cfg.GetPrimaryIPFamily())
3831 new_name = hostname.name
3832 self.ip = new_ip = hostname.ip
3833 old_name = self.cfg.GetClusterName()
3834 old_ip = self.cfg.GetMasterIP()
3835 if new_name == old_name and new_ip == old_ip:
3836 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3837 " cluster has changed",
3839 if new_ip != old_ip:
3840 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3841 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3842 " reachable on the network" %
3843 new_ip, errors.ECODE_NOTUNIQUE)
3845 self.op.name = new_name
3847 def Exec(self, feedback_fn):
3848 """Rename the cluster.
3851 clustername = self.op.name
3854 # shutdown the master IP
3855 master_params = self.cfg.GetMasterNetworkParameters()
3856 ems = self.cfg.GetUseExternalMipScript()
3857 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3859 result.Raise("Could not disable the master role")
3862 cluster = self.cfg.GetClusterInfo()
3863 cluster.cluster_name = clustername
3864 cluster.master_ip = new_ip
3865 self.cfg.Update(cluster, feedback_fn)
3867 # update the known hosts file
3868 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3869 node_list = self.cfg.GetOnlineNodeList()
3871 node_list.remove(master_params.name)
3874 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3876 master_params.ip = new_ip
3877 result = self.rpc.call_node_activate_master_ip(master_params.name,
3879 msg = result.fail_msg
3881 self.LogWarning("Could not re-enable the master role on"
3882 " the master, please restart manually: %s", msg)
3887 def _ValidateNetmask(cfg, netmask):
3888 """Checks if a netmask is valid.
3890 @type cfg: L{config.ConfigWriter}
3891 @param cfg: The cluster configuration
3893 @param netmask: the netmask to be verified
3894 @raise errors.OpPrereqError: if the validation fails
3897 ip_family = cfg.GetPrimaryIPFamily()
3899 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3900 except errors.ProgrammerError:
3901 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3903 if not ipcls.ValidateNetmask(netmask):
3904 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3908 class LUClusterSetParams(LogicalUnit):
3909 """Change the parameters of the cluster.
3912 HPATH = "cluster-modify"
3913 HTYPE = constants.HTYPE_CLUSTER
3916 def CheckArguments(self):
3920 if self.op.uid_pool:
3921 uidpool.CheckUidPool(self.op.uid_pool)
3923 if self.op.add_uids:
3924 uidpool.CheckUidPool(self.op.add_uids)
3926 if self.op.remove_uids:
3927 uidpool.CheckUidPool(self.op.remove_uids)
3929 if self.op.master_netmask is not None:
3930 _ValidateNetmask(self.cfg, self.op.master_netmask)
3932 if self.op.diskparams:
3933 for dt_params in self.op.diskparams.values():
3934 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3936 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3937 except errors.OpPrereqError, err:
3938 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3941 def ExpandNames(self):
3942 # FIXME: in the future maybe other cluster params won't require checking on
3943 # all nodes to be modified.
3944 self.needed_locks = {
3945 locking.LEVEL_NODE: locking.ALL_SET,
3946 locking.LEVEL_INSTANCE: locking.ALL_SET,
3947 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3949 self.share_locks = {
3950 locking.LEVEL_NODE: 1,
3951 locking.LEVEL_INSTANCE: 1,
3952 locking.LEVEL_NODEGROUP: 1,
3955 def BuildHooksEnv(self):
3960 "OP_TARGET": self.cfg.GetClusterName(),
3961 "NEW_VG_NAME": self.op.vg_name,
3964 def BuildHooksNodes(self):
3965 """Build hooks nodes.
3968 mn = self.cfg.GetMasterNode()
3971 def CheckPrereq(self):
3972 """Check prerequisites.
3974 This checks whether the given params don't conflict and
3975 if the given volume group is valid.
3978 if self.op.vg_name is not None and not self.op.vg_name:
3979 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3980 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3981 " instances exist", errors.ECODE_INVAL)
3983 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3984 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3985 raise errors.OpPrereqError("Cannot disable drbd helper while"
3986 " drbd-based instances exist",
3989 node_list = self.owned_locks(locking.LEVEL_NODE)
3991 # if vg_name not None, checks given volume group on all nodes
3993 vglist = self.rpc.call_vg_list(node_list)
3994 for node in node_list:
3995 msg = vglist[node].fail_msg
3997 # ignoring down node
3998 self.LogWarning("Error while gathering data on node %s"
3999 " (ignoring node): %s", node, msg)
4001 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4003 constants.MIN_VG_SIZE)
4005 raise errors.OpPrereqError("Error on node '%s': %s" %
4006 (node, vgstatus), errors.ECODE_ENVIRON)
4008 if self.op.drbd_helper:
4009 # checks given drbd helper on all nodes
4010 helpers = self.rpc.call_drbd_helper(node_list)
4011 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4013 self.LogInfo("Not checking drbd helper on offline node %s", node)
4015 msg = helpers[node].fail_msg
4017 raise errors.OpPrereqError("Error checking drbd helper on node"
4018 " '%s': %s" % (node, msg),
4019 errors.ECODE_ENVIRON)
4020 node_helper = helpers[node].payload
4021 if node_helper != self.op.drbd_helper:
4022 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4023 (node, node_helper), errors.ECODE_ENVIRON)
4025 self.cluster = cluster = self.cfg.GetClusterInfo()
4026 # validate params changes
4027 if self.op.beparams:
4028 objects.UpgradeBeParams(self.op.beparams)
4029 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4030 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4032 if self.op.ndparams:
4033 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4034 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4036 # TODO: we need a more general way to handle resetting
4037 # cluster-level parameters to default values
4038 if self.new_ndparams["oob_program"] == "":
4039 self.new_ndparams["oob_program"] = \
4040 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4042 if self.op.hv_state:
4043 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4044 self.cluster.hv_state_static)
4045 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4046 for hv, values in new_hv_state.items())
4048 if self.op.disk_state:
4049 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4050 self.cluster.disk_state_static)
4051 self.new_disk_state = \
4052 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4053 for name, values in svalues.items()))
4054 for storage, svalues in new_disk_state.items())
4057 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4060 all_instances = self.cfg.GetAllInstancesInfo().values()
4062 for group in self.cfg.GetAllNodeGroupsInfo().values():
4063 instances = frozenset([inst for inst in all_instances
4064 if compat.any(node in group.members
4065 for node in inst.all_nodes)])
4066 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4067 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
4069 new_ipolicy, instances)
4071 violations.update(new)
4074 self.LogWarning("After the ipolicy change the following instances"
4075 " violate them: %s",
4076 utils.CommaJoin(utils.NiceSort(violations)))
4078 if self.op.nicparams:
4079 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4080 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4081 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4084 # check all instances for consistency
4085 for instance in self.cfg.GetAllInstancesInfo().values():
4086 for nic_idx, nic in enumerate(instance.nics):
4087 params_copy = copy.deepcopy(nic.nicparams)
4088 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4090 # check parameter syntax
4092 objects.NIC.CheckParameterSyntax(params_filled)
4093 except errors.ConfigurationError, err:
4094 nic_errors.append("Instance %s, nic/%d: %s" %
4095 (instance.name, nic_idx, err))
4097 # if we're moving instances to routed, check that they have an ip
4098 target_mode = params_filled[constants.NIC_MODE]
4099 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4100 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4101 " address" % (instance.name, nic_idx))
4103 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4104 "\n".join(nic_errors))
4106 # hypervisor list/parameters
4107 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4108 if self.op.hvparams:
4109 for hv_name, hv_dict in self.op.hvparams.items():
4110 if hv_name not in self.new_hvparams:
4111 self.new_hvparams[hv_name] = hv_dict
4113 self.new_hvparams[hv_name].update(hv_dict)
4115 # disk template parameters
4116 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4117 if self.op.diskparams:
4118 for dt_name, dt_params in self.op.diskparams.items():
4119 if dt_name not in self.op.diskparams:
4120 self.new_diskparams[dt_name] = dt_params
4122 self.new_diskparams[dt_name].update(dt_params)
4124 # os hypervisor parameters
4125 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4127 for os_name, hvs in self.op.os_hvp.items():
4128 if os_name not in self.new_os_hvp:
4129 self.new_os_hvp[os_name] = hvs
4131 for hv_name, hv_dict in hvs.items():
4132 if hv_name not in self.new_os_hvp[os_name]:
4133 self.new_os_hvp[os_name][hv_name] = hv_dict
4135 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4138 self.new_osp = objects.FillDict(cluster.osparams, {})
4139 if self.op.osparams:
4140 for os_name, osp in self.op.osparams.items():
4141 if os_name not in self.new_osp:
4142 self.new_osp[os_name] = {}
4144 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4147 if not self.new_osp[os_name]:
4148 # we removed all parameters
4149 del self.new_osp[os_name]
4151 # check the parameter validity (remote check)
4152 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4153 os_name, self.new_osp[os_name])
4155 # changes to the hypervisor list
4156 if self.op.enabled_hypervisors is not None:
4157 self.hv_list = self.op.enabled_hypervisors
4158 for hv in self.hv_list:
4159 # if the hypervisor doesn't already exist in the cluster
4160 # hvparams, we initialize it to empty, and then (in both
4161 # cases) we make sure to fill the defaults, as we might not
4162 # have a complete defaults list if the hypervisor wasn't
4164 if hv not in new_hvp:
4166 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4167 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4169 self.hv_list = cluster.enabled_hypervisors
4171 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4172 # either the enabled list has changed, or the parameters have, validate
4173 for hv_name, hv_params in self.new_hvparams.items():
4174 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4175 (self.op.enabled_hypervisors and
4176 hv_name in self.op.enabled_hypervisors)):
4177 # either this is a new hypervisor, or its parameters have changed
4178 hv_class = hypervisor.GetHypervisor(hv_name)
4179 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4180 hv_class.CheckParameterSyntax(hv_params)
4181 _CheckHVParams(self, node_list, hv_name, hv_params)
4184 # no need to check any newly-enabled hypervisors, since the
4185 # defaults have already been checked in the above code-block
4186 for os_name, os_hvp in self.new_os_hvp.items():
4187 for hv_name, hv_params in os_hvp.items():
4188 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4189 # we need to fill in the new os_hvp on top of the actual hv_p
4190 cluster_defaults = self.new_hvparams.get(hv_name, {})
4191 new_osp = objects.FillDict(cluster_defaults, hv_params)
4192 hv_class = hypervisor.GetHypervisor(hv_name)
4193 hv_class.CheckParameterSyntax(new_osp)
4194 _CheckHVParams(self, node_list, hv_name, new_osp)
4196 if self.op.default_iallocator:
4197 alloc_script = utils.FindFile(self.op.default_iallocator,
4198 constants.IALLOCATOR_SEARCH_PATH,
4200 if alloc_script is None:
4201 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4202 " specified" % self.op.default_iallocator,
4205 def Exec(self, feedback_fn):
4206 """Change the parameters of the cluster.
4209 if self.op.vg_name is not None:
4210 new_volume = self.op.vg_name
4213 if new_volume != self.cfg.GetVGName():
4214 self.cfg.SetVGName(new_volume)
4216 feedback_fn("Cluster LVM configuration already in desired"
4217 " state, not changing")
4218 if self.op.drbd_helper is not None:
4219 new_helper = self.op.drbd_helper
4222 if new_helper != self.cfg.GetDRBDHelper():
4223 self.cfg.SetDRBDHelper(new_helper)
4225 feedback_fn("Cluster DRBD helper already in desired state,"
4227 if self.op.hvparams:
4228 self.cluster.hvparams = self.new_hvparams
4230 self.cluster.os_hvp = self.new_os_hvp
4231 if self.op.enabled_hypervisors is not None:
4232 self.cluster.hvparams = self.new_hvparams
4233 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4234 if self.op.beparams:
4235 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4236 if self.op.nicparams:
4237 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4239 self.cluster.ipolicy = self.new_ipolicy
4240 if self.op.osparams:
4241 self.cluster.osparams = self.new_osp
4242 if self.op.ndparams:
4243 self.cluster.ndparams = self.new_ndparams
4244 if self.op.diskparams:
4245 self.cluster.diskparams = self.new_diskparams
4246 if self.op.hv_state:
4247 self.cluster.hv_state_static = self.new_hv_state
4248 if self.op.disk_state:
4249 self.cluster.disk_state_static = self.new_disk_state
4251 if self.op.candidate_pool_size is not None:
4252 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4253 # we need to update the pool size here, otherwise the save will fail
4254 _AdjustCandidatePool(self, [])
4256 if self.op.maintain_node_health is not None:
4257 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4258 feedback_fn("Note: CONFD was disabled at build time, node health"
4259 " maintenance is not useful (still enabling it)")
4260 self.cluster.maintain_node_health = self.op.maintain_node_health
4262 if self.op.prealloc_wipe_disks is not None:
4263 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4265 if self.op.add_uids is not None:
4266 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4268 if self.op.remove_uids is not None:
4269 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4271 if self.op.uid_pool is not None:
4272 self.cluster.uid_pool = self.op.uid_pool
4274 if self.op.default_iallocator is not None:
4275 self.cluster.default_iallocator = self.op.default_iallocator
4277 if self.op.reserved_lvs is not None:
4278 self.cluster.reserved_lvs = self.op.reserved_lvs
4280 if self.op.use_external_mip_script is not None:
4281 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4283 def helper_os(aname, mods, desc):
4285 lst = getattr(self.cluster, aname)
4286 for key, val in mods:
4287 if key == constants.DDM_ADD:
4289 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4292 elif key == constants.DDM_REMOVE:
4296 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4298 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4300 if self.op.hidden_os:
4301 helper_os("hidden_os", self.op.hidden_os, "hidden")
4303 if self.op.blacklisted_os:
4304 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4306 if self.op.master_netdev:
4307 master_params = self.cfg.GetMasterNetworkParameters()
4308 ems = self.cfg.GetUseExternalMipScript()
4309 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4310 self.cluster.master_netdev)
4311 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4313 result.Raise("Could not disable the master ip")
4314 feedback_fn("Changing master_netdev from %s to %s" %
4315 (master_params.netdev, self.op.master_netdev))
4316 self.cluster.master_netdev = self.op.master_netdev
4318 if self.op.master_netmask:
4319 master_params = self.cfg.GetMasterNetworkParameters()
4320 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4321 result = self.rpc.call_node_change_master_netmask(master_params.name,
4322 master_params.netmask,
4323 self.op.master_netmask,
4325 master_params.netdev)
4327 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4330 self.cluster.master_netmask = self.op.master_netmask
4332 self.cfg.Update(self.cluster, feedback_fn)
4334 if self.op.master_netdev:
4335 master_params = self.cfg.GetMasterNetworkParameters()
4336 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4337 self.op.master_netdev)
4338 ems = self.cfg.GetUseExternalMipScript()
4339 result = self.rpc.call_node_activate_master_ip(master_params.name,
4342 self.LogWarning("Could not re-enable the master ip on"
4343 " the master, please restart manually: %s",
4347 def _UploadHelper(lu, nodes, fname):
4348 """Helper for uploading a file and showing warnings.
4351 if os.path.exists(fname):
4352 result = lu.rpc.call_upload_file(nodes, fname)
4353 for to_node, to_result in result.items():
4354 msg = to_result.fail_msg
4356 msg = ("Copy of file %s to node %s failed: %s" %
4357 (fname, to_node, msg))
4358 lu.proc.LogWarning(msg)
4361 def _ComputeAncillaryFiles(cluster, redist):
4362 """Compute files external to Ganeti which need to be consistent.
4364 @type redist: boolean
4365 @param redist: Whether to include files which need to be redistributed
4368 # Compute files for all nodes
4370 constants.SSH_KNOWN_HOSTS_FILE,
4371 constants.CONFD_HMAC_KEY,
4372 constants.CLUSTER_DOMAIN_SECRET_FILE,
4373 constants.SPICE_CERT_FILE,
4374 constants.SPICE_CACERT_FILE,
4375 constants.RAPI_USERS_FILE,
4379 files_all.update(constants.ALL_CERT_FILES)
4380 files_all.update(ssconf.SimpleStore().GetFileList())
4382 # we need to ship at least the RAPI certificate
4383 files_all.add(constants.RAPI_CERT_FILE)
4385 if cluster.modify_etc_hosts:
4386 files_all.add(constants.ETC_HOSTS)
4388 if cluster.use_external_mip_script:
4389 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4391 # Files which are optional, these must:
4392 # - be present in one other category as well
4393 # - either exist or not exist on all nodes of that category (mc, vm all)
4395 constants.RAPI_USERS_FILE,
4398 # Files which should only be on master candidates
4402 files_mc.add(constants.CLUSTER_CONF_FILE)
4404 # Files which should only be on VM-capable nodes
4405 files_vm = set(filename
4406 for hv_name in cluster.enabled_hypervisors
4407 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4409 files_opt |= set(filename
4410 for hv_name in cluster.enabled_hypervisors
4411 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4413 # Filenames in each category must be unique
4414 all_files_set = files_all | files_mc | files_vm
4415 assert (len(all_files_set) ==
4416 sum(map(len, [files_all, files_mc, files_vm]))), \
4417 "Found file listed in more than one file list"
4419 # Optional files must be present in one other category
4420 assert all_files_set.issuperset(files_opt), \
4421 "Optional file not in a different required list"
4423 return (files_all, files_opt, files_mc, files_vm)
4426 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4427 """Distribute additional files which are part of the cluster configuration.
4429 ConfigWriter takes care of distributing the config and ssconf files, but
4430 there are more files which should be distributed to all nodes. This function
4431 makes sure those are copied.
4433 @param lu: calling logical unit
4434 @param additional_nodes: list of nodes not in the config to distribute to
4435 @type additional_vm: boolean
4436 @param additional_vm: whether the additional nodes are vm-capable or not
4439 # Gather target nodes
4440 cluster = lu.cfg.GetClusterInfo()
4441 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4443 online_nodes = lu.cfg.GetOnlineNodeList()
4444 online_set = frozenset(online_nodes)
4445 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4447 if additional_nodes is not None:
4448 online_nodes.extend(additional_nodes)
4450 vm_nodes.extend(additional_nodes)
4452 # Never distribute to master node
4453 for nodelist in [online_nodes, vm_nodes]:
4454 if master_info.name in nodelist:
4455 nodelist.remove(master_info.name)
4458 (files_all, _, files_mc, files_vm) = \
4459 _ComputeAncillaryFiles(cluster, True)
4461 # Never re-distribute configuration file from here
4462 assert not (constants.CLUSTER_CONF_FILE in files_all or
4463 constants.CLUSTER_CONF_FILE in files_vm)
4464 assert not files_mc, "Master candidates not handled in this function"
4467 (online_nodes, files_all),
4468 (vm_nodes, files_vm),
4472 for (node_list, files) in filemap:
4474 _UploadHelper(lu, node_list, fname)
4477 class LUClusterRedistConf(NoHooksLU):
4478 """Force the redistribution of cluster configuration.
4480 This is a very simple LU.
4485 def ExpandNames(self):
4486 self.needed_locks = {
4487 locking.LEVEL_NODE: locking.ALL_SET,
4489 self.share_locks[locking.LEVEL_NODE] = 1
4491 def Exec(self, feedback_fn):
4492 """Redistribute the configuration.
4495 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4496 _RedistributeAncillaryFiles(self)
4499 class LUClusterActivateMasterIp(NoHooksLU):
4500 """Activate the master IP on the master node.
4503 def Exec(self, feedback_fn):
4504 """Activate the master IP.
4507 master_params = self.cfg.GetMasterNetworkParameters()
4508 ems = self.cfg.GetUseExternalMipScript()
4509 result = self.rpc.call_node_activate_master_ip(master_params.name,
4511 result.Raise("Could not activate the master IP")
4514 class LUClusterDeactivateMasterIp(NoHooksLU):
4515 """Deactivate the master IP on the master node.
4518 def Exec(self, feedback_fn):
4519 """Deactivate the master IP.
4522 master_params = self.cfg.GetMasterNetworkParameters()
4523 ems = self.cfg.GetUseExternalMipScript()
4524 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4526 result.Raise("Could not deactivate the master IP")
4529 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4530 """Sleep and poll for an instance's disk to sync.
4533 if not instance.disks or disks is not None and not disks:
4536 disks = _ExpandCheckDisks(instance, disks)
4539 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4541 node = instance.primary_node
4544 lu.cfg.SetDiskID(dev, node)
4546 # TODO: Convert to utils.Retry
4549 degr_retries = 10 # in seconds, as we sleep 1 second each time
4553 cumul_degraded = False
4554 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4555 msg = rstats.fail_msg
4557 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4560 raise errors.RemoteError("Can't contact node %s for mirror data,"
4561 " aborting." % node)
4564 rstats = rstats.payload
4566 for i, mstat in enumerate(rstats):
4568 lu.LogWarning("Can't compute data for node %s/%s",
4569 node, disks[i].iv_name)
4572 cumul_degraded = (cumul_degraded or
4573 (mstat.is_degraded and mstat.sync_percent is None))
4574 if mstat.sync_percent is not None:
4576 if mstat.estimated_time is not None:
4577 rem_time = ("%s remaining (estimated)" %
4578 utils.FormatSeconds(mstat.estimated_time))
4579 max_time = mstat.estimated_time
4581 rem_time = "no time estimate"
4582 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4583 (disks[i].iv_name, mstat.sync_percent, rem_time))
4585 # if we're done but degraded, let's do a few small retries, to
4586 # make sure we see a stable and not transient situation; therefore
4587 # we force restart of the loop
4588 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4589 logging.info("Degraded disks found, %d retries left", degr_retries)
4597 time.sleep(min(60, max_time))
4600 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4601 return not cumul_degraded
4604 def _BlockdevFind(lu, node, dev, instance):
4605 """Wrapper around call_blockdev_find to annotate diskparams.
4607 @param lu: A reference to the lu object
4608 @param node: The node to call out
4609 @param dev: The device to find
4610 @param instance: The instance object the device belongs to
4611 @returns The result of the rpc call
4614 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4615 return lu.rpc.call_blockdev_find(node, disk)
4618 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4619 """Wrapper around L{_CheckDiskConsistencyInner}.
4622 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4623 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4627 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4629 """Check that mirrors are not degraded.
4631 @attention: The device has to be annotated already.
4633 The ldisk parameter, if True, will change the test from the
4634 is_degraded attribute (which represents overall non-ok status for
4635 the device(s)) to the ldisk (representing the local storage status).
4638 lu.cfg.SetDiskID(dev, node)
4642 if on_primary or dev.AssembleOnSecondary():
4643 rstats = lu.rpc.call_blockdev_find(node, dev)
4644 msg = rstats.fail_msg
4646 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4648 elif not rstats.payload:
4649 lu.LogWarning("Can't find disk on node %s", node)
4653 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4655 result = result and not rstats.payload.is_degraded
4658 for child in dev.children:
4659 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4665 class LUOobCommand(NoHooksLU):
4666 """Logical unit for OOB handling.
4670 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4672 def ExpandNames(self):
4673 """Gather locks we need.
4676 if self.op.node_names:
4677 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4678 lock_names = self.op.node_names
4680 lock_names = locking.ALL_SET
4682 self.needed_locks = {
4683 locking.LEVEL_NODE: lock_names,
4686 def CheckPrereq(self):
4687 """Check prerequisites.
4690 - the node exists in the configuration
4693 Any errors are signaled by raising errors.OpPrereqError.
4697 self.master_node = self.cfg.GetMasterNode()
4699 assert self.op.power_delay >= 0.0
4701 if self.op.node_names:
4702 if (self.op.command in self._SKIP_MASTER and
4703 self.master_node in self.op.node_names):
4704 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4705 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4707 if master_oob_handler:
4708 additional_text = ("run '%s %s %s' if you want to operate on the"
4709 " master regardless") % (master_oob_handler,
4713 additional_text = "it does not support out-of-band operations"
4715 raise errors.OpPrereqError(("Operating on the master node %s is not"
4716 " allowed for %s; %s") %
4717 (self.master_node, self.op.command,
4718 additional_text), errors.ECODE_INVAL)
4720 self.op.node_names = self.cfg.GetNodeList()
4721 if self.op.command in self._SKIP_MASTER:
4722 self.op.node_names.remove(self.master_node)
4724 if self.op.command in self._SKIP_MASTER:
4725 assert self.master_node not in self.op.node_names
4727 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4729 raise errors.OpPrereqError("Node %s not found" % node_name,
4732 self.nodes.append(node)
4734 if (not self.op.ignore_status and
4735 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4736 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4737 " not marked offline") % node_name,
4740 def Exec(self, feedback_fn):
4741 """Execute OOB and return result if we expect any.
4744 master_node = self.master_node
4747 for idx, node in enumerate(utils.NiceSort(self.nodes,
4748 key=lambda node: node.name)):
4749 node_entry = [(constants.RS_NORMAL, node.name)]
4750 ret.append(node_entry)
4752 oob_program = _SupportsOob(self.cfg, node)
4755 node_entry.append((constants.RS_UNAVAIL, None))
4758 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4759 self.op.command, oob_program, node.name)
4760 result = self.rpc.call_run_oob(master_node, oob_program,
4761 self.op.command, node.name,
4765 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4766 node.name, result.fail_msg)
4767 node_entry.append((constants.RS_NODATA, None))
4770 self._CheckPayload(result)
4771 except errors.OpExecError, err:
4772 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4774 node_entry.append((constants.RS_NODATA, None))
4776 if self.op.command == constants.OOB_HEALTH:
4777 # For health we should log important events
4778 for item, status in result.payload:
4779 if status in [constants.OOB_STATUS_WARNING,
4780 constants.OOB_STATUS_CRITICAL]:
4781 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4782 item, node.name, status)
4784 if self.op.command == constants.OOB_POWER_ON:
4786 elif self.op.command == constants.OOB_POWER_OFF:
4787 node.powered = False
4788 elif self.op.command == constants.OOB_POWER_STATUS:
4789 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4790 if powered != node.powered:
4791 logging.warning(("Recorded power state (%s) of node '%s' does not"
4792 " match actual power state (%s)"), node.powered,
4795 # For configuration changing commands we should update the node
4796 if self.op.command in (constants.OOB_POWER_ON,
4797 constants.OOB_POWER_OFF):
4798 self.cfg.Update(node, feedback_fn)
4800 node_entry.append((constants.RS_NORMAL, result.payload))
4802 if (self.op.command == constants.OOB_POWER_ON and
4803 idx < len(self.nodes) - 1):
4804 time.sleep(self.op.power_delay)
4808 def _CheckPayload(self, result):
4809 """Checks if the payload is valid.
4811 @param result: RPC result
4812 @raises errors.OpExecError: If payload is not valid
4816 if self.op.command == constants.OOB_HEALTH:
4817 if not isinstance(result.payload, list):
4818 errs.append("command 'health' is expected to return a list but got %s" %
4819 type(result.payload))
4821 for item, status in result.payload:
4822 if status not in constants.OOB_STATUSES:
4823 errs.append("health item '%s' has invalid status '%s'" %
4826 if self.op.command == constants.OOB_POWER_STATUS:
4827 if not isinstance(result.payload, dict):
4828 errs.append("power-status is expected to return a dict but got %s" %
4829 type(result.payload))
4831 if self.op.command in [
4832 constants.OOB_POWER_ON,
4833 constants.OOB_POWER_OFF,
4834 constants.OOB_POWER_CYCLE,
4836 if result.payload is not None:
4837 errs.append("%s is expected to not return payload but got '%s'" %
4838 (self.op.command, result.payload))
4841 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4842 utils.CommaJoin(errs))
4845 class _OsQuery(_QueryBase):
4846 FIELDS = query.OS_FIELDS
4848 def ExpandNames(self, lu):
4849 # Lock all nodes in shared mode
4850 # Temporary removal of locks, should be reverted later
4851 # TODO: reintroduce locks when they are lighter-weight
4852 lu.needed_locks = {}
4853 #self.share_locks[locking.LEVEL_NODE] = 1
4854 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4856 # The following variables interact with _QueryBase._GetNames
4858 self.wanted = self.names
4860 self.wanted = locking.ALL_SET
4862 self.do_locking = self.use_locking
4864 def DeclareLocks(self, lu, level):
4868 def _DiagnoseByOS(rlist):
4869 """Remaps a per-node return list into an a per-os per-node dictionary
4871 @param rlist: a map with node names as keys and OS objects as values
4874 @return: a dictionary with osnames as keys and as value another
4875 map, with nodes as keys and tuples of (path, status, diagnose,
4876 variants, parameters, api_versions) as values, eg::
4878 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4879 (/srv/..., False, "invalid api")],
4880 "node2": [(/srv/..., True, "", [], [])]}
4885 # we build here the list of nodes that didn't fail the RPC (at RPC
4886 # level), so that nodes with a non-responding node daemon don't
4887 # make all OSes invalid
4888 good_nodes = [node_name for node_name in rlist
4889 if not rlist[node_name].fail_msg]
4890 for node_name, nr in rlist.items():
4891 if nr.fail_msg or not nr.payload:
4893 for (name, path, status, diagnose, variants,
4894 params, api_versions) in nr.payload:
4895 if name not in all_os:
4896 # build a list of nodes for this os containing empty lists
4897 # for each node in node_list
4899 for nname in good_nodes:
4900 all_os[name][nname] = []
4901 # convert params from [name, help] to (name, help)
4902 params = [tuple(v) for v in params]
4903 all_os[name][node_name].append((path, status, diagnose,
4904 variants, params, api_versions))
4907 def _GetQueryData(self, lu):
4908 """Computes the list of nodes and their attributes.
4911 # Locking is not used
4912 assert not (compat.any(lu.glm.is_owned(level)
4913 for level in locking.LEVELS
4914 if level != locking.LEVEL_CLUSTER) or
4915 self.do_locking or self.use_locking)
4917 valid_nodes = [node.name
4918 for node in lu.cfg.GetAllNodesInfo().values()
4919 if not node.offline and node.vm_capable]
4920 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4921 cluster = lu.cfg.GetClusterInfo()
4925 for (os_name, os_data) in pol.items():
4926 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4927 hidden=(os_name in cluster.hidden_os),
4928 blacklisted=(os_name in cluster.blacklisted_os))
4932 api_versions = set()
4934 for idx, osl in enumerate(os_data.values()):
4935 info.valid = bool(info.valid and osl and osl[0][1])
4939 (node_variants, node_params, node_api) = osl[0][3:6]
4942 variants.update(node_variants)
4943 parameters.update(node_params)
4944 api_versions.update(node_api)
4946 # Filter out inconsistent values
4947 variants.intersection_update(node_variants)
4948 parameters.intersection_update(node_params)
4949 api_versions.intersection_update(node_api)
4951 info.variants = list(variants)
4952 info.parameters = list(parameters)
4953 info.api_versions = list(api_versions)
4955 data[os_name] = info
4957 # Prepare data in requested order
4958 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4962 class LUOsDiagnose(NoHooksLU):
4963 """Logical unit for OS diagnose/query.
4969 def _BuildFilter(fields, names):
4970 """Builds a filter for querying OSes.
4973 name_filter = qlang.MakeSimpleFilter("name", names)
4975 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4976 # respective field is not requested
4977 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4978 for fname in ["hidden", "blacklisted"]
4979 if fname not in fields]
4980 if "valid" not in fields:
4981 status_filter.append([qlang.OP_TRUE, "valid"])
4984 status_filter.insert(0, qlang.OP_AND)
4986 status_filter = None
4988 if name_filter and status_filter:
4989 return [qlang.OP_AND, name_filter, status_filter]
4993 return status_filter
4995 def CheckArguments(self):
4996 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4997 self.op.output_fields, False)
4999 def ExpandNames(self):
5000 self.oq.ExpandNames(self)
5002 def Exec(self, feedback_fn):
5003 return self.oq.OldStyleQuery(self)
5006 class _ExtStorageQuery(_QueryBase):
5007 FIELDS = query.EXTSTORAGE_FIELDS
5009 def ExpandNames(self, lu):
5010 # Lock all nodes in shared mode
5011 # Temporary removal of locks, should be reverted later
5012 # TODO: reintroduce locks when they are lighter-weight
5013 lu.needed_locks = {}
5014 #self.share_locks[locking.LEVEL_NODE] = 1
5015 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5017 # The following variables interact with _QueryBase._GetNames
5019 self.wanted = self.names
5021 self.wanted = locking.ALL_SET
5023 self.do_locking = self.use_locking
5025 def DeclareLocks(self, lu, level):
5029 def _DiagnoseByProvider(rlist):
5030 """Remaps a per-node return list into an a per-provider per-node dictionary
5032 @param rlist: a map with node names as keys and ExtStorage objects as values
5035 @return: a dictionary with extstorage providers as keys and as
5036 value another map, with nodes as keys and tuples of
5037 (path, status, diagnose, parameters) as values, eg::
5039 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5040 "node2": [(/srv/..., False, "missing file")]
5041 "node3": [(/srv/..., True, "", [])]
5046 # we build here the list of nodes that didn't fail the RPC (at RPC
5047 # level), so that nodes with a non-responding node daemon don't
5048 # make all OSes invalid
5049 good_nodes = [node_name for node_name in rlist
5050 if not rlist[node_name].fail_msg]
5051 for node_name, nr in rlist.items():
5052 if nr.fail_msg or not nr.payload:
5054 for (name, path, status, diagnose, params) in nr.payload:
5055 if name not in all_es:
5056 # build a list of nodes for this os containing empty lists
5057 # for each node in node_list
5059 for nname in good_nodes:
5060 all_es[name][nname] = []
5061 # convert params from [name, help] to (name, help)
5062 params = [tuple(v) for v in params]
5063 all_es[name][node_name].append((path, status, diagnose, params))
5066 def _GetQueryData(self, lu):
5067 """Computes the list of nodes and their attributes.
5070 # Locking is not used
5071 assert not (compat.any(lu.glm.is_owned(level)
5072 for level in locking.LEVELS
5073 if level != locking.LEVEL_CLUSTER) or
5074 self.do_locking or self.use_locking)
5076 valid_nodes = [node.name
5077 for node in lu.cfg.GetAllNodesInfo().values()
5078 if not node.offline and node.vm_capable]
5079 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5083 nodegroup_list = lu.cfg.GetNodeGroupList()
5085 for (es_name, es_data) in pol.items():
5086 # For every provider compute the nodegroup validity.
5087 # To do this we need to check the validity of each node in es_data
5088 # and then construct the corresponding nodegroup dict:
5089 # { nodegroup1: status
5090 # nodegroup2: status
5093 for nodegroup in nodegroup_list:
5094 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5096 nodegroup_nodes = ndgrp.members
5097 nodegroup_name = ndgrp.name
5100 for node in nodegroup_nodes:
5101 if node in valid_nodes:
5102 if es_data[node] != []:
5103 node_status = es_data[node][0][1]
5104 node_statuses.append(node_status)
5106 node_statuses.append(False)
5108 if False in node_statuses:
5109 ndgrp_data[nodegroup_name] = False
5111 ndgrp_data[nodegroup_name] = True
5113 # Compute the provider's parameters
5115 for idx, esl in enumerate(es_data.values()):
5116 valid = bool(esl and esl[0][1])
5120 node_params = esl[0][3]
5123 parameters.update(node_params)
5125 # Filter out inconsistent values
5126 parameters.intersection_update(node_params)
5128 params = list(parameters)
5130 # Now fill all the info for this provider
5131 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5132 nodegroup_status=ndgrp_data,
5135 data[es_name] = info
5137 # Prepare data in requested order
5138 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5142 class LUExtStorageDiagnose(NoHooksLU):
5143 """Logical unit for ExtStorage diagnose/query.
5148 def CheckArguments(self):
5149 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5150 self.op.output_fields, False)
5152 def ExpandNames(self):
5153 self.eq.ExpandNames(self)
5155 def Exec(self, feedback_fn):
5156 return self.eq.OldStyleQuery(self)
5159 class LUNodeRemove(LogicalUnit):
5160 """Logical unit for removing a node.
5163 HPATH = "node-remove"
5164 HTYPE = constants.HTYPE_NODE
5166 def BuildHooksEnv(self):
5171 "OP_TARGET": self.op.node_name,
5172 "NODE_NAME": self.op.node_name,
5175 def BuildHooksNodes(self):
5176 """Build hooks nodes.
5178 This doesn't run on the target node in the pre phase as a failed
5179 node would then be impossible to remove.
5182 all_nodes = self.cfg.GetNodeList()
5184 all_nodes.remove(self.op.node_name)
5187 return (all_nodes, all_nodes)
5189 def CheckPrereq(self):
5190 """Check prerequisites.
5193 - the node exists in the configuration
5194 - it does not have primary or secondary instances
5195 - it's not the master
5197 Any errors are signaled by raising errors.OpPrereqError.
5200 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5201 node = self.cfg.GetNodeInfo(self.op.node_name)
5202 assert node is not None
5204 masternode = self.cfg.GetMasterNode()
5205 if node.name == masternode:
5206 raise errors.OpPrereqError("Node is the master node, failover to another"
5207 " node is required", errors.ECODE_INVAL)
5209 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5210 if node.name in instance.all_nodes:
5211 raise errors.OpPrereqError("Instance %s is still running on the node,"
5212 " please remove first" % instance_name,
5214 self.op.node_name = node.name
5217 def Exec(self, feedback_fn):
5218 """Removes the node from the cluster.
5222 logging.info("Stopping the node daemon and removing configs from node %s",
5225 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5227 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5230 # Promote nodes to master candidate as needed
5231 _AdjustCandidatePool(self, exceptions=[node.name])
5232 self.context.RemoveNode(node.name)
5234 # Run post hooks on the node before it's removed
5235 _RunPostHook(self, node.name)
5237 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5238 msg = result.fail_msg
5240 self.LogWarning("Errors encountered on the remote node while leaving"
5241 " the cluster: %s", msg)
5243 # Remove node from our /etc/hosts
5244 if self.cfg.GetClusterInfo().modify_etc_hosts:
5245 master_node = self.cfg.GetMasterNode()
5246 result = self.rpc.call_etc_hosts_modify(master_node,
5247 constants.ETC_HOSTS_REMOVE,
5249 result.Raise("Can't update hosts file with new host data")
5250 _RedistributeAncillaryFiles(self)
5253 class _NodeQuery(_QueryBase):
5254 FIELDS = query.NODE_FIELDS
5256 def ExpandNames(self, lu):
5257 lu.needed_locks = {}
5258 lu.share_locks = _ShareAll()
5261 self.wanted = _GetWantedNodes(lu, self.names)
5263 self.wanted = locking.ALL_SET
5265 self.do_locking = (self.use_locking and
5266 query.NQ_LIVE in self.requested_data)
5269 # If any non-static field is requested we need to lock the nodes
5270 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5272 def DeclareLocks(self, lu, level):
5275 def _GetQueryData(self, lu):
5276 """Computes the list of nodes and their attributes.
5279 all_info = lu.cfg.GetAllNodesInfo()
5281 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5283 # Gather data as requested
5284 if query.NQ_LIVE in self.requested_data:
5285 # filter out non-vm_capable nodes
5286 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5288 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5289 [lu.cfg.GetHypervisorType()])
5290 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5291 for (name, nresult) in node_data.items()
5292 if not nresult.fail_msg and nresult.payload)
5296 if query.NQ_INST in self.requested_data:
5297 node_to_primary = dict([(name, set()) for name in nodenames])
5298 node_to_secondary = dict([(name, set()) for name in nodenames])
5300 inst_data = lu.cfg.GetAllInstancesInfo()
5302 for inst in inst_data.values():
5303 if inst.primary_node in node_to_primary:
5304 node_to_primary[inst.primary_node].add(inst.name)
5305 for secnode in inst.secondary_nodes:
5306 if secnode in node_to_secondary:
5307 node_to_secondary[secnode].add(inst.name)
5309 node_to_primary = None
5310 node_to_secondary = None
5312 if query.NQ_OOB in self.requested_data:
5313 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5314 for name, node in all_info.iteritems())
5318 if query.NQ_GROUP in self.requested_data:
5319 groups = lu.cfg.GetAllNodeGroupsInfo()
5323 return query.NodeQueryData([all_info[name] for name in nodenames],
5324 live_data, lu.cfg.GetMasterNode(),
5325 node_to_primary, node_to_secondary, groups,
5326 oob_support, lu.cfg.GetClusterInfo())
5329 class LUNodeQuery(NoHooksLU):
5330 """Logical unit for querying nodes.
5333 # pylint: disable=W0142
5336 def CheckArguments(self):
5337 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5338 self.op.output_fields, self.op.use_locking)
5340 def ExpandNames(self):
5341 self.nq.ExpandNames(self)
5343 def DeclareLocks(self, level):
5344 self.nq.DeclareLocks(self, level)
5346 def Exec(self, feedback_fn):
5347 return self.nq.OldStyleQuery(self)
5350 class LUNodeQueryvols(NoHooksLU):
5351 """Logical unit for getting volumes on node(s).
5355 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5356 _FIELDS_STATIC = utils.FieldSet("node")
5358 def CheckArguments(self):
5359 _CheckOutputFields(static=self._FIELDS_STATIC,
5360 dynamic=self._FIELDS_DYNAMIC,
5361 selected=self.op.output_fields)
5363 def ExpandNames(self):
5364 self.share_locks = _ShareAll()
5365 self.needed_locks = {}
5367 if not self.op.nodes:
5368 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5370 self.needed_locks[locking.LEVEL_NODE] = \
5371 _GetWantedNodes(self, self.op.nodes)
5373 def Exec(self, feedback_fn):
5374 """Computes the list of nodes and their attributes.
5377 nodenames = self.owned_locks(locking.LEVEL_NODE)
5378 volumes = self.rpc.call_node_volumes(nodenames)
5380 ilist = self.cfg.GetAllInstancesInfo()
5381 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5384 for node in nodenames:
5385 nresult = volumes[node]
5388 msg = nresult.fail_msg
5390 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5393 node_vols = sorted(nresult.payload,
5394 key=operator.itemgetter("dev"))
5396 for vol in node_vols:
5398 for field in self.op.output_fields:
5401 elif field == "phys":
5405 elif field == "name":
5407 elif field == "size":
5408 val = int(float(vol["size"]))
5409 elif field == "instance":
5410 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5412 raise errors.ParameterError(field)
5413 node_output.append(str(val))
5415 output.append(node_output)
5420 class LUNodeQueryStorage(NoHooksLU):
5421 """Logical unit for getting information on storage units on node(s).
5424 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5427 def CheckArguments(self):
5428 _CheckOutputFields(static=self._FIELDS_STATIC,
5429 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5430 selected=self.op.output_fields)
5432 def ExpandNames(self):
5433 self.share_locks = _ShareAll()
5434 self.needed_locks = {}
5437 self.needed_locks[locking.LEVEL_NODE] = \
5438 _GetWantedNodes(self, self.op.nodes)
5440 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5442 def Exec(self, feedback_fn):
5443 """Computes the list of nodes and their attributes.
5446 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5448 # Always get name to sort by
5449 if constants.SF_NAME in self.op.output_fields:
5450 fields = self.op.output_fields[:]
5452 fields = [constants.SF_NAME] + self.op.output_fields
5454 # Never ask for node or type as it's only known to the LU
5455 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5456 while extra in fields:
5457 fields.remove(extra)
5459 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5460 name_idx = field_idx[constants.SF_NAME]
5462 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5463 data = self.rpc.call_storage_list(self.nodes,
5464 self.op.storage_type, st_args,
5465 self.op.name, fields)
5469 for node in utils.NiceSort(self.nodes):
5470 nresult = data[node]
5474 msg = nresult.fail_msg
5476 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5479 rows = dict([(row[name_idx], row) for row in nresult.payload])
5481 for name in utils.NiceSort(rows.keys()):
5486 for field in self.op.output_fields:
5487 if field == constants.SF_NODE:
5489 elif field == constants.SF_TYPE:
5490 val = self.op.storage_type
5491 elif field in field_idx:
5492 val = row[field_idx[field]]
5494 raise errors.ParameterError(field)
5503 class _InstanceQuery(_QueryBase):
5504 FIELDS = query.INSTANCE_FIELDS
5506 def ExpandNames(self, lu):
5507 lu.needed_locks = {}
5508 lu.share_locks = _ShareAll()
5511 self.wanted = _GetWantedInstances(lu, self.names)
5513 self.wanted = locking.ALL_SET
5515 self.do_locking = (self.use_locking and
5516 query.IQ_LIVE in self.requested_data)
5518 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5519 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5520 lu.needed_locks[locking.LEVEL_NODE] = []
5521 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5523 self.do_grouplocks = (self.do_locking and
5524 query.IQ_NODES in self.requested_data)
5526 def DeclareLocks(self, lu, level):
5528 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5529 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5531 # Lock all groups used by instances optimistically; this requires going
5532 # via the node before it's locked, requiring verification later on
5533 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5535 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5536 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5537 elif level == locking.LEVEL_NODE:
5538 lu._LockInstancesNodes() # pylint: disable=W0212
5541 def _CheckGroupLocks(lu):
5542 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5543 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5545 # Check if node groups for locked instances are still correct
5546 for instance_name in owned_instances:
5547 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5549 def _GetQueryData(self, lu):
5550 """Computes the list of instances and their attributes.
5553 if self.do_grouplocks:
5554 self._CheckGroupLocks(lu)
5556 cluster = lu.cfg.GetClusterInfo()
5557 all_info = lu.cfg.GetAllInstancesInfo()
5559 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5561 instance_list = [all_info[name] for name in instance_names]
5562 nodes = frozenset(itertools.chain(*(inst.all_nodes
5563 for inst in instance_list)))
5564 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5567 wrongnode_inst = set()
5569 # Gather data as requested
5570 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5572 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5574 result = node_data[name]
5576 # offline nodes will be in both lists
5577 assert result.fail_msg
5578 offline_nodes.append(name)
5580 bad_nodes.append(name)
5581 elif result.payload:
5582 for inst in result.payload:
5583 if inst in all_info:
5584 if all_info[inst].primary_node == name:
5585 live_data.update(result.payload)
5587 wrongnode_inst.add(inst)
5589 # orphan instance; we don't list it here as we don't
5590 # handle this case yet in the output of instance listing
5591 logging.warning("Orphan instance '%s' found on node %s",
5593 # else no instance is alive
5597 if query.IQ_DISKUSAGE in self.requested_data:
5598 disk_usage = dict((inst.name,
5599 _ComputeDiskSize(inst.disk_template,
5600 [{constants.IDISK_SIZE: disk.size}
5601 for disk in inst.disks]))
5602 for inst in instance_list)
5606 if query.IQ_CONSOLE in self.requested_data:
5608 for inst in instance_list:
5609 if inst.name in live_data:
5610 # Instance is running
5611 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5613 consinfo[inst.name] = None
5614 assert set(consinfo.keys()) == set(instance_names)
5618 if query.IQ_NODES in self.requested_data:
5619 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5621 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5622 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5623 for uuid in set(map(operator.attrgetter("group"),
5629 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5630 disk_usage, offline_nodes, bad_nodes,
5631 live_data, wrongnode_inst, consinfo,
5635 class LUQuery(NoHooksLU):
5636 """Query for resources/items of a certain kind.
5639 # pylint: disable=W0142
5642 def CheckArguments(self):
5643 qcls = _GetQueryImplementation(self.op.what)
5645 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5647 def ExpandNames(self):
5648 self.impl.ExpandNames(self)
5650 def DeclareLocks(self, level):
5651 self.impl.DeclareLocks(self, level)
5653 def Exec(self, feedback_fn):
5654 return self.impl.NewStyleQuery(self)
5657 class LUQueryFields(NoHooksLU):
5658 """Query for resources/items of a certain kind.
5661 # pylint: disable=W0142
5664 def CheckArguments(self):
5665 self.qcls = _GetQueryImplementation(self.op.what)
5667 def ExpandNames(self):
5668 self.needed_locks = {}
5670 def Exec(self, feedback_fn):
5671 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5674 class LUNodeModifyStorage(NoHooksLU):
5675 """Logical unit for modifying a storage volume on a node.
5680 def CheckArguments(self):
5681 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5683 storage_type = self.op.storage_type
5686 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5688 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5689 " modified" % storage_type,
5692 diff = set(self.op.changes.keys()) - modifiable
5694 raise errors.OpPrereqError("The following fields can not be modified for"
5695 " storage units of type '%s': %r" %
5696 (storage_type, list(diff)),
5699 def ExpandNames(self):
5700 self.needed_locks = {
5701 locking.LEVEL_NODE: self.op.node_name,
5704 def Exec(self, feedback_fn):
5705 """Computes the list of nodes and their attributes.
5708 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5709 result = self.rpc.call_storage_modify(self.op.node_name,
5710 self.op.storage_type, st_args,
5711 self.op.name, self.op.changes)
5712 result.Raise("Failed to modify storage unit '%s' on %s" %
5713 (self.op.name, self.op.node_name))
5716 class LUNodeAdd(LogicalUnit):
5717 """Logical unit for adding node to the cluster.
5721 HTYPE = constants.HTYPE_NODE
5722 _NFLAGS = ["master_capable", "vm_capable"]
5724 def CheckArguments(self):
5725 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5726 # validate/normalize the node name
5727 self.hostname = netutils.GetHostname(name=self.op.node_name,
5728 family=self.primary_ip_family)
5729 self.op.node_name = self.hostname.name
5731 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5732 raise errors.OpPrereqError("Cannot readd the master node",
5735 if self.op.readd and self.op.group:
5736 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5737 " being readded", errors.ECODE_INVAL)
5739 def BuildHooksEnv(self):
5742 This will run on all nodes before, and on all nodes + the new node after.
5746 "OP_TARGET": self.op.node_name,
5747 "NODE_NAME": self.op.node_name,
5748 "NODE_PIP": self.op.primary_ip,
5749 "NODE_SIP": self.op.secondary_ip,
5750 "MASTER_CAPABLE": str(self.op.master_capable),
5751 "VM_CAPABLE": str(self.op.vm_capable),
5754 def BuildHooksNodes(self):
5755 """Build hooks nodes.
5758 # Exclude added node
5759 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5760 post_nodes = pre_nodes + [self.op.node_name, ]
5762 return (pre_nodes, post_nodes)
5764 def CheckPrereq(self):
5765 """Check prerequisites.
5768 - the new node is not already in the config
5770 - its parameters (single/dual homed) matches the cluster
5772 Any errors are signaled by raising errors.OpPrereqError.
5776 hostname = self.hostname
5777 node = hostname.name
5778 primary_ip = self.op.primary_ip = hostname.ip
5779 if self.op.secondary_ip is None:
5780 if self.primary_ip_family == netutils.IP6Address.family:
5781 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5782 " IPv4 address must be given as secondary",
5784 self.op.secondary_ip = primary_ip
5786 secondary_ip = self.op.secondary_ip
5787 if not netutils.IP4Address.IsValid(secondary_ip):
5788 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5789 " address" % secondary_ip, errors.ECODE_INVAL)
5791 node_list = cfg.GetNodeList()
5792 if not self.op.readd and node in node_list:
5793 raise errors.OpPrereqError("Node %s is already in the configuration" %
5794 node, errors.ECODE_EXISTS)
5795 elif self.op.readd and node not in node_list:
5796 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5799 self.changed_primary_ip = False
5801 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5802 if self.op.readd and node == existing_node_name:
5803 if existing_node.secondary_ip != secondary_ip:
5804 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5805 " address configuration as before",
5807 if existing_node.primary_ip != primary_ip:
5808 self.changed_primary_ip = True
5812 if (existing_node.primary_ip == primary_ip or
5813 existing_node.secondary_ip == primary_ip or
5814 existing_node.primary_ip == secondary_ip or
5815 existing_node.secondary_ip == secondary_ip):
5816 raise errors.OpPrereqError("New node ip address(es) conflict with"
5817 " existing node %s" % existing_node.name,
5818 errors.ECODE_NOTUNIQUE)
5820 # After this 'if' block, None is no longer a valid value for the
5821 # _capable op attributes
5823 old_node = self.cfg.GetNodeInfo(node)
5824 assert old_node is not None, "Can't retrieve locked node %s" % node
5825 for attr in self._NFLAGS:
5826 if getattr(self.op, attr) is None:
5827 setattr(self.op, attr, getattr(old_node, attr))
5829 for attr in self._NFLAGS:
5830 if getattr(self.op, attr) is None:
5831 setattr(self.op, attr, True)
5833 if self.op.readd and not self.op.vm_capable:
5834 pri, sec = cfg.GetNodeInstances(node)
5836 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5837 " flag set to false, but it already holds"
5838 " instances" % node,
5841 # check that the type of the node (single versus dual homed) is the
5842 # same as for the master
5843 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5844 master_singlehomed = myself.secondary_ip == myself.primary_ip
5845 newbie_singlehomed = secondary_ip == primary_ip
5846 if master_singlehomed != newbie_singlehomed:
5847 if master_singlehomed:
5848 raise errors.OpPrereqError("The master has no secondary ip but the"
5849 " new node has one",
5852 raise errors.OpPrereqError("The master has a secondary ip but the"
5853 " new node doesn't have one",
5856 # checks reachability
5857 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5858 raise errors.OpPrereqError("Node not reachable by ping",
5859 errors.ECODE_ENVIRON)
5861 if not newbie_singlehomed:
5862 # check reachability from my secondary ip to newbie's secondary ip
5863 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5864 source=myself.secondary_ip):
5865 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5866 " based ping to node daemon port",
5867 errors.ECODE_ENVIRON)
5874 if self.op.master_capable:
5875 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5877 self.master_candidate = False
5880 self.new_node = old_node
5882 node_group = cfg.LookupNodeGroup(self.op.group)
5883 self.new_node = objects.Node(name=node,
5884 primary_ip=primary_ip,
5885 secondary_ip=secondary_ip,
5886 master_candidate=self.master_candidate,
5887 offline=False, drained=False,
5890 if self.op.ndparams:
5891 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5893 if self.op.hv_state:
5894 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5896 if self.op.disk_state:
5897 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5899 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5900 # it a property on the base class.
5901 result = rpc.DnsOnlyRunner().call_version([node])[node]
5902 result.Raise("Can't get version information from node %s" % node)
5903 if constants.PROTOCOL_VERSION == result.payload:
5904 logging.info("Communication to node %s fine, sw version %s match",
5905 node, result.payload)
5907 raise errors.OpPrereqError("Version mismatch master version %s,"
5908 " node version %s" %
5909 (constants.PROTOCOL_VERSION, result.payload),
5910 errors.ECODE_ENVIRON)
5912 def Exec(self, feedback_fn):
5913 """Adds the new node to the cluster.
5916 new_node = self.new_node
5917 node = new_node.name
5919 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5922 # We adding a new node so we assume it's powered
5923 new_node.powered = True
5925 # for re-adds, reset the offline/drained/master-candidate flags;
5926 # we need to reset here, otherwise offline would prevent RPC calls
5927 # later in the procedure; this also means that if the re-add
5928 # fails, we are left with a non-offlined, broken node
5930 new_node.drained = new_node.offline = False # pylint: disable=W0201
5931 self.LogInfo("Readding a node, the offline/drained flags were reset")
5932 # if we demote the node, we do cleanup later in the procedure
5933 new_node.master_candidate = self.master_candidate
5934 if self.changed_primary_ip:
5935 new_node.primary_ip = self.op.primary_ip
5937 # copy the master/vm_capable flags
5938 for attr in self._NFLAGS:
5939 setattr(new_node, attr, getattr(self.op, attr))
5941 # notify the user about any possible mc promotion
5942 if new_node.master_candidate:
5943 self.LogInfo("Node will be a master candidate")
5945 if self.op.ndparams:
5946 new_node.ndparams = self.op.ndparams
5948 new_node.ndparams = {}
5950 if self.op.hv_state:
5951 new_node.hv_state_static = self.new_hv_state
5953 if self.op.disk_state:
5954 new_node.disk_state_static = self.new_disk_state
5956 # Add node to our /etc/hosts, and add key to known_hosts
5957 if self.cfg.GetClusterInfo().modify_etc_hosts:
5958 master_node = self.cfg.GetMasterNode()
5959 result = self.rpc.call_etc_hosts_modify(master_node,
5960 constants.ETC_HOSTS_ADD,
5963 result.Raise("Can't update hosts file with new host data")
5965 if new_node.secondary_ip != new_node.primary_ip:
5966 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5969 node_verify_list = [self.cfg.GetMasterNode()]
5970 node_verify_param = {
5971 constants.NV_NODELIST: ([node], {}),
5972 # TODO: do a node-net-test as well?
5975 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5976 self.cfg.GetClusterName())
5977 for verifier in node_verify_list:
5978 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5979 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5981 for failed in nl_payload:
5982 feedback_fn("ssh/hostname verification failed"
5983 " (checking from %s): %s" %
5984 (verifier, nl_payload[failed]))
5985 raise errors.OpExecError("ssh/hostname verification failed")
5988 _RedistributeAncillaryFiles(self)
5989 self.context.ReaddNode(new_node)
5990 # make sure we redistribute the config
5991 self.cfg.Update(new_node, feedback_fn)
5992 # and make sure the new node will not have old files around
5993 if not new_node.master_candidate:
5994 result = self.rpc.call_node_demote_from_mc(new_node.name)
5995 msg = result.fail_msg
5997 self.LogWarning("Node failed to demote itself from master"
5998 " candidate status: %s" % msg)
6000 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6001 additional_vm=self.op.vm_capable)
6002 self.context.AddNode(new_node, self.proc.GetECId())
6005 class LUNodeSetParams(LogicalUnit):
6006 """Modifies the parameters of a node.
6008 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6009 to the node role (as _ROLE_*)
6010 @cvar _R2F: a dictionary from node role to tuples of flags
6011 @cvar _FLAGS: a list of attribute names corresponding to the flags
6014 HPATH = "node-modify"
6015 HTYPE = constants.HTYPE_NODE
6017 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6019 (True, False, False): _ROLE_CANDIDATE,
6020 (False, True, False): _ROLE_DRAINED,
6021 (False, False, True): _ROLE_OFFLINE,
6022 (False, False, False): _ROLE_REGULAR,
6024 _R2F = dict((v, k) for k, v in _F2R.items())
6025 _FLAGS = ["master_candidate", "drained", "offline"]
6027 def CheckArguments(self):
6028 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6029 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6030 self.op.master_capable, self.op.vm_capable,
6031 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6033 if all_mods.count(None) == len(all_mods):
6034 raise errors.OpPrereqError("Please pass at least one modification",
6036 if all_mods.count(True) > 1:
6037 raise errors.OpPrereqError("Can't set the node into more than one"
6038 " state at the same time",
6041 # Boolean value that tells us whether we might be demoting from MC
6042 self.might_demote = (self.op.master_candidate == False or
6043 self.op.offline == True or
6044 self.op.drained == True or
6045 self.op.master_capable == False)
6047 if self.op.secondary_ip:
6048 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6049 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6050 " address" % self.op.secondary_ip,
6053 self.lock_all = self.op.auto_promote and self.might_demote
6054 self.lock_instances = self.op.secondary_ip is not None
6056 def _InstanceFilter(self, instance):
6057 """Filter for getting affected instances.
6060 return (instance.disk_template in constants.DTS_INT_MIRROR and
6061 self.op.node_name in instance.all_nodes)
6063 def ExpandNames(self):
6065 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
6067 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
6069 # Since modifying a node can have severe effects on currently running
6070 # operations the resource lock is at least acquired in shared mode
6071 self.needed_locks[locking.LEVEL_NODE_RES] = \
6072 self.needed_locks[locking.LEVEL_NODE]
6074 # Get node resource and instance locks in shared mode; they are not used
6075 # for anything but read-only access
6076 self.share_locks[locking.LEVEL_NODE_RES] = 1
6077 self.share_locks[locking.LEVEL_INSTANCE] = 1
6079 if self.lock_instances:
6080 self.needed_locks[locking.LEVEL_INSTANCE] = \
6081 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6083 def BuildHooksEnv(self):
6086 This runs on the master node.
6090 "OP_TARGET": self.op.node_name,
6091 "MASTER_CANDIDATE": str(self.op.master_candidate),
6092 "OFFLINE": str(self.op.offline),
6093 "DRAINED": str(self.op.drained),
6094 "MASTER_CAPABLE": str(self.op.master_capable),
6095 "VM_CAPABLE": str(self.op.vm_capable),
6098 def BuildHooksNodes(self):
6099 """Build hooks nodes.
6102 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6105 def CheckPrereq(self):
6106 """Check prerequisites.
6108 This only checks the instance list against the existing names.
6111 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6113 if self.lock_instances:
6114 affected_instances = \
6115 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6117 # Verify instance locks
6118 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6119 wanted_instances = frozenset(affected_instances.keys())
6120 if wanted_instances - owned_instances:
6121 raise errors.OpPrereqError("Instances affected by changing node %s's"
6122 " secondary IP address have changed since"
6123 " locks were acquired, wanted '%s', have"
6124 " '%s'; retry the operation" %
6126 utils.CommaJoin(wanted_instances),
6127 utils.CommaJoin(owned_instances)),
6130 affected_instances = None
6132 if (self.op.master_candidate is not None or
6133 self.op.drained is not None or
6134 self.op.offline is not None):
6135 # we can't change the master's node flags
6136 if self.op.node_name == self.cfg.GetMasterNode():
6137 raise errors.OpPrereqError("The master role can be changed"
6138 " only via master-failover",
6141 if self.op.master_candidate and not node.master_capable:
6142 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6143 " it a master candidate" % node.name,
6146 if self.op.vm_capable == False:
6147 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6149 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6150 " the vm_capable flag" % node.name,
6153 if node.master_candidate and self.might_demote and not self.lock_all:
6154 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6155 # check if after removing the current node, we're missing master
6157 (mc_remaining, mc_should, _) = \
6158 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6159 if mc_remaining < mc_should:
6160 raise errors.OpPrereqError("Not enough master candidates, please"
6161 " pass auto promote option to allow"
6162 " promotion (--auto-promote or RAPI"
6163 " auto_promote=True)", errors.ECODE_STATE)
6165 self.old_flags = old_flags = (node.master_candidate,
6166 node.drained, node.offline)
6167 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6168 self.old_role = old_role = self._F2R[old_flags]
6170 # Check for ineffective changes
6171 for attr in self._FLAGS:
6172 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
6173 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6174 setattr(self.op, attr, None)
6176 # Past this point, any flag change to False means a transition
6177 # away from the respective state, as only real changes are kept
6179 # TODO: We might query the real power state if it supports OOB
6180 if _SupportsOob(self.cfg, node):
6181 if self.op.offline is False and not (node.powered or
6182 self.op.powered == True):
6183 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6184 " offline status can be reset") %
6186 elif self.op.powered is not None:
6187 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6188 " as it does not support out-of-band"
6189 " handling") % self.op.node_name)
6191 # If we're being deofflined/drained, we'll MC ourself if needed
6192 if (self.op.drained == False or self.op.offline == False or
6193 (self.op.master_capable and not node.master_capable)):
6194 if _DecideSelfPromotion(self):
6195 self.op.master_candidate = True
6196 self.LogInfo("Auto-promoting node to master candidate")
6198 # If we're no longer master capable, we'll demote ourselves from MC
6199 if self.op.master_capable == False and node.master_candidate:
6200 self.LogInfo("Demoting from master candidate")
6201 self.op.master_candidate = False
6204 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6205 if self.op.master_candidate:
6206 new_role = self._ROLE_CANDIDATE
6207 elif self.op.drained:
6208 new_role = self._ROLE_DRAINED
6209 elif self.op.offline:
6210 new_role = self._ROLE_OFFLINE
6211 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6212 # False is still in new flags, which means we're un-setting (the
6214 new_role = self._ROLE_REGULAR
6215 else: # no new flags, nothing, keep old role
6218 self.new_role = new_role
6220 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6221 # Trying to transition out of offline status
6222 result = self.rpc.call_version([node.name])[node.name]
6224 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6225 " to report its version: %s" %
6226 (node.name, result.fail_msg),
6229 self.LogWarning("Transitioning node from offline to online state"
6230 " without using re-add. Please make sure the node"
6233 if self.op.secondary_ip:
6234 # Ok even without locking, because this can't be changed by any LU
6235 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6236 master_singlehomed = master.secondary_ip == master.primary_ip
6237 if master_singlehomed and self.op.secondary_ip:
6238 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6239 " homed cluster", errors.ECODE_INVAL)
6241 assert not (frozenset(affected_instances) -
6242 self.owned_locks(locking.LEVEL_INSTANCE))
6245 if affected_instances:
6246 raise errors.OpPrereqError("Cannot change secondary IP address:"
6247 " offline node has instances (%s)"
6248 " configured to use it" %
6249 utils.CommaJoin(affected_instances.keys()))
6251 # On online nodes, check that no instances are running, and that
6252 # the node has the new ip and we can reach it.
6253 for instance in affected_instances.values():
6254 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6255 msg="cannot change secondary ip")
6257 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6258 if master.name != node.name:
6259 # check reachability from master secondary ip to new secondary ip
6260 if not netutils.TcpPing(self.op.secondary_ip,
6261 constants.DEFAULT_NODED_PORT,
6262 source=master.secondary_ip):
6263 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6264 " based ping to node daemon port",
6265 errors.ECODE_ENVIRON)
6267 if self.op.ndparams:
6268 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6269 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6270 self.new_ndparams = new_ndparams
6272 if self.op.hv_state:
6273 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6274 self.node.hv_state_static)
6276 if self.op.disk_state:
6277 self.new_disk_state = \
6278 _MergeAndVerifyDiskState(self.op.disk_state,
6279 self.node.disk_state_static)
6281 def Exec(self, feedback_fn):
6286 old_role = self.old_role
6287 new_role = self.new_role
6291 if self.op.ndparams:
6292 node.ndparams = self.new_ndparams
6294 if self.op.powered is not None:
6295 node.powered = self.op.powered
6297 if self.op.hv_state:
6298 node.hv_state_static = self.new_hv_state
6300 if self.op.disk_state:
6301 node.disk_state_static = self.new_disk_state
6303 for attr in ["master_capable", "vm_capable"]:
6304 val = getattr(self.op, attr)
6306 setattr(node, attr, val)
6307 result.append((attr, str(val)))
6309 if new_role != old_role:
6310 # Tell the node to demote itself, if no longer MC and not offline
6311 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6312 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6314 self.LogWarning("Node failed to demote itself: %s", msg)
6316 new_flags = self._R2F[new_role]
6317 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6319 result.append((desc, str(nf)))
6320 (node.master_candidate, node.drained, node.offline) = new_flags
6322 # we locked all nodes, we adjust the CP before updating this node
6324 _AdjustCandidatePool(self, [node.name])
6326 if self.op.secondary_ip:
6327 node.secondary_ip = self.op.secondary_ip
6328 result.append(("secondary_ip", self.op.secondary_ip))
6330 # this will trigger configuration file update, if needed
6331 self.cfg.Update(node, feedback_fn)
6333 # this will trigger job queue propagation or cleanup if the mc
6335 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6336 self.context.ReaddNode(node)
6341 class LUNodePowercycle(NoHooksLU):
6342 """Powercycles a node.
6347 def CheckArguments(self):
6348 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6349 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6350 raise errors.OpPrereqError("The node is the master and the force"
6351 " parameter was not set",
6354 def ExpandNames(self):
6355 """Locking for PowercycleNode.
6357 This is a last-resort option and shouldn't block on other
6358 jobs. Therefore, we grab no locks.
6361 self.needed_locks = {}
6363 def Exec(self, feedback_fn):
6367 result = self.rpc.call_node_powercycle(self.op.node_name,
6368 self.cfg.GetHypervisorType())
6369 result.Raise("Failed to schedule the reboot")
6370 return result.payload
6373 class LUClusterQuery(NoHooksLU):
6374 """Query cluster configuration.
6379 def ExpandNames(self):
6380 self.needed_locks = {}
6382 def Exec(self, feedback_fn):
6383 """Return cluster config.
6386 cluster = self.cfg.GetClusterInfo()
6389 # Filter just for enabled hypervisors
6390 for os_name, hv_dict in cluster.os_hvp.items():
6391 os_hvp[os_name] = {}
6392 for hv_name, hv_params in hv_dict.items():
6393 if hv_name in cluster.enabled_hypervisors:
6394 os_hvp[os_name][hv_name] = hv_params
6396 # Convert ip_family to ip_version
6397 primary_ip_version = constants.IP4_VERSION
6398 if cluster.primary_ip_family == netutils.IP6Address.family:
6399 primary_ip_version = constants.IP6_VERSION
6402 "software_version": constants.RELEASE_VERSION,
6403 "protocol_version": constants.PROTOCOL_VERSION,
6404 "config_version": constants.CONFIG_VERSION,
6405 "os_api_version": max(constants.OS_API_VERSIONS),
6406 "export_version": constants.EXPORT_VERSION,
6407 "architecture": runtime.GetArchInfo(),
6408 "name": cluster.cluster_name,
6409 "master": cluster.master_node,
6410 "default_hypervisor": cluster.primary_hypervisor,
6411 "enabled_hypervisors": cluster.enabled_hypervisors,
6412 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6413 for hypervisor_name in cluster.enabled_hypervisors]),
6415 "beparams": cluster.beparams,
6416 "osparams": cluster.osparams,
6417 "ipolicy": cluster.ipolicy,
6418 "nicparams": cluster.nicparams,
6419 "ndparams": cluster.ndparams,
6420 "diskparams": cluster.diskparams,
6421 "candidate_pool_size": cluster.candidate_pool_size,
6422 "master_netdev": cluster.master_netdev,
6423 "master_netmask": cluster.master_netmask,
6424 "use_external_mip_script": cluster.use_external_mip_script,
6425 "volume_group_name": cluster.volume_group_name,
6426 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6427 "file_storage_dir": cluster.file_storage_dir,
6428 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6429 "maintain_node_health": cluster.maintain_node_health,
6430 "ctime": cluster.ctime,
6431 "mtime": cluster.mtime,
6432 "uuid": cluster.uuid,
6433 "tags": list(cluster.GetTags()),
6434 "uid_pool": cluster.uid_pool,
6435 "default_iallocator": cluster.default_iallocator,
6436 "reserved_lvs": cluster.reserved_lvs,
6437 "primary_ip_version": primary_ip_version,
6438 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6439 "hidden_os": cluster.hidden_os,
6440 "blacklisted_os": cluster.blacklisted_os,
6446 class LUClusterConfigQuery(NoHooksLU):
6447 """Return configuration values.
6452 def CheckArguments(self):
6453 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6455 def ExpandNames(self):
6456 self.cq.ExpandNames(self)
6458 def DeclareLocks(self, level):
6459 self.cq.DeclareLocks(self, level)
6461 def Exec(self, feedback_fn):
6462 result = self.cq.OldStyleQuery(self)
6464 assert len(result) == 1
6469 class _ClusterQuery(_QueryBase):
6470 FIELDS = query.CLUSTER_FIELDS
6472 #: Do not sort (there is only one item)
6475 def ExpandNames(self, lu):
6476 lu.needed_locks = {}
6478 # The following variables interact with _QueryBase._GetNames
6479 self.wanted = locking.ALL_SET
6480 self.do_locking = self.use_locking
6483 raise errors.OpPrereqError("Can not use locking for cluster queries",
6486 def DeclareLocks(self, lu, level):
6489 def _GetQueryData(self, lu):
6490 """Computes the list of nodes and their attributes.
6493 # Locking is not used
6494 assert not (compat.any(lu.glm.is_owned(level)
6495 for level in locking.LEVELS
6496 if level != locking.LEVEL_CLUSTER) or
6497 self.do_locking or self.use_locking)
6499 if query.CQ_CONFIG in self.requested_data:
6500 cluster = lu.cfg.GetClusterInfo()
6502 cluster = NotImplemented
6504 if query.CQ_QUEUE_DRAINED in self.requested_data:
6505 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6507 drain_flag = NotImplemented
6509 if query.CQ_WATCHER_PAUSE in self.requested_data:
6510 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6512 watcher_pause = NotImplemented
6514 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6517 class LUInstanceActivateDisks(NoHooksLU):
6518 """Bring up an instance's disks.
6523 def ExpandNames(self):
6524 self._ExpandAndLockInstance()
6525 self.needed_locks[locking.LEVEL_NODE] = []
6526 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6528 def DeclareLocks(self, level):
6529 if level == locking.LEVEL_NODE:
6530 self._LockInstancesNodes()
6532 def CheckPrereq(self):
6533 """Check prerequisites.
6535 This checks that the instance is in the cluster.
6538 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6539 assert self.instance is not None, \
6540 "Cannot retrieve locked instance %s" % self.op.instance_name
6541 _CheckNodeOnline(self, self.instance.primary_node)
6543 def Exec(self, feedback_fn):
6544 """Activate the disks.
6547 disks_ok, disks_info = \
6548 _AssembleInstanceDisks(self, self.instance,
6549 ignore_size=self.op.ignore_size)
6551 raise errors.OpExecError("Cannot activate block devices")
6556 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6557 ignore_size=False, check=True):
6558 """Prepare the block devices for an instance.
6560 This sets up the block devices on all nodes.
6562 @type lu: L{LogicalUnit}
6563 @param lu: the logical unit on whose behalf we execute
6564 @type instance: L{objects.Instance}
6565 @param instance: the instance for whose disks we assemble
6566 @type disks: list of L{objects.Disk} or None
6567 @param disks: which disks to assemble (or all, if None)
6568 @type ignore_secondaries: boolean
6569 @param ignore_secondaries: if true, errors on secondary nodes
6570 won't result in an error return from the function
6571 @type ignore_size: boolean
6572 @param ignore_size: if true, the current known size of the disk
6573 will not be used during the disk activation, useful for cases
6574 when the size is wrong
6575 @return: False if the operation failed, otherwise a list of
6576 (host, instance_visible_name, node_visible_name)
6577 with the mapping from node devices to instance devices
6582 iname = instance.name
6584 disks = _ExpandCheckDisks(instance, disks)
6586 # With the two passes mechanism we try to reduce the window of
6587 # opportunity for the race condition of switching DRBD to primary
6588 # before handshaking occured, but we do not eliminate it
6590 # The proper fix would be to wait (with some limits) until the
6591 # connection has been made and drbd transitions from WFConnection
6592 # into any other network-connected state (Connected, SyncTarget,
6595 # 1st pass, assemble on all nodes in secondary mode
6596 for idx, inst_disk in enumerate(disks):
6597 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6599 node_disk = node_disk.Copy()
6600 node_disk.UnsetSize()
6601 lu.cfg.SetDiskID(node_disk, node)
6602 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6604 msg = result.fail_msg
6606 is_offline_secondary = (node in instance.secondary_nodes and
6608 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6609 " (is_primary=False, pass=1): %s",
6610 inst_disk.iv_name, node, msg)
6611 if not (ignore_secondaries or is_offline_secondary):
6614 # FIXME: race condition on drbd migration to primary
6616 # 2nd pass, do only the primary node
6617 for idx, inst_disk in enumerate(disks):
6620 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6621 if node != instance.primary_node:
6624 node_disk = node_disk.Copy()
6625 node_disk.UnsetSize()
6626 lu.cfg.SetDiskID(node_disk, node)
6627 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6629 msg = result.fail_msg
6631 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6632 " (is_primary=True, pass=2): %s",
6633 inst_disk.iv_name, node, msg)
6636 dev_path = result.payload
6638 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6640 # leave the disks configured for the primary node
6641 # this is a workaround that would be fixed better by
6642 # improving the logical/physical id handling
6644 lu.cfg.SetDiskID(disk, instance.primary_node)
6646 return disks_ok, device_info
6649 def _StartInstanceDisks(lu, instance, force):
6650 """Start the disks of an instance.
6653 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6654 ignore_secondaries=force)
6656 _ShutdownInstanceDisks(lu, instance)
6657 if force is not None and not force:
6658 lu.proc.LogWarning("", hint="If the message above refers to a"
6660 " you can retry the operation using '--force'.")
6661 raise errors.OpExecError("Disk consistency error")
6664 class LUInstanceDeactivateDisks(NoHooksLU):
6665 """Shutdown an instance's disks.
6670 def ExpandNames(self):
6671 self._ExpandAndLockInstance()
6672 self.needed_locks[locking.LEVEL_NODE] = []
6673 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6675 def DeclareLocks(self, level):
6676 if level == locking.LEVEL_NODE:
6677 self._LockInstancesNodes()
6679 def CheckPrereq(self):
6680 """Check prerequisites.
6682 This checks that the instance is in the cluster.
6685 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6686 assert self.instance is not None, \
6687 "Cannot retrieve locked instance %s" % self.op.instance_name
6689 def Exec(self, feedback_fn):
6690 """Deactivate the disks
6693 instance = self.instance
6695 _ShutdownInstanceDisks(self, instance)
6697 _SafeShutdownInstanceDisks(self, instance)
6700 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6701 """Shutdown block devices of an instance.
6703 This function checks if an instance is running, before calling
6704 _ShutdownInstanceDisks.
6707 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6708 _ShutdownInstanceDisks(lu, instance, disks=disks)
6711 def _ExpandCheckDisks(instance, disks):
6712 """Return the instance disks selected by the disks list
6714 @type disks: list of L{objects.Disk} or None
6715 @param disks: selected disks
6716 @rtype: list of L{objects.Disk}
6717 @return: selected instance disks to act on
6721 return instance.disks
6723 if not set(disks).issubset(instance.disks):
6724 raise errors.ProgrammerError("Can only act on disks belonging to the"
6729 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6730 """Shutdown block devices of an instance.
6732 This does the shutdown on all nodes of the instance.
6734 If the ignore_primary is false, errors on the primary node are
6739 disks = _ExpandCheckDisks(instance, disks)
6742 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6743 lu.cfg.SetDiskID(top_disk, node)
6744 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6745 msg = result.fail_msg
6747 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6748 disk.iv_name, node, msg)
6749 if ((node == instance.primary_node and not ignore_primary) or
6750 (node != instance.primary_node and not result.offline)):
6755 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6756 """Checks if a node has enough free memory.
6758 This function check if a given node has the needed amount of free
6759 memory. In case the node has less memory or we cannot get the
6760 information from the node, this function raise an OpPrereqError
6763 @type lu: C{LogicalUnit}
6764 @param lu: a logical unit from which we get configuration data
6766 @param node: the node to check
6767 @type reason: C{str}
6768 @param reason: string to use in the error message
6769 @type requested: C{int}
6770 @param requested: the amount of memory in MiB to check for
6771 @type hypervisor_name: C{str}
6772 @param hypervisor_name: the hypervisor to ask for memory stats
6774 @return: node current free memory
6775 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6776 we cannot check the node
6779 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6780 nodeinfo[node].Raise("Can't get data from node %s" % node,
6781 prereq=True, ecode=errors.ECODE_ENVIRON)
6782 (_, _, (hv_info, )) = nodeinfo[node].payload
6784 free_mem = hv_info.get("memory_free", None)
6785 if not isinstance(free_mem, int):
6786 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6787 " was '%s'" % (node, free_mem),
6788 errors.ECODE_ENVIRON)
6789 if requested > free_mem:
6790 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6791 " needed %s MiB, available %s MiB" %
6792 (node, reason, requested, free_mem),
6797 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6798 """Checks if nodes have enough free disk space in the all VGs.
6800 This function check if all given nodes have the needed amount of
6801 free disk. In case any node has less disk or we cannot get the
6802 information from the node, this function raise an OpPrereqError
6805 @type lu: C{LogicalUnit}
6806 @param lu: a logical unit from which we get configuration data
6807 @type nodenames: C{list}
6808 @param nodenames: the list of node names to check
6809 @type req_sizes: C{dict}
6810 @param req_sizes: the hash of vg and corresponding amount of disk in
6812 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6813 or we cannot check the node
6816 for vg, req_size in req_sizes.items():
6817 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6820 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6821 """Checks if nodes have enough free disk space in the specified VG.
6823 This function check if all given nodes have the needed amount of
6824 free disk. In case any node has less disk or we cannot get the
6825 information from the node, this function raise an OpPrereqError
6828 @type lu: C{LogicalUnit}
6829 @param lu: a logical unit from which we get configuration data
6830 @type nodenames: C{list}
6831 @param nodenames: the list of node names to check
6833 @param vg: the volume group to check
6834 @type requested: C{int}
6835 @param requested: the amount of disk in MiB to check for
6836 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6837 or we cannot check the node
6840 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6841 for node in nodenames:
6842 info = nodeinfo[node]
6843 info.Raise("Cannot get current information from node %s" % node,
6844 prereq=True, ecode=errors.ECODE_ENVIRON)
6845 (_, (vg_info, ), _) = info.payload
6846 vg_free = vg_info.get("vg_free", None)
6847 if not isinstance(vg_free, int):
6848 raise errors.OpPrereqError("Can't compute free disk space on node"
6849 " %s for vg %s, result was '%s'" %
6850 (node, vg, vg_free), errors.ECODE_ENVIRON)
6851 if requested > vg_free:
6852 raise errors.OpPrereqError("Not enough disk space on target node %s"
6853 " vg %s: required %d MiB, available %d MiB" %
6854 (node, vg, requested, vg_free),
6858 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6859 """Checks if nodes have enough physical CPUs
6861 This function checks if all given nodes have the needed number of
6862 physical CPUs. In case any node has less CPUs or we cannot get the
6863 information from the node, this function raises an OpPrereqError
6866 @type lu: C{LogicalUnit}
6867 @param lu: a logical unit from which we get configuration data
6868 @type nodenames: C{list}
6869 @param nodenames: the list of node names to check
6870 @type requested: C{int}
6871 @param requested: the minimum acceptable number of physical CPUs
6872 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6873 or we cannot check the node
6876 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6877 for node in nodenames:
6878 info = nodeinfo[node]
6879 info.Raise("Cannot get current information from node %s" % node,
6880 prereq=True, ecode=errors.ECODE_ENVIRON)
6881 (_, _, (hv_info, )) = info.payload
6882 num_cpus = hv_info.get("cpu_total", None)
6883 if not isinstance(num_cpus, int):
6884 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6885 " on node %s, result was '%s'" %
6886 (node, num_cpus), errors.ECODE_ENVIRON)
6887 if requested > num_cpus:
6888 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6889 "required" % (node, num_cpus, requested),
6893 class LUInstanceStartup(LogicalUnit):
6894 """Starts an instance.
6897 HPATH = "instance-start"
6898 HTYPE = constants.HTYPE_INSTANCE
6901 def CheckArguments(self):
6903 if self.op.beparams:
6904 # fill the beparams dict
6905 objects.UpgradeBeParams(self.op.beparams)
6906 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6908 def ExpandNames(self):
6909 self._ExpandAndLockInstance()
6910 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6912 def DeclareLocks(self, level):
6913 if level == locking.LEVEL_NODE_RES:
6914 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6916 def BuildHooksEnv(self):
6919 This runs on master, primary and secondary nodes of the instance.
6923 "FORCE": self.op.force,
6926 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6930 def BuildHooksNodes(self):
6931 """Build hooks nodes.
6934 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6937 def CheckPrereq(self):
6938 """Check prerequisites.
6940 This checks that the instance is in the cluster.
6943 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6944 assert self.instance is not None, \
6945 "Cannot retrieve locked instance %s" % self.op.instance_name
6948 if self.op.hvparams:
6949 # check hypervisor parameter syntax (locally)
6950 cluster = self.cfg.GetClusterInfo()
6951 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6952 filled_hvp = cluster.FillHV(instance)
6953 filled_hvp.update(self.op.hvparams)
6954 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6955 hv_type.CheckParameterSyntax(filled_hvp)
6956 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6958 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6960 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6962 if self.primary_offline and self.op.ignore_offline_nodes:
6963 self.proc.LogWarning("Ignoring offline primary node")
6965 if self.op.hvparams or self.op.beparams:
6966 self.proc.LogWarning("Overridden parameters are ignored")
6968 _CheckNodeOnline(self, instance.primary_node)
6970 bep = self.cfg.GetClusterInfo().FillBE(instance)
6971 bep.update(self.op.beparams)
6973 # check bridges existence
6974 _CheckInstanceBridgesExist(self, instance)
6976 remote_info = self.rpc.call_instance_info(instance.primary_node,
6978 instance.hypervisor)
6979 remote_info.Raise("Error checking node %s" % instance.primary_node,
6980 prereq=True, ecode=errors.ECODE_ENVIRON)
6981 if not remote_info.payload: # not running already
6982 _CheckNodeFreeMemory(self, instance.primary_node,
6983 "starting instance %s" % instance.name,
6984 bep[constants.BE_MINMEM], instance.hypervisor)
6986 def Exec(self, feedback_fn):
6987 """Start the instance.
6990 instance = self.instance
6991 force = self.op.force
6993 if not self.op.no_remember:
6994 self.cfg.MarkInstanceUp(instance.name)
6996 if self.primary_offline:
6997 assert self.op.ignore_offline_nodes
6998 self.proc.LogInfo("Primary node offline, marked instance as started")
7000 node_current = instance.primary_node
7002 _StartInstanceDisks(self, instance, force)
7005 self.rpc.call_instance_start(node_current,
7006 (instance, self.op.hvparams,
7008 self.op.startup_paused)
7009 msg = result.fail_msg
7011 _ShutdownInstanceDisks(self, instance)
7012 raise errors.OpExecError("Could not start instance: %s" % msg)
7015 class LUInstanceReboot(LogicalUnit):
7016 """Reboot an instance.
7019 HPATH = "instance-reboot"
7020 HTYPE = constants.HTYPE_INSTANCE
7023 def ExpandNames(self):
7024 self._ExpandAndLockInstance()
7026 def BuildHooksEnv(self):
7029 This runs on master, primary and secondary nodes of the instance.
7033 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7034 "REBOOT_TYPE": self.op.reboot_type,
7035 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7038 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7042 def BuildHooksNodes(self):
7043 """Build hooks nodes.
7046 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7049 def CheckPrereq(self):
7050 """Check prerequisites.
7052 This checks that the instance is in the cluster.
7055 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7056 assert self.instance is not None, \
7057 "Cannot retrieve locked instance %s" % self.op.instance_name
7058 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7059 _CheckNodeOnline(self, instance.primary_node)
7061 # check bridges existence
7062 _CheckInstanceBridgesExist(self, instance)
7064 def Exec(self, feedback_fn):
7065 """Reboot the instance.
7068 instance = self.instance
7069 ignore_secondaries = self.op.ignore_secondaries
7070 reboot_type = self.op.reboot_type
7072 remote_info = self.rpc.call_instance_info(instance.primary_node,
7074 instance.hypervisor)
7075 remote_info.Raise("Error checking node %s" % instance.primary_node)
7076 instance_running = bool(remote_info.payload)
7078 node_current = instance.primary_node
7080 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7081 constants.INSTANCE_REBOOT_HARD]:
7082 for disk in instance.disks:
7083 self.cfg.SetDiskID(disk, node_current)
7084 result = self.rpc.call_instance_reboot(node_current, instance,
7086 self.op.shutdown_timeout)
7087 result.Raise("Could not reboot instance")
7089 if instance_running:
7090 result = self.rpc.call_instance_shutdown(node_current, instance,
7091 self.op.shutdown_timeout)
7092 result.Raise("Could not shutdown instance for full reboot")
7093 _ShutdownInstanceDisks(self, instance)
7095 self.LogInfo("Instance %s was already stopped, starting now",
7097 _StartInstanceDisks(self, instance, ignore_secondaries)
7098 result = self.rpc.call_instance_start(node_current,
7099 (instance, None, None), False)
7100 msg = result.fail_msg
7102 _ShutdownInstanceDisks(self, instance)
7103 raise errors.OpExecError("Could not start instance for"
7104 " full reboot: %s" % msg)
7106 self.cfg.MarkInstanceUp(instance.name)
7109 class LUInstanceShutdown(LogicalUnit):
7110 """Shutdown an instance.
7113 HPATH = "instance-stop"
7114 HTYPE = constants.HTYPE_INSTANCE
7117 def ExpandNames(self):
7118 self._ExpandAndLockInstance()
7120 def BuildHooksEnv(self):
7123 This runs on master, primary and secondary nodes of the instance.
7126 env = _BuildInstanceHookEnvByObject(self, self.instance)
7127 env["TIMEOUT"] = self.op.timeout
7130 def BuildHooksNodes(self):
7131 """Build hooks nodes.
7134 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7137 def CheckPrereq(self):
7138 """Check prerequisites.
7140 This checks that the instance is in the cluster.
7143 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7144 assert self.instance is not None, \
7145 "Cannot retrieve locked instance %s" % self.op.instance_name
7147 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7149 self.primary_offline = \
7150 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7152 if self.primary_offline and self.op.ignore_offline_nodes:
7153 self.proc.LogWarning("Ignoring offline primary node")
7155 _CheckNodeOnline(self, self.instance.primary_node)
7157 def Exec(self, feedback_fn):
7158 """Shutdown the instance.
7161 instance = self.instance
7162 node_current = instance.primary_node
7163 timeout = self.op.timeout
7165 if not self.op.no_remember:
7166 self.cfg.MarkInstanceDown(instance.name)
7168 if self.primary_offline:
7169 assert self.op.ignore_offline_nodes
7170 self.proc.LogInfo("Primary node offline, marked instance as stopped")
7172 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7173 msg = result.fail_msg
7175 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7177 _ShutdownInstanceDisks(self, instance)
7180 class LUInstanceReinstall(LogicalUnit):
7181 """Reinstall an instance.
7184 HPATH = "instance-reinstall"
7185 HTYPE = constants.HTYPE_INSTANCE
7188 def ExpandNames(self):
7189 self._ExpandAndLockInstance()
7191 def BuildHooksEnv(self):
7194 This runs on master, primary and secondary nodes of the instance.
7197 return _BuildInstanceHookEnvByObject(self, self.instance)
7199 def BuildHooksNodes(self):
7200 """Build hooks nodes.
7203 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7206 def CheckPrereq(self):
7207 """Check prerequisites.
7209 This checks that the instance is in the cluster and is not running.
7212 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7213 assert instance is not None, \
7214 "Cannot retrieve locked instance %s" % self.op.instance_name
7215 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7216 " offline, cannot reinstall")
7218 if instance.disk_template == constants.DT_DISKLESS:
7219 raise errors.OpPrereqError("Instance '%s' has no disks" %
7220 self.op.instance_name,
7222 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7224 if self.op.os_type is not None:
7226 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7227 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7228 instance_os = self.op.os_type
7230 instance_os = instance.os
7232 nodelist = list(instance.all_nodes)
7234 if self.op.osparams:
7235 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7236 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7237 self.os_inst = i_osdict # the new dict (without defaults)
7241 self.instance = instance
7243 def Exec(self, feedback_fn):
7244 """Reinstall the instance.
7247 inst = self.instance
7249 if self.op.os_type is not None:
7250 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7251 inst.os = self.op.os_type
7252 # Write to configuration
7253 self.cfg.Update(inst, feedback_fn)
7255 _StartInstanceDisks(self, inst, None)
7257 feedback_fn("Running the instance OS create scripts...")
7258 # FIXME: pass debug option from opcode to backend
7259 result = self.rpc.call_instance_os_add(inst.primary_node,
7260 (inst, self.os_inst), True,
7261 self.op.debug_level)
7262 result.Raise("Could not install OS for instance %s on node %s" %
7263 (inst.name, inst.primary_node))
7265 _ShutdownInstanceDisks(self, inst)
7268 class LUInstanceRecreateDisks(LogicalUnit):
7269 """Recreate an instance's missing disks.
7272 HPATH = "instance-recreate-disks"
7273 HTYPE = constants.HTYPE_INSTANCE
7276 _MODIFYABLE = frozenset([
7277 constants.IDISK_SIZE,
7278 constants.IDISK_MODE,
7281 # New or changed disk parameters may have different semantics
7282 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7283 constants.IDISK_ADOPT,
7285 # TODO: Implement support changing VG while recreating
7287 constants.IDISK_METAVG,
7288 constants.IDISK_PROVIDER,
7291 def CheckArguments(self):
7292 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7293 # Normalize and convert deprecated list of disk indices
7294 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7296 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7298 raise errors.OpPrereqError("Some disks have been specified more than"
7299 " once: %s" % utils.CommaJoin(duplicates),
7302 for (idx, params) in self.op.disks:
7303 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7304 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7306 raise errors.OpPrereqError("Parameters for disk %s try to change"
7307 " unmodifyable parameter(s): %s" %
7308 (idx, utils.CommaJoin(unsupported)),
7311 def ExpandNames(self):
7312 self._ExpandAndLockInstance()
7313 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7315 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7316 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7318 self.needed_locks[locking.LEVEL_NODE] = []
7319 self.needed_locks[locking.LEVEL_NODE_RES] = []
7321 def DeclareLocks(self, level):
7322 if level == locking.LEVEL_NODE:
7323 # if we replace the nodes, we only need to lock the old primary,
7324 # otherwise we need to lock all nodes for disk re-creation
7325 primary_only = bool(self.op.nodes)
7326 self._LockInstancesNodes(primary_only=primary_only)
7327 elif level == locking.LEVEL_NODE_RES:
7329 self.needed_locks[locking.LEVEL_NODE_RES] = \
7330 self.needed_locks[locking.LEVEL_NODE][:]
7332 def BuildHooksEnv(self):
7335 This runs on master, primary and secondary nodes of the instance.
7338 return _BuildInstanceHookEnvByObject(self, self.instance)
7340 def BuildHooksNodes(self):
7341 """Build hooks nodes.
7344 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7347 def CheckPrereq(self):
7348 """Check prerequisites.
7350 This checks that the instance is in the cluster and is not running.
7353 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7354 assert instance is not None, \
7355 "Cannot retrieve locked instance %s" % self.op.instance_name
7357 if len(self.op.nodes) != len(instance.all_nodes):
7358 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7359 " %d replacement nodes were specified" %
7360 (instance.name, len(instance.all_nodes),
7361 len(self.op.nodes)),
7363 assert instance.disk_template != constants.DT_DRBD8 or \
7364 len(self.op.nodes) == 2
7365 assert instance.disk_template != constants.DT_PLAIN or \
7366 len(self.op.nodes) == 1
7367 primary_node = self.op.nodes[0]
7369 primary_node = instance.primary_node
7370 _CheckNodeOnline(self, primary_node)
7372 if instance.disk_template == constants.DT_DISKLESS:
7373 raise errors.OpPrereqError("Instance '%s' has no disks" %
7374 self.op.instance_name, errors.ECODE_INVAL)
7376 # if we replace nodes *and* the old primary is offline, we don't
7378 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7379 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7380 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7381 if not (self.op.nodes and old_pnode.offline):
7382 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7383 msg="cannot recreate disks")
7386 self.disks = dict(self.op.disks)
7388 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7390 maxidx = max(self.disks.keys())
7391 if maxidx >= len(instance.disks):
7392 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7395 if (self.op.nodes and
7396 sorted(self.disks.keys()) != range(len(instance.disks))):
7397 raise errors.OpPrereqError("Can't recreate disks partially and"
7398 " change the nodes at the same time",
7401 self.instance = instance
7403 def Exec(self, feedback_fn):
7404 """Recreate the disks.
7407 instance = self.instance
7409 assert (self.owned_locks(locking.LEVEL_NODE) ==
7410 self.owned_locks(locking.LEVEL_NODE_RES))
7413 mods = [] # keeps track of needed changes
7415 for idx, disk in enumerate(instance.disks):
7417 changes = self.disks[idx]
7419 # Disk should not be recreated
7423 # update secondaries for disks, if needed
7424 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7425 # need to update the nodes and minors
7426 assert len(self.op.nodes) == 2
7427 assert len(disk.logical_id) == 6 # otherwise disk internals
7429 (_, _, old_port, _, _, old_secret) = disk.logical_id
7430 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7431 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7432 new_minors[0], new_minors[1], old_secret)
7433 assert len(disk.logical_id) == len(new_id)
7437 mods.append((idx, new_id, changes))
7439 # now that we have passed all asserts above, we can apply the mods
7440 # in a single run (to avoid partial changes)
7441 for idx, new_id, changes in mods:
7442 disk = instance.disks[idx]
7443 if new_id is not None:
7444 assert disk.dev_type == constants.LD_DRBD8
7445 disk.logical_id = new_id
7447 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7448 mode=changes.get(constants.IDISK_MODE, None))
7450 # change primary node, if needed
7452 instance.primary_node = self.op.nodes[0]
7453 self.LogWarning("Changing the instance's nodes, you will have to"
7454 " remove any disks left on the older nodes manually")
7457 self.cfg.Update(instance, feedback_fn)
7459 _CreateDisks(self, instance, to_skip=to_skip)
7462 class LUInstanceRename(LogicalUnit):
7463 """Rename an instance.
7466 HPATH = "instance-rename"
7467 HTYPE = constants.HTYPE_INSTANCE
7469 def CheckArguments(self):
7473 if self.op.ip_check and not self.op.name_check:
7474 # TODO: make the ip check more flexible and not depend on the name check
7475 raise errors.OpPrereqError("IP address check requires a name check",
7478 def BuildHooksEnv(self):
7481 This runs on master, primary and secondary nodes of the instance.
7484 env = _BuildInstanceHookEnvByObject(self, self.instance)
7485 env["INSTANCE_NEW_NAME"] = self.op.new_name
7488 def BuildHooksNodes(self):
7489 """Build hooks nodes.
7492 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7495 def CheckPrereq(self):
7496 """Check prerequisites.
7498 This checks that the instance is in the cluster and is not running.
7501 self.op.instance_name = _ExpandInstanceName(self.cfg,
7502 self.op.instance_name)
7503 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7504 assert instance is not None
7505 _CheckNodeOnline(self, instance.primary_node)
7506 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7507 msg="cannot rename")
7508 self.instance = instance
7510 new_name = self.op.new_name
7511 if self.op.name_check:
7512 hostname = netutils.GetHostname(name=new_name)
7513 if hostname.name != new_name:
7514 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7516 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7517 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7518 " same as given hostname '%s'") %
7519 (hostname.name, self.op.new_name),
7521 new_name = self.op.new_name = hostname.name
7522 if (self.op.ip_check and
7523 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7524 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7525 (hostname.ip, new_name),
7526 errors.ECODE_NOTUNIQUE)
7528 instance_list = self.cfg.GetInstanceList()
7529 if new_name in instance_list and new_name != instance.name:
7530 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7531 new_name, errors.ECODE_EXISTS)
7533 def Exec(self, feedback_fn):
7534 """Rename the instance.
7537 inst = self.instance
7538 old_name = inst.name
7540 rename_file_storage = False
7541 if (inst.disk_template in constants.DTS_FILEBASED and
7542 self.op.new_name != inst.name):
7543 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7544 rename_file_storage = True
7546 self.cfg.RenameInstance(inst.name, self.op.new_name)
7547 # Change the instance lock. This is definitely safe while we hold the BGL.
7548 # Otherwise the new lock would have to be added in acquired mode.
7550 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7551 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7553 # re-read the instance from the configuration after rename
7554 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7556 if rename_file_storage:
7557 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7558 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7559 old_file_storage_dir,
7560 new_file_storage_dir)
7561 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7562 " (but the instance has been renamed in Ganeti)" %
7563 (inst.primary_node, old_file_storage_dir,
7564 new_file_storage_dir))
7566 _StartInstanceDisks(self, inst, None)
7568 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7569 old_name, self.op.debug_level)
7570 msg = result.fail_msg
7572 msg = ("Could not run OS rename script for instance %s on node %s"
7573 " (but the instance has been renamed in Ganeti): %s" %
7574 (inst.name, inst.primary_node, msg))
7575 self.proc.LogWarning(msg)
7577 _ShutdownInstanceDisks(self, inst)
7582 class LUInstanceRemove(LogicalUnit):
7583 """Remove an instance.
7586 HPATH = "instance-remove"
7587 HTYPE = constants.HTYPE_INSTANCE
7590 def ExpandNames(self):
7591 self._ExpandAndLockInstance()
7592 self.needed_locks[locking.LEVEL_NODE] = []
7593 self.needed_locks[locking.LEVEL_NODE_RES] = []
7594 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7596 def DeclareLocks(self, level):
7597 if level == locking.LEVEL_NODE:
7598 self._LockInstancesNodes()
7599 elif level == locking.LEVEL_NODE_RES:
7601 self.needed_locks[locking.LEVEL_NODE_RES] = \
7602 self.needed_locks[locking.LEVEL_NODE][:]
7604 def BuildHooksEnv(self):
7607 This runs on master, primary and secondary nodes of the instance.
7610 env = _BuildInstanceHookEnvByObject(self, self.instance)
7611 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7614 def BuildHooksNodes(self):
7615 """Build hooks nodes.
7618 nl = [self.cfg.GetMasterNode()]
7619 nl_post = list(self.instance.all_nodes) + nl
7620 return (nl, nl_post)
7622 def CheckPrereq(self):
7623 """Check prerequisites.
7625 This checks that the instance is in the cluster.
7628 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7629 assert self.instance is not None, \
7630 "Cannot retrieve locked instance %s" % self.op.instance_name
7632 def Exec(self, feedback_fn):
7633 """Remove the instance.
7636 instance = self.instance
7637 logging.info("Shutting down instance %s on node %s",
7638 instance.name, instance.primary_node)
7640 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7641 self.op.shutdown_timeout)
7642 msg = result.fail_msg
7644 if self.op.ignore_failures:
7645 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7647 raise errors.OpExecError("Could not shutdown instance %s on"
7649 (instance.name, instance.primary_node, msg))
7651 assert (self.owned_locks(locking.LEVEL_NODE) ==
7652 self.owned_locks(locking.LEVEL_NODE_RES))
7653 assert not (set(instance.all_nodes) -
7654 self.owned_locks(locking.LEVEL_NODE)), \
7655 "Not owning correct locks"
7657 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7660 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7661 """Utility function to remove an instance.
7664 logging.info("Removing block devices for instance %s", instance.name)
7666 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7667 if not ignore_failures:
7668 raise errors.OpExecError("Can't remove instance's disks")
7669 feedback_fn("Warning: can't remove instance's disks")
7671 logging.info("Removing instance %s out of cluster config", instance.name)
7673 lu.cfg.RemoveInstance(instance.name)
7675 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7676 "Instance lock removal conflict"
7678 # Remove lock for the instance
7679 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7682 class LUInstanceQuery(NoHooksLU):
7683 """Logical unit for querying instances.
7686 # pylint: disable=W0142
7689 def CheckArguments(self):
7690 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7691 self.op.output_fields, self.op.use_locking)
7693 def ExpandNames(self):
7694 self.iq.ExpandNames(self)
7696 def DeclareLocks(self, level):
7697 self.iq.DeclareLocks(self, level)
7699 def Exec(self, feedback_fn):
7700 return self.iq.OldStyleQuery(self)
7703 class LUInstanceFailover(LogicalUnit):
7704 """Failover an instance.
7707 HPATH = "instance-failover"
7708 HTYPE = constants.HTYPE_INSTANCE
7711 def CheckArguments(self):
7712 """Check the arguments.
7715 self.iallocator = getattr(self.op, "iallocator", None)
7716 self.target_node = getattr(self.op, "target_node", None)
7718 def ExpandNames(self):
7719 self._ExpandAndLockInstance()
7721 if self.op.target_node is not None:
7722 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7724 self.needed_locks[locking.LEVEL_NODE] = []
7725 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7727 self.needed_locks[locking.LEVEL_NODE_RES] = []
7728 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7730 ignore_consistency = self.op.ignore_consistency
7731 shutdown_timeout = self.op.shutdown_timeout
7732 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7735 ignore_consistency=ignore_consistency,
7736 shutdown_timeout=shutdown_timeout,
7737 ignore_ipolicy=self.op.ignore_ipolicy)
7738 self.tasklets = [self._migrater]
7740 def DeclareLocks(self, level):
7741 if level == locking.LEVEL_NODE:
7742 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7743 if instance.disk_template in constants.DTS_EXT_MIRROR:
7744 if self.op.target_node is None:
7745 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7747 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7748 self.op.target_node]
7749 del self.recalculate_locks[locking.LEVEL_NODE]
7751 self._LockInstancesNodes()
7752 elif level == locking.LEVEL_NODE_RES:
7754 self.needed_locks[locking.LEVEL_NODE_RES] = \
7755 self.needed_locks[locking.LEVEL_NODE][:]
7757 def BuildHooksEnv(self):
7760 This runs on master, primary and secondary nodes of the instance.
7763 instance = self._migrater.instance
7764 source_node = instance.primary_node
7765 target_node = self.op.target_node
7767 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7768 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7769 "OLD_PRIMARY": source_node,
7770 "NEW_PRIMARY": target_node,
7773 if instance.disk_template in constants.DTS_INT_MIRROR:
7774 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7775 env["NEW_SECONDARY"] = source_node
7777 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7779 env.update(_BuildInstanceHookEnvByObject(self, instance))
7783 def BuildHooksNodes(self):
7784 """Build hooks nodes.
7787 instance = self._migrater.instance
7788 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7789 return (nl, nl + [instance.primary_node])
7792 class LUInstanceMigrate(LogicalUnit):
7793 """Migrate an instance.
7795 This is migration without shutting down, compared to the failover,
7796 which is done with shutdown.
7799 HPATH = "instance-migrate"
7800 HTYPE = constants.HTYPE_INSTANCE
7803 def ExpandNames(self):
7804 self._ExpandAndLockInstance()
7806 if self.op.target_node is not None:
7807 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7809 self.needed_locks[locking.LEVEL_NODE] = []
7810 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7812 self.needed_locks[locking.LEVEL_NODE] = []
7813 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7816 TLMigrateInstance(self, self.op.instance_name,
7817 cleanup=self.op.cleanup,
7819 fallback=self.op.allow_failover,
7820 allow_runtime_changes=self.op.allow_runtime_changes,
7821 ignore_ipolicy=self.op.ignore_ipolicy)
7822 self.tasklets = [self._migrater]
7824 def DeclareLocks(self, level):
7825 if level == locking.LEVEL_NODE:
7826 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7827 if instance.disk_template in constants.DTS_EXT_MIRROR:
7828 if self.op.target_node is None:
7829 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7831 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7832 self.op.target_node]
7833 del self.recalculate_locks[locking.LEVEL_NODE]
7835 self._LockInstancesNodes()
7836 elif level == locking.LEVEL_NODE_RES:
7838 self.needed_locks[locking.LEVEL_NODE_RES] = \
7839 self.needed_locks[locking.LEVEL_NODE][:]
7841 def BuildHooksEnv(self):
7844 This runs on master, primary and secondary nodes of the instance.
7847 instance = self._migrater.instance
7848 source_node = instance.primary_node
7849 target_node = self.op.target_node
7850 env = _BuildInstanceHookEnvByObject(self, instance)
7852 "MIGRATE_LIVE": self._migrater.live,
7853 "MIGRATE_CLEANUP": self.op.cleanup,
7854 "OLD_PRIMARY": source_node,
7855 "NEW_PRIMARY": target_node,
7856 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7859 if instance.disk_template in constants.DTS_INT_MIRROR:
7860 env["OLD_SECONDARY"] = target_node
7861 env["NEW_SECONDARY"] = source_node
7863 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7867 def BuildHooksNodes(self):
7868 """Build hooks nodes.
7871 instance = self._migrater.instance
7872 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7873 return (nl, nl + [instance.primary_node])
7876 class LUInstanceMove(LogicalUnit):
7877 """Move an instance by data-copying.
7880 HPATH = "instance-move"
7881 HTYPE = constants.HTYPE_INSTANCE
7884 def ExpandNames(self):
7885 self._ExpandAndLockInstance()
7886 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7887 self.op.target_node = target_node
7888 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7889 self.needed_locks[locking.LEVEL_NODE_RES] = []
7890 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7892 def DeclareLocks(self, level):
7893 if level == locking.LEVEL_NODE:
7894 self._LockInstancesNodes(primary_only=True)
7895 elif level == locking.LEVEL_NODE_RES:
7897 self.needed_locks[locking.LEVEL_NODE_RES] = \
7898 self.needed_locks[locking.LEVEL_NODE][:]
7900 def BuildHooksEnv(self):
7903 This runs on master, primary and secondary nodes of the instance.
7907 "TARGET_NODE": self.op.target_node,
7908 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7910 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7913 def BuildHooksNodes(self):
7914 """Build hooks nodes.
7918 self.cfg.GetMasterNode(),
7919 self.instance.primary_node,
7920 self.op.target_node,
7924 def CheckPrereq(self):
7925 """Check prerequisites.
7927 This checks that the instance is in the cluster.
7930 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7931 assert self.instance is not None, \
7932 "Cannot retrieve locked instance %s" % self.op.instance_name
7934 node = self.cfg.GetNodeInfo(self.op.target_node)
7935 assert node is not None, \
7936 "Cannot retrieve locked node %s" % self.op.target_node
7938 self.target_node = target_node = node.name
7940 if target_node == instance.primary_node:
7941 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7942 (instance.name, target_node),
7945 bep = self.cfg.GetClusterInfo().FillBE(instance)
7947 for idx, dsk in enumerate(instance.disks):
7948 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7949 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7950 " cannot copy" % idx, errors.ECODE_STATE)
7952 _CheckNodeOnline(self, target_node)
7953 _CheckNodeNotDrained(self, target_node)
7954 _CheckNodeVmCapable(self, target_node)
7955 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7956 self.cfg.GetNodeGroup(node.group))
7957 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7958 ignore=self.op.ignore_ipolicy)
7960 if instance.admin_state == constants.ADMINST_UP:
7961 # check memory requirements on the secondary node
7962 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7963 instance.name, bep[constants.BE_MAXMEM],
7964 instance.hypervisor)
7966 self.LogInfo("Not checking memory on the secondary node as"
7967 " instance will not be started")
7969 # check bridge existance
7970 _CheckInstanceBridgesExist(self, instance, node=target_node)
7972 def Exec(self, feedback_fn):
7973 """Move an instance.
7975 The move is done by shutting it down on its present node, copying
7976 the data over (slow) and starting it on the new node.
7979 instance = self.instance
7981 source_node = instance.primary_node
7982 target_node = self.target_node
7984 self.LogInfo("Shutting down instance %s on source node %s",
7985 instance.name, source_node)
7987 assert (self.owned_locks(locking.LEVEL_NODE) ==
7988 self.owned_locks(locking.LEVEL_NODE_RES))
7990 result = self.rpc.call_instance_shutdown(source_node, instance,
7991 self.op.shutdown_timeout)
7992 msg = result.fail_msg
7994 if self.op.ignore_consistency:
7995 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7996 " Proceeding anyway. Please make sure node"
7997 " %s is down. Error details: %s",
7998 instance.name, source_node, source_node, msg)
8000 raise errors.OpExecError("Could not shutdown instance %s on"
8002 (instance.name, source_node, msg))
8004 # create the target disks
8006 _CreateDisks(self, instance, target_node=target_node)
8007 except errors.OpExecError:
8008 self.LogWarning("Device creation failed, reverting...")
8010 _RemoveDisks(self, instance, target_node=target_node)
8012 self.cfg.ReleaseDRBDMinors(instance.name)
8015 cluster_name = self.cfg.GetClusterInfo().cluster_name
8018 # activate, get path, copy the data over
8019 for idx, disk in enumerate(instance.disks):
8020 self.LogInfo("Copying data for disk %d", idx)
8021 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8022 instance.name, True, idx)
8024 self.LogWarning("Can't assemble newly created disk %d: %s",
8025 idx, result.fail_msg)
8026 errs.append(result.fail_msg)
8028 dev_path = result.payload
8029 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8030 target_node, dev_path,
8033 self.LogWarning("Can't copy data over for disk %d: %s",
8034 idx, result.fail_msg)
8035 errs.append(result.fail_msg)
8039 self.LogWarning("Some disks failed to copy, aborting")
8041 _RemoveDisks(self, instance, target_node=target_node)
8043 self.cfg.ReleaseDRBDMinors(instance.name)
8044 raise errors.OpExecError("Errors during disk copy: %s" %
8047 instance.primary_node = target_node
8048 self.cfg.Update(instance, feedback_fn)
8050 self.LogInfo("Removing the disks on the original node")
8051 _RemoveDisks(self, instance, target_node=source_node)
8053 # Only start the instance if it's marked as up
8054 if instance.admin_state == constants.ADMINST_UP:
8055 self.LogInfo("Starting instance %s on node %s",
8056 instance.name, target_node)
8058 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8059 ignore_secondaries=True)
8061 _ShutdownInstanceDisks(self, instance)
8062 raise errors.OpExecError("Can't activate the instance's disks")
8064 result = self.rpc.call_instance_start(target_node,
8065 (instance, None, None), False)
8066 msg = result.fail_msg
8068 _ShutdownInstanceDisks(self, instance)
8069 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8070 (instance.name, target_node, msg))
8073 class LUNodeMigrate(LogicalUnit):
8074 """Migrate all instances from a node.
8077 HPATH = "node-migrate"
8078 HTYPE = constants.HTYPE_NODE
8081 def CheckArguments(self):
8084 def ExpandNames(self):
8085 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8087 self.share_locks = _ShareAll()
8088 self.needed_locks = {
8089 locking.LEVEL_NODE: [self.op.node_name],
8092 def BuildHooksEnv(self):
8095 This runs on the master, the primary and all the secondaries.
8099 "NODE_NAME": self.op.node_name,
8100 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8103 def BuildHooksNodes(self):
8104 """Build hooks nodes.
8107 nl = [self.cfg.GetMasterNode()]
8110 def CheckPrereq(self):
8113 def Exec(self, feedback_fn):
8114 # Prepare jobs for migration instances
8115 allow_runtime_changes = self.op.allow_runtime_changes
8117 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8120 iallocator=self.op.iallocator,
8121 target_node=self.op.target_node,
8122 allow_runtime_changes=allow_runtime_changes,
8123 ignore_ipolicy=self.op.ignore_ipolicy)]
8124 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8127 # TODO: Run iallocator in this opcode and pass correct placement options to
8128 # OpInstanceMigrate. Since other jobs can modify the cluster between
8129 # running the iallocator and the actual migration, a good consistency model
8130 # will have to be found.
8132 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8133 frozenset([self.op.node_name]))
8135 return ResultWithJobs(jobs)
8138 class TLMigrateInstance(Tasklet):
8139 """Tasklet class for instance migration.
8142 @ivar live: whether the migration will be done live or non-live;
8143 this variable is initalized only after CheckPrereq has run
8144 @type cleanup: boolean
8145 @ivar cleanup: Wheater we cleanup from a failed migration
8146 @type iallocator: string
8147 @ivar iallocator: The iallocator used to determine target_node
8148 @type target_node: string
8149 @ivar target_node: If given, the target_node to reallocate the instance to
8150 @type failover: boolean
8151 @ivar failover: Whether operation results in failover or migration
8152 @type fallback: boolean
8153 @ivar fallback: Whether fallback to failover is allowed if migration not
8155 @type ignore_consistency: boolean
8156 @ivar ignore_consistency: Wheter we should ignore consistency between source
8158 @type shutdown_timeout: int
8159 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8160 @type ignore_ipolicy: bool
8161 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8166 _MIGRATION_POLL_INTERVAL = 1 # seconds
8167 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8169 def __init__(self, lu, instance_name, cleanup=False,
8170 failover=False, fallback=False,
8171 ignore_consistency=False,
8172 allow_runtime_changes=True,
8173 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8174 ignore_ipolicy=False):
8175 """Initializes this class.
8178 Tasklet.__init__(self, lu)
8181 self.instance_name = instance_name
8182 self.cleanup = cleanup
8183 self.live = False # will be overridden later
8184 self.failover = failover
8185 self.fallback = fallback
8186 self.ignore_consistency = ignore_consistency
8187 self.shutdown_timeout = shutdown_timeout
8188 self.ignore_ipolicy = ignore_ipolicy
8189 self.allow_runtime_changes = allow_runtime_changes
8191 def CheckPrereq(self):
8192 """Check prerequisites.
8194 This checks that the instance is in the cluster.
8197 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8198 instance = self.cfg.GetInstanceInfo(instance_name)
8199 assert instance is not None
8200 self.instance = instance
8201 cluster = self.cfg.GetClusterInfo()
8203 if (not self.cleanup and
8204 not instance.admin_state == constants.ADMINST_UP and
8205 not self.failover and self.fallback):
8206 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8207 " switching to failover")
8208 self.failover = True
8210 if instance.disk_template not in constants.DTS_MIRRORED:
8215 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8216 " %s" % (instance.disk_template, text),
8219 if instance.disk_template in constants.DTS_EXT_MIRROR:
8220 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8222 if self.lu.op.iallocator:
8223 self._RunAllocator()
8225 # We set set self.target_node as it is required by
8227 self.target_node = self.lu.op.target_node
8229 # Check that the target node is correct in terms of instance policy
8230 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8231 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8232 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8233 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8234 ignore=self.ignore_ipolicy)
8236 # self.target_node is already populated, either directly or by the
8238 target_node = self.target_node
8239 if self.target_node == instance.primary_node:
8240 raise errors.OpPrereqError("Cannot migrate instance %s"
8241 " to its primary (%s)" %
8242 (instance.name, instance.primary_node))
8244 if len(self.lu.tasklets) == 1:
8245 # It is safe to release locks only when we're the only tasklet
8247 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8248 keep=[instance.primary_node, self.target_node])
8251 secondary_nodes = instance.secondary_nodes
8252 if not secondary_nodes:
8253 raise errors.ConfigurationError("No secondary node but using"
8254 " %s disk template" %
8255 instance.disk_template)
8256 target_node = secondary_nodes[0]
8257 if self.lu.op.iallocator or (self.lu.op.target_node and
8258 self.lu.op.target_node != target_node):
8260 text = "failed over"
8263 raise errors.OpPrereqError("Instances with disk template %s cannot"
8264 " be %s to arbitrary nodes"
8265 " (neither an iallocator nor a target"
8266 " node can be passed)" %
8267 (instance.disk_template, text),
8269 nodeinfo = self.cfg.GetNodeInfo(target_node)
8270 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8271 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8272 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8273 ignore=self.ignore_ipolicy)
8275 i_be = cluster.FillBE(instance)
8277 # check memory requirements on the secondary node
8278 if (not self.cleanup and
8279 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8280 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8281 "migrating instance %s" %
8283 i_be[constants.BE_MINMEM],
8284 instance.hypervisor)
8286 self.lu.LogInfo("Not checking memory on the secondary node as"
8287 " instance will not be started")
8289 # check if failover must be forced instead of migration
8290 if (not self.cleanup and not self.failover and
8291 i_be[constants.BE_ALWAYS_FAILOVER]):
8293 self.lu.LogInfo("Instance configured to always failover; fallback"
8295 self.failover = True
8297 raise errors.OpPrereqError("This instance has been configured to"
8298 " always failover, please allow failover",
8301 # check bridge existance
8302 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8304 if not self.cleanup:
8305 _CheckNodeNotDrained(self.lu, target_node)
8306 if not self.failover:
8307 result = self.rpc.call_instance_migratable(instance.primary_node,
8309 if result.fail_msg and self.fallback:
8310 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8312 self.failover = True
8314 result.Raise("Can't migrate, please use failover",
8315 prereq=True, ecode=errors.ECODE_STATE)
8317 assert not (self.failover and self.cleanup)
8319 if not self.failover:
8320 if self.lu.op.live is not None and self.lu.op.mode is not None:
8321 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8322 " parameters are accepted",
8324 if self.lu.op.live is not None:
8326 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8328 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8329 # reset the 'live' parameter to None so that repeated
8330 # invocations of CheckPrereq do not raise an exception
8331 self.lu.op.live = None
8332 elif self.lu.op.mode is None:
8333 # read the default value from the hypervisor
8334 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8335 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8337 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8339 # Failover is never live
8342 if not (self.failover or self.cleanup):
8343 remote_info = self.rpc.call_instance_info(instance.primary_node,
8345 instance.hypervisor)
8346 remote_info.Raise("Error checking instance on node %s" %
8347 instance.primary_node)
8348 instance_running = bool(remote_info.payload)
8349 if instance_running:
8350 self.current_mem = int(remote_info.payload["memory"])
8352 def _RunAllocator(self):
8353 """Run the allocator based on input opcode.
8356 # FIXME: add a self.ignore_ipolicy option
8357 ial = IAllocator(self.cfg, self.rpc,
8358 mode=constants.IALLOCATOR_MODE_RELOC,
8359 name=self.instance_name,
8360 relocate_from=[self.instance.primary_node],
8363 ial.Run(self.lu.op.iallocator)
8366 raise errors.OpPrereqError("Can't compute nodes using"
8367 " iallocator '%s': %s" %
8368 (self.lu.op.iallocator, ial.info),
8370 if len(ial.result) != ial.required_nodes:
8371 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8372 " of nodes (%s), required %s" %
8373 (self.lu.op.iallocator, len(ial.result),
8374 ial.required_nodes), errors.ECODE_FAULT)
8375 self.target_node = ial.result[0]
8376 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8377 self.instance_name, self.lu.op.iallocator,
8378 utils.CommaJoin(ial.result))
8380 def _WaitUntilSync(self):
8381 """Poll with custom rpc for disk sync.
8383 This uses our own step-based rpc call.
8386 self.feedback_fn("* wait until resync is done")
8390 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8392 (self.instance.disks,
8395 for node, nres in result.items():
8396 nres.Raise("Cannot resync disks on node %s" % node)
8397 node_done, node_percent = nres.payload
8398 all_done = all_done and node_done
8399 if node_percent is not None:
8400 min_percent = min(min_percent, node_percent)
8402 if min_percent < 100:
8403 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8406 def _EnsureSecondary(self, node):
8407 """Demote a node to secondary.
8410 self.feedback_fn("* switching node %s to secondary mode" % node)
8412 for dev in self.instance.disks:
8413 self.cfg.SetDiskID(dev, node)
8415 result = self.rpc.call_blockdev_close(node, self.instance.name,
8416 self.instance.disks)
8417 result.Raise("Cannot change disk to secondary on node %s" % node)
8419 def _GoStandalone(self):
8420 """Disconnect from the network.
8423 self.feedback_fn("* changing into standalone mode")
8424 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8425 self.instance.disks)
8426 for node, nres in result.items():
8427 nres.Raise("Cannot disconnect disks node %s" % node)
8429 def _GoReconnect(self, multimaster):
8430 """Reconnect to the network.
8436 msg = "single-master"
8437 self.feedback_fn("* changing disks into %s mode" % msg)
8438 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8439 (self.instance.disks, self.instance),
8440 self.instance.name, multimaster)
8441 for node, nres in result.items():
8442 nres.Raise("Cannot change disks config on node %s" % node)
8444 def _ExecCleanup(self):
8445 """Try to cleanup after a failed migration.
8447 The cleanup is done by:
8448 - check that the instance is running only on one node
8449 (and update the config if needed)
8450 - change disks on its secondary node to secondary
8451 - wait until disks are fully synchronized
8452 - disconnect from the network
8453 - change disks into single-master mode
8454 - wait again until disks are fully synchronized
8457 instance = self.instance
8458 target_node = self.target_node
8459 source_node = self.source_node
8461 # check running on only one node
8462 self.feedback_fn("* checking where the instance actually runs"
8463 " (if this hangs, the hypervisor might be in"
8465 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8466 for node, result in ins_l.items():
8467 result.Raise("Can't contact node %s" % node)
8469 runningon_source = instance.name in ins_l[source_node].payload
8470 runningon_target = instance.name in ins_l[target_node].payload
8472 if runningon_source and runningon_target:
8473 raise errors.OpExecError("Instance seems to be running on two nodes,"
8474 " or the hypervisor is confused; you will have"
8475 " to ensure manually that it runs only on one"
8476 " and restart this operation")
8478 if not (runningon_source or runningon_target):
8479 raise errors.OpExecError("Instance does not seem to be running at all;"
8480 " in this case it's safer to repair by"
8481 " running 'gnt-instance stop' to ensure disk"
8482 " shutdown, and then restarting it")
8484 if runningon_target:
8485 # the migration has actually succeeded, we need to update the config
8486 self.feedback_fn("* instance running on secondary node (%s),"
8487 " updating config" % target_node)
8488 instance.primary_node = target_node
8489 self.cfg.Update(instance, self.feedback_fn)
8490 demoted_node = source_node
8492 self.feedback_fn("* instance confirmed to be running on its"
8493 " primary node (%s)" % source_node)
8494 demoted_node = target_node
8496 if instance.disk_template in constants.DTS_INT_MIRROR:
8497 self._EnsureSecondary(demoted_node)
8499 self._WaitUntilSync()
8500 except errors.OpExecError:
8501 # we ignore here errors, since if the device is standalone, it
8502 # won't be able to sync
8504 self._GoStandalone()
8505 self._GoReconnect(False)
8506 self._WaitUntilSync()
8508 self.feedback_fn("* done")
8510 def _RevertDiskStatus(self):
8511 """Try to revert the disk status after a failed migration.
8514 target_node = self.target_node
8515 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8519 self._EnsureSecondary(target_node)
8520 self._GoStandalone()
8521 self._GoReconnect(False)
8522 self._WaitUntilSync()
8523 except errors.OpExecError, err:
8524 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8525 " please try to recover the instance manually;"
8526 " error '%s'" % str(err))
8528 def _AbortMigration(self):
8529 """Call the hypervisor code to abort a started migration.
8532 instance = self.instance
8533 target_node = self.target_node
8534 source_node = self.source_node
8535 migration_info = self.migration_info
8537 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8541 abort_msg = abort_result.fail_msg
8543 logging.error("Aborting migration failed on target node %s: %s",
8544 target_node, abort_msg)
8545 # Don't raise an exception here, as we stil have to try to revert the
8546 # disk status, even if this step failed.
8548 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8549 instance, False, self.live)
8550 abort_msg = abort_result.fail_msg
8552 logging.error("Aborting migration failed on source node %s: %s",
8553 source_node, abort_msg)
8555 def _ExecMigration(self):
8556 """Migrate an instance.
8558 The migrate is done by:
8559 - change the disks into dual-master mode
8560 - wait until disks are fully synchronized again
8561 - migrate the instance
8562 - change disks on the new secondary node (the old primary) to secondary
8563 - wait until disks are fully synchronized
8564 - change disks into single-master mode
8567 instance = self.instance
8568 target_node = self.target_node
8569 source_node = self.source_node
8571 # Check for hypervisor version mismatch and warn the user.
8572 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8573 None, [self.instance.hypervisor])
8574 for ninfo in nodeinfo.values():
8575 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8577 (_, _, (src_info, )) = nodeinfo[source_node].payload
8578 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8580 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8581 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8582 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8583 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8584 if src_version != dst_version:
8585 self.feedback_fn("* warning: hypervisor version mismatch between"
8586 " source (%s) and target (%s) node" %
8587 (src_version, dst_version))
8589 self.feedback_fn("* checking disk consistency between source and target")
8590 for (idx, dev) in enumerate(instance.disks):
8591 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8592 raise errors.OpExecError("Disk %s is degraded or not fully"
8593 " synchronized on target node,"
8594 " aborting migration" % idx)
8596 if self.current_mem > self.tgt_free_mem:
8597 if not self.allow_runtime_changes:
8598 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8599 " free memory to fit instance %s on target"
8600 " node %s (have %dMB, need %dMB)" %
8601 (instance.name, target_node,
8602 self.tgt_free_mem, self.current_mem))
8603 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8604 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8607 rpcres.Raise("Cannot modify instance runtime memory")
8609 # First get the migration information from the remote node
8610 result = self.rpc.call_migration_info(source_node, instance)
8611 msg = result.fail_msg
8613 log_err = ("Failed fetching source migration information from %s: %s" %
8615 logging.error(log_err)
8616 raise errors.OpExecError(log_err)
8618 self.migration_info = migration_info = result.payload
8620 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8621 # Then switch the disks to master/master mode
8622 self._EnsureSecondary(target_node)
8623 self._GoStandalone()
8624 self._GoReconnect(True)
8625 self._WaitUntilSync()
8627 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8628 result = self.rpc.call_accept_instance(target_node,
8631 self.nodes_ip[target_node])
8633 msg = result.fail_msg
8635 logging.error("Instance pre-migration failed, trying to revert"
8636 " disk status: %s", msg)
8637 self.feedback_fn("Pre-migration failed, aborting")
8638 self._AbortMigration()
8639 self._RevertDiskStatus()
8640 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8641 (instance.name, msg))
8643 self.feedback_fn("* migrating instance to %s" % target_node)
8644 result = self.rpc.call_instance_migrate(source_node, instance,
8645 self.nodes_ip[target_node],
8647 msg = result.fail_msg
8649 logging.error("Instance migration failed, trying to revert"
8650 " disk status: %s", msg)
8651 self.feedback_fn("Migration failed, aborting")
8652 self._AbortMigration()
8653 self._RevertDiskStatus()
8654 raise errors.OpExecError("Could not migrate instance %s: %s" %
8655 (instance.name, msg))
8657 self.feedback_fn("* starting memory transfer")
8658 last_feedback = time.time()
8660 result = self.rpc.call_instance_get_migration_status(source_node,
8662 msg = result.fail_msg
8663 ms = result.payload # MigrationStatus instance
8664 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8665 logging.error("Instance migration failed, trying to revert"
8666 " disk status: %s", msg)
8667 self.feedback_fn("Migration failed, aborting")
8668 self._AbortMigration()
8669 self._RevertDiskStatus()
8670 raise errors.OpExecError("Could not migrate instance %s: %s" %
8671 (instance.name, msg))
8673 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8674 self.feedback_fn("* memory transfer complete")
8677 if (utils.TimeoutExpired(last_feedback,
8678 self._MIGRATION_FEEDBACK_INTERVAL) and
8679 ms.transferred_ram is not None):
8680 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8681 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8682 last_feedback = time.time()
8684 time.sleep(self._MIGRATION_POLL_INTERVAL)
8686 result = self.rpc.call_instance_finalize_migration_src(source_node,
8690 msg = result.fail_msg
8692 logging.error("Instance migration succeeded, but finalization failed"
8693 " on the source node: %s", msg)
8694 raise errors.OpExecError("Could not finalize instance migration: %s" %
8697 instance.primary_node = target_node
8699 # distribute new instance config to the other nodes
8700 self.cfg.Update(instance, self.feedback_fn)
8702 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8706 msg = result.fail_msg
8708 logging.error("Instance migration succeeded, but finalization failed"
8709 " on the target node: %s", msg)
8710 raise errors.OpExecError("Could not finalize instance migration: %s" %
8713 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8714 self._EnsureSecondary(source_node)
8715 self._WaitUntilSync()
8716 self._GoStandalone()
8717 self._GoReconnect(False)
8718 self._WaitUntilSync()
8720 # If the instance's disk template is `rbd' or `ext' and there was a
8721 # successful migration, unmap the device from the source node.
8722 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
8723 disks = _ExpandCheckDisks(instance, instance.disks)
8724 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8726 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8727 msg = result.fail_msg
8729 logging.error("Migration was successful, but couldn't unmap the"
8730 " block device %s on source node %s: %s",
8731 disk.iv_name, source_node, msg)
8732 logging.error("You need to unmap the device %s manually on %s",
8733 disk.iv_name, source_node)
8735 self.feedback_fn("* done")
8737 def _ExecFailover(self):
8738 """Failover an instance.
8740 The failover is done by shutting it down on its present node and
8741 starting it on the secondary.
8744 instance = self.instance
8745 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8747 source_node = instance.primary_node
8748 target_node = self.target_node
8750 if instance.admin_state == constants.ADMINST_UP:
8751 self.feedback_fn("* checking disk consistency between source and target")
8752 for (idx, dev) in enumerate(instance.disks):
8753 # for drbd, these are drbd over lvm
8754 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8756 if primary_node.offline:
8757 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8759 (primary_node.name, idx, target_node))
8760 elif not self.ignore_consistency:
8761 raise errors.OpExecError("Disk %s is degraded on target node,"
8762 " aborting failover" % idx)
8764 self.feedback_fn("* not checking disk consistency as instance is not"
8767 self.feedback_fn("* shutting down instance on source node")
8768 logging.info("Shutting down instance %s on node %s",
8769 instance.name, source_node)
8771 result = self.rpc.call_instance_shutdown(source_node, instance,
8772 self.shutdown_timeout)
8773 msg = result.fail_msg
8775 if self.ignore_consistency or primary_node.offline:
8776 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8777 " proceeding anyway; please make sure node"
8778 " %s is down; error details: %s",
8779 instance.name, source_node, source_node, msg)
8781 raise errors.OpExecError("Could not shutdown instance %s on"
8783 (instance.name, source_node, msg))
8785 self.feedback_fn("* deactivating the instance's disks on source node")
8786 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8787 raise errors.OpExecError("Can't shut down the instance's disks")
8789 instance.primary_node = target_node
8790 # distribute new instance config to the other nodes
8791 self.cfg.Update(instance, self.feedback_fn)
8793 # Only start the instance if it's marked as up
8794 if instance.admin_state == constants.ADMINST_UP:
8795 self.feedback_fn("* activating the instance's disks on target node %s" %
8797 logging.info("Starting instance %s on node %s",
8798 instance.name, target_node)
8800 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8801 ignore_secondaries=True)
8803 _ShutdownInstanceDisks(self.lu, instance)
8804 raise errors.OpExecError("Can't activate the instance's disks")
8806 self.feedback_fn("* starting the instance on the target node %s" %
8808 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8810 msg = result.fail_msg
8812 _ShutdownInstanceDisks(self.lu, instance)
8813 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8814 (instance.name, target_node, msg))
8816 def Exec(self, feedback_fn):
8817 """Perform the migration.
8820 self.feedback_fn = feedback_fn
8821 self.source_node = self.instance.primary_node
8823 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8824 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8825 self.target_node = self.instance.secondary_nodes[0]
8826 # Otherwise self.target_node has been populated either
8827 # directly, or through an iallocator.
8829 self.all_nodes = [self.source_node, self.target_node]
8830 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8831 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8834 feedback_fn("Failover instance %s" % self.instance.name)
8835 self._ExecFailover()
8837 feedback_fn("Migrating instance %s" % self.instance.name)
8840 return self._ExecCleanup()
8842 return self._ExecMigration()
8845 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8847 """Wrapper around L{_CreateBlockDevInner}.
8849 This method annotates the root device first.
8852 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8853 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8857 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8859 """Create a tree of block devices on a given node.
8861 If this device type has to be created on secondaries, create it and
8864 If not, just recurse to children keeping the same 'force' value.
8866 @attention: The device has to be annotated already.
8868 @param lu: the lu on whose behalf we execute
8869 @param node: the node on which to create the device
8870 @type instance: L{objects.Instance}
8871 @param instance: the instance which owns the device
8872 @type device: L{objects.Disk}
8873 @param device: the device to create
8874 @type force_create: boolean
8875 @param force_create: whether to force creation of this device; this
8876 will be change to True whenever we find a device which has
8877 CreateOnSecondary() attribute
8878 @param info: the extra 'metadata' we should attach to the device
8879 (this will be represented as a LVM tag)
8880 @type force_open: boolean
8881 @param force_open: this parameter will be passes to the
8882 L{backend.BlockdevCreate} function where it specifies
8883 whether we run on primary or not, and it affects both
8884 the child assembly and the device own Open() execution
8887 if device.CreateOnSecondary():
8891 for child in device.children:
8892 _CreateBlockDevInner(lu, node, instance, child, force_create,
8895 if not force_create:
8898 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8901 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8902 """Create a single block device on a given node.
8904 This will not recurse over children of the device, so they must be
8907 @param lu: the lu on whose behalf we execute
8908 @param node: the node on which to create the device
8909 @type instance: L{objects.Instance}
8910 @param instance: the instance which owns the device
8911 @type device: L{objects.Disk}
8912 @param device: the device to create
8913 @param info: the extra 'metadata' we should attach to the device
8914 (this will be represented as a LVM tag)
8915 @type force_open: boolean
8916 @param force_open: this parameter will be passes to the
8917 L{backend.BlockdevCreate} function where it specifies
8918 whether we run on primary or not, and it affects both
8919 the child assembly and the device own Open() execution
8922 lu.cfg.SetDiskID(device, node)
8923 result = lu.rpc.call_blockdev_create(node, device, device.size,
8924 instance.name, force_open, info)
8925 result.Raise("Can't create block device %s on"
8926 " node %s for instance %s" % (device, node, instance.name))
8927 if device.physical_id is None:
8928 device.physical_id = result.payload
8931 def _GenerateUniqueNames(lu, exts):
8932 """Generate a suitable LV name.
8934 This will generate a logical volume name for the given instance.
8939 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8940 results.append("%s%s" % (new_id, val))
8943 def _GetPCIInfo(lu, dev_type):
8946 if hasattr(lu, 'hotplug_info'):
8947 info = lu.hotplug_info
8948 elif hasattr(lu, 'instance') and hasattr(lu.instance, 'hotplug_info'):
8949 return lu.cfg.GetPCIInfo(lu.instance.name, dev_type)
8952 idx = getattr(info, dev_type)
8953 setattr(info, dev_type, idx+1)
8954 pci = info.pci_pool.pop()
8960 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8961 iv_name, p_minor, s_minor):
8962 """Generate a drbd8 device complete with its children.
8965 assert len(vgnames) == len(names) == 2
8966 port = lu.cfg.AllocatePort()
8967 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8969 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8970 logical_id=(vgnames[0], names[0]),
8972 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8973 logical_id=(vgnames[1], names[1]),
8976 disk_idx, pci = _GetPCIInfo(lu, 'disks')
8977 drbd_dev = objects.Disk(idx=disk_idx, pci=pci,
8978 dev_type=constants.LD_DRBD8, size=size,
8979 logical_id=(primary, secondary, port,
8982 children=[dev_data, dev_meta],
8983 iv_name=iv_name, params={})
8987 _DISK_TEMPLATE_NAME_PREFIX = {
8988 constants.DT_PLAIN: "",
8989 constants.DT_RBD: ".rbd",
8990 constants.DT_EXT: ".ext",
8994 _DISK_TEMPLATE_DEVICE_TYPE = {
8995 constants.DT_PLAIN: constants.LD_LV,
8996 constants.DT_FILE: constants.LD_FILE,
8997 constants.DT_SHARED_FILE: constants.LD_FILE,
8998 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8999 constants.DT_RBD: constants.LD_RBD,
9000 constants.DT_EXT: constants.LD_EXT,
9004 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
9005 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
9006 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9007 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9008 """Generate the entire disk layout for a given template type.
9011 #TODO: compute space requirements
9013 vgname = lu.cfg.GetVGName()
9014 disk_count = len(disk_info)
9017 if template_name == constants.DT_DISKLESS:
9019 elif template_name == constants.DT_DRBD8:
9020 if len(secondary_nodes) != 1:
9021 raise errors.ProgrammerError("Wrong template configuration")
9022 remote_node = secondary_nodes[0]
9023 minors = lu.cfg.AllocateDRBDMinor(
9024 [primary_node, remote_node] * len(disk_info), instance_name)
9026 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9028 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9031 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9032 for i in range(disk_count)]):
9033 names.append(lv_prefix + "_data")
9034 names.append(lv_prefix + "_meta")
9035 for idx, disk in enumerate(disk_info):
9036 disk_index = idx + base_index
9037 data_vg = disk.get(constants.IDISK_VG, vgname)
9038 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9039 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9040 disk[constants.IDISK_SIZE],
9042 names[idx * 2:idx * 2 + 2],
9043 "disk/%d" % disk_index,
9044 minors[idx * 2], minors[idx * 2 + 1])
9045 disk_dev.mode = disk[constants.IDISK_MODE]
9046 disks.append(disk_dev)
9049 raise errors.ProgrammerError("Wrong template configuration")
9051 if template_name == constants.DT_FILE:
9053 elif template_name == constants.DT_SHARED_FILE:
9054 _req_shr_file_storage()
9056 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9057 if name_prefix is None:
9060 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9061 (name_prefix, base_index + i)
9062 for i in range(disk_count)])
9064 if template_name == constants.DT_PLAIN:
9065 def logical_id_fn(idx, _, disk):
9066 vg = disk.get(constants.IDISK_VG, vgname)
9067 return (vg, names[idx])
9068 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9070 lambda _, disk_index, disk: (file_driver,
9071 "%s/disk%d" % (file_storage_dir,
9073 elif template_name == constants.DT_BLOCK:
9075 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9076 disk[constants.IDISK_ADOPT])
9077 elif template_name == constants.DT_RBD:
9078 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9079 elif template_name == constants.DT_EXT:
9080 def logical_id_fn(idx, _, disk):
9081 provider = disk.get(constants.IDISK_PROVIDER, None)
9082 if provider is None:
9083 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9084 " not found", constants.DT_EXT,
9085 constants.IDISK_PROVIDER)
9086 return (provider, names[idx])
9088 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9090 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9092 for idx, disk in enumerate(disk_info):
9094 # Only for the Ext template add disk_info to params
9095 if template_name == constants.DT_EXT:
9096 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9098 if key not in constants.IDISK_PARAMS:
9099 params[key] = disk[key]
9100 disk_index = idx + base_index
9101 size = disk[constants.IDISK_SIZE]
9102 feedback_fn("* disk %s, size %s" %
9103 (disk_index, utils.FormatUnit(size, "h")))
9105 disk_idx, pci = _GetPCIInfo(lu, 'disks')
9107 disks.append(objects.Disk(dev_type=dev_type, size=size,
9108 logical_id=logical_id_fn(idx, disk_index, disk),
9109 iv_name="disk/%d" % disk_index,
9110 mode=disk[constants.IDISK_MODE],
9111 params=params, idx=disk_idx, pci=pci))
9116 def _GetInstanceInfoText(instance):
9117 """Compute that text that should be added to the disk's metadata.
9120 return "originstname+%s" % instance.name
9123 def _CalcEta(time_taken, written, total_size):
9124 """Calculates the ETA based on size written and total size.
9126 @param time_taken: The time taken so far
9127 @param written: amount written so far
9128 @param total_size: The total size of data to be written
9129 @return: The remaining time in seconds
9132 avg_time = time_taken / float(written)
9133 return (total_size - written) * avg_time
9136 def _WipeDisks(lu, instance):
9137 """Wipes instance disks.
9139 @type lu: L{LogicalUnit}
9140 @param lu: the logical unit on whose behalf we execute
9141 @type instance: L{objects.Instance}
9142 @param instance: the instance whose disks we should create
9143 @return: the success of the wipe
9146 node = instance.primary_node
9148 for device in instance.disks:
9149 lu.cfg.SetDiskID(device, node)
9151 logging.info("Pause sync of instance %s disks", instance.name)
9152 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9153 (instance.disks, instance),
9155 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
9157 for idx, success in enumerate(result.payload):
9159 logging.warn("pause-sync of instance %s for disks %d failed",
9163 for idx, device in enumerate(instance.disks):
9164 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9165 # MAX_WIPE_CHUNK at max
9166 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
9167 constants.MIN_WIPE_CHUNK_PERCENT)
9168 # we _must_ make this an int, otherwise rounding errors will
9170 wipe_chunk_size = int(wipe_chunk_size)
9172 lu.LogInfo("* Wiping disk %d", idx)
9173 logging.info("Wiping disk %d for instance %s, node %s using"
9174 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9179 start_time = time.time()
9181 while offset < size:
9182 wipe_size = min(wipe_chunk_size, size - offset)
9183 logging.debug("Wiping disk %d, offset %s, chunk %s",
9184 idx, offset, wipe_size)
9185 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9187 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9188 (idx, offset, wipe_size))
9191 if now - last_output >= 60:
9192 eta = _CalcEta(now - start_time, offset, size)
9193 lu.LogInfo(" - done: %.1f%% ETA: %s" %
9194 (offset / float(size) * 100, utils.FormatSeconds(eta)))
9197 logging.info("Resume sync of instance %s disks", instance.name)
9199 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9200 (instance.disks, instance),
9204 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
9205 " please have a look at the status and troubleshoot"
9206 " the issue: %s", node, result.fail_msg)
9208 for idx, success in enumerate(result.payload):
9210 lu.LogWarning("Resume sync of disk %d failed, please have a"
9211 " look at the status and troubleshoot the issue", idx)
9212 logging.warn("resume-sync of instance %s for disks %d failed",
9216 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9217 """Create all disks for an instance.
9219 This abstracts away some work from AddInstance.
9221 @type lu: L{LogicalUnit}
9222 @param lu: the logical unit on whose behalf we execute
9223 @type instance: L{objects.Instance}
9224 @param instance: the instance whose disks we should create
9226 @param to_skip: list of indices to skip
9227 @type target_node: string
9228 @param target_node: if passed, overrides the target node for creation
9230 @return: the success of the creation
9233 info = _GetInstanceInfoText(instance)
9234 if target_node is None:
9235 pnode = instance.primary_node
9236 all_nodes = instance.all_nodes
9241 if instance.disk_template in constants.DTS_FILEBASED:
9242 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9243 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9245 result.Raise("Failed to create directory '%s' on"
9246 " node %s" % (file_storage_dir, pnode))
9248 # Note: this needs to be kept in sync with adding of disks in
9249 # LUInstanceSetParams
9250 for idx, device in enumerate(instance.disks):
9251 if to_skip and idx in to_skip:
9253 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9255 for node in all_nodes:
9256 f_create = node == pnode
9257 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9260 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9261 """Remove all disks for an instance.
9263 This abstracts away some work from `AddInstance()` and
9264 `RemoveInstance()`. Note that in case some of the devices couldn't
9265 be removed, the removal will continue with the other ones (compare
9266 with `_CreateDisks()`).
9268 @type lu: L{LogicalUnit}
9269 @param lu: the logical unit on whose behalf we execute
9270 @type instance: L{objects.Instance}
9271 @param instance: the instance whose disks we should remove
9272 @type target_node: string
9273 @param target_node: used to override the node on which to remove the disks
9275 @return: the success of the removal
9278 logging.info("Removing block devices for instance %s", instance.name)
9281 ports_to_release = set()
9282 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9283 for (idx, device) in enumerate(anno_disks):
9285 edata = [(target_node, device)]
9287 edata = device.ComputeNodeTree(instance.primary_node)
9288 for node, disk in edata:
9289 lu.cfg.SetDiskID(disk, node)
9290 result = lu.rpc.call_blockdev_remove(node, disk)
9292 lu.LogWarning("Could not remove disk %s on node %s,"
9293 " continuing anyway: %s", idx, node, result.fail_msg)
9294 if not (result.offline and node != instance.primary_node):
9297 # if this is a DRBD disk, return its port to the pool
9298 if device.dev_type in constants.LDS_DRBD:
9299 ports_to_release.add(device.logical_id[2])
9301 if all_result or ignore_failures:
9302 for port in ports_to_release:
9303 lu.cfg.AddTcpUdpPort(port)
9305 if instance.disk_template == constants.DT_FILE:
9306 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9310 tgt = instance.primary_node
9311 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9313 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9314 file_storage_dir, instance.primary_node, result.fail_msg)
9320 def _ComputeDiskSizePerVG(disk_template, disks):
9321 """Compute disk size requirements in the volume group
9324 def _compute(disks, payload):
9325 """Universal algorithm.
9330 vgs[disk[constants.IDISK_VG]] = \
9331 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9335 # Required free disk space as a function of disk and swap space
9337 constants.DT_DISKLESS: {},
9338 constants.DT_PLAIN: _compute(disks, 0),
9339 # 128 MB are added for drbd metadata for each disk
9340 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9341 constants.DT_FILE: {},
9342 constants.DT_SHARED_FILE: {},
9345 if disk_template not in req_size_dict:
9346 raise errors.ProgrammerError("Disk template '%s' size requirement"
9347 " is unknown" % disk_template)
9349 return req_size_dict[disk_template]
9352 def _ComputeDiskSize(disk_template, disks):
9353 """Compute disk size requirements according to disk template
9356 # Required free disk space as a function of disk and swap space
9358 constants.DT_DISKLESS: None,
9359 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9360 # 128 MB are added for drbd metadata for each disk
9362 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9363 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9364 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9365 constants.DT_BLOCK: 0,
9366 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9367 constants.DT_EXT: sum(d[constants.IDISK_SIZE] for d in disks),
9370 if disk_template not in req_size_dict:
9371 raise errors.ProgrammerError("Disk template '%s' size requirement"
9372 " is unknown" % disk_template)
9374 return req_size_dict[disk_template]
9377 def _FilterVmNodes(lu, nodenames):
9378 """Filters out non-vm_capable nodes from a list.
9380 @type lu: L{LogicalUnit}
9381 @param lu: the logical unit for which we check
9382 @type nodenames: list
9383 @param nodenames: the list of nodes on which we should check
9385 @return: the list of vm-capable nodes
9388 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9389 return [name for name in nodenames if name not in vm_nodes]
9392 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9393 """Hypervisor parameter validation.
9395 This function abstract the hypervisor parameter validation to be
9396 used in both instance create and instance modify.
9398 @type lu: L{LogicalUnit}
9399 @param lu: the logical unit for which we check
9400 @type nodenames: list
9401 @param nodenames: the list of nodes on which we should check
9402 @type hvname: string
9403 @param hvname: the name of the hypervisor we should use
9404 @type hvparams: dict
9405 @param hvparams: the parameters which we need to check
9406 @raise errors.OpPrereqError: if the parameters are not valid
9409 nodenames = _FilterVmNodes(lu, nodenames)
9411 cluster = lu.cfg.GetClusterInfo()
9412 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9414 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9415 for node in nodenames:
9419 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9422 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9423 """OS parameters validation.
9425 @type lu: L{LogicalUnit}
9426 @param lu: the logical unit for which we check
9427 @type required: boolean
9428 @param required: whether the validation should fail if the OS is not
9430 @type nodenames: list
9431 @param nodenames: the list of nodes on which we should check
9432 @type osname: string
9433 @param osname: the name of the hypervisor we should use
9434 @type osparams: dict
9435 @param osparams: the parameters which we need to check
9436 @raise errors.OpPrereqError: if the parameters are not valid
9439 nodenames = _FilterVmNodes(lu, nodenames)
9440 result = lu.rpc.call_os_validate(nodenames, required, osname,
9441 [constants.OS_VALIDATE_PARAMETERS],
9443 for node, nres in result.items():
9444 # we don't check for offline cases since this should be run only
9445 # against the master node and/or an instance's nodes
9446 nres.Raise("OS Parameters validation failed on node %s" % node)
9447 if not nres.payload:
9448 lu.LogInfo("OS %s not found on node %s, validation skipped",
9452 class LUInstanceCreate(LogicalUnit):
9453 """Create an instance.
9456 HPATH = "instance-add"
9457 HTYPE = constants.HTYPE_INSTANCE
9460 def CheckArguments(self):
9464 # do not require name_check to ease forward/backward compatibility
9466 if self.op.no_install and self.op.start:
9467 self.LogInfo("No-installation mode selected, disabling startup")
9468 self.op.start = False
9469 # validate/normalize the instance name
9470 self.op.instance_name = \
9471 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9473 if self.op.ip_check and not self.op.name_check:
9474 # TODO: make the ip check more flexible and not depend on the name check
9475 raise errors.OpPrereqError("Cannot do IP address check without a name"
9476 " check", errors.ECODE_INVAL)
9478 # check nics' parameter names
9479 for nic in self.op.nics:
9480 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9482 # check disks. parameter names and consistent adopt/no-adopt strategy
9483 has_adopt = has_no_adopt = False
9484 for disk in self.op.disks:
9485 if self.op.disk_template != constants.DT_EXT:
9486 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9487 if constants.IDISK_ADOPT in disk:
9491 if has_adopt and has_no_adopt:
9492 raise errors.OpPrereqError("Either all disks are adopted or none is",
9495 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9496 raise errors.OpPrereqError("Disk adoption is not supported for the"
9497 " '%s' disk template" %
9498 self.op.disk_template,
9500 if self.op.iallocator is not None:
9501 raise errors.OpPrereqError("Disk adoption not allowed with an"
9502 " iallocator script", errors.ECODE_INVAL)
9503 if self.op.mode == constants.INSTANCE_IMPORT:
9504 raise errors.OpPrereqError("Disk adoption not allowed for"
9505 " instance import", errors.ECODE_INVAL)
9507 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9508 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9509 " but no 'adopt' parameter given" %
9510 self.op.disk_template,
9513 self.adopt_disks = has_adopt
9515 # instance name verification
9516 if self.op.name_check:
9517 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9518 self.op.instance_name = self.hostname1.name
9519 # used in CheckPrereq for ip ping check
9520 self.check_ip = self.hostname1.ip
9522 self.check_ip = None
9524 # file storage checks
9525 if (self.op.file_driver and
9526 not self.op.file_driver in constants.FILE_DRIVER):
9527 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9528 self.op.file_driver, errors.ECODE_INVAL)
9530 if self.op.disk_template == constants.DT_FILE:
9531 opcodes.RequireFileStorage()
9532 elif self.op.disk_template == constants.DT_SHARED_FILE:
9533 opcodes.RequireSharedFileStorage()
9535 ### Node/iallocator related checks
9536 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9538 if self.op.pnode is not None:
9539 if self.op.disk_template in constants.DTS_INT_MIRROR:
9540 if self.op.snode is None:
9541 raise errors.OpPrereqError("The networked disk templates need"
9542 " a mirror node", errors.ECODE_INVAL)
9544 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9546 self.op.snode = None
9548 self._cds = _GetClusterDomainSecret()
9550 if self.op.mode == constants.INSTANCE_IMPORT:
9551 # On import force_variant must be True, because if we forced it at
9552 # initial install, our only chance when importing it back is that it
9554 self.op.force_variant = True
9556 if self.op.no_install:
9557 self.LogInfo("No-installation mode has no effect during import")
9559 elif self.op.mode == constants.INSTANCE_CREATE:
9560 if self.op.os_type is None:
9561 raise errors.OpPrereqError("No guest OS specified",
9563 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9564 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9565 " installation" % self.op.os_type,
9567 if self.op.disk_template is None:
9568 raise errors.OpPrereqError("No disk template specified",
9571 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9572 # Check handshake to ensure both clusters have the same domain secret
9573 src_handshake = self.op.source_handshake
9574 if not src_handshake:
9575 raise errors.OpPrereqError("Missing source handshake",
9578 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9581 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9584 # Load and check source CA
9585 self.source_x509_ca_pem = self.op.source_x509_ca
9586 if not self.source_x509_ca_pem:
9587 raise errors.OpPrereqError("Missing source X509 CA",
9591 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9593 except OpenSSL.crypto.Error, err:
9594 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9595 (err, ), errors.ECODE_INVAL)
9597 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9598 if errcode is not None:
9599 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9602 self.source_x509_ca = cert
9604 src_instance_name = self.op.source_instance_name
9605 if not src_instance_name:
9606 raise errors.OpPrereqError("Missing source instance name",
9609 self.source_instance_name = \
9610 netutils.GetHostname(name=src_instance_name).name
9613 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9614 self.op.mode, errors.ECODE_INVAL)
9616 def ExpandNames(self):
9617 """ExpandNames for CreateInstance.
9619 Figure out the right locks for instance creation.
9622 self.needed_locks = {}
9624 instance_name = self.op.instance_name
9625 # this is just a preventive check, but someone might still add this
9626 # instance in the meantime, and creation will fail at lock-add time
9627 if instance_name in self.cfg.GetInstanceList():
9628 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9629 instance_name, errors.ECODE_EXISTS)
9631 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9633 if self.op.iallocator:
9634 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9635 # specifying a group on instance creation and then selecting nodes from
9637 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9638 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9640 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9641 nodelist = [self.op.pnode]
9642 if self.op.snode is not None:
9643 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9644 nodelist.append(self.op.snode)
9645 self.needed_locks[locking.LEVEL_NODE] = nodelist
9646 # Lock resources of instance's primary and secondary nodes (copy to
9647 # prevent accidential modification)
9648 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9650 # in case of import lock the source node too
9651 if self.op.mode == constants.INSTANCE_IMPORT:
9652 src_node = self.op.src_node
9653 src_path = self.op.src_path
9655 if src_path is None:
9656 self.op.src_path = src_path = self.op.instance_name
9658 if src_node is None:
9659 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9660 self.op.src_node = None
9661 if os.path.isabs(src_path):
9662 raise errors.OpPrereqError("Importing an instance from a path"
9663 " requires a source node option",
9666 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9667 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9668 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9669 if not os.path.isabs(src_path):
9670 self.op.src_path = src_path = \
9671 utils.PathJoin(constants.EXPORT_DIR, src_path)
9673 def _RunAllocator(self):
9674 """Run the allocator based on input opcode.
9677 #TODO Export network to iallocator so that it chooses a pnode
9678 # in a nodegroup that has the desired network connected to
9679 nics = [n.ToDict() for n in self.nics]
9680 ial = IAllocator(self.cfg, self.rpc,
9681 mode=constants.IALLOCATOR_MODE_ALLOC,
9682 name=self.op.instance_name,
9683 disk_template=self.op.disk_template,
9686 vcpus=self.be_full[constants.BE_VCPUS],
9687 memory=self.be_full[constants.BE_MAXMEM],
9688 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9691 hypervisor=self.op.hypervisor,
9694 ial.Run(self.op.iallocator)
9697 raise errors.OpPrereqError("Can't compute nodes using"
9698 " iallocator '%s': %s" %
9699 (self.op.iallocator, ial.info),
9701 if len(ial.result) != ial.required_nodes:
9702 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9703 " of nodes (%s), required %s" %
9704 (self.op.iallocator, len(ial.result),
9705 ial.required_nodes), errors.ECODE_FAULT)
9706 self.op.pnode = ial.result[0]
9707 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9708 self.op.instance_name, self.op.iallocator,
9709 utils.CommaJoin(ial.result))
9710 if ial.required_nodes == 2:
9711 self.op.snode = ial.result[1]
9713 def BuildHooksEnv(self):
9716 This runs on master, primary and secondary nodes of the instance.
9720 "ADD_MODE": self.op.mode,
9722 if self.op.mode == constants.INSTANCE_IMPORT:
9723 env["SRC_NODE"] = self.op.src_node
9724 env["SRC_PATH"] = self.op.src_path
9725 env["SRC_IMAGES"] = self.src_images
9727 env.update(_BuildInstanceHookEnv(
9728 name=self.op.instance_name,
9729 primary_node=self.op.pnode,
9730 secondary_nodes=self.secondaries,
9731 status=self.op.start,
9732 os_type=self.op.os_type,
9733 minmem=self.be_full[constants.BE_MINMEM],
9734 maxmem=self.be_full[constants.BE_MAXMEM],
9735 vcpus=self.be_full[constants.BE_VCPUS],
9736 nics=_NICListToTuple(self, self.nics),
9737 disk_template=self.op.disk_template,
9738 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9739 for d in self.disks],
9742 hypervisor_name=self.op.hypervisor,
9748 def BuildHooksNodes(self):
9749 """Build hooks nodes.
9752 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9755 def _ReadExportInfo(self):
9756 """Reads the export information from disk.
9758 It will override the opcode source node and path with the actual
9759 information, if these two were not specified before.
9761 @return: the export information
9764 assert self.op.mode == constants.INSTANCE_IMPORT
9766 src_node = self.op.src_node
9767 src_path = self.op.src_path
9769 if src_node is None:
9770 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9771 exp_list = self.rpc.call_export_list(locked_nodes)
9773 for node in exp_list:
9774 if exp_list[node].fail_msg:
9776 if src_path in exp_list[node].payload:
9778 self.op.src_node = src_node = node
9779 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9783 raise errors.OpPrereqError("No export found for relative path %s" %
9784 src_path, errors.ECODE_INVAL)
9786 _CheckNodeOnline(self, src_node)
9787 result = self.rpc.call_export_info(src_node, src_path)
9788 result.Raise("No export or invalid export found in dir %s" % src_path)
9790 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9791 if not export_info.has_section(constants.INISECT_EXP):
9792 raise errors.ProgrammerError("Corrupted export config",
9793 errors.ECODE_ENVIRON)
9795 ei_version = export_info.get(constants.INISECT_EXP, "version")
9796 if (int(ei_version) != constants.EXPORT_VERSION):
9797 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9798 (ei_version, constants.EXPORT_VERSION),
9799 errors.ECODE_ENVIRON)
9802 def _ReadExportParams(self, einfo):
9803 """Use export parameters as defaults.
9805 In case the opcode doesn't specify (as in override) some instance
9806 parameters, then try to use them from the export information, if
9810 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9812 if self.op.disk_template is None:
9813 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9814 self.op.disk_template = einfo.get(constants.INISECT_INS,
9816 if self.op.disk_template not in constants.DISK_TEMPLATES:
9817 raise errors.OpPrereqError("Disk template specified in configuration"
9818 " file is not one of the allowed values:"
9819 " %s" % " ".join(constants.DISK_TEMPLATES))
9821 raise errors.OpPrereqError("No disk template specified and the export"
9822 " is missing the disk_template information",
9825 if not self.op.disks:
9827 # TODO: import the disk iv_name too
9828 for idx in range(constants.MAX_DISKS):
9829 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9830 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9831 disks.append({constants.IDISK_SIZE: disk_sz})
9832 self.op.disks = disks
9833 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9834 raise errors.OpPrereqError("No disk info specified and the export"
9835 " is missing the disk information",
9838 if not self.op.nics:
9840 for idx in range(constants.MAX_NICS):
9841 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9843 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9844 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9851 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9852 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9854 if (self.op.hypervisor is None and
9855 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9856 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9858 if einfo.has_section(constants.INISECT_HYP):
9859 # use the export parameters but do not override the ones
9860 # specified by the user
9861 for name, value in einfo.items(constants.INISECT_HYP):
9862 if name not in self.op.hvparams:
9863 self.op.hvparams[name] = value
9865 if einfo.has_section(constants.INISECT_BEP):
9866 # use the parameters, without overriding
9867 for name, value in einfo.items(constants.INISECT_BEP):
9868 if name not in self.op.beparams:
9869 self.op.beparams[name] = value
9870 # Compatibility for the old "memory" be param
9871 if name == constants.BE_MEMORY:
9872 if constants.BE_MAXMEM not in self.op.beparams:
9873 self.op.beparams[constants.BE_MAXMEM] = value
9874 if constants.BE_MINMEM not in self.op.beparams:
9875 self.op.beparams[constants.BE_MINMEM] = value
9877 # try to read the parameters old style, from the main section
9878 for name in constants.BES_PARAMETERS:
9879 if (name not in self.op.beparams and
9880 einfo.has_option(constants.INISECT_INS, name)):
9881 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9883 if einfo.has_section(constants.INISECT_OSP):
9884 # use the parameters, without overriding
9885 for name, value in einfo.items(constants.INISECT_OSP):
9886 if name not in self.op.osparams:
9887 self.op.osparams[name] = value
9889 def _RevertToDefaults(self, cluster):
9890 """Revert the instance parameters to the default values.
9894 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9895 for name in self.op.hvparams.keys():
9896 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9897 del self.op.hvparams[name]
9899 be_defs = cluster.SimpleFillBE({})
9900 for name in self.op.beparams.keys():
9901 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9902 del self.op.beparams[name]
9904 nic_defs = cluster.SimpleFillNIC({})
9905 for nic in self.op.nics:
9906 for name in constants.NICS_PARAMETERS:
9907 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9910 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9911 for name in self.op.osparams.keys():
9912 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9913 del self.op.osparams[name]
9915 def _CalculateFileStorageDir(self):
9916 """Calculate final instance file storage dir.
9919 # file storage dir calculation/check
9920 self.instance_file_storage_dir = None
9921 if self.op.disk_template in constants.DTS_FILEBASED:
9922 # build the full file storage dir path
9925 if self.op.disk_template == constants.DT_SHARED_FILE:
9926 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9928 get_fsd_fn = self.cfg.GetFileStorageDir
9930 cfg_storagedir = get_fsd_fn()
9931 if not cfg_storagedir:
9932 raise errors.OpPrereqError("Cluster file storage dir not defined")
9933 joinargs.append(cfg_storagedir)
9935 if self.op.file_storage_dir is not None:
9936 joinargs.append(self.op.file_storage_dir)
9938 joinargs.append(self.op.instance_name)
9940 # pylint: disable=W0142
9941 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9943 def CheckPrereq(self): # pylint: disable=R0914
9944 """Check prerequisites.
9947 self._CalculateFileStorageDir()
9949 if self.op.mode == constants.INSTANCE_IMPORT:
9950 export_info = self._ReadExportInfo()
9951 self._ReadExportParams(export_info)
9952 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9954 self._old_instance_name = None
9956 if (not self.cfg.GetVGName() and
9957 self.op.disk_template not in constants.DTS_NOT_LVM):
9958 raise errors.OpPrereqError("Cluster does not support lvm-based"
9959 " instances", errors.ECODE_STATE)
9961 if (self.op.hypervisor is None or
9962 self.op.hypervisor == constants.VALUE_AUTO):
9963 self.op.hypervisor = self.cfg.GetHypervisorType()
9965 cluster = self.cfg.GetClusterInfo()
9966 enabled_hvs = cluster.enabled_hypervisors
9967 if self.op.hypervisor not in enabled_hvs:
9968 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9969 " cluster (%s)" % (self.op.hypervisor,
9970 ",".join(enabled_hvs)),
9973 # Check tag validity
9974 for tag in self.op.tags:
9975 objects.TaggableObject.ValidateTag(tag)
9977 # check hypervisor parameter syntax (locally)
9978 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9979 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9981 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9982 hv_type.CheckParameterSyntax(filled_hvp)
9983 self.hv_full = filled_hvp
9984 # check that we don't specify global parameters on an instance
9985 _CheckGlobalHvParams(self.op.hvparams)
9987 # fill and remember the beparams dict
9988 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9989 for param, value in self.op.beparams.iteritems():
9990 if value == constants.VALUE_AUTO:
9991 self.op.beparams[param] = default_beparams[param]
9992 objects.UpgradeBeParams(self.op.beparams)
9993 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9994 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9996 # build os parameters
9997 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9999 # now that hvp/bep are in final format, let's reset to defaults,
10001 if self.op.identify_defaults:
10002 self._RevertToDefaults(cluster)
10004 self.hotplug_info = None
10005 if self.op.hotplug:
10006 self.hotplug_info = objects.HotplugInfo(disks=0, nics=0,
10007 pci_pool=list(range(16,32)))
10010 for idx, nic in enumerate(self.op.nics):
10011 nic_mode_req = nic.get(constants.INIC_MODE, None)
10012 nic_mode = nic_mode_req
10013 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
10014 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
10016 net = nic.get(constants.INIC_NETWORK, None)
10017 link = nic.get(constants.NIC_LINK, None)
10018 ip = nic.get(constants.INIC_IP, None)
10020 if net is None or net.lower() == constants.VALUE_NONE:
10023 if nic_mode_req is not None or link is not None:
10024 raise errors.OpPrereqError("If network is given, no mode or link"
10025 " is allowed to be passed",
10026 errors.ECODE_INVAL)
10028 # ip validity checks
10029 if ip is None or ip.lower() == constants.VALUE_NONE:
10031 elif ip.lower() == constants.VALUE_AUTO:
10032 if not self.op.name_check:
10033 raise errors.OpPrereqError("IP address set to auto but name checks"
10034 " have been skipped",
10035 errors.ECODE_INVAL)
10036 nic_ip = self.hostname1.ip
10038 # We defer pool operations until later, so that the iallocator has
10039 # filled in the instance's node(s) dimara
10040 if ip.lower() == constants.NIC_IP_POOL:
10042 raise errors.OpPrereqError("if ip=pool, parameter network"
10043 " must be passed too",
10044 errors.ECODE_INVAL)
10046 elif not netutils.IPAddress.IsValid(ip):
10047 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
10048 errors.ECODE_INVAL)
10052 # TODO: check the ip address for uniqueness
10053 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
10054 raise errors.OpPrereqError("Routed nic mode requires an ip address",
10055 errors.ECODE_INVAL)
10057 # MAC address verification
10058 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
10059 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10060 mac = utils.NormalizeAndValidateMac(mac)
10063 self.cfg.ReserveMAC(mac, self.proc.GetECId())
10064 except errors.ReservationError:
10065 raise errors.OpPrereqError("MAC address %s already in use"
10066 " in cluster" % mac,
10067 errors.ECODE_NOTUNIQUE)
10069 # Build nic parameters
10072 nicparams[constants.NIC_MODE] = nic_mode
10074 nicparams[constants.NIC_LINK] = link
10076 check_params = cluster.SimpleFillNIC(nicparams)
10077 objects.NIC.CheckParameterSyntax(check_params)
10078 nic_idx, pci = _GetPCIInfo(self, 'nics')
10079 self.nics.append(objects.NIC(idx=nic_idx, pci=pci,
10080 mac=mac, ip=nic_ip, network=net,
10081 nicparams=check_params))
10083 # disk checks/pre-build
10084 default_vg = self.cfg.GetVGName()
10086 for disk in self.op.disks:
10087 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
10088 if mode not in constants.DISK_ACCESS_SET:
10089 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
10090 mode, errors.ECODE_INVAL)
10091 size = disk.get(constants.IDISK_SIZE, None)
10093 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
10096 except (TypeError, ValueError):
10097 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
10098 errors.ECODE_INVAL)
10100 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
10101 if ext_provider and self.op.disk_template != constants.DT_EXT:
10102 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10103 " disk template, not %s" %
10104 (constants.IDISK_PROVIDER, constants.DT_EXT,
10105 self.op.disk_template), errors.ECODE_INVAL)
10107 data_vg = disk.get(constants.IDISK_VG, default_vg)
10109 constants.IDISK_SIZE: size,
10110 constants.IDISK_MODE: mode,
10111 constants.IDISK_VG: data_vg,
10114 if constants.IDISK_METAVG in disk:
10115 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10116 if constants.IDISK_ADOPT in disk:
10117 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10119 # For extstorage, demand the `provider' option and add any
10120 # additional parameters (ext-params) to the dict
10121 if self.op.disk_template == constants.DT_EXT:
10123 new_disk[constants.IDISK_PROVIDER] = ext_provider
10125 if key not in constants.IDISK_PARAMS:
10126 new_disk[key] = disk[key]
10128 raise errors.OpPrereqError("Missing provider for template '%s'" %
10129 constants.DT_EXT, errors.ECODE_INVAL)
10131 self.disks.append(new_disk)
10133 if self.op.mode == constants.INSTANCE_IMPORT:
10135 for idx in range(len(self.disks)):
10136 option = "disk%d_dump" % idx
10137 if export_info.has_option(constants.INISECT_INS, option):
10138 # FIXME: are the old os-es, disk sizes, etc. useful?
10139 export_name = export_info.get(constants.INISECT_INS, option)
10140 image = utils.PathJoin(self.op.src_path, export_name)
10141 disk_images.append(image)
10143 disk_images.append(False)
10145 self.src_images = disk_images
10147 if self.op.instance_name == self._old_instance_name:
10148 for idx, nic in enumerate(self.nics):
10149 if nic.mac == constants.VALUE_AUTO:
10150 nic_mac_ini = "nic%d_mac" % idx
10151 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10153 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10155 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10156 if self.op.ip_check:
10157 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10158 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10159 (self.check_ip, self.op.instance_name),
10160 errors.ECODE_NOTUNIQUE)
10162 #### mac address generation
10163 # By generating here the mac address both the allocator and the hooks get
10164 # the real final mac address rather than the 'auto' or 'generate' value.
10165 # There is a race condition between the generation and the instance object
10166 # creation, which means that we know the mac is valid now, but we're not
10167 # sure it will be when we actually add the instance. If things go bad
10168 # adding the instance will abort because of a duplicate mac, and the
10169 # creation job will fail.
10170 for nic in self.nics:
10171 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10172 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10176 if self.op.iallocator is not None:
10177 self._RunAllocator()
10179 # Release all unneeded node locks
10180 _ReleaseLocks(self, locking.LEVEL_NODE,
10181 keep=filter(None, [self.op.pnode, self.op.snode,
10182 self.op.src_node]))
10183 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10184 keep=filter(None, [self.op.pnode, self.op.snode,
10185 self.op.src_node]))
10187 #### node related checks
10189 # check primary node
10190 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10191 assert self.pnode is not None, \
10192 "Cannot retrieve locked node %s" % self.op.pnode
10194 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10195 pnode.name, errors.ECODE_STATE)
10197 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10198 pnode.name, errors.ECODE_STATE)
10199 if not pnode.vm_capable:
10200 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10201 " '%s'" % pnode.name, errors.ECODE_STATE)
10203 self.secondaries = []
10205 # Fill in any IPs from IP pools. This must happen here, because we need to
10206 # know the nic's primary node, as specified by the iallocator
10207 for idx, nic in enumerate(self.nics):
10209 if net is not None:
10210 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10211 if netparams is None:
10212 raise errors.OpPrereqError("No netparams found for network"
10213 " %s. Propably not connected to"
10214 " node's %s nodegroup" %
10215 (net, self.pnode.name),
10216 errors.ECODE_INVAL)
10217 self.LogInfo("NIC/%d inherits netparams %s" %
10218 (idx, netparams.values()))
10219 nic.nicparams = dict(netparams)
10220 if nic.ip is not None:
10221 filled_params = cluster.SimpleFillNIC(nic.nicparams)
10222 if nic.ip.lower() == constants.NIC_IP_POOL:
10224 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10225 except errors.ReservationError:
10226 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10227 " from the address pool" % idx,
10228 errors.ECODE_STATE)
10229 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10232 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10233 except errors.ReservationError:
10234 raise errors.OpPrereqError("IP address %s already in use"
10235 " or does not belong to network %s" %
10237 errors.ECODE_NOTUNIQUE)
10239 # net is None, ip None or given
10240 if self.op.conflicts_check:
10241 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10244 # mirror node verification
10245 if self.op.disk_template in constants.DTS_INT_MIRROR:
10246 if self.op.snode == pnode.name:
10247 raise errors.OpPrereqError("The secondary node cannot be the"
10248 " primary node", errors.ECODE_INVAL)
10249 _CheckNodeOnline(self, self.op.snode)
10250 _CheckNodeNotDrained(self, self.op.snode)
10251 _CheckNodeVmCapable(self, self.op.snode)
10252 self.secondaries.append(self.op.snode)
10254 snode = self.cfg.GetNodeInfo(self.op.snode)
10255 if pnode.group != snode.group:
10256 self.LogWarning("The primary and secondary nodes are in two"
10257 " different node groups; the disk parameters"
10258 " from the first disk's node group will be"
10261 nodenames = [pnode.name] + self.secondaries
10263 # Verify instance specs
10264 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10266 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10267 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10268 constants.ISPEC_DISK_COUNT: len(self.disks),
10269 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10270 constants.ISPEC_NIC_COUNT: len(self.nics),
10271 constants.ISPEC_SPINDLE_USE: spindle_use,
10274 group_info = self.cfg.GetNodeGroup(pnode.group)
10275 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
10276 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10277 if not self.op.ignore_ipolicy and res:
10278 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10279 " policy: %s") % (pnode.group,
10280 utils.CommaJoin(res)),
10281 errors.ECODE_INVAL)
10283 if not self.adopt_disks:
10284 if self.op.disk_template == constants.DT_RBD:
10285 # _CheckRADOSFreeSpace() is just a placeholder.
10286 # Any function that checks prerequisites can be placed here.
10287 # Check if there is enough space on the RADOS cluster.
10288 _CheckRADOSFreeSpace()
10289 elif self.op.disk_template == constants.DT_EXT:
10290 # FIXME: Function that checks prereqs if needed
10293 # Check lv size requirements, if not adopting
10294 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10295 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10297 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10298 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10299 disk[constants.IDISK_ADOPT])
10300 for disk in self.disks])
10301 if len(all_lvs) != len(self.disks):
10302 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10303 errors.ECODE_INVAL)
10304 for lv_name in all_lvs:
10306 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10307 # to ReserveLV uses the same syntax
10308 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10309 except errors.ReservationError:
10310 raise errors.OpPrereqError("LV named %s used by another instance" %
10311 lv_name, errors.ECODE_NOTUNIQUE)
10313 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10314 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10316 node_lvs = self.rpc.call_lv_list([pnode.name],
10317 vg_names.payload.keys())[pnode.name]
10318 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10319 node_lvs = node_lvs.payload
10321 delta = all_lvs.difference(node_lvs.keys())
10323 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10324 utils.CommaJoin(delta),
10325 errors.ECODE_INVAL)
10326 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10328 raise errors.OpPrereqError("Online logical volumes found, cannot"
10329 " adopt: %s" % utils.CommaJoin(online_lvs),
10330 errors.ECODE_STATE)
10331 # update the size of disk based on what is found
10332 for dsk in self.disks:
10333 dsk[constants.IDISK_SIZE] = \
10334 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10335 dsk[constants.IDISK_ADOPT])][0]))
10337 elif self.op.disk_template == constants.DT_BLOCK:
10338 # Normalize and de-duplicate device paths
10339 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10340 for disk in self.disks])
10341 if len(all_disks) != len(self.disks):
10342 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10343 errors.ECODE_INVAL)
10344 baddisks = [d for d in all_disks
10345 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10347 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10348 " cannot be adopted" %
10349 (", ".join(baddisks),
10350 constants.ADOPTABLE_BLOCKDEV_ROOT),
10351 errors.ECODE_INVAL)
10353 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10354 list(all_disks))[pnode.name]
10355 node_disks.Raise("Cannot get block device information from node %s" %
10357 node_disks = node_disks.payload
10358 delta = all_disks.difference(node_disks.keys())
10360 raise errors.OpPrereqError("Missing block device(s): %s" %
10361 utils.CommaJoin(delta),
10362 errors.ECODE_INVAL)
10363 for dsk in self.disks:
10364 dsk[constants.IDISK_SIZE] = \
10365 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10367 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10369 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10370 # check OS parameters (remotely)
10371 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10373 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10375 #TODO: _CheckExtParams (remotely)
10376 # Check parameters for extstorage
10378 # memory check on primary node
10379 #TODO(dynmem): use MINMEM for checking
10381 _CheckNodeFreeMemory(self, self.pnode.name,
10382 "creating instance %s" % self.op.instance_name,
10383 self.be_full[constants.BE_MAXMEM],
10384 self.op.hypervisor)
10386 self.dry_run_result = list(nodenames)
10388 def Exec(self, feedback_fn):
10389 """Create and add the instance to the cluster.
10392 instance = self.op.instance_name
10393 pnode_name = self.pnode.name
10395 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10396 self.owned_locks(locking.LEVEL_NODE)), \
10397 "Node locks differ from node resource locks"
10399 ht_kind = self.op.hypervisor
10400 if ht_kind in constants.HTS_REQ_PORT:
10401 network_port = self.cfg.AllocatePort()
10403 network_port = None
10405 # This is ugly but we got a chicken-egg problem here
10406 # We can only take the group disk parameters, as the instance
10407 # has no disks yet (we are generating them right here).
10408 node = self.cfg.GetNodeInfo(pnode_name)
10409 nodegroup = self.cfg.GetNodeGroup(node.group)
10410 disks = _GenerateDiskTemplate(self,
10411 self.op.disk_template,
10412 instance, pnode_name,
10415 self.instance_file_storage_dir,
10416 self.op.file_driver,
10419 self.cfg.GetGroupDiskParams(nodegroup))
10421 iobj = objects.Instance(name=instance, os=self.op.os_type,
10422 primary_node=pnode_name,
10423 nics=self.nics, disks=disks,
10424 disk_template=self.op.disk_template,
10425 admin_state=constants.ADMINST_DOWN,
10426 network_port=network_port,
10427 beparams=self.op.beparams,
10428 hvparams=self.op.hvparams,
10429 hypervisor=self.op.hypervisor,
10430 osparams=self.op.osparams,
10431 hotplug_info=self.hotplug_info,
10435 for tag in self.op.tags:
10438 if self.adopt_disks:
10439 if self.op.disk_template == constants.DT_PLAIN:
10440 # rename LVs to the newly-generated names; we need to construct
10441 # 'fake' LV disks with the old data, plus the new unique_id
10442 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10444 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10445 rename_to.append(t_dsk.logical_id)
10446 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10447 self.cfg.SetDiskID(t_dsk, pnode_name)
10448 result = self.rpc.call_blockdev_rename(pnode_name,
10449 zip(tmp_disks, rename_to))
10450 result.Raise("Failed to rename adoped LVs")
10452 feedback_fn("* creating instance disks...")
10454 _CreateDisks(self, iobj)
10455 except errors.OpExecError:
10456 self.LogWarning("Device creation failed, reverting...")
10458 _RemoveDisks(self, iobj)
10460 self.cfg.ReleaseDRBDMinors(instance)
10463 feedback_fn("adding instance %s to cluster config" % instance)
10465 self.cfg.AddInstance(iobj, self.proc.GetECId())
10467 # Declare that we don't want to remove the instance lock anymore, as we've
10468 # added the instance to the config
10469 del self.remove_locks[locking.LEVEL_INSTANCE]
10471 if self.op.mode == constants.INSTANCE_IMPORT:
10472 # Release unused nodes
10473 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10475 # Release all nodes
10476 _ReleaseLocks(self, locking.LEVEL_NODE)
10479 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10480 feedback_fn("* wiping instance disks...")
10482 _WipeDisks(self, iobj)
10483 except errors.OpExecError, err:
10484 logging.exception("Wiping disks failed")
10485 self.LogWarning("Wiping instance disks failed (%s)", err)
10489 # Something is already wrong with the disks, don't do anything else
10491 elif self.op.wait_for_sync:
10492 disk_abort = not _WaitForSync(self, iobj)
10493 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10494 # make sure the disks are not degraded (still sync-ing is ok)
10495 feedback_fn("* checking mirrors status")
10496 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10501 _RemoveDisks(self, iobj)
10502 self.cfg.RemoveInstance(iobj.name)
10503 # Make sure the instance lock gets removed
10504 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10505 raise errors.OpExecError("There are some degraded disks for"
10508 # Release all node resource locks
10509 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10511 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10512 # we need to set the disks ID to the primary node, since the
10513 # preceding code might or might have not done it, depending on
10514 # disk template and other options
10515 for disk in iobj.disks:
10516 self.cfg.SetDiskID(disk, pnode_name)
10517 if self.op.mode == constants.INSTANCE_CREATE:
10518 if not self.op.no_install:
10519 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10520 not self.op.wait_for_sync)
10522 feedback_fn("* pausing disk sync to install instance OS")
10523 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10526 for idx, success in enumerate(result.payload):
10528 logging.warn("pause-sync of instance %s for disk %d failed",
10531 feedback_fn("* running the instance OS create scripts...")
10532 # FIXME: pass debug option from opcode to backend
10534 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10535 self.op.debug_level)
10537 feedback_fn("* resuming disk sync")
10538 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10541 for idx, success in enumerate(result.payload):
10543 logging.warn("resume-sync of instance %s for disk %d failed",
10546 os_add_result.Raise("Could not add os for instance %s"
10547 " on node %s" % (instance, pnode_name))
10550 if self.op.mode == constants.INSTANCE_IMPORT:
10551 feedback_fn("* running the instance OS import scripts...")
10555 for idx, image in enumerate(self.src_images):
10559 # FIXME: pass debug option from opcode to backend
10560 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10561 constants.IEIO_FILE, (image, ),
10562 constants.IEIO_SCRIPT,
10563 (iobj.disks[idx], idx),
10565 transfers.append(dt)
10568 masterd.instance.TransferInstanceData(self, feedback_fn,
10569 self.op.src_node, pnode_name,
10570 self.pnode.secondary_ip,
10572 if not compat.all(import_result):
10573 self.LogWarning("Some disks for instance %s on node %s were not"
10574 " imported successfully" % (instance, pnode_name))
10576 rename_from = self._old_instance_name
10578 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10579 feedback_fn("* preparing remote import...")
10580 # The source cluster will stop the instance before attempting to make
10581 # a connection. In some cases stopping an instance can take a long
10582 # time, hence the shutdown timeout is added to the connection
10584 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10585 self.op.source_shutdown_timeout)
10586 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10588 assert iobj.primary_node == self.pnode.name
10590 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10591 self.source_x509_ca,
10592 self._cds, timeouts)
10593 if not compat.all(disk_results):
10594 # TODO: Should the instance still be started, even if some disks
10595 # failed to import (valid for local imports, too)?
10596 self.LogWarning("Some disks for instance %s on node %s were not"
10597 " imported successfully" % (instance, pnode_name))
10599 rename_from = self.source_instance_name
10602 # also checked in the prereq part
10603 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10606 # Run rename script on newly imported instance
10607 assert iobj.name == instance
10608 feedback_fn("Running rename script for %s" % instance)
10609 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10611 self.op.debug_level)
10612 if result.fail_msg:
10613 self.LogWarning("Failed to run rename script for %s on node"
10614 " %s: %s" % (instance, pnode_name, result.fail_msg))
10616 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10619 iobj.admin_state = constants.ADMINST_UP
10620 self.cfg.Update(iobj, feedback_fn)
10621 logging.info("Starting instance %s on node %s", instance, pnode_name)
10622 feedback_fn("* starting instance...")
10623 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10625 result.Raise("Could not start instance")
10627 return list(iobj.all_nodes)
10630 def _CheckRADOSFreeSpace():
10631 """Compute disk size requirements inside the RADOS cluster.
10634 # For the RADOS cluster we assume there is always enough space.
10638 class LUInstanceConsole(NoHooksLU):
10639 """Connect to an instance's console.
10641 This is somewhat special in that it returns the command line that
10642 you need to run on the master node in order to connect to the
10648 def ExpandNames(self):
10649 self.share_locks = _ShareAll()
10650 self._ExpandAndLockInstance()
10652 def CheckPrereq(self):
10653 """Check prerequisites.
10655 This checks that the instance is in the cluster.
10658 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10659 assert self.instance is not None, \
10660 "Cannot retrieve locked instance %s" % self.op.instance_name
10661 _CheckNodeOnline(self, self.instance.primary_node)
10663 def Exec(self, feedback_fn):
10664 """Connect to the console of an instance
10667 instance = self.instance
10668 node = instance.primary_node
10670 node_insts = self.rpc.call_instance_list([node],
10671 [instance.hypervisor])[node]
10672 node_insts.Raise("Can't get node information from %s" % node)
10674 if instance.name not in node_insts.payload:
10675 if instance.admin_state == constants.ADMINST_UP:
10676 state = constants.INSTST_ERRORDOWN
10677 elif instance.admin_state == constants.ADMINST_DOWN:
10678 state = constants.INSTST_ADMINDOWN
10680 state = constants.INSTST_ADMINOFFLINE
10681 raise errors.OpExecError("Instance %s is not running (state %s)" %
10682 (instance.name, state))
10684 logging.debug("Connecting to console of %s on %s", instance.name, node)
10686 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10689 def _GetInstanceConsole(cluster, instance):
10690 """Returns console information for an instance.
10692 @type cluster: L{objects.Cluster}
10693 @type instance: L{objects.Instance}
10697 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10698 # beparams and hvparams are passed separately, to avoid editing the
10699 # instance and then saving the defaults in the instance itself.
10700 hvparams = cluster.FillHV(instance)
10701 beparams = cluster.FillBE(instance)
10702 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10704 assert console.instance == instance.name
10705 assert console.Validate()
10707 return console.ToDict()
10710 class LUInstanceReplaceDisks(LogicalUnit):
10711 """Replace the disks of an instance.
10714 HPATH = "mirrors-replace"
10715 HTYPE = constants.HTYPE_INSTANCE
10718 def CheckArguments(self):
10719 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10720 self.op.iallocator)
10722 def ExpandNames(self):
10723 self._ExpandAndLockInstance()
10725 assert locking.LEVEL_NODE not in self.needed_locks
10726 assert locking.LEVEL_NODE_RES not in self.needed_locks
10727 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10729 assert self.op.iallocator is None or self.op.remote_node is None, \
10730 "Conflicting options"
10732 if self.op.remote_node is not None:
10733 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10735 # Warning: do not remove the locking of the new secondary here
10736 # unless DRBD8.AddChildren is changed to work in parallel;
10737 # currently it doesn't since parallel invocations of
10738 # FindUnusedMinor will conflict
10739 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10740 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10742 self.needed_locks[locking.LEVEL_NODE] = []
10743 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10745 if self.op.iallocator is not None:
10746 # iallocator will select a new node in the same group
10747 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10749 self.needed_locks[locking.LEVEL_NODE_RES] = []
10751 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10752 self.op.iallocator, self.op.remote_node,
10753 self.op.disks, False, self.op.early_release,
10754 self.op.ignore_ipolicy)
10756 self.tasklets = [self.replacer]
10758 def DeclareLocks(self, level):
10759 if level == locking.LEVEL_NODEGROUP:
10760 assert self.op.remote_node is None
10761 assert self.op.iallocator is not None
10762 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10764 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10765 # Lock all groups used by instance optimistically; this requires going
10766 # via the node before it's locked, requiring verification later on
10767 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10768 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10770 elif level == locking.LEVEL_NODE:
10771 if self.op.iallocator is not None:
10772 assert self.op.remote_node is None
10773 assert not self.needed_locks[locking.LEVEL_NODE]
10775 # Lock member nodes of all locked groups
10776 self.needed_locks[locking.LEVEL_NODE] = [node_name
10777 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10778 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10780 self._LockInstancesNodes()
10781 elif level == locking.LEVEL_NODE_RES:
10783 self.needed_locks[locking.LEVEL_NODE_RES] = \
10784 self.needed_locks[locking.LEVEL_NODE]
10786 def BuildHooksEnv(self):
10787 """Build hooks env.
10789 This runs on the master, the primary and all the secondaries.
10792 instance = self.replacer.instance
10794 "MODE": self.op.mode,
10795 "NEW_SECONDARY": self.op.remote_node,
10796 "OLD_SECONDARY": instance.secondary_nodes[0],
10798 env.update(_BuildInstanceHookEnvByObject(self, instance))
10801 def BuildHooksNodes(self):
10802 """Build hooks nodes.
10805 instance = self.replacer.instance
10807 self.cfg.GetMasterNode(),
10808 instance.primary_node,
10810 if self.op.remote_node is not None:
10811 nl.append(self.op.remote_node)
10814 def CheckPrereq(self):
10815 """Check prerequisites.
10818 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10819 self.op.iallocator is None)
10821 # Verify if node group locks are still correct
10822 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10824 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10826 return LogicalUnit.CheckPrereq(self)
10829 class TLReplaceDisks(Tasklet):
10830 """Replaces disks for an instance.
10832 Note: Locking is not within the scope of this class.
10835 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10836 disks, delay_iallocator, early_release, ignore_ipolicy):
10837 """Initializes this class.
10840 Tasklet.__init__(self, lu)
10843 self.instance_name = instance_name
10845 self.iallocator_name = iallocator_name
10846 self.remote_node = remote_node
10848 self.delay_iallocator = delay_iallocator
10849 self.early_release = early_release
10850 self.ignore_ipolicy = ignore_ipolicy
10853 self.instance = None
10854 self.new_node = None
10855 self.target_node = None
10856 self.other_node = None
10857 self.remote_node_info = None
10858 self.node_secondary_ip = None
10861 def CheckArguments(mode, remote_node, iallocator):
10862 """Helper function for users of this class.
10865 # check for valid parameter combination
10866 if mode == constants.REPLACE_DISK_CHG:
10867 if remote_node is None and iallocator is None:
10868 raise errors.OpPrereqError("When changing the secondary either an"
10869 " iallocator script must be used or the"
10870 " new node given", errors.ECODE_INVAL)
10872 if remote_node is not None and iallocator is not None:
10873 raise errors.OpPrereqError("Give either the iallocator or the new"
10874 " secondary, not both", errors.ECODE_INVAL)
10876 elif remote_node is not None or iallocator is not None:
10877 # Not replacing the secondary
10878 raise errors.OpPrereqError("The iallocator and new node options can"
10879 " only be used when changing the"
10880 " secondary node", errors.ECODE_INVAL)
10883 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10884 """Compute a new secondary node using an IAllocator.
10887 ial = IAllocator(lu.cfg, lu.rpc,
10888 mode=constants.IALLOCATOR_MODE_RELOC,
10889 name=instance_name,
10890 relocate_from=list(relocate_from))
10892 ial.Run(iallocator_name)
10894 if not ial.success:
10895 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10896 " %s" % (iallocator_name, ial.info),
10897 errors.ECODE_NORES)
10899 if len(ial.result) != ial.required_nodes:
10900 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10901 " of nodes (%s), required %s" %
10903 len(ial.result), ial.required_nodes),
10904 errors.ECODE_FAULT)
10906 remote_node_name = ial.result[0]
10908 lu.LogInfo("Selected new secondary for instance '%s': %s",
10909 instance_name, remote_node_name)
10911 return remote_node_name
10913 def _FindFaultyDisks(self, node_name):
10914 """Wrapper for L{_FindFaultyInstanceDisks}.
10917 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10920 def _CheckDisksActivated(self, instance):
10921 """Checks if the instance disks are activated.
10923 @param instance: The instance to check disks
10924 @return: True if they are activated, False otherwise
10927 nodes = instance.all_nodes
10929 for idx, dev in enumerate(instance.disks):
10931 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10932 self.cfg.SetDiskID(dev, node)
10934 result = _BlockdevFind(self, node, dev, instance)
10938 elif result.fail_msg or not result.payload:
10943 def CheckPrereq(self):
10944 """Check prerequisites.
10946 This checks that the instance is in the cluster.
10949 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10950 assert instance is not None, \
10951 "Cannot retrieve locked instance %s" % self.instance_name
10953 if instance.disk_template != constants.DT_DRBD8:
10954 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10955 " instances", errors.ECODE_INVAL)
10957 if len(instance.secondary_nodes) != 1:
10958 raise errors.OpPrereqError("The instance has a strange layout,"
10959 " expected one secondary but found %d" %
10960 len(instance.secondary_nodes),
10961 errors.ECODE_FAULT)
10963 if not self.delay_iallocator:
10964 self._CheckPrereq2()
10966 def _CheckPrereq2(self):
10967 """Check prerequisites, second part.
10969 This function should always be part of CheckPrereq. It was separated and is
10970 now called from Exec because during node evacuation iallocator was only
10971 called with an unmodified cluster model, not taking planned changes into
10975 instance = self.instance
10976 secondary_node = instance.secondary_nodes[0]
10978 if self.iallocator_name is None:
10979 remote_node = self.remote_node
10981 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10982 instance.name, instance.secondary_nodes)
10984 if remote_node is None:
10985 self.remote_node_info = None
10987 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10988 "Remote node '%s' is not locked" % remote_node
10990 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10991 assert self.remote_node_info is not None, \
10992 "Cannot retrieve locked node %s" % remote_node
10994 if remote_node == self.instance.primary_node:
10995 raise errors.OpPrereqError("The specified node is the primary node of"
10996 " the instance", errors.ECODE_INVAL)
10998 if remote_node == secondary_node:
10999 raise errors.OpPrereqError("The specified node is already the"
11000 " secondary node of the instance",
11001 errors.ECODE_INVAL)
11003 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11004 constants.REPLACE_DISK_CHG):
11005 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11006 errors.ECODE_INVAL)
11008 if self.mode == constants.REPLACE_DISK_AUTO:
11009 if not self._CheckDisksActivated(instance):
11010 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11011 " first" % self.instance_name,
11012 errors.ECODE_STATE)
11013 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11014 faulty_secondary = self._FindFaultyDisks(secondary_node)
11016 if faulty_primary and faulty_secondary:
11017 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11018 " one node and can not be repaired"
11019 " automatically" % self.instance_name,
11020 errors.ECODE_STATE)
11023 self.disks = faulty_primary
11024 self.target_node = instance.primary_node
11025 self.other_node = secondary_node
11026 check_nodes = [self.target_node, self.other_node]
11027 elif faulty_secondary:
11028 self.disks = faulty_secondary
11029 self.target_node = secondary_node
11030 self.other_node = instance.primary_node
11031 check_nodes = [self.target_node, self.other_node]
11037 # Non-automatic modes
11038 if self.mode == constants.REPLACE_DISK_PRI:
11039 self.target_node = instance.primary_node
11040 self.other_node = secondary_node
11041 check_nodes = [self.target_node, self.other_node]
11043 elif self.mode == constants.REPLACE_DISK_SEC:
11044 self.target_node = secondary_node
11045 self.other_node = instance.primary_node
11046 check_nodes = [self.target_node, self.other_node]
11048 elif self.mode == constants.REPLACE_DISK_CHG:
11049 self.new_node = remote_node
11050 self.other_node = instance.primary_node
11051 self.target_node = secondary_node
11052 check_nodes = [self.new_node, self.other_node]
11054 _CheckNodeNotDrained(self.lu, remote_node)
11055 _CheckNodeVmCapable(self.lu, remote_node)
11057 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11058 assert old_node_info is not None
11059 if old_node_info.offline and not self.early_release:
11060 # doesn't make sense to delay the release
11061 self.early_release = True
11062 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11063 " early-release mode", secondary_node)
11066 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11069 # If not specified all disks should be replaced
11071 self.disks = range(len(self.instance.disks))
11073 # TODO: This is ugly, but right now we can't distinguish between internal
11074 # submitted opcode and external one. We should fix that.
11075 if self.remote_node_info:
11076 # We change the node, lets verify it still meets instance policy
11077 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11078 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
11080 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11081 ignore=self.ignore_ipolicy)
11083 for node in check_nodes:
11084 _CheckNodeOnline(self.lu, node)
11086 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11089 if node_name is not None)
11091 # Release unneeded node and node resource locks
11092 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11093 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11095 # Release any owned node group
11096 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11097 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11099 # Check whether disks are valid
11100 for disk_idx in self.disks:
11101 instance.FindDisk(disk_idx)
11103 # Get secondary node IP addresses
11104 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11105 in self.cfg.GetMultiNodeInfo(touched_nodes))
11107 def Exec(self, feedback_fn):
11108 """Execute disk replacement.
11110 This dispatches the disk replacement to the appropriate handler.
11113 if self.delay_iallocator:
11114 self._CheckPrereq2()
11117 # Verify owned locks before starting operation
11118 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11119 assert set(owned_nodes) == set(self.node_secondary_ip), \
11120 ("Incorrect node locks, owning %s, expected %s" %
11121 (owned_nodes, self.node_secondary_ip.keys()))
11122 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11123 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11125 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11126 assert list(owned_instances) == [self.instance_name], \
11127 "Instance '%s' not locked" % self.instance_name
11129 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11130 "Should not own any node group lock at this point"
11133 feedback_fn("No disks need replacement")
11136 feedback_fn("Replacing disk(s) %s for %s" %
11137 (utils.CommaJoin(self.disks), self.instance.name))
11139 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11141 # Activate the instance disks if we're replacing them on a down instance
11143 _StartInstanceDisks(self.lu, self.instance, True)
11146 # Should we replace the secondary node?
11147 if self.new_node is not None:
11148 fn = self._ExecDrbd8Secondary
11150 fn = self._ExecDrbd8DiskOnly
11152 result = fn(feedback_fn)
11154 # Deactivate the instance disks if we're replacing them on a
11157 _SafeShutdownInstanceDisks(self.lu, self.instance)
11159 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11162 # Verify owned locks
11163 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11164 nodes = frozenset(self.node_secondary_ip)
11165 assert ((self.early_release and not owned_nodes) or
11166 (not self.early_release and not (set(owned_nodes) - nodes))), \
11167 ("Not owning the correct locks, early_release=%s, owned=%r,"
11168 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11172 def _CheckVolumeGroup(self, nodes):
11173 self.lu.LogInfo("Checking volume groups")
11175 vgname = self.cfg.GetVGName()
11177 # Make sure volume group exists on all involved nodes
11178 results = self.rpc.call_vg_list(nodes)
11180 raise errors.OpExecError("Can't list volume groups on the nodes")
11183 res = results[node]
11184 res.Raise("Error checking node %s" % node)
11185 if vgname not in res.payload:
11186 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11189 def _CheckDisksExistence(self, nodes):
11190 # Check disk existence
11191 for idx, dev in enumerate(self.instance.disks):
11192 if idx not in self.disks:
11196 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11197 self.cfg.SetDiskID(dev, node)
11199 result = _BlockdevFind(self, node, dev, self.instance)
11201 msg = result.fail_msg
11202 if msg or not result.payload:
11204 msg = "disk not found"
11205 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11208 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11209 for idx, dev in enumerate(self.instance.disks):
11210 if idx not in self.disks:
11213 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11216 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11217 on_primary, ldisk=ldisk):
11218 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11219 " replace disks for instance %s" %
11220 (node_name, self.instance.name))
11222 def _CreateNewStorage(self, node_name):
11223 """Create new storage on the primary or secondary node.
11225 This is only used for same-node replaces, not for changing the
11226 secondary node, hence we don't want to modify the existing disk.
11231 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11232 for idx, dev in enumerate(disks):
11233 if idx not in self.disks:
11236 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11238 self.cfg.SetDiskID(dev, node_name)
11240 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11241 names = _GenerateUniqueNames(self.lu, lv_names)
11243 (data_disk, meta_disk) = dev.children
11244 vg_data = data_disk.logical_id[0]
11245 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11246 logical_id=(vg_data, names[0]),
11247 params=data_disk.params)
11248 vg_meta = meta_disk.logical_id[0]
11249 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
11250 logical_id=(vg_meta, names[1]),
11251 params=meta_disk.params)
11253 new_lvs = [lv_data, lv_meta]
11254 old_lvs = [child.Copy() for child in dev.children]
11255 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11257 # we pass force_create=True to force the LVM creation
11258 for new_lv in new_lvs:
11259 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11260 _GetInstanceInfoText(self.instance), False)
11264 def _CheckDevices(self, node_name, iv_names):
11265 for name, (dev, _, _) in iv_names.iteritems():
11266 self.cfg.SetDiskID(dev, node_name)
11268 result = _BlockdevFind(self, node_name, dev, self.instance)
11270 msg = result.fail_msg
11271 if msg or not result.payload:
11273 msg = "disk not found"
11274 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11277 if result.payload.is_degraded:
11278 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11280 def _RemoveOldStorage(self, node_name, iv_names):
11281 for name, (_, old_lvs, _) in iv_names.iteritems():
11282 self.lu.LogInfo("Remove logical volumes for %s" % name)
11285 self.cfg.SetDiskID(lv, node_name)
11287 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11289 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11290 hint="remove unused LVs manually")
11292 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11293 """Replace a disk on the primary or secondary for DRBD 8.
11295 The algorithm for replace is quite complicated:
11297 1. for each disk to be replaced:
11299 1. create new LVs on the target node with unique names
11300 1. detach old LVs from the drbd device
11301 1. rename old LVs to name_replaced.<time_t>
11302 1. rename new LVs to old LVs
11303 1. attach the new LVs (with the old names now) to the drbd device
11305 1. wait for sync across all devices
11307 1. for each modified disk:
11309 1. remove old LVs (which have the name name_replaces.<time_t>)
11311 Failures are not very well handled.
11316 # Step: check device activation
11317 self.lu.LogStep(1, steps_total, "Check device existence")
11318 self._CheckDisksExistence([self.other_node, self.target_node])
11319 self._CheckVolumeGroup([self.target_node, self.other_node])
11321 # Step: check other node consistency
11322 self.lu.LogStep(2, steps_total, "Check peer consistency")
11323 self._CheckDisksConsistency(self.other_node,
11324 self.other_node == self.instance.primary_node,
11327 # Step: create new storage
11328 self.lu.LogStep(3, steps_total, "Allocate new storage")
11329 iv_names = self._CreateNewStorage(self.target_node)
11331 # Step: for each lv, detach+rename*2+attach
11332 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11333 for dev, old_lvs, new_lvs in iv_names.itervalues():
11334 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11336 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11338 result.Raise("Can't detach drbd from local storage on node"
11339 " %s for device %s" % (self.target_node, dev.iv_name))
11341 #cfg.Update(instance)
11343 # ok, we created the new LVs, so now we know we have the needed
11344 # storage; as such, we proceed on the target node to rename
11345 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11346 # using the assumption that logical_id == physical_id (which in
11347 # turn is the unique_id on that node)
11349 # FIXME(iustin): use a better name for the replaced LVs
11350 temp_suffix = int(time.time())
11351 ren_fn = lambda d, suff: (d.physical_id[0],
11352 d.physical_id[1] + "_replaced-%s" % suff)
11354 # Build the rename list based on what LVs exist on the node
11355 rename_old_to_new = []
11356 for to_ren in old_lvs:
11357 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11358 if not result.fail_msg and result.payload:
11360 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11362 self.lu.LogInfo("Renaming the old LVs on the target node")
11363 result = self.rpc.call_blockdev_rename(self.target_node,
11365 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11367 # Now we rename the new LVs to the old LVs
11368 self.lu.LogInfo("Renaming the new LVs on the target node")
11369 rename_new_to_old = [(new, old.physical_id)
11370 for old, new in zip(old_lvs, new_lvs)]
11371 result = self.rpc.call_blockdev_rename(self.target_node,
11373 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11375 # Intermediate steps of in memory modifications
11376 for old, new in zip(old_lvs, new_lvs):
11377 new.logical_id = old.logical_id
11378 self.cfg.SetDiskID(new, self.target_node)
11380 # We need to modify old_lvs so that removal later removes the
11381 # right LVs, not the newly added ones; note that old_lvs is a
11383 for disk in old_lvs:
11384 disk.logical_id = ren_fn(disk, temp_suffix)
11385 self.cfg.SetDiskID(disk, self.target_node)
11387 # Now that the new lvs have the old name, we can add them to the device
11388 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11389 result = self.rpc.call_blockdev_addchildren(self.target_node,
11390 (dev, self.instance), new_lvs)
11391 msg = result.fail_msg
11393 for new_lv in new_lvs:
11394 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11397 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11398 hint=("cleanup manually the unused logical"
11400 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11402 cstep = itertools.count(5)
11404 if self.early_release:
11405 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11406 self._RemoveOldStorage(self.target_node, iv_names)
11407 # TODO: Check if releasing locks early still makes sense
11408 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11410 # Release all resource locks except those used by the instance
11411 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11412 keep=self.node_secondary_ip.keys())
11414 # Release all node locks while waiting for sync
11415 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11417 # TODO: Can the instance lock be downgraded here? Take the optional disk
11418 # shutdown in the caller into consideration.
11421 # This can fail as the old devices are degraded and _WaitForSync
11422 # does a combined result over all disks, so we don't check its return value
11423 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11424 _WaitForSync(self.lu, self.instance)
11426 # Check all devices manually
11427 self._CheckDevices(self.instance.primary_node, iv_names)
11429 # Step: remove old storage
11430 if not self.early_release:
11431 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11432 self._RemoveOldStorage(self.target_node, iv_names)
11434 def _ExecDrbd8Secondary(self, feedback_fn):
11435 """Replace the secondary node for DRBD 8.
11437 The algorithm for replace is quite complicated:
11438 - for all disks of the instance:
11439 - create new LVs on the new node with same names
11440 - shutdown the drbd device on the old secondary
11441 - disconnect the drbd network on the primary
11442 - create the drbd device on the new secondary
11443 - network attach the drbd on the primary, using an artifice:
11444 the drbd code for Attach() will connect to the network if it
11445 finds a device which is connected to the good local disks but
11446 not network enabled
11447 - wait for sync across all devices
11448 - remove all disks from the old secondary
11450 Failures are not very well handled.
11455 pnode = self.instance.primary_node
11457 # Step: check device activation
11458 self.lu.LogStep(1, steps_total, "Check device existence")
11459 self._CheckDisksExistence([self.instance.primary_node])
11460 self._CheckVolumeGroup([self.instance.primary_node])
11462 # Step: check other node consistency
11463 self.lu.LogStep(2, steps_total, "Check peer consistency")
11464 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11466 # Step: create new storage
11467 self.lu.LogStep(3, steps_total, "Allocate new storage")
11468 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11469 for idx, dev in enumerate(disks):
11470 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11471 (self.new_node, idx))
11472 # we pass force_create=True to force LVM creation
11473 for new_lv in dev.children:
11474 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11475 True, _GetInstanceInfoText(self.instance), False)
11477 # Step 4: dbrd minors and drbd setups changes
11478 # after this, we must manually remove the drbd minors on both the
11479 # error and the success paths
11480 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11481 minors = self.cfg.AllocateDRBDMinor([self.new_node
11482 for dev in self.instance.disks],
11483 self.instance.name)
11484 logging.debug("Allocated minors %r", minors)
11487 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11488 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11489 (self.new_node, idx))
11490 # create new devices on new_node; note that we create two IDs:
11491 # one without port, so the drbd will be activated without
11492 # networking information on the new node at this stage, and one
11493 # with network, for the latter activation in step 4
11494 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11495 if self.instance.primary_node == o_node1:
11498 assert self.instance.primary_node == o_node2, "Three-node instance?"
11501 new_alone_id = (self.instance.primary_node, self.new_node, None,
11502 p_minor, new_minor, o_secret)
11503 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11504 p_minor, new_minor, o_secret)
11506 iv_names[idx] = (dev, dev.children, new_net_id)
11507 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11509 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11510 logical_id=new_alone_id,
11511 children=dev.children,
11514 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11517 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11519 _GetInstanceInfoText(self.instance), False)
11520 except errors.GenericError:
11521 self.cfg.ReleaseDRBDMinors(self.instance.name)
11524 # We have new devices, shutdown the drbd on the old secondary
11525 for idx, dev in enumerate(self.instance.disks):
11526 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11527 self.cfg.SetDiskID(dev, self.target_node)
11528 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11529 (dev, self.instance)).fail_msg
11531 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11532 "node: %s" % (idx, msg),
11533 hint=("Please cleanup this device manually as"
11534 " soon as possible"))
11536 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11537 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11538 self.instance.disks)[pnode]
11540 msg = result.fail_msg
11542 # detaches didn't succeed (unlikely)
11543 self.cfg.ReleaseDRBDMinors(self.instance.name)
11544 raise errors.OpExecError("Can't detach the disks from the network on"
11545 " old node: %s" % (msg,))
11547 # if we managed to detach at least one, we update all the disks of
11548 # the instance to point to the new secondary
11549 self.lu.LogInfo("Updating instance configuration")
11550 for dev, _, new_logical_id in iv_names.itervalues():
11551 dev.logical_id = new_logical_id
11552 self.cfg.SetDiskID(dev, self.instance.primary_node)
11554 self.cfg.Update(self.instance, feedback_fn)
11556 # Release all node locks (the configuration has been updated)
11557 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11559 # and now perform the drbd attach
11560 self.lu.LogInfo("Attaching primary drbds to new secondary"
11561 " (standalone => connected)")
11562 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11564 self.node_secondary_ip,
11565 (self.instance.disks, self.instance),
11566 self.instance.name,
11568 for to_node, to_result in result.items():
11569 msg = to_result.fail_msg
11571 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11573 hint=("please do a gnt-instance info to see the"
11574 " status of disks"))
11576 cstep = itertools.count(5)
11578 if self.early_release:
11579 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11580 self._RemoveOldStorage(self.target_node, iv_names)
11581 # TODO: Check if releasing locks early still makes sense
11582 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11584 # Release all resource locks except those used by the instance
11585 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11586 keep=self.node_secondary_ip.keys())
11588 # TODO: Can the instance lock be downgraded here? Take the optional disk
11589 # shutdown in the caller into consideration.
11592 # This can fail as the old devices are degraded and _WaitForSync
11593 # does a combined result over all disks, so we don't check its return value
11594 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11595 _WaitForSync(self.lu, self.instance)
11597 # Check all devices manually
11598 self._CheckDevices(self.instance.primary_node, iv_names)
11600 # Step: remove old storage
11601 if not self.early_release:
11602 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11603 self._RemoveOldStorage(self.target_node, iv_names)
11606 class LURepairNodeStorage(NoHooksLU):
11607 """Repairs the volume group on a node.
11612 def CheckArguments(self):
11613 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11615 storage_type = self.op.storage_type
11617 if (constants.SO_FIX_CONSISTENCY not in
11618 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11619 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11620 " repaired" % storage_type,
11621 errors.ECODE_INVAL)
11623 def ExpandNames(self):
11624 self.needed_locks = {
11625 locking.LEVEL_NODE: [self.op.node_name],
11628 def _CheckFaultyDisks(self, instance, node_name):
11629 """Ensure faulty disks abort the opcode or at least warn."""
11631 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11633 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11634 " node '%s'" % (instance.name, node_name),
11635 errors.ECODE_STATE)
11636 except errors.OpPrereqError, err:
11637 if self.op.ignore_consistency:
11638 self.proc.LogWarning(str(err.args[0]))
11642 def CheckPrereq(self):
11643 """Check prerequisites.
11646 # Check whether any instance on this node has faulty disks
11647 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11648 if inst.admin_state != constants.ADMINST_UP:
11650 check_nodes = set(inst.all_nodes)
11651 check_nodes.discard(self.op.node_name)
11652 for inst_node_name in check_nodes:
11653 self._CheckFaultyDisks(inst, inst_node_name)
11655 def Exec(self, feedback_fn):
11656 feedback_fn("Repairing storage unit '%s' on %s ..." %
11657 (self.op.name, self.op.node_name))
11659 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11660 result = self.rpc.call_storage_execute(self.op.node_name,
11661 self.op.storage_type, st_args,
11663 constants.SO_FIX_CONSISTENCY)
11664 result.Raise("Failed to repair storage unit '%s' on %s" %
11665 (self.op.name, self.op.node_name))
11668 class LUNodeEvacuate(NoHooksLU):
11669 """Evacuates instances off a list of nodes.
11674 _MODE2IALLOCATOR = {
11675 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11676 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11677 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11679 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11680 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11681 constants.IALLOCATOR_NEVAC_MODES)
11683 def CheckArguments(self):
11684 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11686 def ExpandNames(self):
11687 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11689 if self.op.remote_node is not None:
11690 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11691 assert self.op.remote_node
11693 if self.op.remote_node == self.op.node_name:
11694 raise errors.OpPrereqError("Can not use evacuated node as a new"
11695 " secondary node", errors.ECODE_INVAL)
11697 if self.op.mode != constants.NODE_EVAC_SEC:
11698 raise errors.OpPrereqError("Without the use of an iallocator only"
11699 " secondary instances can be evacuated",
11700 errors.ECODE_INVAL)
11703 self.share_locks = _ShareAll()
11704 self.needed_locks = {
11705 locking.LEVEL_INSTANCE: [],
11706 locking.LEVEL_NODEGROUP: [],
11707 locking.LEVEL_NODE: [],
11710 # Determine nodes (via group) optimistically, needs verification once locks
11711 # have been acquired
11712 self.lock_nodes = self._DetermineNodes()
11714 def _DetermineNodes(self):
11715 """Gets the list of nodes to operate on.
11718 if self.op.remote_node is None:
11719 # Iallocator will choose any node(s) in the same group
11720 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11722 group_nodes = frozenset([self.op.remote_node])
11724 # Determine nodes to be locked
11725 return set([self.op.node_name]) | group_nodes
11727 def _DetermineInstances(self):
11728 """Builds list of instances to operate on.
11731 assert self.op.mode in constants.NODE_EVAC_MODES
11733 if self.op.mode == constants.NODE_EVAC_PRI:
11734 # Primary instances only
11735 inst_fn = _GetNodePrimaryInstances
11736 assert self.op.remote_node is None, \
11737 "Evacuating primary instances requires iallocator"
11738 elif self.op.mode == constants.NODE_EVAC_SEC:
11739 # Secondary instances only
11740 inst_fn = _GetNodeSecondaryInstances
11743 assert self.op.mode == constants.NODE_EVAC_ALL
11744 inst_fn = _GetNodeInstances
11745 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11747 raise errors.OpPrereqError("Due to an issue with the iallocator"
11748 " interface it is not possible to evacuate"
11749 " all instances at once; specify explicitly"
11750 " whether to evacuate primary or secondary"
11752 errors.ECODE_INVAL)
11754 return inst_fn(self.cfg, self.op.node_name)
11756 def DeclareLocks(self, level):
11757 if level == locking.LEVEL_INSTANCE:
11758 # Lock instances optimistically, needs verification once node and group
11759 # locks have been acquired
11760 self.needed_locks[locking.LEVEL_INSTANCE] = \
11761 set(i.name for i in self._DetermineInstances())
11763 elif level == locking.LEVEL_NODEGROUP:
11764 # Lock node groups for all potential target nodes optimistically, needs
11765 # verification once nodes have been acquired
11766 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11767 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11769 elif level == locking.LEVEL_NODE:
11770 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11772 def CheckPrereq(self):
11774 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11775 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11776 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11778 need_nodes = self._DetermineNodes()
11780 if not owned_nodes.issuperset(need_nodes):
11781 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11782 " locks were acquired, current nodes are"
11783 " are '%s', used to be '%s'; retry the"
11785 (self.op.node_name,
11786 utils.CommaJoin(need_nodes),
11787 utils.CommaJoin(owned_nodes)),
11788 errors.ECODE_STATE)
11790 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11791 if owned_groups != wanted_groups:
11792 raise errors.OpExecError("Node groups changed since locks were acquired,"
11793 " current groups are '%s', used to be '%s';"
11794 " retry the operation" %
11795 (utils.CommaJoin(wanted_groups),
11796 utils.CommaJoin(owned_groups)))
11798 # Determine affected instances
11799 self.instances = self._DetermineInstances()
11800 self.instance_names = [i.name for i in self.instances]
11802 if set(self.instance_names) != owned_instances:
11803 raise errors.OpExecError("Instances on node '%s' changed since locks"
11804 " were acquired, current instances are '%s',"
11805 " used to be '%s'; retry the operation" %
11806 (self.op.node_name,
11807 utils.CommaJoin(self.instance_names),
11808 utils.CommaJoin(owned_instances)))
11810 if self.instance_names:
11811 self.LogInfo("Evacuating instances from node '%s': %s",
11813 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11815 self.LogInfo("No instances to evacuate from node '%s'",
11818 if self.op.remote_node is not None:
11819 for i in self.instances:
11820 if i.primary_node == self.op.remote_node:
11821 raise errors.OpPrereqError("Node %s is the primary node of"
11822 " instance %s, cannot use it as"
11824 (self.op.remote_node, i.name),
11825 errors.ECODE_INVAL)
11827 def Exec(self, feedback_fn):
11828 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11830 if not self.instance_names:
11831 # No instances to evacuate
11834 elif self.op.iallocator is not None:
11835 # TODO: Implement relocation to other group
11836 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11837 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11838 instances=list(self.instance_names))
11840 ial.Run(self.op.iallocator)
11842 if not ial.success:
11843 raise errors.OpPrereqError("Can't compute node evacuation using"
11844 " iallocator '%s': %s" %
11845 (self.op.iallocator, ial.info),
11846 errors.ECODE_NORES)
11848 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11850 elif self.op.remote_node is not None:
11851 assert self.op.mode == constants.NODE_EVAC_SEC
11853 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11854 remote_node=self.op.remote_node,
11856 mode=constants.REPLACE_DISK_CHG,
11857 early_release=self.op.early_release)]
11858 for instance_name in self.instance_names
11862 raise errors.ProgrammerError("No iallocator or remote node")
11864 return ResultWithJobs(jobs)
11867 def _SetOpEarlyRelease(early_release, op):
11868 """Sets C{early_release} flag on opcodes if available.
11872 op.early_release = early_release
11873 except AttributeError:
11874 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11879 def _NodeEvacDest(use_nodes, group, nodes):
11880 """Returns group or nodes depending on caller's choice.
11884 return utils.CommaJoin(nodes)
11889 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11890 """Unpacks the result of change-group and node-evacuate iallocator requests.
11892 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11893 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11895 @type lu: L{LogicalUnit}
11896 @param lu: Logical unit instance
11897 @type alloc_result: tuple/list
11898 @param alloc_result: Result from iallocator
11899 @type early_release: bool
11900 @param early_release: Whether to release locks early if possible
11901 @type use_nodes: bool
11902 @param use_nodes: Whether to display node names instead of groups
11905 (moved, failed, jobs) = alloc_result
11908 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11909 for (name, reason) in failed)
11910 lu.LogWarning("Unable to evacuate instances %s", failreason)
11911 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11914 lu.LogInfo("Instances to be moved: %s",
11915 utils.CommaJoin("%s (to %s)" %
11916 (name, _NodeEvacDest(use_nodes, group, nodes))
11917 for (name, group, nodes) in moved))
11919 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11920 map(opcodes.OpCode.LoadOpCode, ops))
11924 class LUInstanceGrowDisk(LogicalUnit):
11925 """Grow a disk of an instance.
11928 HPATH = "disk-grow"
11929 HTYPE = constants.HTYPE_INSTANCE
11932 def ExpandNames(self):
11933 self._ExpandAndLockInstance()
11934 self.needed_locks[locking.LEVEL_NODE] = []
11935 self.needed_locks[locking.LEVEL_NODE_RES] = []
11936 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11937 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11939 def DeclareLocks(self, level):
11940 if level == locking.LEVEL_NODE:
11941 self._LockInstancesNodes()
11942 elif level == locking.LEVEL_NODE_RES:
11944 self.needed_locks[locking.LEVEL_NODE_RES] = \
11945 self.needed_locks[locking.LEVEL_NODE][:]
11947 def BuildHooksEnv(self):
11948 """Build hooks env.
11950 This runs on the master, the primary and all the secondaries.
11954 "DISK": self.op.disk,
11955 "AMOUNT": self.op.amount,
11956 "ABSOLUTE": self.op.absolute,
11958 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11961 def BuildHooksNodes(self):
11962 """Build hooks nodes.
11965 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11968 def CheckPrereq(self):
11969 """Check prerequisites.
11971 This checks that the instance is in the cluster.
11974 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11975 assert instance is not None, \
11976 "Cannot retrieve locked instance %s" % self.op.instance_name
11977 nodenames = list(instance.all_nodes)
11978 for node in nodenames:
11979 _CheckNodeOnline(self, node)
11981 self.instance = instance
11983 if instance.disk_template not in constants.DTS_GROWABLE:
11984 raise errors.OpPrereqError("Instance's disk layout does not support"
11985 " growing", errors.ECODE_INVAL)
11987 self.disk = instance.FindDisk(self.op.disk)
11989 if self.op.absolute:
11990 self.target = self.op.amount
11991 self.delta = self.target - self.disk.size
11993 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11994 "current disk size (%s)" %
11995 (utils.FormatUnit(self.target, "h"),
11996 utils.FormatUnit(self.disk.size, "h")),
11997 errors.ECODE_STATE)
11999 self.delta = self.op.amount
12000 self.target = self.disk.size + self.delta
12002 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12003 utils.FormatUnit(self.delta, "h"),
12004 errors.ECODE_INVAL)
12006 if instance.disk_template not in (constants.DT_FILE,
12007 constants.DT_SHARED_FILE,
12010 # TODO: check the free disk space for file, when that feature will be
12012 _CheckNodesFreeDiskPerVG(self, nodenames,
12013 self.disk.ComputeGrowth(self.delta))
12015 def Exec(self, feedback_fn):
12016 """Execute disk grow.
12019 instance = self.instance
12022 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12023 assert (self.owned_locks(locking.LEVEL_NODE) ==
12024 self.owned_locks(locking.LEVEL_NODE_RES))
12026 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12028 raise errors.OpExecError("Cannot activate block device to grow")
12030 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12031 (self.op.disk, instance.name,
12032 utils.FormatUnit(self.delta, "h"),
12033 utils.FormatUnit(self.target, "h")))
12035 # First run all grow ops in dry-run mode
12036 for node in instance.all_nodes:
12037 self.cfg.SetDiskID(disk, node)
12038 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12040 result.Raise("Grow request failed to node %s" % node)
12042 # We know that (as far as we can test) operations across different
12043 # nodes will succeed, time to run it for real
12044 for node in instance.all_nodes:
12045 self.cfg.SetDiskID(disk, node)
12046 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12048 result.Raise("Grow request failed to node %s" % node)
12050 # TODO: Rewrite code to work properly
12051 # DRBD goes into sync mode for a short amount of time after executing the
12052 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
12053 # calling "resize" in sync mode fails. Sleeping for a short amount of
12054 # time is a work-around.
12057 disk.RecordGrow(self.delta)
12058 self.cfg.Update(instance, feedback_fn)
12060 # Changes have been recorded, release node lock
12061 _ReleaseLocks(self, locking.LEVEL_NODE)
12063 # Downgrade lock while waiting for sync
12064 self.glm.downgrade(locking.LEVEL_INSTANCE)
12066 if self.op.wait_for_sync:
12067 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12069 self.proc.LogWarning("Disk sync-ing has not returned a good"
12070 " status; please check the instance")
12071 if instance.admin_state != constants.ADMINST_UP:
12072 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12073 elif instance.admin_state != constants.ADMINST_UP:
12074 self.proc.LogWarning("Not shutting down the disk even if the instance is"
12075 " not supposed to be running because no wait for"
12076 " sync mode was requested")
12078 assert self.owned_locks(locking.LEVEL_NODE_RES)
12079 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12082 class LUInstanceQueryData(NoHooksLU):
12083 """Query runtime instance data.
12088 def ExpandNames(self):
12089 self.needed_locks = {}
12091 # Use locking if requested or when non-static information is wanted
12092 if not (self.op.static or self.op.use_locking):
12093 self.LogWarning("Non-static data requested, locks need to be acquired")
12094 self.op.use_locking = True
12096 if self.op.instances or not self.op.use_locking:
12097 # Expand instance names right here
12098 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12100 # Will use acquired locks
12101 self.wanted_names = None
12103 if self.op.use_locking:
12104 self.share_locks = _ShareAll()
12106 if self.wanted_names is None:
12107 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12109 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12111 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12112 self.needed_locks[locking.LEVEL_NODE] = []
12113 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12115 def DeclareLocks(self, level):
12116 if self.op.use_locking:
12117 if level == locking.LEVEL_NODEGROUP:
12118 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12120 # Lock all groups used by instances optimistically; this requires going
12121 # via the node before it's locked, requiring verification later on
12122 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12123 frozenset(group_uuid
12124 for instance_name in owned_instances
12126 self.cfg.GetInstanceNodeGroups(instance_name))
12128 elif level == locking.LEVEL_NODE:
12129 self._LockInstancesNodes()
12131 def CheckPrereq(self):
12132 """Check prerequisites.
12134 This only checks the optional instance list against the existing names.
12137 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12138 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12139 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12141 if self.wanted_names is None:
12142 assert self.op.use_locking, "Locking was not used"
12143 self.wanted_names = owned_instances
12145 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12147 if self.op.use_locking:
12148 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12151 assert not (owned_instances or owned_groups or owned_nodes)
12153 self.wanted_instances = instances.values()
12155 def _ComputeBlockdevStatus(self, node, instance, dev):
12156 """Returns the status of a block device
12159 if self.op.static or not node:
12162 self.cfg.SetDiskID(dev, node)
12164 result = self.rpc.call_blockdev_find(node, dev)
12168 result.Raise("Can't compute disk status for %s" % instance.name)
12170 status = result.payload
12174 return (status.dev_path, status.major, status.minor,
12175 status.sync_percent, status.estimated_time,
12176 status.is_degraded, status.ldisk_status)
12178 def _ComputeDiskStatus(self, instance, snode, dev):
12179 """Compute block device status.
12182 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12184 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12186 def _ComputeDiskStatusInner(self, instance, snode, dev):
12187 """Compute block device status.
12189 @attention: The device has to be annotated already.
12192 if dev.dev_type in constants.LDS_DRBD:
12193 # we change the snode then (otherwise we use the one passed in)
12194 if dev.logical_id[0] == instance.primary_node:
12195 snode = dev.logical_id[1]
12197 snode = dev.logical_id[0]
12199 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12201 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12204 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12211 "iv_name": dev.iv_name,
12212 "dev_type": dev.dev_type,
12213 "logical_id": dev.logical_id,
12214 "physical_id": dev.physical_id,
12215 "pstatus": dev_pstatus,
12216 "sstatus": dev_sstatus,
12217 "children": dev_children,
12222 def Exec(self, feedback_fn):
12223 """Gather and return data"""
12226 cluster = self.cfg.GetClusterInfo()
12228 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12229 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12231 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12232 for node in nodes.values()))
12234 group2name_fn = lambda uuid: groups[uuid].name
12236 for instance in self.wanted_instances:
12237 pnode = nodes[instance.primary_node]
12239 if self.op.static or pnode.offline:
12240 remote_state = None
12242 self.LogWarning("Primary node %s is marked offline, returning static"
12243 " information only for instance %s" %
12244 (pnode.name, instance.name))
12246 remote_info = self.rpc.call_instance_info(instance.primary_node,
12248 instance.hypervisor)
12249 remote_info.Raise("Error checking node %s" % instance.primary_node)
12250 remote_info = remote_info.payload
12251 if remote_info and "state" in remote_info:
12252 remote_state = "up"
12254 if instance.admin_state == constants.ADMINST_UP:
12255 remote_state = "down"
12257 remote_state = instance.admin_state
12259 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12262 snodes_group_uuids = [nodes[snode_name].group
12263 for snode_name in instance.secondary_nodes]
12265 result[instance.name] = {
12266 "name": instance.name,
12267 "config_state": instance.admin_state,
12268 "run_state": remote_state,
12269 "pnode": instance.primary_node,
12270 "pnode_group_uuid": pnode.group,
12271 "pnode_group_name": group2name_fn(pnode.group),
12272 "snodes": instance.secondary_nodes,
12273 "snodes_group_uuids": snodes_group_uuids,
12274 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12276 # this happens to be the same format used for hooks
12277 "nics": _NICListToTuple(self, instance.nics),
12278 "disk_template": instance.disk_template,
12280 "hypervisor": instance.hypervisor,
12281 "network_port": instance.network_port,
12282 "hv_instance": instance.hvparams,
12283 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12284 "be_instance": instance.beparams,
12285 "be_actual": cluster.FillBE(instance),
12286 "os_instance": instance.osparams,
12287 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12288 "serial_no": instance.serial_no,
12289 "mtime": instance.mtime,
12290 "ctime": instance.ctime,
12291 "uuid": instance.uuid,
12297 def PrepareContainerMods(mods, private_fn):
12298 """Prepares a list of container modifications by adding a private data field.
12300 @type mods: list of tuples; (operation, index, parameters)
12301 @param mods: List of modifications
12302 @type private_fn: callable or None
12303 @param private_fn: Callable for constructing a private data field for a
12308 if private_fn is None:
12313 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12316 #: Type description for changes as returned by L{ApplyContainerMods}'s
12318 _TApplyContModsCbChanges = \
12319 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12320 ht.TNonEmptyString,
12325 def ApplyContainerMods(kind, container, chgdesc, mods,
12326 create_fn, modify_fn, remove_fn):
12327 """Applies descriptions in C{mods} to C{container}.
12330 @param kind: One-word item description
12331 @type container: list
12332 @param container: Container to modify
12333 @type chgdesc: None or list
12334 @param chgdesc: List of applied changes
12336 @param mods: Modifications as returned by L{PrepareContainerMods}
12337 @type create_fn: callable
12338 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12339 receives absolute item index, parameters and private data object as added
12340 by L{PrepareContainerMods}, returns tuple containing new item and changes
12342 @type modify_fn: callable
12343 @param modify_fn: Callback for modifying an existing item
12344 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12345 and private data object as added by L{PrepareContainerMods}, returns
12347 @type remove_fn: callable
12348 @param remove_fn: Callback on removing item; receives absolute item index,
12349 item and private data object as added by L{PrepareContainerMods}
12352 for (op, idx, params, private) in mods:
12355 absidx = len(container) - 1
12357 raise IndexError("Not accepting negative indices other than -1")
12358 elif idx > len(container):
12359 raise IndexError("Got %s index %s, but there are only %s" %
12360 (kind, idx, len(container)))
12366 if op == constants.DDM_ADD:
12367 # Calculate where item will be added
12369 addidx = len(container)
12373 if create_fn is None:
12376 (item, changes) = create_fn(addidx, params, private)
12379 container.append(item)
12382 assert idx <= len(container)
12383 # list.insert does so before the specified index
12384 container.insert(idx, item)
12386 # Retrieve existing item
12388 item = container[absidx]
12390 raise IndexError("Invalid %s index %s" % (kind, idx))
12392 if op == constants.DDM_REMOVE:
12395 if remove_fn is not None:
12396 remove_fn(absidx, item, private)
12398 #TODO: include a hotplugged msg in changes
12399 changes = [("%s/%s" % (kind, absidx), "remove")]
12401 assert container[absidx] == item
12402 del container[absidx]
12403 elif op == constants.DDM_MODIFY:
12404 if modify_fn is not None:
12405 #TODO: include a hotplugged msg in changes
12406 changes = modify_fn(absidx, item, params, private)
12409 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12411 assert _TApplyContModsCbChanges(changes)
12413 if not (chgdesc is None or changes is None):
12414 chgdesc.extend(changes)
12417 def _UpdateIvNames(base_index, disks):
12418 """Updates the C{iv_name} attribute of disks.
12420 @type disks: list of L{objects.Disk}
12423 for (idx, disk) in enumerate(disks):
12424 disk.iv_name = "disk/%s" % (base_index + idx, )
12427 class _InstNicModPrivate:
12428 """Data structure for network interface modifications.
12430 Used by L{LUInstanceSetParams}.
12433 def __init__(self):
12438 class LUInstanceSetParams(LogicalUnit):
12439 """Modifies an instances's parameters.
12442 HPATH = "instance-modify"
12443 HTYPE = constants.HTYPE_INSTANCE
12447 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12448 assert ht.TList(mods)
12449 assert not mods or len(mods[0]) in (2, 3)
12451 if mods and len(mods[0]) == 2:
12455 for op, params in mods:
12456 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12457 result.append((op, -1, params))
12461 raise errors.OpPrereqError("Only one %s add or remove operation is"
12462 " supported at a time" % kind,
12463 errors.ECODE_INVAL)
12465 result.append((constants.DDM_MODIFY, op, params))
12467 assert verify_fn(result)
12474 def _CheckMods(kind, mods, key_types, item_fn):
12475 """Ensures requested disk/NIC modifications are valid.
12478 for (op, _, params) in mods:
12479 assert ht.TDict(params)
12481 # If key_types is an empty dict, we assume we have an 'ext' template
12482 # and thus do not ForceDictType
12484 utils.ForceDictType(params, key_types)
12486 if op == constants.DDM_REMOVE:
12488 raise errors.OpPrereqError("No settings should be passed when"
12489 " removing a %s" % kind,
12490 errors.ECODE_INVAL)
12491 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12492 item_fn(op, params)
12494 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12497 def _VerifyDiskModification(op, params):
12498 """Verifies a disk modification.
12501 if op == constants.DDM_ADD:
12502 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12503 if mode not in constants.DISK_ACCESS_SET:
12504 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12505 errors.ECODE_INVAL)
12507 size = params.get(constants.IDISK_SIZE, None)
12509 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12510 constants.IDISK_SIZE, errors.ECODE_INVAL)
12514 except (TypeError, ValueError), err:
12515 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12516 errors.ECODE_INVAL)
12518 params[constants.IDISK_SIZE] = size
12520 elif op == constants.DDM_MODIFY:
12521 if constants.IDISK_SIZE in params:
12522 raise errors.OpPrereqError("Disk size change not possible, use"
12523 " grow-disk", errors.ECODE_INVAL)
12524 if constants.IDISK_MODE not in params:
12525 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
12526 " modification supported, but missing",
12527 errors.ECODE_NOENT)
12528 if len(params) > 1:
12529 raise errors.OpPrereqError("Disk modification doesn't support"
12530 " additional arbitrary parameters",
12531 errors.ECODE_INVAL)
12534 def _VerifyNicModification(op, params):
12535 """Verifies a network interface modification.
12538 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12539 ip = params.get(constants.INIC_IP, None)
12540 req_net = params.get(constants.INIC_NETWORK, None)
12541 link = params.get(constants.NIC_LINK, None)
12542 mode = params.get(constants.NIC_MODE, None)
12543 if req_net is not None:
12544 if req_net.lower() == constants.VALUE_NONE:
12545 params[constants.INIC_NETWORK] = None
12547 elif link is not None or mode is not None:
12548 raise errors.OpPrereqError("If network is given"
12549 " mode or link should not",
12550 errors.ECODE_INVAL)
12552 if op == constants.DDM_ADD:
12553 macaddr = params.get(constants.INIC_MAC, None)
12554 if macaddr is None:
12555 params[constants.INIC_MAC] = constants.VALUE_AUTO
12558 if ip.lower() == constants.VALUE_NONE:
12559 params[constants.INIC_IP] = None
12561 if ip.lower() == constants.NIC_IP_POOL:
12562 if op == constants.DDM_ADD and req_net is None:
12563 raise errors.OpPrereqError("If ip=pool, parameter network"
12565 errors.ECODE_INVAL)
12567 if not netutils.IPAddress.IsValid(ip):
12568 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12569 errors.ECODE_INVAL)
12571 if constants.INIC_MAC in params:
12572 macaddr = params[constants.INIC_MAC]
12573 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12574 macaddr = utils.NormalizeAndValidateMac(macaddr)
12576 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12577 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12578 " modifying an existing NIC",
12579 errors.ECODE_INVAL)
12581 def CheckArguments(self):
12582 if not (self.op.nics or self.op.disks or self.op.disk_template or
12583 self.op.hvparams or self.op.beparams or self.op.os_name or
12584 self.op.offline is not None or self.op.runtime_mem):
12585 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12587 if self.op.hvparams:
12588 _CheckGlobalHvParams(self.op.hvparams)
12590 if self.op.allow_arbit_params:
12592 self._UpgradeDiskNicMods("disk", self.op.disks,
12593 opcodes.OpInstanceSetParams.TestExtDiskModifications)
12596 self._UpgradeDiskNicMods("disk", self.op.disks,
12597 opcodes.OpInstanceSetParams.TestDiskModifications)
12600 self._UpgradeDiskNicMods("NIC", self.op.nics,
12601 opcodes.OpInstanceSetParams.TestNicModifications)
12603 # Check disk modifications
12604 if self.op.allow_arbit_params:
12605 self._CheckMods("disk", self.op.disks, {},
12606 self._VerifyDiskModification)
12608 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12609 self._VerifyDiskModification)
12611 if self.op.disks and self.op.disk_template is not None:
12612 raise errors.OpPrereqError("Disk template conversion and other disk"
12613 " changes not supported at the same time",
12614 errors.ECODE_INVAL)
12616 if (self.op.disk_template and
12617 self.op.disk_template in constants.DTS_INT_MIRROR and
12618 self.op.remote_node is None):
12619 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12620 " one requires specifying a secondary node",
12621 errors.ECODE_INVAL)
12623 # Check NIC modifications
12624 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12625 self._VerifyNicModification)
12627 def ExpandNames(self):
12628 self._ExpandAndLockInstance()
12629 # Can't even acquire node locks in shared mode as upcoming changes in
12630 # Ganeti 2.6 will start to modify the node object on disk conversion
12631 self.needed_locks[locking.LEVEL_NODE] = []
12632 self.needed_locks[locking.LEVEL_NODE_RES] = []
12633 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12635 def DeclareLocks(self, level):
12636 # TODO: Acquire group lock in shared mode (disk parameters)
12637 if level == locking.LEVEL_NODE:
12638 self._LockInstancesNodes()
12639 if self.op.disk_template and self.op.remote_node:
12640 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12641 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12642 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12644 self.needed_locks[locking.LEVEL_NODE_RES] = \
12645 self.needed_locks[locking.LEVEL_NODE][:]
12647 def BuildHooksEnv(self):
12648 """Build hooks env.
12650 This runs on the master, primary and secondaries.
12654 if constants.BE_MINMEM in self.be_new:
12655 args["minmem"] = self.be_new[constants.BE_MINMEM]
12656 if constants.BE_MAXMEM in self.be_new:
12657 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12658 if constants.BE_VCPUS in self.be_new:
12659 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12660 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12661 # information at all.
12663 if self._new_nics is not None:
12666 for nic in self._new_nics:
12667 n = copy.deepcopy(nic)
12668 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12669 n.nicparams = nicparams
12670 nics.append(_NICToTuple(self, n))
12672 args["nics"] = nics
12674 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12675 if self.op.disk_template:
12676 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12677 if self.op.runtime_mem:
12678 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12682 def BuildHooksNodes(self):
12683 """Build hooks nodes.
12686 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12689 def _PrepareNicModification(self, params, private, old_ip, old_net,
12690 old_params, cluster, pnode):
12692 update_params_dict = dict([(key, params[key])
12693 for key in constants.NICS_PARAMETERS
12696 req_link = update_params_dict.get(constants.NIC_LINK, None)
12697 req_mode = update_params_dict.get(constants.NIC_MODE, None)
12699 new_net = params.get(constants.INIC_NETWORK, old_net)
12700 if new_net is not None:
12701 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12702 if netparams is None:
12703 raise errors.OpPrereqError("No netparams found for the network"
12704 " %s, propably not connected." % new_net,
12705 errors.ECODE_INVAL)
12706 new_params = dict(netparams)
12708 new_params = _GetUpdatedParams(old_params, update_params_dict)
12710 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12712 new_filled_params = cluster.SimpleFillNIC(new_params)
12713 objects.NIC.CheckParameterSyntax(new_filled_params)
12715 new_mode = new_filled_params[constants.NIC_MODE]
12716 if new_mode == constants.NIC_MODE_BRIDGED:
12717 bridge = new_filled_params[constants.NIC_LINK]
12718 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12720 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12722 self.warn.append(msg)
12724 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12726 elif new_mode == constants.NIC_MODE_ROUTED:
12727 ip = params.get(constants.INIC_IP, old_ip)
12729 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12730 " on a routed NIC", errors.ECODE_INVAL)
12732 if constants.INIC_MAC in params:
12733 mac = params[constants.INIC_MAC]
12735 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12736 errors.ECODE_INVAL)
12737 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12738 # otherwise generate the MAC address
12739 params[constants.INIC_MAC] = \
12740 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12742 # or validate/reserve the current one
12744 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12745 except errors.ReservationError:
12746 raise errors.OpPrereqError("MAC address '%s' already in use"
12747 " in cluster" % mac,
12748 errors.ECODE_NOTUNIQUE)
12749 elif new_net != old_net:
12750 def get_net_prefix(net):
12752 uuid = self.cfg.LookupNetwork(net)
12754 nobj = self.cfg.GetNetwork(uuid)
12755 return nobj.mac_prefix
12757 new_prefix = get_net_prefix(new_net)
12758 old_prefix = get_net_prefix(old_net)
12759 if old_prefix != new_prefix:
12760 params[constants.INIC_MAC] = \
12761 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12763 #if there is a change in nic-network configuration
12764 new_ip = params.get(constants.INIC_IP, old_ip)
12765 if (new_ip, new_net) != (old_ip, old_net):
12768 if new_ip.lower() == constants.NIC_IP_POOL:
12770 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
12771 except errors.ReservationError:
12772 raise errors.OpPrereqError("Unable to get a free IP"
12773 " from the address pool",
12774 errors.ECODE_STATE)
12775 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
12776 params[constants.INIC_IP] = new_ip
12777 elif new_ip != old_ip or new_net != old_net:
12779 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
12780 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
12781 except errors.ReservationError:
12782 raise errors.OpPrereqError("IP %s not available in network %s" %
12784 errors.ECODE_NOTUNIQUE)
12785 elif new_ip.lower() == constants.NIC_IP_POOL:
12786 raise errors.OpPrereqError("ip=pool, but no network found",
12790 if self.op.conflicts_check:
12791 _CheckForConflictingIp(self, new_ip, pnode)
12796 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
12797 except errors.AddressPoolError:
12798 logging.warning("Release IP %s not contained in network %s",
12801 # there are no changes in (net, ip) tuple
12802 elif (old_net is not None and
12803 (req_link is not None or req_mode is not None)):
12804 raise errors.OpPrereqError("Not allowed to change link or mode of"
12805 " a NIC that is connected to a network.",
12806 errors.ECODE_INVAL)
12808 logging.info("new_params %s", new_params)
12809 logging.info("new_filled_params %s", new_filled_params)
12810 private.params = new_params
12811 private.filled = new_filled_params
12813 def CheckPrereq(self):
12814 """Check prerequisites.
12816 This only checks the instance list against the existing names.
12819 # checking the new params on the primary/secondary nodes
12821 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12822 cluster = self.cluster = self.cfg.GetClusterInfo()
12823 assert self.instance is not None, \
12824 "Cannot retrieve locked instance %s" % self.op.instance_name
12825 pnode = instance.primary_node
12826 nodelist = list(instance.all_nodes)
12827 pnode_info = self.cfg.GetNodeInfo(pnode)
12828 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12830 # Prepare disk/NIC modifications
12831 self.diskmod = PrepareContainerMods(self.op.disks, None)
12832 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12833 logging.info("nicmod %s", self.nicmod)
12835 # Check the validity of the `provider' parameter
12836 if instance.disk_template in constants.DT_EXT:
12837 for mod in self.diskmod:
12838 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
12839 if mod[0] == constants.DDM_ADD:
12840 if ext_provider is None:
12841 raise errors.OpPrereqError("Instance template is '%s' and parameter"
12842 " '%s' missing, during disk add" %
12844 constants.IDISK_PROVIDER),
12845 errors.ECODE_NOENT)
12846 elif mod[0] == constants.DDM_MODIFY:
12848 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
12849 " modification" % constants.IDISK_PROVIDER,
12850 errors.ECODE_INVAL)
12852 for mod in self.diskmod:
12853 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
12854 if ext_provider is not None:
12855 raise errors.OpPrereqError("Parameter '%s' is only valid for instances"
12856 " of type '%s'" % (constants.IDISK_PROVIDER,
12857 constants.DT_EXT), errors.ECODE_INVAL)
12860 if self.op.os_name and not self.op.force:
12861 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12862 self.op.force_variant)
12863 instance_os = self.op.os_name
12865 instance_os = instance.os
12867 assert not (self.op.disk_template and self.op.disks), \
12868 "Can't modify disk template and apply disk changes at the same time"
12870 if self.op.disk_template:
12871 if instance.disk_template == self.op.disk_template:
12872 raise errors.OpPrereqError("Instance already has disk template %s" %
12873 instance.disk_template, errors.ECODE_INVAL)
12875 if (instance.disk_template,
12876 self.op.disk_template) not in self._DISK_CONVERSIONS:
12877 raise errors.OpPrereqError("Unsupported disk template conversion from"
12878 " %s to %s" % (instance.disk_template,
12879 self.op.disk_template),
12880 errors.ECODE_INVAL)
12881 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12882 msg="cannot change disk template")
12883 if self.op.disk_template in constants.DTS_INT_MIRROR:
12884 if self.op.remote_node == pnode:
12885 raise errors.OpPrereqError("Given new secondary node %s is the same"
12886 " as the primary node of the instance" %
12887 self.op.remote_node, errors.ECODE_STATE)
12888 _CheckNodeOnline(self, self.op.remote_node)
12889 _CheckNodeNotDrained(self, self.op.remote_node)
12890 # FIXME: here we assume that the old instance type is DT_PLAIN
12891 assert instance.disk_template == constants.DT_PLAIN
12892 disks = [{constants.IDISK_SIZE: d.size,
12893 constants.IDISK_VG: d.logical_id[0]}
12894 for d in instance.disks]
12895 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12896 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12898 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12899 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12900 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12901 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12902 ignore=self.op.ignore_ipolicy)
12903 if pnode_info.group != snode_info.group:
12904 self.LogWarning("The primary and secondary nodes are in two"
12905 " different node groups; the disk parameters"
12906 " from the first disk's node group will be"
12909 # hvparams processing
12910 if self.op.hvparams:
12911 hv_type = instance.hypervisor
12912 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12913 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12914 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12917 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12918 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12919 self.hv_proposed = self.hv_new = hv_new # the new actual values
12920 self.hv_inst = i_hvdict # the new dict (without defaults)
12922 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12924 self.hv_new = self.hv_inst = {}
12926 # beparams processing
12927 if self.op.beparams:
12928 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12930 objects.UpgradeBeParams(i_bedict)
12931 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12932 be_new = cluster.SimpleFillBE(i_bedict)
12933 self.be_proposed = self.be_new = be_new # the new actual values
12934 self.be_inst = i_bedict # the new dict (without defaults)
12936 self.be_new = self.be_inst = {}
12937 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12938 be_old = cluster.FillBE(instance)
12940 # CPU param validation -- checking every time a parameter is
12941 # changed to cover all cases where either CPU mask or vcpus have
12943 if (constants.BE_VCPUS in self.be_proposed and
12944 constants.HV_CPU_MASK in self.hv_proposed):
12946 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12947 # Verify mask is consistent with number of vCPUs. Can skip this
12948 # test if only 1 entry in the CPU mask, which means same mask
12949 # is applied to all vCPUs.
12950 if (len(cpu_list) > 1 and
12951 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12952 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12954 (self.be_proposed[constants.BE_VCPUS],
12955 self.hv_proposed[constants.HV_CPU_MASK]),
12956 errors.ECODE_INVAL)
12958 # Only perform this test if a new CPU mask is given
12959 if constants.HV_CPU_MASK in self.hv_new:
12960 # Calculate the largest CPU number requested
12961 max_requested_cpu = max(map(max, cpu_list))
12962 # Check that all of the instance's nodes have enough physical CPUs to
12963 # satisfy the requested CPU mask
12964 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12965 max_requested_cpu + 1, instance.hypervisor)
12967 # osparams processing
12968 if self.op.osparams:
12969 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12970 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12971 self.os_inst = i_osdict # the new dict (without defaults)
12977 #TODO(dynmem): do the appropriate check involving MINMEM
12978 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12979 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12980 mem_check_list = [pnode]
12981 if be_new[constants.BE_AUTO_BALANCE]:
12982 # either we changed auto_balance to yes or it was from before
12983 mem_check_list.extend(instance.secondary_nodes)
12984 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12985 instance.hypervisor)
12986 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12987 [instance.hypervisor])
12988 pninfo = nodeinfo[pnode]
12989 msg = pninfo.fail_msg
12991 # Assume the primary node is unreachable and go ahead
12992 self.warn.append("Can't get info from primary node %s: %s" %
12995 (_, _, (pnhvinfo, )) = pninfo.payload
12996 if not isinstance(pnhvinfo.get("memory_free", None), int):
12997 self.warn.append("Node data from primary node %s doesn't contain"
12998 " free memory information" % pnode)
12999 elif instance_info.fail_msg:
13000 self.warn.append("Can't get instance runtime information: %s" %
13001 instance_info.fail_msg)
13003 if instance_info.payload:
13004 current_mem = int(instance_info.payload["memory"])
13006 # Assume instance not running
13007 # (there is a slight race condition here, but it's not very
13008 # probable, and we have no other way to check)
13009 # TODO: Describe race condition
13011 #TODO(dynmem): do the appropriate check involving MINMEM
13012 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13013 pnhvinfo["memory_free"])
13015 raise errors.OpPrereqError("This change will prevent the instance"
13016 " from starting, due to %d MB of memory"
13017 " missing on its primary node" %
13019 errors.ECODE_NORES)
13021 if be_new[constants.BE_AUTO_BALANCE]:
13022 for node, nres in nodeinfo.items():
13023 if node not in instance.secondary_nodes:
13025 nres.Raise("Can't get info from secondary node %s" % node,
13026 prereq=True, ecode=errors.ECODE_STATE)
13027 (_, _, (nhvinfo, )) = nres.payload
13028 if not isinstance(nhvinfo.get("memory_free", None), int):
13029 raise errors.OpPrereqError("Secondary node %s didn't return free"
13030 " memory information" % node,
13031 errors.ECODE_STATE)
13032 #TODO(dynmem): do the appropriate check involving MINMEM
13033 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13034 raise errors.OpPrereqError("This change will prevent the instance"
13035 " from failover to its secondary node"
13036 " %s, due to not enough memory" % node,
13037 errors.ECODE_STATE)
13039 if self.op.runtime_mem:
13040 remote_info = self.rpc.call_instance_info(instance.primary_node,
13042 instance.hypervisor)
13043 remote_info.Raise("Error checking node %s" % instance.primary_node)
13044 if not remote_info.payload: # not running already
13045 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
13046 errors.ECODE_STATE)
13048 current_memory = remote_info.payload["memory"]
13049 if (not self.op.force and
13050 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13051 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13052 raise errors.OpPrereqError("Instance %s must have memory between %d"
13053 " and %d MB of memory unless --force is"
13054 " given" % (instance.name,
13055 self.be_proposed[constants.BE_MINMEM],
13056 self.be_proposed[constants.BE_MAXMEM]),
13057 errors.ECODE_INVAL)
13059 if self.op.runtime_mem > current_memory:
13060 _CheckNodeFreeMemory(self, instance.primary_node,
13061 "ballooning memory for instance %s" %
13063 self.op.memory - current_memory,
13064 instance.hypervisor)
13066 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13067 raise errors.OpPrereqError("Disk operations not supported for"
13068 " diskless instances",
13069 errors.ECODE_INVAL)
13071 def _PrepareNicCreate(_, params, private):
13072 self._PrepareNicModification(params, private, None, None,
13073 {}, cluster, pnode)
13074 return (None, None)
13076 def _PrepareNicMod(_, nic, params, private):
13077 self._PrepareNicModification(params, private, nic.ip, nic.network,
13078 nic.nicparams, cluster, pnode)
13081 def _PrepareNicRemove(_, params, private):
13083 net = params.network
13084 if net is not None and ip is not None:
13085 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13087 # Verify NIC changes (operating on copy)
13088 nics = instance.nics[:]
13089 ApplyContainerMods("NIC", nics, None, self.nicmod,
13090 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13091 if len(nics) > constants.MAX_NICS:
13092 raise errors.OpPrereqError("Instance has too many network interfaces"
13093 " (%d), cannot add more" % constants.MAX_NICS,
13094 errors.ECODE_STATE)
13097 # Verify disk changes (operating on a copy)
13098 disks = instance.disks[:]
13099 ApplyContainerMods("disk", disks, None, self.diskmod,
13101 if len(disks) > constants.MAX_DISKS:
13102 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13103 " more" % constants.MAX_DISKS,
13104 errors.ECODE_STATE)
13106 if self.op.offline is not None:
13107 if self.op.offline:
13108 msg = "can't change to offline"
13110 msg = "can't change to online"
13111 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13113 # Pre-compute NIC changes (necessary to use result in hooks)
13114 self._nic_chgdesc = []
13116 # Operate on copies as this is still in prereq
13117 nics = [nic.Copy() for nic in instance.nics]
13118 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13119 self._CreateNewNic, self._ApplyNicMods,
13121 self._new_nics = nics
13123 self._new_nics = None
13126 def _ConvertPlainToDrbd(self, feedback_fn):
13127 """Converts an instance from plain to drbd.
13130 feedback_fn("Converting template to drbd")
13131 instance = self.instance
13132 pnode = instance.primary_node
13133 snode = self.op.remote_node
13135 assert instance.disk_template == constants.DT_PLAIN
13137 # create a fake disk info for _GenerateDiskTemplate
13138 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13139 constants.IDISK_VG: d.logical_id[0]}
13140 for d in instance.disks]
13141 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13142 instance.name, pnode, [snode],
13143 disk_info, None, None, 0, feedback_fn,
13145 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13147 info = _GetInstanceInfoText(instance)
13148 feedback_fn("Creating additional volumes...")
13149 # first, create the missing data and meta devices
13150 for disk in anno_disks:
13151 # unfortunately this is... not too nice
13152 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13154 for child in disk.children:
13155 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13156 # at this stage, all new LVs have been created, we can rename the
13158 feedback_fn("Renaming original volumes...")
13159 rename_list = [(o, n.children[0].logical_id)
13160 for (o, n) in zip(instance.disks, new_disks)]
13161 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13162 result.Raise("Failed to rename original LVs")
13164 feedback_fn("Initializing DRBD devices...")
13165 # all child devices are in place, we can now create the DRBD devices
13166 for disk in anno_disks:
13167 for node in [pnode, snode]:
13168 f_create = node == pnode
13169 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13171 # at this point, the instance has been modified
13172 instance.disk_template = constants.DT_DRBD8
13173 instance.disks = new_disks
13174 self.cfg.Update(instance, feedback_fn)
13176 # Release node locks while waiting for sync
13177 _ReleaseLocks(self, locking.LEVEL_NODE)
13179 # disks are created, waiting for sync
13180 disk_abort = not _WaitForSync(self, instance,
13181 oneshot=not self.op.wait_for_sync)
13183 raise errors.OpExecError("There are some degraded disks for"
13184 " this instance, please cleanup manually")
13186 # Node resource locks will be released by caller
13188 def _ConvertDrbdToPlain(self, feedback_fn):
13189 """Converts an instance from drbd to plain.
13192 instance = self.instance
13194 assert len(instance.secondary_nodes) == 1
13195 assert instance.disk_template == constants.DT_DRBD8
13197 pnode = instance.primary_node
13198 snode = instance.secondary_nodes[0]
13199 feedback_fn("Converting template to plain")
13201 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13202 new_disks = [d.children[0] for d in instance.disks]
13204 # copy over size and mode
13205 for parent, child in zip(old_disks, new_disks):
13206 child.size = parent.size
13207 child.mode = parent.mode
13209 # this is a DRBD disk, return its port to the pool
13210 # NOTE: this must be done right before the call to cfg.Update!
13211 for disk in old_disks:
13212 tcp_port = disk.logical_id[2]
13213 self.cfg.AddTcpUdpPort(tcp_port)
13215 # update instance structure
13216 instance.disks = new_disks
13217 instance.disk_template = constants.DT_PLAIN
13218 self.cfg.Update(instance, feedback_fn)
13220 # Release locks in case removing disks takes a while
13221 _ReleaseLocks(self, locking.LEVEL_NODE)
13223 feedback_fn("Removing volumes on the secondary node...")
13224 for disk in old_disks:
13225 self.cfg.SetDiskID(disk, snode)
13226 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13228 self.LogWarning("Could not remove block device %s on node %s,"
13229 " continuing anyway: %s", disk.iv_name, snode, msg)
13231 feedback_fn("Removing unneeded volumes on the primary node...")
13232 for idx, disk in enumerate(old_disks):
13233 meta = disk.children[1]
13234 self.cfg.SetDiskID(meta, pnode)
13235 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13237 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13238 " continuing anyway: %s", idx, pnode, msg)
13240 def _CreateNewDisk(self, idx, params, _):
13241 """Creates a new disk.
13244 instance = self.instance
13247 if instance.disk_template in constants.DTS_FILEBASED:
13248 (file_driver, file_path) = instance.disks[0].logical_id
13249 file_path = os.path.dirname(file_path)
13251 file_driver = file_path = None
13254 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13255 instance.primary_node, instance.secondary_nodes,
13256 [params], file_path, file_driver, idx,
13257 self.Log, self.diskparams)[0]
13259 info = _GetInstanceInfoText(instance)
13261 logging.info("Creating volume %s for instance %s",
13262 disk.iv_name, instance.name)
13263 # Note: this needs to be kept in sync with _CreateDisks
13265 for node in instance.all_nodes:
13266 f_create = (node == instance.primary_node)
13268 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13269 except errors.OpExecError, err:
13270 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13271 disk.iv_name, disk, node, err)
13273 if self.op.hotplug and disk.pci:
13274 disk_ok, device_info = _AssembleInstanceDisks(self, self.instance,
13275 [disk], check=False)
13276 _, _, dev_path = device_info[0]
13277 result = self.rpc.call_hot_add_disk(self.instance.primary_node,
13278 self.instance, disk, dev_path, idx)
13280 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13284 def _ModifyDisk(idx, disk, params, _):
13285 """Modifies a disk.
13288 disk.mode = params[constants.IDISK_MODE]
13291 ("disk.mode/%d" % idx, disk.mode),
13294 def _RemoveDisk(self, idx, root, _):
13298 #TODO: log warning in case hotplug is not possible
13300 if root.pci and not self.op.hotplug:
13301 raise errors.OpPrereqError("Cannot remove a disk that has"
13303 " without removing it with hotplug",
13304 errors.ECODE_INVAL)
13305 if self.op.hotplug and root.pci:
13306 self.rpc.call_hot_del_disk(self.instance.primary_node,
13307 self.instance, root, idx)
13308 _ShutdownInstanceDisks(self, self.instance, [root])
13309 self.cfg.UpdatePCIInfo(self.instance.name, root.pci)
13311 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13312 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13313 self.cfg.SetDiskID(disk, node)
13314 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13316 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13317 " continuing anyway", idx, node, msg)
13319 # if this is a DRBD disk, return its port to the pool
13320 if root.dev_type in constants.LDS_DRBD:
13321 self.cfg.AddTcpUdpPort(root.logical_id[2])
13323 def _CreateNewNic(self, idx, params, private):
13324 """Creates data structure for a new network interface.
13327 mac = params[constants.INIC_MAC]
13328 ip = params.get(constants.INIC_IP, None)
13329 network = params.get(constants.INIC_NETWORK, None)
13330 #TODO: not private.filled?? can a nic have no nicparams??
13331 nicparams = private.filled
13333 nic = objects.NIC(mac=mac, ip=ip, network=network, nicparams=nicparams)
13335 #TODO: log warning in case hotplug is not possible
13338 if self.op.hotplug:
13339 nic_idx, pci = _GetPCIInfo(self, 'nics')
13342 result = self.rpc.call_hot_add_nic(self.instance.primary_node,
13343 self.instance, nic, idx)
13346 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13347 (mac, ip, private.filled[constants.NIC_MODE],
13348 private.filled[constants.NIC_LINK],
13353 def _ApplyNicMods(self, idx, nic, params, private):
13354 """Modifies a network interface.
13359 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13361 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13362 setattr(nic, key, params[key])
13365 nic.nicparams = private.filled
13367 for (key, val) in nic.nicparams.items():
13368 changes.append(("nic.%s/%d" % (key, idx), val))
13370 #TODO: log warning in case hotplug is not possible
13372 if self.op.hotplug and nic.pci:
13373 self.rpc.call_hot_del_nic(self.instance.primary_node,
13374 self.instance, nic, idx)
13375 result = self.rpc.call_hot_add_nic(self.instance.primary_node,
13376 self.instance, nic, idx)
13379 def _RemoveNic(self, idx, nic, private):
13380 if nic.pci and not self.op.hotplug:
13381 raise errors.OpPrereqError("Cannot remove a nic that has been hotplugged"
13382 " without removing it with hotplug",
13383 errors.ECODE_INVAL)
13384 #TODO: log warning in case hotplug is not possible
13386 if self.op.hotplug and nic.pci:
13387 self.rpc.call_hot_del_nic(self.instance.primary_node,
13388 self.instance, nic, idx)
13389 self.cfg.UpdatePCIInfo(self.instance.name, nic.pci)
13392 def Exec(self, feedback_fn):
13393 """Modifies an instance.
13395 All parameters take effect only at the next restart of the instance.
13398 # Process here the warnings from CheckPrereq, as we don't have a
13399 # feedback_fn there.
13400 # TODO: Replace with self.LogWarning
13401 for warn in self.warn:
13402 feedback_fn("WARNING: %s" % warn)
13404 assert ((self.op.disk_template is None) ^
13405 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13406 "Not owning any node resource locks"
13409 instance = self.instance
13412 if self.op.runtime_mem:
13413 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13415 self.op.runtime_mem)
13416 rpcres.Raise("Cannot modify instance runtime memory")
13417 result.append(("runtime_memory", self.op.runtime_mem))
13419 # Apply disk changes
13420 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13421 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13422 _UpdateIvNames(0, instance.disks)
13424 if self.op.disk_template:
13426 check_nodes = set(instance.all_nodes)
13427 if self.op.remote_node:
13428 check_nodes.add(self.op.remote_node)
13429 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13430 owned = self.owned_locks(level)
13431 assert not (check_nodes - owned), \
13432 ("Not owning the correct locks, owning %r, expected at least %r" %
13433 (owned, check_nodes))
13435 r_shut = _ShutdownInstanceDisks(self, instance)
13437 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13438 " proceed with disk template conversion")
13439 mode = (instance.disk_template, self.op.disk_template)
13441 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13443 self.cfg.ReleaseDRBDMinors(instance.name)
13445 result.append(("disk_template", self.op.disk_template))
13447 assert instance.disk_template == self.op.disk_template, \
13448 ("Expected disk template '%s', found '%s'" %
13449 (self.op.disk_template, instance.disk_template))
13451 # Release node and resource locks if there are any (they might already have
13452 # been released during disk conversion)
13453 _ReleaseLocks(self, locking.LEVEL_NODE)
13454 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13456 # Apply NIC changes
13457 if self._new_nics is not None:
13458 instance.nics = self._new_nics
13459 result.extend(self._nic_chgdesc)
13462 if self.op.hvparams:
13463 instance.hvparams = self.hv_inst
13464 for key, val in self.op.hvparams.iteritems():
13465 result.append(("hv/%s" % key, val))
13468 if self.op.beparams:
13469 instance.beparams = self.be_inst
13470 for key, val in self.op.beparams.iteritems():
13471 result.append(("be/%s" % key, val))
13474 if self.op.os_name:
13475 instance.os = self.op.os_name
13478 if self.op.osparams:
13479 instance.osparams = self.os_inst
13480 for key, val in self.op.osparams.iteritems():
13481 result.append(("os/%s" % key, val))
13483 if self.op.offline is None:
13486 elif self.op.offline:
13487 # Mark instance as offline
13488 self.cfg.MarkInstanceOffline(instance.name)
13489 result.append(("admin_state", constants.ADMINST_OFFLINE))
13491 # Mark instance as online, but stopped
13492 self.cfg.MarkInstanceDown(instance.name)
13493 result.append(("admin_state", constants.ADMINST_DOWN))
13495 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13497 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13498 self.owned_locks(locking.LEVEL_NODE)), \
13499 "All node locks should have been released by now"
13503 _DISK_CONVERSIONS = {
13504 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13505 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13509 class LUInstanceChangeGroup(LogicalUnit):
13510 HPATH = "instance-change-group"
13511 HTYPE = constants.HTYPE_INSTANCE
13514 def ExpandNames(self):
13515 self.share_locks = _ShareAll()
13516 self.needed_locks = {
13517 locking.LEVEL_NODEGROUP: [],
13518 locking.LEVEL_NODE: [],
13521 self._ExpandAndLockInstance()
13523 if self.op.target_groups:
13524 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13525 self.op.target_groups)
13527 self.req_target_uuids = None
13529 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13531 def DeclareLocks(self, level):
13532 if level == locking.LEVEL_NODEGROUP:
13533 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13535 if self.req_target_uuids:
13536 lock_groups = set(self.req_target_uuids)
13538 # Lock all groups used by instance optimistically; this requires going
13539 # via the node before it's locked, requiring verification later on
13540 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13541 lock_groups.update(instance_groups)
13543 # No target groups, need to lock all of them
13544 lock_groups = locking.ALL_SET
13546 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13548 elif level == locking.LEVEL_NODE:
13549 if self.req_target_uuids:
13550 # Lock all nodes used by instances
13551 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13552 self._LockInstancesNodes()
13554 # Lock all nodes in all potential target groups
13555 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13556 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13557 member_nodes = [node_name
13558 for group in lock_groups
13559 for node_name in self.cfg.GetNodeGroup(group).members]
13560 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13562 # Lock all nodes as all groups are potential targets
13563 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13565 def CheckPrereq(self):
13566 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13567 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13568 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13570 assert (self.req_target_uuids is None or
13571 owned_groups.issuperset(self.req_target_uuids))
13572 assert owned_instances == set([self.op.instance_name])
13574 # Get instance information
13575 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13577 # Check if node groups for locked instance are still correct
13578 assert owned_nodes.issuperset(self.instance.all_nodes), \
13579 ("Instance %s's nodes changed while we kept the lock" %
13580 self.op.instance_name)
13582 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13585 if self.req_target_uuids:
13586 # User requested specific target groups
13587 self.target_uuids = frozenset(self.req_target_uuids)
13589 # All groups except those used by the instance are potential targets
13590 self.target_uuids = owned_groups - inst_groups
13592 conflicting_groups = self.target_uuids & inst_groups
13593 if conflicting_groups:
13594 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13595 " used by the instance '%s'" %
13596 (utils.CommaJoin(conflicting_groups),
13597 self.op.instance_name),
13598 errors.ECODE_INVAL)
13600 if not self.target_uuids:
13601 raise errors.OpPrereqError("There are no possible target groups",
13602 errors.ECODE_INVAL)
13604 def BuildHooksEnv(self):
13605 """Build hooks env.
13608 assert self.target_uuids
13611 "TARGET_GROUPS": " ".join(self.target_uuids),
13614 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13618 def BuildHooksNodes(self):
13619 """Build hooks nodes.
13622 mn = self.cfg.GetMasterNode()
13623 return ([mn], [mn])
13625 def Exec(self, feedback_fn):
13626 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13628 assert instances == [self.op.instance_name], "Instance not locked"
13630 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13631 instances=instances, target_groups=list(self.target_uuids))
13633 ial.Run(self.op.iallocator)
13635 if not ial.success:
13636 raise errors.OpPrereqError("Can't compute solution for changing group of"
13637 " instance '%s' using iallocator '%s': %s" %
13638 (self.op.instance_name, self.op.iallocator,
13640 errors.ECODE_NORES)
13642 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13644 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13645 " instance '%s'", len(jobs), self.op.instance_name)
13647 return ResultWithJobs(jobs)
13650 class LUBackupQuery(NoHooksLU):
13651 """Query the exports list
13656 def CheckArguments(self):
13657 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13658 ["node", "export"], self.op.use_locking)
13660 def ExpandNames(self):
13661 self.expq.ExpandNames(self)
13663 def DeclareLocks(self, level):
13664 self.expq.DeclareLocks(self, level)
13666 def Exec(self, feedback_fn):
13669 for (node, expname) in self.expq.OldStyleQuery(self):
13670 if expname is None:
13671 result[node] = False
13673 result.setdefault(node, []).append(expname)
13678 class _ExportQuery(_QueryBase):
13679 FIELDS = query.EXPORT_FIELDS
13681 #: The node name is not a unique key for this query
13682 SORT_FIELD = "node"
13684 def ExpandNames(self, lu):
13685 lu.needed_locks = {}
13687 # The following variables interact with _QueryBase._GetNames
13689 self.wanted = _GetWantedNodes(lu, self.names)
13691 self.wanted = locking.ALL_SET
13693 self.do_locking = self.use_locking
13695 if self.do_locking:
13696 lu.share_locks = _ShareAll()
13697 lu.needed_locks = {
13698 locking.LEVEL_NODE: self.wanted,
13701 def DeclareLocks(self, lu, level):
13704 def _GetQueryData(self, lu):
13705 """Computes the list of nodes and their attributes.
13708 # Locking is not used
13710 assert not (compat.any(lu.glm.is_owned(level)
13711 for level in locking.LEVELS
13712 if level != locking.LEVEL_CLUSTER) or
13713 self.do_locking or self.use_locking)
13715 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13719 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13721 result.append((node, None))
13723 result.extend((node, expname) for expname in nres.payload)
13728 class LUBackupPrepare(NoHooksLU):
13729 """Prepares an instance for an export and returns useful information.
13734 def ExpandNames(self):
13735 self._ExpandAndLockInstance()
13737 def CheckPrereq(self):
13738 """Check prerequisites.
13741 instance_name = self.op.instance_name
13743 self.instance = self.cfg.GetInstanceInfo(instance_name)
13744 assert self.instance is not None, \
13745 "Cannot retrieve locked instance %s" % self.op.instance_name
13746 _CheckNodeOnline(self, self.instance.primary_node)
13748 self._cds = _GetClusterDomainSecret()
13750 def Exec(self, feedback_fn):
13751 """Prepares an instance for an export.
13754 instance = self.instance
13756 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13757 salt = utils.GenerateSecret(8)
13759 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13760 result = self.rpc.call_x509_cert_create(instance.primary_node,
13761 constants.RIE_CERT_VALIDITY)
13762 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13764 (name, cert_pem) = result.payload
13766 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13770 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13771 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13773 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13779 class LUBackupExport(LogicalUnit):
13780 """Export an instance to an image in the cluster.
13783 HPATH = "instance-export"
13784 HTYPE = constants.HTYPE_INSTANCE
13787 def CheckArguments(self):
13788 """Check the arguments.
13791 self.x509_key_name = self.op.x509_key_name
13792 self.dest_x509_ca_pem = self.op.destination_x509_ca
13794 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13795 if not self.x509_key_name:
13796 raise errors.OpPrereqError("Missing X509 key name for encryption",
13797 errors.ECODE_INVAL)
13799 if not self.dest_x509_ca_pem:
13800 raise errors.OpPrereqError("Missing destination X509 CA",
13801 errors.ECODE_INVAL)
13803 def ExpandNames(self):
13804 self._ExpandAndLockInstance()
13806 # Lock all nodes for local exports
13807 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13808 # FIXME: lock only instance primary and destination node
13810 # Sad but true, for now we have do lock all nodes, as we don't know where
13811 # the previous export might be, and in this LU we search for it and
13812 # remove it from its current node. In the future we could fix this by:
13813 # - making a tasklet to search (share-lock all), then create the
13814 # new one, then one to remove, after
13815 # - removing the removal operation altogether
13816 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13818 def DeclareLocks(self, level):
13819 """Last minute lock declaration."""
13820 # All nodes are locked anyway, so nothing to do here.
13822 def BuildHooksEnv(self):
13823 """Build hooks env.
13825 This will run on the master, primary node and target node.
13829 "EXPORT_MODE": self.op.mode,
13830 "EXPORT_NODE": self.op.target_node,
13831 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13832 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13833 # TODO: Generic function for boolean env variables
13834 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13837 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13841 def BuildHooksNodes(self):
13842 """Build hooks nodes.
13845 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13847 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13848 nl.append(self.op.target_node)
13852 def CheckPrereq(self):
13853 """Check prerequisites.
13855 This checks that the instance and node names are valid.
13858 instance_name = self.op.instance_name
13860 self.instance = self.cfg.GetInstanceInfo(instance_name)
13861 assert self.instance is not None, \
13862 "Cannot retrieve locked instance %s" % self.op.instance_name
13863 _CheckNodeOnline(self, self.instance.primary_node)
13865 if (self.op.remove_instance and
13866 self.instance.admin_state == constants.ADMINST_UP and
13867 not self.op.shutdown):
13868 raise errors.OpPrereqError("Can not remove instance without shutting it"
13871 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13872 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13873 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13874 assert self.dst_node is not None
13876 _CheckNodeOnline(self, self.dst_node.name)
13877 _CheckNodeNotDrained(self, self.dst_node.name)
13880 self.dest_disk_info = None
13881 self.dest_x509_ca = None
13883 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13884 self.dst_node = None
13886 if len(self.op.target_node) != len(self.instance.disks):
13887 raise errors.OpPrereqError(("Received destination information for %s"
13888 " disks, but instance %s has %s disks") %
13889 (len(self.op.target_node), instance_name,
13890 len(self.instance.disks)),
13891 errors.ECODE_INVAL)
13893 cds = _GetClusterDomainSecret()
13895 # Check X509 key name
13897 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13898 except (TypeError, ValueError), err:
13899 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13901 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13902 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13903 errors.ECODE_INVAL)
13905 # Load and verify CA
13907 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13908 except OpenSSL.crypto.Error, err:
13909 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13910 (err, ), errors.ECODE_INVAL)
13912 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13913 if errcode is not None:
13914 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13915 (msg, ), errors.ECODE_INVAL)
13917 self.dest_x509_ca = cert
13919 # Verify target information
13921 for idx, disk_data in enumerate(self.op.target_node):
13923 (host, port, magic) = \
13924 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13925 except errors.GenericError, err:
13926 raise errors.OpPrereqError("Target info for disk %s: %s" %
13927 (idx, err), errors.ECODE_INVAL)
13929 disk_info.append((host, port, magic))
13931 assert len(disk_info) == len(self.op.target_node)
13932 self.dest_disk_info = disk_info
13935 raise errors.ProgrammerError("Unhandled export mode %r" %
13938 # instance disk type verification
13939 # TODO: Implement export support for file-based disks
13940 for disk in self.instance.disks:
13941 if disk.dev_type == constants.LD_FILE:
13942 raise errors.OpPrereqError("Export not supported for instances with"
13943 " file-based disks", errors.ECODE_INVAL)
13945 def _CleanupExports(self, feedback_fn):
13946 """Removes exports of current instance from all other nodes.
13948 If an instance in a cluster with nodes A..D was exported to node C, its
13949 exports will be removed from the nodes A, B and D.
13952 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13954 nodelist = self.cfg.GetNodeList()
13955 nodelist.remove(self.dst_node.name)
13957 # on one-node clusters nodelist will be empty after the removal
13958 # if we proceed the backup would be removed because OpBackupQuery
13959 # substitutes an empty list with the full cluster node list.
13960 iname = self.instance.name
13962 feedback_fn("Removing old exports for instance %s" % iname)
13963 exportlist = self.rpc.call_export_list(nodelist)
13964 for node in exportlist:
13965 if exportlist[node].fail_msg:
13967 if iname in exportlist[node].payload:
13968 msg = self.rpc.call_export_remove(node, iname).fail_msg
13970 self.LogWarning("Could not remove older export for instance %s"
13971 " on node %s: %s", iname, node, msg)
13973 def Exec(self, feedback_fn):
13974 """Export an instance to an image in the cluster.
13977 assert self.op.mode in constants.EXPORT_MODES
13979 instance = self.instance
13980 src_node = instance.primary_node
13982 if self.op.shutdown:
13983 # shutdown the instance, but not the disks
13984 feedback_fn("Shutting down instance %s" % instance.name)
13985 result = self.rpc.call_instance_shutdown(src_node, instance,
13986 self.op.shutdown_timeout)
13987 # TODO: Maybe ignore failures if ignore_remove_failures is set
13988 result.Raise("Could not shutdown instance %s on"
13989 " node %s" % (instance.name, src_node))
13991 # set the disks ID correctly since call_instance_start needs the
13992 # correct drbd minor to create the symlinks
13993 for disk in instance.disks:
13994 self.cfg.SetDiskID(disk, src_node)
13996 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13999 # Activate the instance disks if we'exporting a stopped instance
14000 feedback_fn("Activating disks for %s" % instance.name)
14001 _StartInstanceDisks(self, instance, None)
14004 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14007 helper.CreateSnapshots()
14009 if (self.op.shutdown and
14010 instance.admin_state == constants.ADMINST_UP and
14011 not self.op.remove_instance):
14012 assert not activate_disks
14013 feedback_fn("Starting instance %s" % instance.name)
14014 result = self.rpc.call_instance_start(src_node,
14015 (instance, None, None), False)
14016 msg = result.fail_msg
14018 feedback_fn("Failed to start instance: %s" % msg)
14019 _ShutdownInstanceDisks(self, instance)
14020 raise errors.OpExecError("Could not start instance: %s" % msg)
14022 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14023 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14024 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14025 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14026 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14028 (key_name, _, _) = self.x509_key_name
14031 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14034 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14035 key_name, dest_ca_pem,
14040 # Check for backwards compatibility
14041 assert len(dresults) == len(instance.disks)
14042 assert compat.all(isinstance(i, bool) for i in dresults), \
14043 "Not all results are boolean: %r" % dresults
14047 feedback_fn("Deactivating disks for %s" % instance.name)
14048 _ShutdownInstanceDisks(self, instance)
14050 if not (compat.all(dresults) and fin_resu):
14053 failures.append("export finalization")
14054 if not compat.all(dresults):
14055 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14057 failures.append("disk export: disk(s) %s" % fdsk)
14059 raise errors.OpExecError("Export failed, errors in %s" %
14060 utils.CommaJoin(failures))
14062 # At this point, the export was successful, we can cleanup/finish
14064 # Remove instance if requested
14065 if self.op.remove_instance:
14066 feedback_fn("Removing instance %s" % instance.name)
14067 _RemoveInstance(self, feedback_fn, instance,
14068 self.op.ignore_remove_failures)
14070 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14071 self._CleanupExports(feedback_fn)
14073 return fin_resu, dresults
14076 class LUBackupRemove(NoHooksLU):
14077 """Remove exports related to the named instance.
14082 def ExpandNames(self):
14083 self.needed_locks = {}
14084 # We need all nodes to be locked in order for RemoveExport to work, but we
14085 # don't need to lock the instance itself, as nothing will happen to it (and
14086 # we can remove exports also for a removed instance)
14087 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14089 def Exec(self, feedback_fn):
14090 """Remove any export.
14093 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14094 # If the instance was not found we'll try with the name that was passed in.
14095 # This will only work if it was an FQDN, though.
14097 if not instance_name:
14099 instance_name = self.op.instance_name
14101 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14102 exportlist = self.rpc.call_export_list(locked_nodes)
14104 for node in exportlist:
14105 msg = exportlist[node].fail_msg
14107 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14109 if instance_name in exportlist[node].payload:
14111 result = self.rpc.call_export_remove(node, instance_name)
14112 msg = result.fail_msg
14114 logging.error("Could not remove export for instance %s"
14115 " on node %s: %s", instance_name, node, msg)
14117 if fqdn_warn and not found:
14118 feedback_fn("Export not found. If trying to remove an export belonging"
14119 " to a deleted instance please use its Fully Qualified"
14123 class LUGroupAdd(LogicalUnit):
14124 """Logical unit for creating node groups.
14127 HPATH = "group-add"
14128 HTYPE = constants.HTYPE_GROUP
14131 def ExpandNames(self):
14132 # We need the new group's UUID here so that we can create and acquire the
14133 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14134 # that it should not check whether the UUID exists in the configuration.
14135 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14136 self.needed_locks = {}
14137 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14139 def CheckPrereq(self):
14140 """Check prerequisites.
14142 This checks that the given group name is not an existing node group
14147 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14148 except errors.OpPrereqError:
14151 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14152 " node group (UUID: %s)" %
14153 (self.op.group_name, existing_uuid),
14154 errors.ECODE_EXISTS)
14156 if self.op.ndparams:
14157 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14159 if self.op.hv_state:
14160 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14162 self.new_hv_state = None
14164 if self.op.disk_state:
14165 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14167 self.new_disk_state = None
14169 if self.op.diskparams:
14170 for templ in constants.DISK_TEMPLATES:
14171 if templ in self.op.diskparams:
14172 utils.ForceDictType(self.op.diskparams[templ],
14173 constants.DISK_DT_TYPES)
14174 self.new_diskparams = self.op.diskparams
14176 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14177 except errors.OpPrereqError, err:
14178 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14179 errors.ECODE_INVAL)
14181 self.new_diskparams = {}
14183 if self.op.ipolicy:
14184 cluster = self.cfg.GetClusterInfo()
14185 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14187 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14188 except errors.ConfigurationError, err:
14189 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14190 errors.ECODE_INVAL)
14192 def BuildHooksEnv(self):
14193 """Build hooks env.
14197 "GROUP_NAME": self.op.group_name,
14200 def BuildHooksNodes(self):
14201 """Build hooks nodes.
14204 mn = self.cfg.GetMasterNode()
14205 return ([mn], [mn])
14207 def Exec(self, feedback_fn):
14208 """Add the node group to the cluster.
14211 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14212 uuid=self.group_uuid,
14213 alloc_policy=self.op.alloc_policy,
14214 ndparams=self.op.ndparams,
14215 diskparams=self.new_diskparams,
14216 ipolicy=self.op.ipolicy,
14217 hv_state_static=self.new_hv_state,
14218 disk_state_static=self.new_disk_state)
14220 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14221 del self.remove_locks[locking.LEVEL_NODEGROUP]
14224 class LUGroupAssignNodes(NoHooksLU):
14225 """Logical unit for assigning nodes to groups.
14230 def ExpandNames(self):
14231 # These raise errors.OpPrereqError on their own:
14232 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14233 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14235 # We want to lock all the affected nodes and groups. We have readily
14236 # available the list of nodes, and the *destination* group. To gather the
14237 # list of "source" groups, we need to fetch node information later on.
14238 self.needed_locks = {
14239 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14240 locking.LEVEL_NODE: self.op.nodes,
14243 def DeclareLocks(self, level):
14244 if level == locking.LEVEL_NODEGROUP:
14245 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14247 # Try to get all affected nodes' groups without having the group or node
14248 # lock yet. Needs verification later in the code flow.
14249 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14251 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14253 def CheckPrereq(self):
14254 """Check prerequisites.
14257 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14258 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14259 frozenset(self.op.nodes))
14261 expected_locks = (set([self.group_uuid]) |
14262 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14263 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14264 if actual_locks != expected_locks:
14265 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14266 " current groups are '%s', used to be '%s'" %
14267 (utils.CommaJoin(expected_locks),
14268 utils.CommaJoin(actual_locks)))
14270 self.node_data = self.cfg.GetAllNodesInfo()
14271 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14272 instance_data = self.cfg.GetAllInstancesInfo()
14274 if self.group is None:
14275 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14276 (self.op.group_name, self.group_uuid))
14278 (new_splits, previous_splits) = \
14279 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14280 for node in self.op.nodes],
14281 self.node_data, instance_data)
14284 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14286 if not self.op.force:
14287 raise errors.OpExecError("The following instances get split by this"
14288 " change and --force was not given: %s" %
14291 self.LogWarning("This operation will split the following instances: %s",
14294 if previous_splits:
14295 self.LogWarning("In addition, these already-split instances continue"
14296 " to be split across groups: %s",
14297 utils.CommaJoin(utils.NiceSort(previous_splits)))
14299 def Exec(self, feedback_fn):
14300 """Assign nodes to a new group.
14303 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14305 self.cfg.AssignGroupNodes(mods)
14308 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14309 """Check for split instances after a node assignment.
14311 This method considers a series of node assignments as an atomic operation,
14312 and returns information about split instances after applying the set of
14315 In particular, it returns information about newly split instances, and
14316 instances that were already split, and remain so after the change.
14318 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14321 @type changes: list of (node_name, new_group_uuid) pairs.
14322 @param changes: list of node assignments to consider.
14323 @param node_data: a dict with data for all nodes
14324 @param instance_data: a dict with all instances to consider
14325 @rtype: a two-tuple
14326 @return: a list of instances that were previously okay and result split as a
14327 consequence of this change, and a list of instances that were previously
14328 split and this change does not fix.
14331 changed_nodes = dict((node, group) for node, group in changes
14332 if node_data[node].group != group)
14334 all_split_instances = set()
14335 previously_split_instances = set()
14337 def InstanceNodes(instance):
14338 return [instance.primary_node] + list(instance.secondary_nodes)
14340 for inst in instance_data.values():
14341 if inst.disk_template not in constants.DTS_INT_MIRROR:
14344 instance_nodes = InstanceNodes(inst)
14346 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14347 previously_split_instances.add(inst.name)
14349 if len(set(changed_nodes.get(node, node_data[node].group)
14350 for node in instance_nodes)) > 1:
14351 all_split_instances.add(inst.name)
14353 return (list(all_split_instances - previously_split_instances),
14354 list(previously_split_instances & all_split_instances))
14357 class _GroupQuery(_QueryBase):
14358 FIELDS = query.GROUP_FIELDS
14360 def ExpandNames(self, lu):
14361 lu.needed_locks = {}
14363 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14364 self._cluster = lu.cfg.GetClusterInfo()
14365 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14368 self.wanted = [name_to_uuid[name]
14369 for name in utils.NiceSort(name_to_uuid.keys())]
14371 # Accept names to be either names or UUIDs.
14374 all_uuid = frozenset(self._all_groups.keys())
14376 for name in self.names:
14377 if name in all_uuid:
14378 self.wanted.append(name)
14379 elif name in name_to_uuid:
14380 self.wanted.append(name_to_uuid[name])
14382 missing.append(name)
14385 raise errors.OpPrereqError("Some groups do not exist: %s" %
14386 utils.CommaJoin(missing),
14387 errors.ECODE_NOENT)
14389 def DeclareLocks(self, lu, level):
14392 def _GetQueryData(self, lu):
14393 """Computes the list of node groups and their attributes.
14396 do_nodes = query.GQ_NODE in self.requested_data
14397 do_instances = query.GQ_INST in self.requested_data
14399 group_to_nodes = None
14400 group_to_instances = None
14402 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14403 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14404 # latter GetAllInstancesInfo() is not enough, for we have to go through
14405 # instance->node. Hence, we will need to process nodes even if we only need
14406 # instance information.
14407 if do_nodes or do_instances:
14408 all_nodes = lu.cfg.GetAllNodesInfo()
14409 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14412 for node in all_nodes.values():
14413 if node.group in group_to_nodes:
14414 group_to_nodes[node.group].append(node.name)
14415 node_to_group[node.name] = node.group
14418 all_instances = lu.cfg.GetAllInstancesInfo()
14419 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14421 for instance in all_instances.values():
14422 node = instance.primary_node
14423 if node in node_to_group:
14424 group_to_instances[node_to_group[node]].append(instance.name)
14427 # Do not pass on node information if it was not requested.
14428 group_to_nodes = None
14430 return query.GroupQueryData(self._cluster,
14431 [self._all_groups[uuid]
14432 for uuid in self.wanted],
14433 group_to_nodes, group_to_instances,
14434 query.GQ_DISKPARAMS in self.requested_data)
14437 class LUGroupQuery(NoHooksLU):
14438 """Logical unit for querying node groups.
14443 def CheckArguments(self):
14444 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14445 self.op.output_fields, False)
14447 def ExpandNames(self):
14448 self.gq.ExpandNames(self)
14450 def DeclareLocks(self, level):
14451 self.gq.DeclareLocks(self, level)
14453 def Exec(self, feedback_fn):
14454 return self.gq.OldStyleQuery(self)
14457 class LUGroupSetParams(LogicalUnit):
14458 """Modifies the parameters of a node group.
14461 HPATH = "group-modify"
14462 HTYPE = constants.HTYPE_GROUP
14465 def CheckArguments(self):
14468 self.op.diskparams,
14469 self.op.alloc_policy,
14471 self.op.disk_state,
14475 if all_changes.count(None) == len(all_changes):
14476 raise errors.OpPrereqError("Please pass at least one modification",
14477 errors.ECODE_INVAL)
14479 def ExpandNames(self):
14480 # This raises errors.OpPrereqError on its own:
14481 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14483 self.needed_locks = {
14484 locking.LEVEL_INSTANCE: [],
14485 locking.LEVEL_NODEGROUP: [self.group_uuid],
14488 self.share_locks[locking.LEVEL_INSTANCE] = 1
14490 def DeclareLocks(self, level):
14491 if level == locking.LEVEL_INSTANCE:
14492 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14494 # Lock instances optimistically, needs verification once group lock has
14496 self.needed_locks[locking.LEVEL_INSTANCE] = \
14497 self.cfg.GetNodeGroupInstances(self.group_uuid)
14500 def _UpdateAndVerifyDiskParams(old, new):
14501 """Updates and verifies disk parameters.
14504 new_params = _GetUpdatedParams(old, new)
14505 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14508 def CheckPrereq(self):
14509 """Check prerequisites.
14512 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14514 # Check if locked instances are still correct
14515 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14517 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14518 cluster = self.cfg.GetClusterInfo()
14520 if self.group is None:
14521 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14522 (self.op.group_name, self.group_uuid))
14524 if self.op.ndparams:
14525 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14526 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14527 self.new_ndparams = new_ndparams
14529 if self.op.diskparams:
14530 diskparams = self.group.diskparams
14531 uavdp = self._UpdateAndVerifyDiskParams
14532 # For each disktemplate subdict update and verify the values
14533 new_diskparams = dict((dt,
14534 uavdp(diskparams.get(dt, {}),
14535 self.op.diskparams[dt]))
14536 for dt in constants.DISK_TEMPLATES
14537 if dt in self.op.diskparams)
14538 # As we've all subdicts of diskparams ready, lets merge the actual
14539 # dict with all updated subdicts
14540 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14542 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14543 except errors.OpPrereqError, err:
14544 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14545 errors.ECODE_INVAL)
14547 if self.op.hv_state:
14548 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14549 self.group.hv_state_static)
14551 if self.op.disk_state:
14552 self.new_disk_state = \
14553 _MergeAndVerifyDiskState(self.op.disk_state,
14554 self.group.disk_state_static)
14556 if self.op.ipolicy:
14557 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14561 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14562 inst_filter = lambda inst: inst.name in owned_instances
14563 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14565 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14567 new_ipolicy, instances)
14570 self.LogWarning("After the ipolicy change the following instances"
14571 " violate them: %s",
14572 utils.CommaJoin(violations))
14574 def BuildHooksEnv(self):
14575 """Build hooks env.
14579 "GROUP_NAME": self.op.group_name,
14580 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14583 def BuildHooksNodes(self):
14584 """Build hooks nodes.
14587 mn = self.cfg.GetMasterNode()
14588 return ([mn], [mn])
14590 def Exec(self, feedback_fn):
14591 """Modifies the node group.
14596 if self.op.ndparams:
14597 self.group.ndparams = self.new_ndparams
14598 result.append(("ndparams", str(self.group.ndparams)))
14600 if self.op.diskparams:
14601 self.group.diskparams = self.new_diskparams
14602 result.append(("diskparams", str(self.group.diskparams)))
14604 if self.op.alloc_policy:
14605 self.group.alloc_policy = self.op.alloc_policy
14607 if self.op.hv_state:
14608 self.group.hv_state_static = self.new_hv_state
14610 if self.op.disk_state:
14611 self.group.disk_state_static = self.new_disk_state
14613 if self.op.ipolicy:
14614 self.group.ipolicy = self.new_ipolicy
14616 self.cfg.Update(self.group, feedback_fn)
14620 class LUGroupRemove(LogicalUnit):
14621 HPATH = "group-remove"
14622 HTYPE = constants.HTYPE_GROUP
14625 def ExpandNames(self):
14626 # This will raises errors.OpPrereqError on its own:
14627 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14628 self.needed_locks = {
14629 locking.LEVEL_NODEGROUP: [self.group_uuid],
14632 def CheckPrereq(self):
14633 """Check prerequisites.
14635 This checks that the given group name exists as a node group, that is
14636 empty (i.e., contains no nodes), and that is not the last group of the
14640 # Verify that the group is empty.
14641 group_nodes = [node.name
14642 for node in self.cfg.GetAllNodesInfo().values()
14643 if node.group == self.group_uuid]
14646 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14648 (self.op.group_name,
14649 utils.CommaJoin(utils.NiceSort(group_nodes))),
14650 errors.ECODE_STATE)
14652 # Verify the cluster would not be left group-less.
14653 if len(self.cfg.GetNodeGroupList()) == 1:
14654 raise errors.OpPrereqError("Group '%s' is the only group,"
14655 " cannot be removed" %
14656 self.op.group_name,
14657 errors.ECODE_STATE)
14659 def BuildHooksEnv(self):
14660 """Build hooks env.
14664 "GROUP_NAME": self.op.group_name,
14667 def BuildHooksNodes(self):
14668 """Build hooks nodes.
14671 mn = self.cfg.GetMasterNode()
14672 return ([mn], [mn])
14674 def Exec(self, feedback_fn):
14675 """Remove the node group.
14679 self.cfg.RemoveNodeGroup(self.group_uuid)
14680 except errors.ConfigurationError:
14681 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14682 (self.op.group_name, self.group_uuid))
14684 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14687 class LUGroupRename(LogicalUnit):
14688 HPATH = "group-rename"
14689 HTYPE = constants.HTYPE_GROUP
14692 def ExpandNames(self):
14693 # This raises errors.OpPrereqError on its own:
14694 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14696 self.needed_locks = {
14697 locking.LEVEL_NODEGROUP: [self.group_uuid],
14700 def CheckPrereq(self):
14701 """Check prerequisites.
14703 Ensures requested new name is not yet used.
14707 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14708 except errors.OpPrereqError:
14711 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14712 " node group (UUID: %s)" %
14713 (self.op.new_name, new_name_uuid),
14714 errors.ECODE_EXISTS)
14716 def BuildHooksEnv(self):
14717 """Build hooks env.
14721 "OLD_NAME": self.op.group_name,
14722 "NEW_NAME": self.op.new_name,
14725 def BuildHooksNodes(self):
14726 """Build hooks nodes.
14729 mn = self.cfg.GetMasterNode()
14731 all_nodes = self.cfg.GetAllNodesInfo()
14732 all_nodes.pop(mn, None)
14735 run_nodes.extend(node.name for node in all_nodes.values()
14736 if node.group == self.group_uuid)
14738 return (run_nodes, run_nodes)
14740 def Exec(self, feedback_fn):
14741 """Rename the node group.
14744 group = self.cfg.GetNodeGroup(self.group_uuid)
14747 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14748 (self.op.group_name, self.group_uuid))
14750 group.name = self.op.new_name
14751 self.cfg.Update(group, feedback_fn)
14753 return self.op.new_name
14756 class LUGroupEvacuate(LogicalUnit):
14757 HPATH = "group-evacuate"
14758 HTYPE = constants.HTYPE_GROUP
14761 def ExpandNames(self):
14762 # This raises errors.OpPrereqError on its own:
14763 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14765 if self.op.target_groups:
14766 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14767 self.op.target_groups)
14769 self.req_target_uuids = []
14771 if self.group_uuid in self.req_target_uuids:
14772 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14773 " as a target group (targets are %s)" %
14775 utils.CommaJoin(self.req_target_uuids)),
14776 errors.ECODE_INVAL)
14778 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14780 self.share_locks = _ShareAll()
14781 self.needed_locks = {
14782 locking.LEVEL_INSTANCE: [],
14783 locking.LEVEL_NODEGROUP: [],
14784 locking.LEVEL_NODE: [],
14787 def DeclareLocks(self, level):
14788 if level == locking.LEVEL_INSTANCE:
14789 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14791 # Lock instances optimistically, needs verification once node and group
14792 # locks have been acquired
14793 self.needed_locks[locking.LEVEL_INSTANCE] = \
14794 self.cfg.GetNodeGroupInstances(self.group_uuid)
14796 elif level == locking.LEVEL_NODEGROUP:
14797 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14799 if self.req_target_uuids:
14800 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14802 # Lock all groups used by instances optimistically; this requires going
14803 # via the node before it's locked, requiring verification later on
14804 lock_groups.update(group_uuid
14805 for instance_name in
14806 self.owned_locks(locking.LEVEL_INSTANCE)
14808 self.cfg.GetInstanceNodeGroups(instance_name))
14810 # No target groups, need to lock all of them
14811 lock_groups = locking.ALL_SET
14813 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14815 elif level == locking.LEVEL_NODE:
14816 # This will only lock the nodes in the group to be evacuated which
14817 # contain actual instances
14818 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14819 self._LockInstancesNodes()
14821 # Lock all nodes in group to be evacuated and target groups
14822 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14823 assert self.group_uuid in owned_groups
14824 member_nodes = [node_name
14825 for group in owned_groups
14826 for node_name in self.cfg.GetNodeGroup(group).members]
14827 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14829 def CheckPrereq(self):
14830 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14831 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14832 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14834 assert owned_groups.issuperset(self.req_target_uuids)
14835 assert self.group_uuid in owned_groups
14837 # Check if locked instances are still correct
14838 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14840 # Get instance information
14841 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14843 # Check if node groups for locked instances are still correct
14844 _CheckInstancesNodeGroups(self.cfg, self.instances,
14845 owned_groups, owned_nodes, self.group_uuid)
14847 if self.req_target_uuids:
14848 # User requested specific target groups
14849 self.target_uuids = self.req_target_uuids
14851 # All groups except the one to be evacuated are potential targets
14852 self.target_uuids = [group_uuid for group_uuid in owned_groups
14853 if group_uuid != self.group_uuid]
14855 if not self.target_uuids:
14856 raise errors.OpPrereqError("There are no possible target groups",
14857 errors.ECODE_INVAL)
14859 def BuildHooksEnv(self):
14860 """Build hooks env.
14864 "GROUP_NAME": self.op.group_name,
14865 "TARGET_GROUPS": " ".join(self.target_uuids),
14868 def BuildHooksNodes(self):
14869 """Build hooks nodes.
14872 mn = self.cfg.GetMasterNode()
14874 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14876 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14878 return (run_nodes, run_nodes)
14880 def Exec(self, feedback_fn):
14881 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14883 assert self.group_uuid not in self.target_uuids
14885 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14886 instances=instances, target_groups=self.target_uuids)
14888 ial.Run(self.op.iallocator)
14890 if not ial.success:
14891 raise errors.OpPrereqError("Can't compute group evacuation using"
14892 " iallocator '%s': %s" %
14893 (self.op.iallocator, ial.info),
14894 errors.ECODE_NORES)
14896 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14898 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14899 len(jobs), self.op.group_name)
14901 return ResultWithJobs(jobs)
14904 class TagsLU(NoHooksLU): # pylint: disable=W0223
14905 """Generic tags LU.
14907 This is an abstract class which is the parent of all the other tags LUs.
14910 def ExpandNames(self):
14911 self.group_uuid = None
14912 self.needed_locks = {}
14914 if self.op.kind == constants.TAG_NODE:
14915 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14916 lock_level = locking.LEVEL_NODE
14917 lock_name = self.op.name
14918 elif self.op.kind == constants.TAG_INSTANCE:
14919 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14920 lock_level = locking.LEVEL_INSTANCE
14921 lock_name = self.op.name
14922 elif self.op.kind == constants.TAG_NODEGROUP:
14923 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14924 lock_level = locking.LEVEL_NODEGROUP
14925 lock_name = self.group_uuid
14926 elif self.op.kind == constants.TAG_NETWORK:
14927 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
14928 lock_level = locking.LEVEL_NETWORK
14929 lock_name = self.network_uuid
14934 if lock_level and getattr(self.op, "use_locking", True):
14935 self.needed_locks[lock_level] = lock_name
14937 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14938 # not possible to acquire the BGL based on opcode parameters)
14940 def CheckPrereq(self):
14941 """Check prerequisites.
14944 if self.op.kind == constants.TAG_CLUSTER:
14945 self.target = self.cfg.GetClusterInfo()
14946 elif self.op.kind == constants.TAG_NODE:
14947 self.target = self.cfg.GetNodeInfo(self.op.name)
14948 elif self.op.kind == constants.TAG_INSTANCE:
14949 self.target = self.cfg.GetInstanceInfo(self.op.name)
14950 elif self.op.kind == constants.TAG_NODEGROUP:
14951 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14952 elif self.op.kind == constants.TAG_NETWORK:
14953 self.target = self.cfg.GetNetwork(self.network_uuid)
14955 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14956 str(self.op.kind), errors.ECODE_INVAL)
14959 class LUTagsGet(TagsLU):
14960 """Returns the tags of a given object.
14965 def ExpandNames(self):
14966 TagsLU.ExpandNames(self)
14968 # Share locks as this is only a read operation
14969 self.share_locks = _ShareAll()
14971 def Exec(self, feedback_fn):
14972 """Returns the tag list.
14975 return list(self.target.GetTags())
14978 class LUTagsSearch(NoHooksLU):
14979 """Searches the tags for a given pattern.
14984 def ExpandNames(self):
14985 self.needed_locks = {}
14987 def CheckPrereq(self):
14988 """Check prerequisites.
14990 This checks the pattern passed for validity by compiling it.
14994 self.re = re.compile(self.op.pattern)
14995 except re.error, err:
14996 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14997 (self.op.pattern, err), errors.ECODE_INVAL)
14999 def Exec(self, feedback_fn):
15000 """Returns the tag list.
15004 tgts = [("/cluster", cfg.GetClusterInfo())]
15005 ilist = cfg.GetAllInstancesInfo().values()
15006 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15007 nlist = cfg.GetAllNodesInfo().values()
15008 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15009 tgts.extend(("/nodegroup/%s" % n.name, n)
15010 for n in cfg.GetAllNodeGroupsInfo().values())
15012 for path, target in tgts:
15013 for tag in target.GetTags():
15014 if self.re.search(tag):
15015 results.append((path, tag))
15019 class LUTagsSet(TagsLU):
15020 """Sets a tag on a given object.
15025 def CheckPrereq(self):
15026 """Check prerequisites.
15028 This checks the type and length of the tag name and value.
15031 TagsLU.CheckPrereq(self)
15032 for tag in self.op.tags:
15033 objects.TaggableObject.ValidateTag(tag)
15035 def Exec(self, feedback_fn):
15040 for tag in self.op.tags:
15041 self.target.AddTag(tag)
15042 except errors.TagError, err:
15043 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15044 self.cfg.Update(self.target, feedback_fn)
15047 class LUTagsDel(TagsLU):
15048 """Delete a list of tags from a given object.
15053 def CheckPrereq(self):
15054 """Check prerequisites.
15056 This checks that we have the given tag.
15059 TagsLU.CheckPrereq(self)
15060 for tag in self.op.tags:
15061 objects.TaggableObject.ValidateTag(tag)
15062 del_tags = frozenset(self.op.tags)
15063 cur_tags = self.target.GetTags()
15065 diff_tags = del_tags - cur_tags
15067 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15068 raise errors.OpPrereqError("Tag(s) %s not found" %
15069 (utils.CommaJoin(diff_names), ),
15070 errors.ECODE_NOENT)
15072 def Exec(self, feedback_fn):
15073 """Remove the tag from the object.
15076 for tag in self.op.tags:
15077 self.target.RemoveTag(tag)
15078 self.cfg.Update(self.target, feedback_fn)
15081 class LUTestDelay(NoHooksLU):
15082 """Sleep for a specified amount of time.
15084 This LU sleeps on the master and/or nodes for a specified amount of
15090 def ExpandNames(self):
15091 """Expand names and set required locks.
15093 This expands the node list, if any.
15096 self.needed_locks = {}
15097 if self.op.on_nodes:
15098 # _GetWantedNodes can be used here, but is not always appropriate to use
15099 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15100 # more information.
15101 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15102 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15104 def _TestDelay(self):
15105 """Do the actual sleep.
15108 if self.op.on_master:
15109 if not utils.TestDelay(self.op.duration):
15110 raise errors.OpExecError("Error during master delay test")
15111 if self.op.on_nodes:
15112 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15113 for node, node_result in result.items():
15114 node_result.Raise("Failure during rpc call to node %s" % node)
15116 def Exec(self, feedback_fn):
15117 """Execute the test delay opcode, with the wanted repetitions.
15120 if self.op.repeat == 0:
15123 top_value = self.op.repeat - 1
15124 for i in range(self.op.repeat):
15125 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
15129 class LUTestJqueue(NoHooksLU):
15130 """Utility LU to test some aspects of the job queue.
15135 # Must be lower than default timeout for WaitForJobChange to see whether it
15136 # notices changed jobs
15137 _CLIENT_CONNECT_TIMEOUT = 20.0
15138 _CLIENT_CONFIRM_TIMEOUT = 60.0
15141 def _NotifyUsingSocket(cls, cb, errcls):
15142 """Opens a Unix socket and waits for another program to connect.
15145 @param cb: Callback to send socket name to client
15146 @type errcls: class
15147 @param errcls: Exception class to use for errors
15150 # Using a temporary directory as there's no easy way to create temporary
15151 # sockets without writing a custom loop around tempfile.mktemp and
15153 tmpdir = tempfile.mkdtemp()
15155 tmpsock = utils.PathJoin(tmpdir, "sock")
15157 logging.debug("Creating temporary socket at %s", tmpsock)
15158 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15163 # Send details to client
15166 # Wait for client to connect before continuing
15167 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15169 (conn, _) = sock.accept()
15170 except socket.error, err:
15171 raise errcls("Client didn't connect in time (%s)" % err)
15175 # Remove as soon as client is connected
15176 shutil.rmtree(tmpdir)
15178 # Wait for client to close
15181 # pylint: disable=E1101
15182 # Instance of '_socketobject' has no ... member
15183 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15185 except socket.error, err:
15186 raise errcls("Client failed to confirm notification (%s)" % err)
15190 def _SendNotification(self, test, arg, sockname):
15191 """Sends a notification to the client.
15194 @param test: Test name
15195 @param arg: Test argument (depends on test)
15196 @type sockname: string
15197 @param sockname: Socket path
15200 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15202 def _Notify(self, prereq, test, arg):
15203 """Notifies the client of a test.
15206 @param prereq: Whether this is a prereq-phase test
15208 @param test: Test name
15209 @param arg: Test argument (depends on test)
15213 errcls = errors.OpPrereqError
15215 errcls = errors.OpExecError
15217 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15221 def CheckArguments(self):
15222 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15223 self.expandnames_calls = 0
15225 def ExpandNames(self):
15226 checkargs_calls = getattr(self, "checkargs_calls", 0)
15227 if checkargs_calls < 1:
15228 raise errors.ProgrammerError("CheckArguments was not called")
15230 self.expandnames_calls += 1
15232 if self.op.notify_waitlock:
15233 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15235 self.LogInfo("Expanding names")
15237 # Get lock on master node (just to get a lock, not for a particular reason)
15238 self.needed_locks = {
15239 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15242 def Exec(self, feedback_fn):
15243 if self.expandnames_calls < 1:
15244 raise errors.ProgrammerError("ExpandNames was not called")
15246 if self.op.notify_exec:
15247 self._Notify(False, constants.JQT_EXEC, None)
15249 self.LogInfo("Executing")
15251 if self.op.log_messages:
15252 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15253 for idx, msg in enumerate(self.op.log_messages):
15254 self.LogInfo("Sending log message %s", idx + 1)
15255 feedback_fn(constants.JQT_MSGPREFIX + msg)
15256 # Report how many test messages have been sent
15257 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15260 raise errors.OpExecError("Opcode failure was requested")
15265 class IAllocator(object):
15266 """IAllocator framework.
15268 An IAllocator instance has three sets of attributes:
15269 - cfg that is needed to query the cluster
15270 - input data (all members of the _KEYS class attribute are required)
15271 - four buffer attributes (in|out_data|text), that represent the
15272 input (to the external script) in text and data structure format,
15273 and the output from it, again in two formats
15274 - the result variables from the script (success, info, nodes) for
15278 # pylint: disable=R0902
15279 # lots of instance attributes
15281 def __init__(self, cfg, rpc_runner, mode, **kwargs):
15283 self.rpc = rpc_runner
15284 # init buffer variables
15285 self.in_text = self.out_text = self.in_data = self.out_data = None
15286 # init all input fields so that pylint is happy
15288 self.memory = self.disks = self.disk_template = self.spindle_use = None
15289 self.os = self.tags = self.nics = self.vcpus = None
15290 self.hypervisor = None
15291 self.relocate_from = None
15293 self.instances = None
15294 self.evac_mode = None
15295 self.target_groups = []
15297 self.required_nodes = None
15298 # init result fields
15299 self.success = self.info = self.result = None
15302 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
15304 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
15305 " IAllocator" % self.mode)
15307 keyset = [n for (n, _) in keydata]
15310 if key not in keyset:
15311 raise errors.ProgrammerError("Invalid input parameter '%s' to"
15312 " IAllocator" % key)
15313 setattr(self, key, kwargs[key])
15316 if key not in kwargs:
15317 raise errors.ProgrammerError("Missing input parameter '%s' to"
15318 " IAllocator" % key)
15319 self._BuildInputData(compat.partial(fn, self), keydata)
15321 def _ComputeClusterData(self):
15322 """Compute the generic allocator input data.
15324 This is the data that is independent of the actual operation.
15328 cluster_info = cfg.GetClusterInfo()
15331 "version": constants.IALLOCATOR_VERSION,
15332 "cluster_name": cfg.GetClusterName(),
15333 "cluster_tags": list(cluster_info.GetTags()),
15334 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
15335 "ipolicy": cluster_info.ipolicy,
15337 ninfo = cfg.GetAllNodesInfo()
15338 iinfo = cfg.GetAllInstancesInfo().values()
15339 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
15342 node_list = [n.name for n in ninfo.values() if n.vm_capable]
15344 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
15345 hypervisor_name = self.hypervisor
15346 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
15347 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
15349 hypervisor_name = cluster_info.primary_hypervisor
15351 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
15354 self.rpc.call_all_instances_info(node_list,
15355 cluster_info.enabled_hypervisors)
15357 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
15359 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
15360 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
15361 i_list, config_ndata)
15362 assert len(data["nodes"]) == len(ninfo), \
15363 "Incomplete node data computed"
15365 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
15367 self.in_data = data
15370 def _ComputeNodeGroupData(cfg):
15371 """Compute node groups data.
15374 cluster = cfg.GetClusterInfo()
15375 ng = dict((guuid, {
15376 "name": gdata.name,
15377 "alloc_policy": gdata.alloc_policy,
15378 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
15380 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
15385 def _ComputeBasicNodeData(cfg, node_cfg):
15386 """Compute global node data.
15389 @returns: a dict of name: (node dict, node config)
15392 # fill in static (config-based) values
15393 node_results = dict((ninfo.name, {
15394 "tags": list(ninfo.GetTags()),
15395 "primary_ip": ninfo.primary_ip,
15396 "secondary_ip": ninfo.secondary_ip,
15397 "offline": ninfo.offline,
15398 "drained": ninfo.drained,
15399 "master_candidate": ninfo.master_candidate,
15400 "group": ninfo.group,
15401 "master_capable": ninfo.master_capable,
15402 "vm_capable": ninfo.vm_capable,
15403 "ndparams": cfg.GetNdParams(ninfo),
15405 for ninfo in node_cfg.values())
15407 return node_results
15410 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
15412 """Compute global node data.
15414 @param node_results: the basic node structures as filled from the config
15417 #TODO(dynmem): compute the right data on MAX and MIN memory
15418 # make a copy of the current dict
15419 node_results = dict(node_results)
15420 for nname, nresult in node_data.items():
15421 assert nname in node_results, "Missing basic data for node %s" % nname
15422 ninfo = node_cfg[nname]
15424 if not (ninfo.offline or ninfo.drained):
15425 nresult.Raise("Can't get data for node %s" % nname)
15426 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
15428 remote_info = _MakeLegacyNodeInfo(nresult.payload)
15430 for attr in ["memory_total", "memory_free", "memory_dom0",
15431 "vg_size", "vg_free", "cpu_total"]:
15432 if attr not in remote_info:
15433 raise errors.OpExecError("Node '%s' didn't return attribute"
15434 " '%s'" % (nname, attr))
15435 if not isinstance(remote_info[attr], int):
15436 raise errors.OpExecError("Node '%s' returned invalid value"
15438 (nname, attr, remote_info[attr]))
15439 # compute memory used by primary instances
15440 i_p_mem = i_p_up_mem = 0
15441 for iinfo, beinfo in i_list:
15442 if iinfo.primary_node == nname:
15443 i_p_mem += beinfo[constants.BE_MAXMEM]
15444 if iinfo.name not in node_iinfo[nname].payload:
15447 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
15448 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
15449 remote_info["memory_free"] -= max(0, i_mem_diff)
15451 if iinfo.admin_state == constants.ADMINST_UP:
15452 i_p_up_mem += beinfo[constants.BE_MAXMEM]
15454 # compute memory used by instances
15456 "total_memory": remote_info["memory_total"],
15457 "reserved_memory": remote_info["memory_dom0"],
15458 "free_memory": remote_info["memory_free"],
15459 "total_disk": remote_info["vg_size"],
15460 "free_disk": remote_info["vg_free"],
15461 "total_cpus": remote_info["cpu_total"],
15462 "i_pri_memory": i_p_mem,
15463 "i_pri_up_memory": i_p_up_mem,
15465 pnr_dyn.update(node_results[nname])
15466 node_results[nname] = pnr_dyn
15468 return node_results
15471 def _ComputeInstanceData(cluster_info, i_list):
15472 """Compute global instance data.
15476 for iinfo, beinfo in i_list:
15478 for nic in iinfo.nics:
15479 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
15483 "mode": filled_params[constants.NIC_MODE],
15484 "link": filled_params[constants.NIC_LINK],
15485 "network": nic.network,
15487 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
15488 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
15489 nic_data.append(nic_dict)
15491 "tags": list(iinfo.GetTags()),
15492 "admin_state": iinfo.admin_state,
15493 "vcpus": beinfo[constants.BE_VCPUS],
15494 "memory": beinfo[constants.BE_MAXMEM],
15495 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
15497 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
15499 "disks": [{constants.IDISK_SIZE: dsk.size,
15500 constants.IDISK_MODE: dsk.mode}
15501 for dsk in iinfo.disks],
15502 "disk_template": iinfo.disk_template,
15503 "hypervisor": iinfo.hypervisor,
15505 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
15507 instance_data[iinfo.name] = pir
15509 return instance_data
15511 def _AddNewInstance(self):
15512 """Add new instance data to allocator structure.
15514 This in combination with _AllocatorGetClusterData will create the
15515 correct structure needed as input for the allocator.
15517 The checks for the completeness of the opcode must have already been
15521 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
15523 if self.disk_template in constants.DTS_INT_MIRROR:
15524 self.required_nodes = 2
15526 self.required_nodes = 1
15530 "disk_template": self.disk_template,
15533 "vcpus": self.vcpus,
15534 "memory": self.memory,
15535 "spindle_use": self.spindle_use,
15536 "disks": self.disks,
15537 "disk_space_total": disk_space,
15539 "required_nodes": self.required_nodes,
15540 "hypervisor": self.hypervisor,
15545 def _AddRelocateInstance(self):
15546 """Add relocate instance data to allocator structure.
15548 This in combination with _IAllocatorGetClusterData will create the
15549 correct structure needed as input for the allocator.
15551 The checks for the completeness of the opcode must have already been
15555 instance = self.cfg.GetInstanceInfo(self.name)
15556 if instance is None:
15557 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15558 " IAllocator" % self.name)
15560 if instance.disk_template not in constants.DTS_MIRRORED:
15561 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15562 errors.ECODE_INVAL)
15564 if instance.disk_template in constants.DTS_INT_MIRROR and \
15565 len(instance.secondary_nodes) != 1:
15566 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15567 errors.ECODE_STATE)
15569 self.required_nodes = 1
15570 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15571 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15575 "disk_space_total": disk_space,
15576 "required_nodes": self.required_nodes,
15577 "relocate_from": self.relocate_from,
15581 def _AddNodeEvacuate(self):
15582 """Get data for node-evacuate requests.
15586 "instances": self.instances,
15587 "evac_mode": self.evac_mode,
15590 def _AddChangeGroup(self):
15591 """Get data for node-evacuate requests.
15595 "instances": self.instances,
15596 "target_groups": self.target_groups,
15599 def _BuildInputData(self, fn, keydata):
15600 """Build input data structures.
15603 self._ComputeClusterData()
15606 request["type"] = self.mode
15607 for keyname, keytype in keydata:
15608 if keyname not in request:
15609 raise errors.ProgrammerError("Request parameter %s is missing" %
15611 val = request[keyname]
15612 if not keytype(val):
15613 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15614 " validation, value %s, expected"
15615 " type %s" % (keyname, val, keytype))
15616 self.in_data["request"] = request
15618 self.in_text = serializer.Dump(self.in_data)
15620 _STRING_LIST = ht.TListOf(ht.TString)
15621 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15622 # pylint: disable=E1101
15623 # Class '...' has no 'OP_ID' member
15624 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15625 opcodes.OpInstanceMigrate.OP_ID,
15626 opcodes.OpInstanceReplaceDisks.OP_ID])
15630 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15631 ht.TItems([ht.TNonEmptyString,
15632 ht.TNonEmptyString,
15633 ht.TListOf(ht.TNonEmptyString),
15636 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15637 ht.TItems([ht.TNonEmptyString,
15640 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15641 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15644 constants.IALLOCATOR_MODE_ALLOC:
15647 ("name", ht.TString),
15648 ("memory", ht.TInt),
15649 ("spindle_use", ht.TInt),
15650 ("disks", ht.TListOf(ht.TDict)),
15651 ("disk_template", ht.TString),
15652 ("os", ht.TString),
15653 ("tags", _STRING_LIST),
15654 ("nics", ht.TListOf(ht.TDict)),
15655 ("vcpus", ht.TInt),
15656 ("hypervisor", ht.TString),
15658 constants.IALLOCATOR_MODE_RELOC:
15659 (_AddRelocateInstance,
15660 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15662 constants.IALLOCATOR_MODE_NODE_EVAC:
15663 (_AddNodeEvacuate, [
15664 ("instances", _STRING_LIST),
15665 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15667 constants.IALLOCATOR_MODE_CHG_GROUP:
15668 (_AddChangeGroup, [
15669 ("instances", _STRING_LIST),
15670 ("target_groups", _STRING_LIST),
15674 def Run(self, name, validate=True, call_fn=None):
15675 """Run an instance allocator and return the results.
15678 if call_fn is None:
15679 call_fn = self.rpc.call_iallocator_runner
15681 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15682 result.Raise("Failure while running the iallocator script")
15684 self.out_text = result.payload
15686 self._ValidateResult()
15688 def _ValidateResult(self):
15689 """Process the allocator results.
15691 This will process and if successful save the result in
15692 self.out_data and the other parameters.
15696 rdict = serializer.Load(self.out_text)
15697 except Exception, err:
15698 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15700 if not isinstance(rdict, dict):
15701 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15703 # TODO: remove backwards compatiblity in later versions
15704 if "nodes" in rdict and "result" not in rdict:
15705 rdict["result"] = rdict["nodes"]
15708 for key in "success", "info", "result":
15709 if key not in rdict:
15710 raise errors.OpExecError("Can't parse iallocator results:"
15711 " missing key '%s'" % key)
15712 setattr(self, key, rdict[key])
15714 if not self._result_check(self.result):
15715 raise errors.OpExecError("Iallocator returned invalid result,"
15716 " expected %s, got %s" %
15717 (self._result_check, self.result),
15718 errors.ECODE_INVAL)
15720 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15721 assert self.relocate_from is not None
15722 assert self.required_nodes == 1
15724 node2group = dict((name, ndata["group"])
15725 for (name, ndata) in self.in_data["nodes"].items())
15727 fn = compat.partial(self._NodesToGroups, node2group,
15728 self.in_data["nodegroups"])
15730 instance = self.cfg.GetInstanceInfo(self.name)
15731 request_groups = fn(self.relocate_from + [instance.primary_node])
15732 result_groups = fn(rdict["result"] + [instance.primary_node])
15734 if self.success and not set(result_groups).issubset(request_groups):
15735 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15736 " differ from original groups (%s)" %
15737 (utils.CommaJoin(result_groups),
15738 utils.CommaJoin(request_groups)))
15740 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15741 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15743 self.out_data = rdict
15746 def _NodesToGroups(node2group, groups, nodes):
15747 """Returns a list of unique group names for a list of nodes.
15749 @type node2group: dict
15750 @param node2group: Map from node name to group UUID
15752 @param groups: Group information
15754 @param nodes: Node names
15761 group_uuid = node2group[node]
15763 # Ignore unknown node
15767 group = groups[group_uuid]
15769 # Can't find group, let's use UUID
15770 group_name = group_uuid
15772 group_name = group["name"]
15774 result.add(group_name)
15776 return sorted(result)
15779 class LUTestAllocator(NoHooksLU):
15780 """Run allocator tests.
15782 This LU runs the allocator tests
15785 def CheckPrereq(self):
15786 """Check prerequisites.
15788 This checks the opcode parameters depending on the director and mode test.
15791 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15792 for attr in ["memory", "disks", "disk_template",
15793 "os", "tags", "nics", "vcpus"]:
15794 if not hasattr(self.op, attr):
15795 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15796 attr, errors.ECODE_INVAL)
15797 iname = self.cfg.ExpandInstanceName(self.op.name)
15798 if iname is not None:
15799 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15800 iname, errors.ECODE_EXISTS)
15801 if not isinstance(self.op.nics, list):
15802 raise errors.OpPrereqError("Invalid parameter 'nics'",
15803 errors.ECODE_INVAL)
15804 if not isinstance(self.op.disks, list):
15805 raise errors.OpPrereqError("Invalid parameter 'disks'",
15806 errors.ECODE_INVAL)
15807 for row in self.op.disks:
15808 if (not isinstance(row, dict) or
15809 constants.IDISK_SIZE not in row or
15810 not isinstance(row[constants.IDISK_SIZE], int) or
15811 constants.IDISK_MODE not in row or
15812 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15813 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15814 " parameter", errors.ECODE_INVAL)
15815 if self.op.hypervisor is None:
15816 self.op.hypervisor = self.cfg.GetHypervisorType()
15817 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15818 fname = _ExpandInstanceName(self.cfg, self.op.name)
15819 self.op.name = fname
15820 self.relocate_from = \
15821 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15822 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15823 constants.IALLOCATOR_MODE_NODE_EVAC):
15824 if not self.op.instances:
15825 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15826 self.op.instances = _GetWantedInstances(self, self.op.instances)
15828 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15829 self.op.mode, errors.ECODE_INVAL)
15831 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15832 if self.op.allocator is None:
15833 raise errors.OpPrereqError("Missing allocator name",
15834 errors.ECODE_INVAL)
15835 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15836 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15837 self.op.direction, errors.ECODE_INVAL)
15839 def Exec(self, feedback_fn):
15840 """Run the allocator test.
15843 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15844 ial = IAllocator(self.cfg, self.rpc,
15847 memory=self.op.memory,
15848 disks=self.op.disks,
15849 disk_template=self.op.disk_template,
15853 vcpus=self.op.vcpus,
15854 hypervisor=self.op.hypervisor,
15856 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15857 ial = IAllocator(self.cfg, self.rpc,
15860 relocate_from=list(self.relocate_from),
15862 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15863 ial = IAllocator(self.cfg, self.rpc,
15865 instances=self.op.instances,
15866 target_groups=self.op.target_groups)
15867 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15868 ial = IAllocator(self.cfg, self.rpc,
15870 instances=self.op.instances,
15871 evac_mode=self.op.evac_mode)
15873 raise errors.ProgrammerError("Uncatched mode %s in"
15874 " LUTestAllocator.Exec", self.op.mode)
15876 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15877 result = ial.in_text
15879 ial.Run(self.op.allocator, validate=False)
15880 result = ial.out_text
15884 class LUNetworkAdd(LogicalUnit):
15885 """Logical unit for creating networks.
15888 HPATH = "network-add"
15889 HTYPE = constants.HTYPE_NETWORK
15892 def BuildHooksNodes(self):
15893 """Build hooks nodes.
15896 mn = self.cfg.GetMasterNode()
15897 return ([mn], [mn])
15899 def ExpandNames(self):
15900 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15901 self.needed_locks = {}
15902 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15904 def CheckPrereq(self):
15905 """Check prerequisites.
15907 This checks that the given group name is not an existing node group
15911 if self.op.network is None:
15912 raise errors.OpPrereqError("Network must be given",
15913 errors.ECODE_INVAL)
15915 uuid = self.cfg.LookupNetwork(self.op.network_name)
15918 raise errors.OpPrereqError("Network '%s' already defined" %
15919 self.op.network, errors.ECODE_EXISTS)
15921 if self.op.mac_prefix:
15922 utils.NormalizeAndValidateMac(self.op.mac_prefix+":00:00:00")
15924 # Check tag validity
15925 for tag in self.op.tags:
15926 objects.TaggableObject.ValidateTag(tag)
15929 def BuildHooksEnv(self):
15930 """Build hooks env.
15934 "name": self.op.network_name,
15935 "network": self.op.network,
15936 "gateway": self.op.gateway,
15937 "network6": self.op.network6,
15938 "gateway6": self.op.gateway6,
15939 "mac_prefix": self.op.mac_prefix,
15940 "network_type": self.op.network_type,
15941 "tags": self.op.tags,
15943 return _BuildNetworkHookEnv(**args)
15945 def Exec(self, feedback_fn):
15946 """Add the ip pool to the cluster.
15949 nobj = objects.Network(name=self.op.network_name,
15950 network=self.op.network,
15951 gateway=self.op.gateway,
15952 network6=self.op.network6,
15953 gateway6=self.op.gateway6,
15954 mac_prefix=self.op.mac_prefix,
15955 network_type=self.op.network_type,
15956 uuid=self.network_uuid,
15958 # Initialize the associated address pool
15960 pool = network.AddressPool.InitializeNetwork(nobj)
15961 except errors.AddressPoolError, e:
15962 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15964 # Check if we need to reserve the nodes and the cluster master IP
15965 # These may not be allocated to any instances in routed mode, as
15966 # they wouldn't function anyway.
15967 for node in self.cfg.GetAllNodesInfo().values():
15968 for ip in [node.primary_ip, node.secondary_ip]:
15971 self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
15973 except errors.AddressPoolError:
15976 master_ip = self.cfg.GetClusterInfo().master_ip
15978 pool.Reserve(master_ip)
15979 self.LogInfo("Reserved cluster master IP (%s)", master_ip)
15980 except errors.AddressPoolError:
15983 if self.op.add_reserved_ips:
15984 for ip in self.op.add_reserved_ips:
15986 pool.Reserve(ip, external=True)
15987 except errors.AddressPoolError, e:
15988 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15991 for tag in self.op.tags:
15994 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15995 del self.remove_locks[locking.LEVEL_NETWORK]
15998 class LUNetworkRemove(LogicalUnit):
15999 HPATH = "network-remove"
16000 HTYPE = constants.HTYPE_NETWORK
16003 def ExpandNames(self):
16004 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16006 self.needed_locks = {
16007 locking.LEVEL_NETWORK: [self.network_uuid],
16011 def CheckPrereq(self):
16012 """Check prerequisites.
16014 This checks that the given network name exists as a network, that is
16015 empty (i.e., contains no nodes), and that is not the last group of the
16019 if not self.network_uuid:
16020 raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
16021 errors.ECODE_INVAL)
16023 # Verify that the network is not conncted.
16024 node_groups = [group.name
16025 for group in self.cfg.GetAllNodeGroupsInfo().values()
16026 for network in group.networks.keys()
16027 if network == self.network_uuid]
16030 self.LogWarning("Nework '%s' is connected to the following"
16031 " node groups: %s" % (self.op.network_name,
16032 utils.CommaJoin(utils.NiceSort(node_groups))))
16033 raise errors.OpPrereqError("Network still connected",
16034 errors.ECODE_STATE)
16036 def BuildHooksEnv(self):
16037 """Build hooks env.
16041 "NETWORK_NAME": self.op.network_name,
16044 def BuildHooksNodes(self):
16045 """Build hooks nodes.
16048 mn = self.cfg.GetMasterNode()
16049 return ([mn], [mn])
16051 def Exec(self, feedback_fn):
16052 """Remove the network.
16056 self.cfg.RemoveNetwork(self.network_uuid)
16057 except errors.ConfigurationError:
16058 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16059 (self.op.network_name, self.network_uuid))
16062 class LUNetworkSetParams(LogicalUnit):
16063 """Modifies the parameters of a network.
16066 HPATH = "network-modify"
16067 HTYPE = constants.HTYPE_NETWORK
16070 def CheckArguments(self):
16071 if (self.op.gateway and
16072 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16073 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16074 " at once", errors.ECODE_INVAL)
16077 def ExpandNames(self):
16078 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16079 self.network = self.cfg.GetNetwork(self.network_uuid)
16080 self.needed_locks = {
16081 locking.LEVEL_NETWORK: [self.network_uuid],
16085 if self.network is None:
16086 raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
16087 (self.op.network_name, self.network_uuid),
16088 errors.ECODE_INVAL)
16090 def CheckPrereq(self):
16091 """Check prerequisites.
16094 self.gateway = self.network.gateway
16095 self.network_type = self.network.network_type
16096 self.mac_prefix = self.network.mac_prefix
16097 self.network6 = self.network.network6
16098 self.gateway6 = self.network.gateway6
16099 self.tags = self.network.tags
16101 self.pool = network.AddressPool(self.network)
16103 if self.op.gateway:
16104 if self.op.gateway == constants.VALUE_NONE:
16105 self.gateway = None
16107 self.gateway = self.op.gateway
16108 if self.pool.IsReserved(self.gateway):
16109 raise errors.OpPrereqError("%s is already reserved" %
16110 self.gateway, errors.ECODE_INVAL)
16112 if self.op.network_type:
16113 if self.op.network_type == constants.VALUE_NONE:
16114 self.network_type = None
16116 self.network_type = self.op.network_type
16118 if self.op.mac_prefix:
16119 if self.op.mac_prefix == constants.VALUE_NONE:
16120 self.mac_prefix = None
16122 utils.NormalizeAndValidateMac(self.op.mac_prefix+":00:00:00")
16123 self.mac_prefix = self.op.mac_prefix
16125 if self.op.gateway6:
16126 if self.op.gateway6 == constants.VALUE_NONE:
16127 self.gateway6 = None
16129 self.gateway6 = self.op.gateway6
16131 if self.op.network6:
16132 if self.op.network6 == constants.VALUE_NONE:
16133 self.network6 = None
16135 self.network6 = self.op.network6
16139 def BuildHooksEnv(self):
16140 """Build hooks env.
16144 "name": self.op.network_name,
16145 "network": self.network.network,
16146 "gateway": self.gateway,
16147 "network6": self.network6,
16148 "gateway6": self.gateway6,
16149 "mac_prefix": self.mac_prefix,
16150 "network_type": self.network_type,
16153 return _BuildNetworkHookEnv(**args)
16155 def BuildHooksNodes(self):
16156 """Build hooks nodes.
16159 mn = self.cfg.GetMasterNode()
16160 return ([mn], [mn])
16162 def Exec(self, feedback_fn):
16163 """Modifies the network.
16166 #TODO: reserve/release via temporary reservation manager
16167 # extend cfg.ReserveIp/ReleaseIp with the external flag
16168 if self.op.gateway:
16169 if self.gateway == self.network.gateway:
16170 self.LogWarning("Gateway is already %s" % self.gateway)
16173 self.pool.Reserve(self.gateway, external=True)
16174 if self.network.gateway:
16175 self.pool.Release(self.network.gateway, external=True)
16176 self.network.gateway = self.gateway
16178 if self.op.add_reserved_ips:
16179 for ip in self.op.add_reserved_ips:
16181 if self.pool.IsReserved(ip):
16182 self.LogWarning("IP %s is already reserved" % ip)
16184 self.pool.Reserve(ip, external=True)
16185 except errors.AddressPoolError, e:
16186 self.LogWarning("Cannot reserve ip %s. %s" % (ip, e))
16188 if self.op.remove_reserved_ips:
16189 for ip in self.op.remove_reserved_ips:
16190 if ip == self.network.gateway:
16191 self.LogWarning("Cannot unreserve Gateway's IP")
16194 if not self.pool.IsReserved(ip):
16195 self.LogWarning("IP %s is already unreserved" % ip)
16197 self.pool.Release(ip, external=True)
16198 except errors.AddressPoolError, e:
16199 self.LogWarning("Cannot release ip %s. %s" % (ip, e))
16201 if self.op.mac_prefix:
16202 self.network.mac_prefix = self.mac_prefix
16204 if self.op.network6:
16205 self.network.network6 = self.network6
16207 if self.op.gateway6:
16208 self.network.gateway6 = self.gateway6
16210 if self.op.network_type:
16211 self.network.network_type = self.network_type
16213 self.pool.Validate()
16215 self.cfg.Update(self.network, feedback_fn)
16218 class _NetworkQuery(_QueryBase):
16219 FIELDS = query.NETWORK_FIELDS
16221 def ExpandNames(self, lu):
16222 lu.needed_locks = {}
16224 self._all_networks = lu.cfg.GetAllNetworksInfo()
16225 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
16228 self.wanted = [name_to_uuid[name]
16229 for name in utils.NiceSort(name_to_uuid.keys())]
16231 # Accept names to be either names or UUIDs.
16234 all_uuid = frozenset(self._all_networks.keys())
16236 for name in self.names:
16237 if name in all_uuid:
16238 self.wanted.append(name)
16239 elif name in name_to_uuid:
16240 self.wanted.append(name_to_uuid[name])
16242 missing.append(name)
16245 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16246 errors.ECODE_NOENT)
16248 def DeclareLocks(self, lu, level):
16251 def _GetQueryData(self, lu):
16252 """Computes the list of networks and their attributes.
16255 do_instances = query.NETQ_INST in self.requested_data
16256 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
16257 do_stats = query.NETQ_STATS in self.requested_data
16258 cluster = lu.cfg.GetClusterInfo()
16260 network_to_groups = None
16261 network_to_instances = None
16264 # For NETQ_GROUP, we need to map network->[groups]
16266 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16267 network_to_groups = dict((uuid, []) for uuid in self.wanted)
16268 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
16271 all_instances = lu.cfg.GetAllInstancesInfo()
16272 all_nodes = lu.cfg.GetAllNodesInfo()
16273 network_to_instances = dict((uuid, []) for uuid in self.wanted)
16276 for group in all_groups.values():
16278 group_nodes = [node.name for node in all_nodes.values() if
16279 node.group == group.uuid]
16280 group_instances = [instance for instance in all_instances.values()
16281 if instance.primary_node in group_nodes]
16283 for net_uuid in group.networks.keys():
16284 if net_uuid in network_to_groups:
16285 netparams = group.networks[net_uuid]
16286 mode = netparams[constants.NIC_MODE]
16287 link = netparams[constants.NIC_LINK]
16288 info = group.name + '(' + mode + ', ' + link + ')'
16289 network_to_groups[net_uuid].append(info)
16292 for instance in group_instances:
16293 for nic in instance.nics:
16294 if nic.network == self._all_networks[net_uuid].name:
16295 network_to_instances[net_uuid].append(instance.name)
16300 for uuid, net in self._all_networks.items():
16301 if uuid in self.wanted:
16302 pool = network.AddressPool(net)
16304 "free_count": pool.GetFreeCount(),
16305 "reserved_count": pool.GetReservedCount(),
16306 "map": pool.GetMap(),
16307 "external_reservations": ", ".join(pool.GetExternalReservations()),
16310 return query.NetworkQueryData([self._all_networks[uuid]
16311 for uuid in self.wanted],
16313 network_to_instances,
16317 class LUNetworkQuery(NoHooksLU):
16318 """Logical unit for querying networks.
16323 def CheckArguments(self):
16324 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16325 self.op.output_fields, False)
16327 def ExpandNames(self):
16328 self.nq.ExpandNames(self)
16330 def Exec(self, feedback_fn):
16331 return self.nq.OldStyleQuery(self)
16335 class LUNetworkConnect(LogicalUnit):
16336 """Connect a network to a nodegroup
16339 HPATH = "network-connect"
16340 HTYPE = constants.HTYPE_NETWORK
16343 def ExpandNames(self):
16344 self.network_name = self.op.network_name
16345 self.group_name = self.op.group_name
16346 self.network_mode = self.op.network_mode
16347 self.network_link = self.op.network_link
16349 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16350 self.network = self.cfg.GetNetwork(self.network_uuid)
16351 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16352 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16354 self.needed_locks = {
16355 locking.LEVEL_INSTANCE: [],
16356 locking.LEVEL_NODEGROUP: [self.group_uuid],
16358 self.share_locks[locking.LEVEL_INSTANCE] = 1
16360 def DeclareLocks(self, level):
16361 if level == locking.LEVEL_INSTANCE:
16362 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16364 # Lock instances optimistically, needs verification once group lock has
16366 self.needed_locks[locking.LEVEL_INSTANCE] = \
16367 self.cfg.GetNodeGroupInstances(self.group_uuid)
16369 def BuildHooksEnv(self):
16371 ret["GROUP_NAME"] = self.group_name
16372 ret["GROUP_NETWORK_MODE"] = self.network_mode
16373 ret["GROUP_NETWORK_LINK"] = self.network_link
16374 ret.update(_BuildNetworkHookEnvByObject(self, self.network))
16377 def BuildHooksNodes(self):
16378 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16379 return (nodes, nodes)
16382 def CheckPrereq(self):
16383 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16386 if self.network is None:
16387 raise errors.OpPrereqError("Network %s does not exist" %
16388 self.network_name, errors.ECODE_INVAL)
16390 self.netparams = dict()
16391 self.netparams[constants.NIC_MODE] = self.network_mode
16392 self.netparams[constants.NIC_LINK] = self.network_link
16393 objects.NIC.CheckParameterSyntax(self.netparams)
16395 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16396 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16397 self.connected = False
16398 if self.network_uuid in self.group.networks:
16399 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16400 (self.network_name, self.group.name))
16401 self.connected = True
16404 pool = network.AddressPool(self.network)
16405 if self.op.conflicts_check:
16406 groupinstances = []
16407 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16408 groupinstances.append(self.cfg.GetInstanceInfo(n))
16409 instances = [(instance.name, idx, nic.ip)
16410 for instance in groupinstances
16411 for idx, nic in enumerate(instance.nics)
16412 if (not nic.network and pool._Contains(nic.ip))]
16414 self.LogWarning("Following occurences use IPs from network %s"
16415 " that is about to connect to nodegroup %s: %s" %
16416 (self.network_name, self.group.name,
16418 raise errors.OpPrereqError("Conflicting IPs found."
16419 " Please remove/modify"
16420 " corresponding NICs",
16421 errors.ECODE_INVAL)
16423 def Exec(self, feedback_fn):
16427 self.group.networks[self.network_uuid] = self.netparams
16428 self.cfg.Update(self.group, feedback_fn)
16431 class LUNetworkDisconnect(LogicalUnit):
16432 """Disconnect a network to a nodegroup
16435 HPATH = "network-disconnect"
16436 HTYPE = constants.HTYPE_NETWORK
16439 def ExpandNames(self):
16440 self.network_name = self.op.network_name
16441 self.group_name = self.op.group_name
16443 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16444 self.network = self.cfg.GetNetwork(self.network_uuid)
16445 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16446 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16448 self.needed_locks = {
16449 locking.LEVEL_INSTANCE: [],
16450 locking.LEVEL_NODEGROUP: [self.group_uuid],
16452 self.share_locks[locking.LEVEL_INSTANCE] = 1
16454 def DeclareLocks(self, level):
16455 if level == locking.LEVEL_INSTANCE:
16456 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16458 # Lock instances optimistically, needs verification once group lock has
16460 self.needed_locks[locking.LEVEL_INSTANCE] = \
16461 self.cfg.GetNodeGroupInstances(self.group_uuid)
16463 def BuildHooksEnv(self):
16465 ret["GROUP_NAME"] = self.group_name
16466 ret.update(_BuildNetworkHookEnvByObject(self, self.network))
16469 def BuildHooksNodes(self):
16470 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16471 return (nodes, nodes)
16474 def CheckPrereq(self):
16475 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16478 self.connected = True
16479 if self.network_uuid not in self.group.networks:
16480 self.LogWarning("Network '%s' is"
16481 " not mapped to group '%s'" %
16482 (self.network_name, self.group.name))
16483 self.connected = False
16486 if self.op.conflicts_check:
16487 groupinstances = []
16488 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16489 groupinstances.append(self.cfg.GetInstanceInfo(n))
16490 instances = [(instance.name, idx, nic.ip)
16491 for instance in groupinstances
16492 for idx, nic in enumerate(instance.nics)
16493 if nic.network == self.network_name]
16495 self.LogWarning("Following occurences use IPs from network %s"
16496 " that is about to disconnected from the nodegroup"
16498 (self.network_name, self.group.name,
16500 raise errors.OpPrereqError("Conflicting IPs."
16501 " Please remove/modify"
16502 " corresponding NICS",
16503 errors.ECODE_INVAL)
16505 def Exec(self, feedback_fn):
16506 if not self.connected:
16509 del self.group.networks[self.network_uuid]
16510 self.cfg.Update(self.group, feedback_fn)
16513 #: Query type implementations
16515 constants.QR_CLUSTER: _ClusterQuery,
16516 constants.QR_INSTANCE: _InstanceQuery,
16517 constants.QR_NODE: _NodeQuery,
16518 constants.QR_GROUP: _GroupQuery,
16519 constants.QR_NETWORK: _NetworkQuery,
16520 constants.QR_OS: _OsQuery,
16521 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16522 constants.QR_EXPORT: _ExportQuery,
16525 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16528 def _GetQueryImplementation(name):
16529 """Returns the implemtnation for a query type.
16531 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16535 return _QUERY_IMPL[name]
16537 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16538 errors.ECODE_INVAL)
16540 def _CheckForConflictingIp(lu, ip, node):
16541 """In case of conflicting ip raise error.
16544 @param ip: ip address
16546 @param node: node name
16549 (conf_net, conf_netparams) = lu.cfg.CheckIPInNodeGroup(ip, node)
16550 if conf_net is not None:
16551 raise errors.OpPrereqError("Conflicting IP found:"
16552 " %s <> %s." % (ip, conf_net),
16553 errors.ECODE_INVAL)
16555 return (None, None)