4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
63 from ganeti import runtime
64 from ganeti import network
66 import ganeti.masterd.instance # pylint: disable=W0611
69 #: Size of DRBD meta block device
73 INSTANCE_DOWN = [constants.ADMINST_DOWN]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
77 #: Instance status in which an instance can be marked as offline/online
78 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
79 constants.ADMINST_OFFLINE,
84 """Data container for LU results with jobs.
86 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
87 by L{mcpu._ProcessResult}. The latter will then submit the jobs
88 contained in the C{jobs} attribute and include the job IDs in the opcode
92 def __init__(self, jobs, **kwargs):
93 """Initializes this class.
95 Additional return values can be specified as keyword arguments.
97 @type jobs: list of lists of L{opcode.OpCode}
98 @param jobs: A list of lists of opcode objects
105 class LogicalUnit(object):
106 """Logical Unit base class.
108 Subclasses must follow these rules:
109 - implement ExpandNames
110 - implement CheckPrereq (except when tasklets are used)
111 - implement Exec (except when tasklets are used)
112 - implement BuildHooksEnv
113 - implement BuildHooksNodes
114 - redefine HPATH and HTYPE
115 - optionally redefine their run requirements:
116 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
118 Note that all commands require root permissions.
120 @ivar dry_run_result: the value (if any) that will be returned to the caller
121 in dry-run mode (signalled by opcode dry_run parameter)
128 def __init__(self, processor, op, context, rpc_runner):
129 """Constructor for LogicalUnit.
131 This needs to be overridden in derived classes in order to check op
135 self.proc = processor
137 self.cfg = context.cfg
138 self.glm = context.glm
140 self.owned_locks = context.glm.list_owned
141 self.context = context
142 self.rpc = rpc_runner
143 # Dicts used to declare locking needs to mcpu
144 self.needed_locks = None
145 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
147 self.remove_locks = {}
148 # Used to force good behavior when calling helper functions
149 self.recalculate_locks = {}
151 self.Log = processor.Log # pylint: disable=C0103
152 self.LogWarning = processor.LogWarning # pylint: disable=C0103
153 self.LogInfo = processor.LogInfo # pylint: disable=C0103
154 self.LogStep = processor.LogStep # pylint: disable=C0103
155 # support for dry-run
156 self.dry_run_result = None
157 # support for generic debug attribute
158 if (not hasattr(self.op, "debug_level") or
159 not isinstance(self.op.debug_level, int)):
160 self.op.debug_level = 0
165 # Validate opcode parameters and set defaults
166 self.op.Validate(True)
168 self.CheckArguments()
170 def CheckArguments(self):
171 """Check syntactic validity for the opcode arguments.
173 This method is for doing a simple syntactic check and ensure
174 validity of opcode parameters, without any cluster-related
175 checks. While the same can be accomplished in ExpandNames and/or
176 CheckPrereq, doing these separate is better because:
178 - ExpandNames is left as as purely a lock-related function
179 - CheckPrereq is run after we have acquired locks (and possible
182 The function is allowed to change the self.op attribute so that
183 later methods can no longer worry about missing parameters.
188 def ExpandNames(self):
189 """Expand names for this LU.
191 This method is called before starting to execute the opcode, and it should
192 update all the parameters of the opcode to their canonical form (e.g. a
193 short node name must be fully expanded after this method has successfully
194 completed). This way locking, hooks, logging, etc. can work correctly.
196 LUs which implement this method must also populate the self.needed_locks
197 member, as a dict with lock levels as keys, and a list of needed lock names
200 - use an empty dict if you don't need any lock
201 - if you don't need any lock at a particular level omit that
202 level (note that in this case C{DeclareLocks} won't be called
203 at all for that level)
204 - if you need locks at a level, but you can't calculate it in
205 this function, initialise that level with an empty list and do
206 further processing in L{LogicalUnit.DeclareLocks} (see that
207 function's docstring)
208 - don't put anything for the BGL level
209 - if you want all locks at a level use L{locking.ALL_SET} as a value
211 If you need to share locks (rather than acquire them exclusively) at one
212 level you can modify self.share_locks, setting a true value (usually 1) for
213 that level. By default locks are not shared.
215 This function can also define a list of tasklets, which then will be
216 executed in order instead of the usual LU-level CheckPrereq and Exec
217 functions, if those are not defined by the LU.
221 # Acquire all nodes and one instance
222 self.needed_locks = {
223 locking.LEVEL_NODE: locking.ALL_SET,
224 locking.LEVEL_INSTANCE: ['instance1.example.com'],
226 # Acquire just two nodes
227 self.needed_locks = {
228 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
231 self.needed_locks = {} # No, you can't leave it to the default value None
234 # The implementation of this method is mandatory only if the new LU is
235 # concurrent, so that old LUs don't need to be changed all at the same
238 self.needed_locks = {} # Exclusive LUs don't need locks.
240 raise NotImplementedError
242 def DeclareLocks(self, level):
243 """Declare LU locking needs for a level
245 While most LUs can just declare their locking needs at ExpandNames time,
246 sometimes there's the need to calculate some locks after having acquired
247 the ones before. This function is called just before acquiring locks at a
248 particular level, but after acquiring the ones at lower levels, and permits
249 such calculations. It can be used to modify self.needed_locks, and by
250 default it does nothing.
252 This function is only called if you have something already set in
253 self.needed_locks for the level.
255 @param level: Locking level which is going to be locked
256 @type level: member of L{ganeti.locking.LEVELS}
260 def CheckPrereq(self):
261 """Check prerequisites for this LU.
263 This method should check that the prerequisites for the execution
264 of this LU are fulfilled. It can do internode communication, but
265 it should be idempotent - no cluster or system changes are
268 The method should raise errors.OpPrereqError in case something is
269 not fulfilled. Its return value is ignored.
271 This method should also update all the parameters of the opcode to
272 their canonical form if it hasn't been done by ExpandNames before.
275 if self.tasklets is not None:
276 for (idx, tl) in enumerate(self.tasklets):
277 logging.debug("Checking prerequisites for tasklet %s/%s",
278 idx + 1, len(self.tasklets))
283 def Exec(self, feedback_fn):
286 This method should implement the actual work. It should raise
287 errors.OpExecError for failures that are somewhat dealt with in
291 if self.tasklets is not None:
292 for (idx, tl) in enumerate(self.tasklets):
293 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
296 raise NotImplementedError
298 def BuildHooksEnv(self):
299 """Build hooks environment for this LU.
302 @return: Dictionary containing the environment that will be used for
303 running the hooks for this LU. The keys of the dict must not be prefixed
304 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
305 will extend the environment with additional variables. If no environment
306 should be defined, an empty dictionary should be returned (not C{None}).
307 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311 raise NotImplementedError
313 def BuildHooksNodes(self):
314 """Build list of nodes to run LU's hooks.
316 @rtype: tuple; (list, list)
317 @return: Tuple containing a list of node names on which the hook
318 should run before the execution and a list of node names on which the
319 hook should run after the execution. No nodes should be returned as an
320 empty list (and not None).
321 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
325 raise NotImplementedError
327 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
328 """Notify the LU about the results of its hooks.
330 This method is called every time a hooks phase is executed, and notifies
331 the Logical Unit about the hooks' result. The LU can then use it to alter
332 its result based on the hooks. By default the method does nothing and the
333 previous result is passed back unchanged but any LU can define it if it
334 wants to use the local cluster hook-scripts somehow.
336 @param phase: one of L{constants.HOOKS_PHASE_POST} or
337 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
338 @param hook_results: the results of the multi-node hooks rpc call
339 @param feedback_fn: function used send feedback back to the caller
340 @param lu_result: the previous Exec result this LU had, or None
342 @return: the new Exec result, based on the previous result
346 # API must be kept, thus we ignore the unused argument and could
347 # be a function warnings
348 # pylint: disable=W0613,R0201
351 def _ExpandAndLockInstance(self):
352 """Helper function to expand and lock an instance.
354 Many LUs that work on an instance take its name in self.op.instance_name
355 and need to expand it and then declare the expanded name for locking. This
356 function does it, and then updates self.op.instance_name to the expanded
357 name. It also initializes needed_locks as a dict, if this hasn't been done
361 if self.needed_locks is None:
362 self.needed_locks = {}
364 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
365 "_ExpandAndLockInstance called with instance-level locks set"
366 self.op.instance_name = _ExpandInstanceName(self.cfg,
367 self.op.instance_name)
368 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
370 def _LockInstancesNodes(self, primary_only=False,
371 level=locking.LEVEL_NODE):
372 """Helper function to declare instances' nodes for locking.
374 This function should be called after locking one or more instances to lock
375 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
376 with all primary or secondary nodes for instances already locked and
377 present in self.needed_locks[locking.LEVEL_INSTANCE].
379 It should be called from DeclareLocks, and for safety only works if
380 self.recalculate_locks[locking.LEVEL_NODE] is set.
382 In the future it may grow parameters to just lock some instance's nodes, or
383 to just lock primaries or secondary nodes, if needed.
385 If should be called in DeclareLocks in a way similar to::
387 if level == locking.LEVEL_NODE:
388 self._LockInstancesNodes()
390 @type primary_only: boolean
391 @param primary_only: only lock primary nodes of locked instances
392 @param level: Which lock level to use for locking nodes
395 assert level in self.recalculate_locks, \
396 "_LockInstancesNodes helper function called with no nodes to recalculate"
398 # TODO: check if we're really been called with the instance locks held
400 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
401 # future we might want to have different behaviors depending on the value
402 # of self.recalculate_locks[locking.LEVEL_NODE]
404 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
405 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
406 wanted_nodes.append(instance.primary_node)
408 wanted_nodes.extend(instance.secondary_nodes)
410 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
411 self.needed_locks[level] = wanted_nodes
412 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
413 self.needed_locks[level].extend(wanted_nodes)
415 raise errors.ProgrammerError("Unknown recalculation mode")
417 del self.recalculate_locks[level]
420 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
421 """Simple LU which runs no hooks.
423 This LU is intended as a parent for other LogicalUnits which will
424 run no hooks, in order to reduce duplicate code.
430 def BuildHooksEnv(self):
431 """Empty BuildHooksEnv for NoHooksLu.
433 This just raises an error.
436 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
438 def BuildHooksNodes(self):
439 """Empty BuildHooksNodes for NoHooksLU.
442 raise AssertionError("BuildHooksNodes called for NoHooksLU")
446 """Tasklet base class.
448 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
449 they can mix legacy code with tasklets. Locking needs to be done in the LU,
450 tasklets know nothing about locks.
452 Subclasses must follow these rules:
453 - Implement CheckPrereq
457 def __init__(self, lu):
464 def CheckPrereq(self):
465 """Check prerequisites for this tasklets.
467 This method should check whether the prerequisites for the execution of
468 this tasklet are fulfilled. It can do internode communication, but it
469 should be idempotent - no cluster or system changes are allowed.
471 The method should raise errors.OpPrereqError in case something is not
472 fulfilled. Its return value is ignored.
474 This method should also update all parameters to their canonical form if it
475 hasn't been done before.
480 def Exec(self, feedback_fn):
481 """Execute the tasklet.
483 This method should implement the actual work. It should raise
484 errors.OpExecError for failures that are somewhat dealt with in code, or
488 raise NotImplementedError
492 """Base for query utility classes.
495 #: Attribute holding field definitions
501 def __init__(self, qfilter, fields, use_locking):
502 """Initializes this class.
505 self.use_locking = use_locking
507 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
508 namefield=self.SORT_FIELD)
509 self.requested_data = self.query.RequestedData()
510 self.names = self.query.RequestedNames()
512 # Sort only if no names were requested
513 self.sort_by_name = not self.names
515 self.do_locking = None
518 def _GetNames(self, lu, all_names, lock_level):
519 """Helper function to determine names asked for in the query.
523 names = lu.owned_locks(lock_level)
527 if self.wanted == locking.ALL_SET:
528 assert not self.names
529 # caller didn't specify names, so ordering is not important
530 return utils.NiceSort(names)
532 # caller specified names and we must keep the same order
534 assert not self.do_locking or lu.glm.is_owned(lock_level)
536 missing = set(self.wanted).difference(names)
538 raise errors.OpExecError("Some items were removed before retrieving"
539 " their data: %s" % missing)
541 # Return expanded names
544 def ExpandNames(self, lu):
545 """Expand names for this query.
547 See L{LogicalUnit.ExpandNames}.
550 raise NotImplementedError()
552 def DeclareLocks(self, lu, level):
553 """Declare locks for this query.
555 See L{LogicalUnit.DeclareLocks}.
558 raise NotImplementedError()
560 def _GetQueryData(self, lu):
561 """Collects all data for this query.
563 @return: Query data object
566 raise NotImplementedError()
568 def NewStyleQuery(self, lu):
569 """Collect data and execute query.
572 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
573 sort_by_name=self.sort_by_name)
575 def OldStyleQuery(self, lu):
576 """Collect data and execute query.
579 return self.query.OldStyleQuery(self._GetQueryData(lu),
580 sort_by_name=self.sort_by_name)
584 """Returns a dict declaring all lock levels shared.
587 return dict.fromkeys(locking.LEVELS, 1)
590 def _MakeLegacyNodeInfo(data):
591 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
593 Converts the data into a single dictionary. This is fine for most use cases,
594 but some require information from more than one volume group or hypervisor.
597 (bootid, (vg_info, ), (hv_info, )) = data
599 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
604 def _AnnotateDiskParams(instance, devs, cfg):
605 """Little helper wrapper to the rpc annotation method.
607 @param instance: The instance object
608 @type devs: List of L{objects.Disk}
609 @param devs: The root devices (not any of its children!)
610 @param cfg: The config object
611 @returns The annotated disk copies
612 @see L{rpc.AnnotateDiskParams}
615 return rpc.AnnotateDiskParams(instance.disk_template, devs,
616 cfg.GetInstanceDiskParams(instance))
619 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
621 """Checks if node groups for locked instances are still correct.
623 @type cfg: L{config.ConfigWriter}
624 @param cfg: Cluster configuration
625 @type instances: dict; string as key, L{objects.Instance} as value
626 @param instances: Dictionary, instance name as key, instance object as value
627 @type owned_groups: iterable of string
628 @param owned_groups: List of owned groups
629 @type owned_nodes: iterable of string
630 @param owned_nodes: List of owned nodes
631 @type cur_group_uuid: string or None
632 @param cur_group_uuid: Optional group UUID to check against instance's groups
635 for (name, inst) in instances.items():
636 assert owned_nodes.issuperset(inst.all_nodes), \
637 "Instance %s's nodes changed while we kept the lock" % name
639 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
641 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
642 "Instance %s has no node in group %s" % (name, cur_group_uuid)
645 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
646 """Checks if the owned node groups are still correct for an instance.
648 @type cfg: L{config.ConfigWriter}
649 @param cfg: The cluster configuration
650 @type instance_name: string
651 @param instance_name: Instance name
652 @type owned_groups: set or frozenset
653 @param owned_groups: List of currently owned node groups
656 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
658 if not owned_groups.issuperset(inst_groups):
659 raise errors.OpPrereqError("Instance %s's node groups changed since"
660 " locks were acquired, current groups are"
661 " are '%s', owning groups '%s'; retry the"
664 utils.CommaJoin(inst_groups),
665 utils.CommaJoin(owned_groups)),
671 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
672 """Checks if the instances in a node group are still correct.
674 @type cfg: L{config.ConfigWriter}
675 @param cfg: The cluster configuration
676 @type group_uuid: string
677 @param group_uuid: Node group UUID
678 @type owned_instances: set or frozenset
679 @param owned_instances: List of currently owned instances
682 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
683 if owned_instances != wanted_instances:
684 raise errors.OpPrereqError("Instances in node group '%s' changed since"
685 " locks were acquired, wanted '%s', have '%s';"
686 " retry the operation" %
688 utils.CommaJoin(wanted_instances),
689 utils.CommaJoin(owned_instances)),
692 return wanted_instances
695 def _SupportsOob(cfg, node):
696 """Tells if node supports OOB.
698 @type cfg: L{config.ConfigWriter}
699 @param cfg: The cluster configuration
700 @type node: L{objects.Node}
701 @param node: The node
702 @return: The OOB script if supported or an empty string otherwise
705 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
708 def _GetWantedNodes(lu, nodes):
709 """Returns list of checked and expanded node names.
711 @type lu: L{LogicalUnit}
712 @param lu: the logical unit on whose behalf we execute
714 @param nodes: list of node names or None for all nodes
716 @return: the list of nodes, sorted
717 @raise errors.ProgrammerError: if the nodes parameter is wrong type
721 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
723 return utils.NiceSort(lu.cfg.GetNodeList())
726 def _GetWantedInstances(lu, instances):
727 """Returns list of checked and expanded instance names.
729 @type lu: L{LogicalUnit}
730 @param lu: the logical unit on whose behalf we execute
731 @type instances: list
732 @param instances: list of instance names or None for all instances
734 @return: the list of instances, sorted
735 @raise errors.OpPrereqError: if the instances parameter is wrong type
736 @raise errors.OpPrereqError: if any of the passed instances is not found
740 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
742 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
746 def _GetUpdatedParams(old_params, update_dict,
747 use_default=True, use_none=False):
748 """Return the new version of a parameter dictionary.
750 @type old_params: dict
751 @param old_params: old parameters
752 @type update_dict: dict
753 @param update_dict: dict containing new parameter values, or
754 constants.VALUE_DEFAULT to reset the parameter to its default
756 @param use_default: boolean
757 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
758 values as 'to be deleted' values
759 @param use_none: boolean
760 @type use_none: whether to recognise C{None} values as 'to be
763 @return: the new parameter dictionary
766 params_copy = copy.deepcopy(old_params)
767 for key, val in update_dict.iteritems():
768 if ((use_default and val == constants.VALUE_DEFAULT) or
769 (use_none and val is None)):
775 params_copy[key] = val
779 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
780 """Return the new version of a instance policy.
782 @param group_policy: whether this policy applies to a group and thus
783 we should support removal of policy entries
786 use_none = use_default = group_policy
787 ipolicy = copy.deepcopy(old_ipolicy)
788 for key, value in new_ipolicy.items():
789 if key not in constants.IPOLICY_ALL_KEYS:
790 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
792 if key in constants.IPOLICY_ISPECS:
793 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
794 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
796 use_default=use_default)
798 if (not value or value == [constants.VALUE_DEFAULT] or
799 value == constants.VALUE_DEFAULT):
803 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
804 " on the cluster'" % key,
807 if key in constants.IPOLICY_PARAMETERS:
808 # FIXME: we assume all such values are float
810 ipolicy[key] = float(value)
811 except (TypeError, ValueError), err:
812 raise errors.OpPrereqError("Invalid value for attribute"
813 " '%s': '%s', error: %s" %
814 (key, value, err), errors.ECODE_INVAL)
816 # FIXME: we assume all others are lists; this should be redone
818 ipolicy[key] = list(value)
820 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
821 except errors.ConfigurationError, err:
822 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
827 def _UpdateAndVerifySubDict(base, updates, type_check):
828 """Updates and verifies a dict with sub dicts of the same type.
830 @param base: The dict with the old data
831 @param updates: The dict with the new data
832 @param type_check: Dict suitable to ForceDictType to verify correct types
833 @returns: A new dict with updated and verified values
837 new = _GetUpdatedParams(old, value)
838 utils.ForceDictType(new, type_check)
841 ret = copy.deepcopy(base)
842 ret.update(dict((key, fn(base.get(key, {}), value))
843 for key, value in updates.items()))
847 def _MergeAndVerifyHvState(op_input, obj_input):
848 """Combines the hv state from an opcode with the one of the object
850 @param op_input: The input dict from the opcode
851 @param obj_input: The input dict from the objects
852 @return: The verified and updated dict
856 invalid_hvs = set(op_input) - constants.HYPER_TYPES
858 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
859 " %s" % utils.CommaJoin(invalid_hvs),
861 if obj_input is None:
863 type_check = constants.HVSTS_PARAMETER_TYPES
864 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
869 def _MergeAndVerifyDiskState(op_input, obj_input):
870 """Combines the disk state from an opcode with the one of the object
872 @param op_input: The input dict from the opcode
873 @param obj_input: The input dict from the objects
874 @return: The verified and updated dict
877 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
879 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
880 utils.CommaJoin(invalid_dst),
882 type_check = constants.DSS_PARAMETER_TYPES
883 if obj_input is None:
885 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
887 for key, value in op_input.items())
892 def _ReleaseLocks(lu, level, names=None, keep=None):
893 """Releases locks owned by an LU.
895 @type lu: L{LogicalUnit}
896 @param level: Lock level
897 @type names: list or None
898 @param names: Names of locks to release
899 @type keep: list or None
900 @param keep: Names of locks to retain
903 assert not (keep is not None and names is not None), \
904 "Only one of the 'names' and the 'keep' parameters can be given"
906 if names is not None:
907 should_release = names.__contains__
909 should_release = lambda name: name not in keep
911 should_release = None
913 owned = lu.owned_locks(level)
915 # Not owning any lock at this level, do nothing
922 # Determine which locks to release
924 if should_release(name):
929 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
931 # Release just some locks
932 lu.glm.release(level, names=release)
934 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
937 lu.glm.release(level)
939 assert not lu.glm.is_owned(level), "No locks should be owned"
942 def _MapInstanceDisksToNodes(instances):
943 """Creates a map from (node, volume) to instance name.
945 @type instances: list of L{objects.Instance}
946 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
949 return dict(((node, vol), inst.name)
950 for inst in instances
951 for (node, vols) in inst.MapLVsByNode().items()
955 def _RunPostHook(lu, node_name):
956 """Runs the post-hook for an opcode on a single node.
959 hm = lu.proc.BuildHooksManager(lu)
961 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
963 # pylint: disable=W0702
964 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
967 def _CheckOutputFields(static, dynamic, selected):
968 """Checks whether all selected fields are valid.
970 @type static: L{utils.FieldSet}
971 @param static: static fields set
972 @type dynamic: L{utils.FieldSet}
973 @param dynamic: dynamic fields set
980 delta = f.NonMatching(selected)
982 raise errors.OpPrereqError("Unknown output fields selected: %s"
983 % ",".join(delta), errors.ECODE_INVAL)
986 def _CheckGlobalHvParams(params):
987 """Validates that given hypervisor params are not global ones.
989 This will ensure that instances don't get customised versions of
993 used_globals = constants.HVC_GLOBALS.intersection(params)
995 msg = ("The following hypervisor parameters are global and cannot"
996 " be customized at instance level, please modify them at"
997 " cluster level: %s" % utils.CommaJoin(used_globals))
998 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1001 def _CheckNodeOnline(lu, node, msg=None):
1002 """Ensure that a given node is online.
1004 @param lu: the LU on behalf of which we make the check
1005 @param node: the node to check
1006 @param msg: if passed, should be a message to replace the default one
1007 @raise errors.OpPrereqError: if the node is offline
1011 msg = "Can't use offline node"
1012 if lu.cfg.GetNodeInfo(node).offline:
1013 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1016 def _CheckNodeNotDrained(lu, node):
1017 """Ensure that a given node is not drained.
1019 @param lu: the LU on behalf of which we make the check
1020 @param node: the node to check
1021 @raise errors.OpPrereqError: if the node is drained
1024 if lu.cfg.GetNodeInfo(node).drained:
1025 raise errors.OpPrereqError("Can't use drained node %s" % node,
1029 def _CheckNodeVmCapable(lu, node):
1030 """Ensure that a given node is vm capable.
1032 @param lu: the LU on behalf of which we make the check
1033 @param node: the node to check
1034 @raise errors.OpPrereqError: if the node is not vm capable
1037 if not lu.cfg.GetNodeInfo(node).vm_capable:
1038 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1042 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1043 """Ensure that a node supports a given OS.
1045 @param lu: the LU on behalf of which we make the check
1046 @param node: the node to check
1047 @param os_name: the OS to query about
1048 @param force_variant: whether to ignore variant errors
1049 @raise errors.OpPrereqError: if the node is not supporting the OS
1052 result = lu.rpc.call_os_get(node, os_name)
1053 result.Raise("OS '%s' not in supported OS list for node %s" %
1055 prereq=True, ecode=errors.ECODE_INVAL)
1056 if not force_variant:
1057 _CheckOSVariant(result.payload, os_name)
1060 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1061 """Ensure that a node has the given secondary ip.
1063 @type lu: L{LogicalUnit}
1064 @param lu: the LU on behalf of which we make the check
1066 @param node: the node to check
1067 @type secondary_ip: string
1068 @param secondary_ip: the ip to check
1069 @type prereq: boolean
1070 @param prereq: whether to throw a prerequisite or an execute error
1071 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1072 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1075 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1076 result.Raise("Failure checking secondary ip on node %s" % node,
1077 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1078 if not result.payload:
1079 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1080 " please fix and re-run this command" % secondary_ip)
1082 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1084 raise errors.OpExecError(msg)
1087 def _GetClusterDomainSecret():
1088 """Reads the cluster domain secret.
1091 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1095 def _CheckInstanceState(lu, instance, req_states, msg=None):
1096 """Ensure that an instance is in one of the required states.
1098 @param lu: the LU on behalf of which we make the check
1099 @param instance: the instance to check
1100 @param msg: if passed, should be a message to replace the default one
1101 @raise errors.OpPrereqError: if the instance is not in the required state
1105 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1106 if instance.admin_state not in req_states:
1107 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1108 (instance.name, instance.admin_state, msg),
1111 if constants.ADMINST_UP not in req_states:
1112 pnode = instance.primary_node
1113 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1114 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1115 prereq=True, ecode=errors.ECODE_ENVIRON)
1117 if instance.name in ins_l.payload:
1118 raise errors.OpPrereqError("Instance %s is running, %s" %
1119 (instance.name, msg), errors.ECODE_STATE)
1122 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1123 """Computes if value is in the desired range.
1125 @param name: name of the parameter for which we perform the check
1126 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1128 @param ipolicy: dictionary containing min, max and std values
1129 @param value: actual value that we want to use
1130 @return: None or element not meeting the criteria
1134 if value in [None, constants.VALUE_AUTO]:
1136 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1137 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1138 if value > max_v or min_v > value:
1140 fqn = "%s/%s" % (name, qualifier)
1143 return ("%s value %s is not in range [%s, %s]" %
1144 (fqn, value, min_v, max_v))
1148 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1149 nic_count, disk_sizes, spindle_use,
1150 _compute_fn=_ComputeMinMaxSpec):
1151 """Verifies ipolicy against provided specs.
1154 @param ipolicy: The ipolicy
1156 @param mem_size: The memory size
1157 @type cpu_count: int
1158 @param cpu_count: Used cpu cores
1159 @type disk_count: int
1160 @param disk_count: Number of disks used
1161 @type nic_count: int
1162 @param nic_count: Number of nics used
1163 @type disk_sizes: list of ints
1164 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1165 @type spindle_use: int
1166 @param spindle_use: The number of spindles this instance uses
1167 @param _compute_fn: The compute function (unittest only)
1168 @return: A list of violations, or an empty list of no violations are found
1171 assert disk_count == len(disk_sizes)
1174 (constants.ISPEC_MEM_SIZE, "", mem_size),
1175 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1176 (constants.ISPEC_DISK_COUNT, "", disk_count),
1177 (constants.ISPEC_NIC_COUNT, "", nic_count),
1178 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1179 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1180 for idx, d in enumerate(disk_sizes)]
1183 (_compute_fn(name, qualifier, ipolicy, value)
1184 for (name, qualifier, value) in test_settings))
1187 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1188 _compute_fn=_ComputeIPolicySpecViolation):
1189 """Compute if instance meets the specs of ipolicy.
1192 @param ipolicy: The ipolicy to verify against
1193 @type instance: L{objects.Instance}
1194 @param instance: The instance to verify
1195 @param _compute_fn: The function to verify ipolicy (unittest only)
1196 @see: L{_ComputeIPolicySpecViolation}
1199 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1200 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1201 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1202 disk_count = len(instance.disks)
1203 disk_sizes = [disk.size for disk in instance.disks]
1204 nic_count = len(instance.nics)
1206 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1207 disk_sizes, spindle_use)
1210 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1211 _compute_fn=_ComputeIPolicySpecViolation):
1212 """Compute if instance specs meets the specs of ipolicy.
1215 @param ipolicy: The ipolicy to verify against
1216 @param instance_spec: dict
1217 @param instance_spec: The instance spec to verify
1218 @param _compute_fn: The function to verify ipolicy (unittest only)
1219 @see: L{_ComputeIPolicySpecViolation}
1222 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1223 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1224 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1225 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1226 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1227 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1229 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1230 disk_sizes, spindle_use)
1233 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1235 _compute_fn=_ComputeIPolicyInstanceViolation):
1236 """Compute if instance meets the specs of the new target group.
1238 @param ipolicy: The ipolicy to verify
1239 @param instance: The instance object to verify
1240 @param current_group: The current group of the instance
1241 @param target_group: The new group of the instance
1242 @param _compute_fn: The function to verify ipolicy (unittest only)
1243 @see: L{_ComputeIPolicySpecViolation}
1246 if current_group == target_group:
1249 return _compute_fn(ipolicy, instance)
1252 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1253 _compute_fn=_ComputeIPolicyNodeViolation):
1254 """Checks that the target node is correct in terms of instance policy.
1256 @param ipolicy: The ipolicy to verify
1257 @param instance: The instance object to verify
1258 @param node: The new node to relocate
1259 @param ignore: Ignore violations of the ipolicy
1260 @param _compute_fn: The function to verify ipolicy (unittest only)
1261 @see: L{_ComputeIPolicySpecViolation}
1264 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1265 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1268 msg = ("Instance does not meet target node group's (%s) instance"
1269 " policy: %s") % (node.group, utils.CommaJoin(res))
1273 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1276 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1277 """Computes a set of any instances that would violate the new ipolicy.
1279 @param old_ipolicy: The current (still in-place) ipolicy
1280 @param new_ipolicy: The new (to become) ipolicy
1281 @param instances: List of instances to verify
1282 @return: A list of instances which violates the new ipolicy but
1286 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1287 _ComputeViolatingInstances(old_ipolicy, instances))
1290 def _ExpandItemName(fn, name, kind):
1291 """Expand an item name.
1293 @param fn: the function to use for expansion
1294 @param name: requested item name
1295 @param kind: text description ('Node' or 'Instance')
1296 @return: the resolved (full) name
1297 @raise errors.OpPrereqError: if the item is not found
1300 full_name = fn(name)
1301 if full_name is None:
1302 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1307 def _ExpandNodeName(cfg, name):
1308 """Wrapper over L{_ExpandItemName} for nodes."""
1309 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1312 def _ExpandInstanceName(cfg, name):
1313 """Wrapper over L{_ExpandItemName} for instance."""
1314 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1316 def _BuildNetworkHookEnv(name, network, gateway, network6, gateway6,
1317 network_type, mac_prefix, tags):
1320 env["NETWORK_NAME"] = name
1322 env["NETWORK_SUBNET"] = network
1324 env["NETWORK_GATEWAY"] = gateway
1326 env["NETWORK_SUBNET6"] = network6
1328 env["NETWORK_GATEWAY6"] = gateway6
1330 env["NETWORK_MAC_PREFIX"] = mac_prefix
1332 env["NETWORK_TYPE"] = network_type
1334 env["NETWORK_TAGS"] = " ".join(tags)
1339 def _BuildNetworkHookEnvByObject(lu, network):
1341 "name": network.name,
1342 "network": network.network,
1343 "gateway": network.gateway,
1344 "network6": network.network6,
1345 "gateway6": network.gateway6,
1346 "network_type": network.network_type,
1347 "mac_prefix": network.mac_prefix,
1348 "tags" : network.tags,
1350 return _BuildNetworkHookEnv(**args)
1353 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1354 minmem, maxmem, vcpus, nics, disk_template, disks,
1355 bep, hvp, hypervisor_name, tags):
1356 """Builds instance related env variables for hooks
1358 This builds the hook environment from individual variables.
1361 @param name: the name of the instance
1362 @type primary_node: string
1363 @param primary_node: the name of the instance's primary node
1364 @type secondary_nodes: list
1365 @param secondary_nodes: list of secondary nodes as strings
1366 @type os_type: string
1367 @param os_type: the name of the instance's OS
1368 @type status: string
1369 @param status: the desired status of the instance
1370 @type minmem: string
1371 @param minmem: the minimum memory size of the instance
1372 @type maxmem: string
1373 @param maxmem: the maximum memory size of the instance
1375 @param vcpus: the count of VCPUs the instance has
1377 @param nics: list of tuples (ip, mac, mode, link, network) representing
1378 the NICs the instance has
1379 @type disk_template: string
1380 @param disk_template: the disk template of the instance
1382 @param disks: the list of (size, mode) pairs
1384 @param bep: the backend parameters for the instance
1386 @param hvp: the hypervisor parameters for the instance
1387 @type hypervisor_name: string
1388 @param hypervisor_name: the hypervisor for the instance
1390 @param tags: list of instance tags as strings
1392 @return: the hook environment for this instance
1397 "INSTANCE_NAME": name,
1398 "INSTANCE_PRIMARY": primary_node,
1399 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1400 "INSTANCE_OS_TYPE": os_type,
1401 "INSTANCE_STATUS": status,
1402 "INSTANCE_MINMEM": minmem,
1403 "INSTANCE_MAXMEM": maxmem,
1404 # TODO(2.7) remove deprecated "memory" value
1405 "INSTANCE_MEMORY": maxmem,
1406 "INSTANCE_VCPUS": vcpus,
1407 "INSTANCE_DISK_TEMPLATE": disk_template,
1408 "INSTANCE_HYPERVISOR": hypervisor_name,
1411 nic_count = len(nics)
1412 for idx, (ip, mac, mode, link, network, netinfo) in enumerate(nics):
1415 env["INSTANCE_NIC%d_IP" % idx] = ip
1416 env["INSTANCE_NIC%d_MAC" % idx] = mac
1417 env["INSTANCE_NIC%d_MODE" % idx] = mode
1418 env["INSTANCE_NIC%d_LINK" % idx] = link
1420 env["INSTANCE_NIC%d_NETWORK" % idx] = network
1422 nobj = objects.Network.FromDict(netinfo)
1424 env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1426 env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1428 env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1430 env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1432 env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1433 if nobj.network_type:
1434 env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1436 env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1437 if mode == constants.NIC_MODE_BRIDGED:
1438 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1442 env["INSTANCE_NIC_COUNT"] = nic_count
1445 disk_count = len(disks)
1446 for idx, (size, mode) in enumerate(disks):
1447 env["INSTANCE_DISK%d_SIZE" % idx] = size
1448 env["INSTANCE_DISK%d_MODE" % idx] = mode
1452 env["INSTANCE_DISK_COUNT"] = disk_count
1457 env["INSTANCE_TAGS"] = " ".join(tags)
1459 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1460 for key, value in source.items():
1461 env["INSTANCE_%s_%s" % (kind, key)] = value
1465 def _NICToTuple(lu, nic):
1466 """Build a tupple of nic information.
1468 @type lu: L{LogicalUnit}
1469 @param lu: the logical unit on whose behalf we execute
1470 @type nic: L{objects.NIC}
1471 @param nic: nic to convert to hooks tuple
1474 cluster = lu.cfg.GetClusterInfo()
1477 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1478 mode = filled_params[constants.NIC_MODE]
1479 link = filled_params[constants.NIC_LINK]
1480 network = nic.network
1483 net_uuid = lu.cfg.LookupNetwork(network)
1485 nobj = lu.cfg.GetNetwork(net_uuid)
1486 netinfo = objects.Network.ToDict(nobj)
1487 return (ip, mac, mode, link, network, netinfo)
1489 def _NICListToTuple(lu, nics):
1490 """Build a list of nic information tuples.
1492 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1493 value in LUInstanceQueryData.
1495 @type lu: L{LogicalUnit}
1496 @param lu: the logical unit on whose behalf we execute
1497 @type nics: list of L{objects.NIC}
1498 @param nics: list of nics to convert to hooks tuples
1502 cluster = lu.cfg.GetClusterInfo()
1504 hooks_nics.append(_NICToTuple(lu, nic))
1507 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1508 """Builds instance related env variables for hooks from an object.
1510 @type lu: L{LogicalUnit}
1511 @param lu: the logical unit on whose behalf we execute
1512 @type instance: L{objects.Instance}
1513 @param instance: the instance for which we should build the
1515 @type override: dict
1516 @param override: dictionary with key/values that will override
1519 @return: the hook environment dictionary
1522 cluster = lu.cfg.GetClusterInfo()
1523 bep = cluster.FillBE(instance)
1524 hvp = cluster.FillHV(instance)
1526 "name": instance.name,
1527 "primary_node": instance.primary_node,
1528 "secondary_nodes": instance.secondary_nodes,
1529 "os_type": instance.os,
1530 "status": instance.admin_state,
1531 "maxmem": bep[constants.BE_MAXMEM],
1532 "minmem": bep[constants.BE_MINMEM],
1533 "vcpus": bep[constants.BE_VCPUS],
1534 "nics": _NICListToTuple(lu, instance.nics),
1535 "disk_template": instance.disk_template,
1536 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1539 "hypervisor_name": instance.hypervisor,
1540 "tags": instance.tags,
1543 args.update(override)
1544 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1547 def _AdjustCandidatePool(lu, exceptions):
1548 """Adjust the candidate pool after node operations.
1551 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1553 lu.LogInfo("Promoted nodes to master candidate role: %s",
1554 utils.CommaJoin(node.name for node in mod_list))
1555 for name in mod_list:
1556 lu.context.ReaddNode(name)
1557 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1559 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1563 def _DecideSelfPromotion(lu, exceptions=None):
1564 """Decide whether I should promote myself as a master candidate.
1567 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1568 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1569 # the new node will increase mc_max with one, so:
1570 mc_should = min(mc_should + 1, cp_size)
1571 return mc_now < mc_should
1574 def _CalculateGroupIPolicy(cluster, group):
1575 """Calculate instance policy for group.
1578 return cluster.SimpleFillIPolicy(group.ipolicy)
1581 def _ComputeViolatingInstances(ipolicy, instances):
1582 """Computes a set of instances who violates given ipolicy.
1584 @param ipolicy: The ipolicy to verify
1585 @type instances: object.Instance
1586 @param instances: List of instances to verify
1587 @return: A frozenset of instance names violating the ipolicy
1590 return frozenset([inst.name for inst in instances
1591 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1594 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1595 """Check that the brigdes needed by a list of nics exist.
1598 cluster = lu.cfg.GetClusterInfo()
1599 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1600 brlist = [params[constants.NIC_LINK] for params in paramslist
1601 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1603 result = lu.rpc.call_bridges_exist(target_node, brlist)
1604 result.Raise("Error checking bridges on destination node '%s'" %
1605 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1608 def _CheckInstanceBridgesExist(lu, instance, node=None):
1609 """Check that the brigdes needed by an instance exist.
1613 node = instance.primary_node
1614 _CheckNicsBridgesExist(lu, instance.nics, node)
1617 def _CheckOSVariant(os_obj, name):
1618 """Check whether an OS name conforms to the os variants specification.
1620 @type os_obj: L{objects.OS}
1621 @param os_obj: OS object to check
1623 @param name: OS name passed by the user, to check for validity
1626 variant = objects.OS.GetVariant(name)
1627 if not os_obj.supported_variants:
1629 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1630 " passed)" % (os_obj.name, variant),
1634 raise errors.OpPrereqError("OS name must include a variant",
1637 if variant not in os_obj.supported_variants:
1638 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1641 def _GetNodeInstancesInner(cfg, fn):
1642 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1645 def _GetNodeInstances(cfg, node_name):
1646 """Returns a list of all primary and secondary instances on a node.
1650 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1653 def _GetNodePrimaryInstances(cfg, node_name):
1654 """Returns primary instances on a node.
1657 return _GetNodeInstancesInner(cfg,
1658 lambda inst: node_name == inst.primary_node)
1661 def _GetNodeSecondaryInstances(cfg, node_name):
1662 """Returns secondary instances on a node.
1665 return _GetNodeInstancesInner(cfg,
1666 lambda inst: node_name in inst.secondary_nodes)
1669 def _GetStorageTypeArgs(cfg, storage_type):
1670 """Returns the arguments for a storage type.
1673 # Special case for file storage
1674 if storage_type == constants.ST_FILE:
1675 # storage.FileStorage wants a list of storage directories
1676 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1681 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1684 for dev in instance.disks:
1685 cfg.SetDiskID(dev, node_name)
1687 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1689 result.Raise("Failed to get disk status from node %s" % node_name,
1690 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1692 for idx, bdev_status in enumerate(result.payload):
1693 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1699 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1700 """Check the sanity of iallocator and node arguments and use the
1701 cluster-wide iallocator if appropriate.
1703 Check that at most one of (iallocator, node) is specified. If none is
1704 specified, then the LU's opcode's iallocator slot is filled with the
1705 cluster-wide default iallocator.
1707 @type iallocator_slot: string
1708 @param iallocator_slot: the name of the opcode iallocator slot
1709 @type node_slot: string
1710 @param node_slot: the name of the opcode target node slot
1713 node = getattr(lu.op, node_slot, None)
1714 iallocator = getattr(lu.op, iallocator_slot, None)
1716 if node is not None and iallocator is not None:
1717 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1719 elif node is None and iallocator is None:
1720 default_iallocator = lu.cfg.GetDefaultIAllocator()
1721 if default_iallocator:
1722 setattr(lu.op, iallocator_slot, default_iallocator)
1724 raise errors.OpPrereqError("No iallocator or node given and no"
1725 " cluster-wide default iallocator found;"
1726 " please specify either an iallocator or a"
1727 " node, or set a cluster-wide default"
1731 def _GetDefaultIAllocator(cfg, iallocator):
1732 """Decides on which iallocator to use.
1734 @type cfg: L{config.ConfigWriter}
1735 @param cfg: Cluster configuration object
1736 @type iallocator: string or None
1737 @param iallocator: Iallocator specified in opcode
1739 @return: Iallocator name
1743 # Use default iallocator
1744 iallocator = cfg.GetDefaultIAllocator()
1747 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1748 " opcode nor as a cluster-wide default",
1754 class LUClusterPostInit(LogicalUnit):
1755 """Logical unit for running hooks after cluster initialization.
1758 HPATH = "cluster-init"
1759 HTYPE = constants.HTYPE_CLUSTER
1761 def BuildHooksEnv(self):
1766 "OP_TARGET": self.cfg.GetClusterName(),
1769 def BuildHooksNodes(self):
1770 """Build hooks nodes.
1773 return ([], [self.cfg.GetMasterNode()])
1775 def Exec(self, feedback_fn):
1782 class LUClusterDestroy(LogicalUnit):
1783 """Logical unit for destroying the cluster.
1786 HPATH = "cluster-destroy"
1787 HTYPE = constants.HTYPE_CLUSTER
1789 def BuildHooksEnv(self):
1794 "OP_TARGET": self.cfg.GetClusterName(),
1797 def BuildHooksNodes(self):
1798 """Build hooks nodes.
1803 def CheckPrereq(self):
1804 """Check prerequisites.
1806 This checks whether the cluster is empty.
1808 Any errors are signaled by raising errors.OpPrereqError.
1811 master = self.cfg.GetMasterNode()
1813 nodelist = self.cfg.GetNodeList()
1814 if len(nodelist) != 1 or nodelist[0] != master:
1815 raise errors.OpPrereqError("There are still %d node(s) in"
1816 " this cluster." % (len(nodelist) - 1),
1818 instancelist = self.cfg.GetInstanceList()
1820 raise errors.OpPrereqError("There are still %d instance(s) in"
1821 " this cluster." % len(instancelist),
1824 def Exec(self, feedback_fn):
1825 """Destroys the cluster.
1828 master_params = self.cfg.GetMasterNetworkParameters()
1830 # Run post hooks on master node before it's removed
1831 _RunPostHook(self, master_params.name)
1833 ems = self.cfg.GetUseExternalMipScript()
1834 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1837 self.LogWarning("Error disabling the master IP address: %s",
1840 return master_params.name
1843 def _VerifyCertificate(filename):
1844 """Verifies a certificate for L{LUClusterVerifyConfig}.
1846 @type filename: string
1847 @param filename: Path to PEM file
1851 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1852 utils.ReadFile(filename))
1853 except Exception, err: # pylint: disable=W0703
1854 return (LUClusterVerifyConfig.ETYPE_ERROR,
1855 "Failed to load X509 certificate %s: %s" % (filename, err))
1858 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1859 constants.SSL_CERT_EXPIRATION_ERROR)
1862 fnamemsg = "While verifying %s: %s" % (filename, msg)
1867 return (None, fnamemsg)
1868 elif errcode == utils.CERT_WARNING:
1869 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1870 elif errcode == utils.CERT_ERROR:
1871 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1873 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1876 def _GetAllHypervisorParameters(cluster, instances):
1877 """Compute the set of all hypervisor parameters.
1879 @type cluster: L{objects.Cluster}
1880 @param cluster: the cluster object
1881 @param instances: list of L{objects.Instance}
1882 @param instances: additional instances from which to obtain parameters
1883 @rtype: list of (origin, hypervisor, parameters)
1884 @return: a list with all parameters found, indicating the hypervisor they
1885 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1890 for hv_name in cluster.enabled_hypervisors:
1891 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1893 for os_name, os_hvp in cluster.os_hvp.items():
1894 for hv_name, hv_params in os_hvp.items():
1896 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1897 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1899 # TODO: collapse identical parameter values in a single one
1900 for instance in instances:
1901 if instance.hvparams:
1902 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1903 cluster.FillHV(instance)))
1908 class _VerifyErrors(object):
1909 """Mix-in for cluster/group verify LUs.
1911 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1912 self.op and self._feedback_fn to be available.)
1916 ETYPE_FIELD = "code"
1917 ETYPE_ERROR = "ERROR"
1918 ETYPE_WARNING = "WARNING"
1920 def _Error(self, ecode, item, msg, *args, **kwargs):
1921 """Format an error message.
1923 Based on the opcode's error_codes parameter, either format a
1924 parseable error code, or a simpler error string.
1926 This must be called only from Exec and functions called from Exec.
1929 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1930 itype, etxt, _ = ecode
1931 # first complete the msg
1934 # then format the whole message
1935 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1936 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1942 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1943 # and finally report it via the feedback_fn
1944 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1946 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1947 """Log an error message if the passed condition is True.
1951 or self.op.debug_simulate_errors) # pylint: disable=E1101
1953 # If the error code is in the list of ignored errors, demote the error to a
1955 (_, etxt, _) = ecode
1956 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1957 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1960 self._Error(ecode, *args, **kwargs)
1962 # do not mark the operation as failed for WARN cases only
1963 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1964 self.bad = self.bad or cond
1967 class LUClusterVerify(NoHooksLU):
1968 """Submits all jobs necessary to verify the cluster.
1973 def ExpandNames(self):
1974 self.needed_locks = {}
1976 def Exec(self, feedback_fn):
1979 if self.op.group_name:
1980 groups = [self.op.group_name]
1981 depends_fn = lambda: None
1983 groups = self.cfg.GetNodeGroupList()
1985 # Verify global configuration
1987 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1990 # Always depend on global verification
1991 depends_fn = lambda: [(-len(jobs), [])]
1993 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1994 ignore_errors=self.op.ignore_errors,
1995 depends=depends_fn())]
1996 for group in groups)
1998 # Fix up all parameters
1999 for op in itertools.chain(*jobs): # pylint: disable=W0142
2000 op.debug_simulate_errors = self.op.debug_simulate_errors
2001 op.verbose = self.op.verbose
2002 op.error_codes = self.op.error_codes
2004 op.skip_checks = self.op.skip_checks
2005 except AttributeError:
2006 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2008 return ResultWithJobs(jobs)
2011 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2012 """Verifies the cluster config.
2017 def _VerifyHVP(self, hvp_data):
2018 """Verifies locally the syntax of the hypervisor parameters.
2021 for item, hv_name, hv_params in hvp_data:
2022 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2025 hv_class = hypervisor.GetHypervisor(hv_name)
2026 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2027 hv_class.CheckParameterSyntax(hv_params)
2028 except errors.GenericError, err:
2029 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2031 def ExpandNames(self):
2032 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2033 self.share_locks = _ShareAll()
2035 def CheckPrereq(self):
2036 """Check prerequisites.
2039 # Retrieve all information
2040 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2041 self.all_node_info = self.cfg.GetAllNodesInfo()
2042 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2044 def Exec(self, feedback_fn):
2045 """Verify integrity of cluster, performing various test on nodes.
2049 self._feedback_fn = feedback_fn
2051 feedback_fn("* Verifying cluster config")
2053 for msg in self.cfg.VerifyConfig():
2054 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2056 feedback_fn("* Verifying cluster certificate files")
2058 for cert_filename in constants.ALL_CERT_FILES:
2059 (errcode, msg) = _VerifyCertificate(cert_filename)
2060 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2062 feedback_fn("* Verifying hypervisor parameters")
2064 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2065 self.all_inst_info.values()))
2067 feedback_fn("* Verifying all nodes belong to an existing group")
2069 # We do this verification here because, should this bogus circumstance
2070 # occur, it would never be caught by VerifyGroup, which only acts on
2071 # nodes/instances reachable from existing node groups.
2073 dangling_nodes = set(node.name for node in self.all_node_info.values()
2074 if node.group not in self.all_group_info)
2076 dangling_instances = {}
2077 no_node_instances = []
2079 for inst in self.all_inst_info.values():
2080 if inst.primary_node in dangling_nodes:
2081 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2082 elif inst.primary_node not in self.all_node_info:
2083 no_node_instances.append(inst.name)
2088 utils.CommaJoin(dangling_instances.get(node.name,
2090 for node in dangling_nodes]
2092 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2094 "the following nodes (and their instances) belong to a non"
2095 " existing group: %s", utils.CommaJoin(pretty_dangling))
2097 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2099 "the following instances have a non-existing primary-node:"
2100 " %s", utils.CommaJoin(no_node_instances))
2105 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2106 """Verifies the status of a node group.
2109 HPATH = "cluster-verify"
2110 HTYPE = constants.HTYPE_CLUSTER
2113 _HOOKS_INDENT_RE = re.compile("^", re.M)
2115 class NodeImage(object):
2116 """A class representing the logical and physical status of a node.
2119 @ivar name: the node name to which this object refers
2120 @ivar volumes: a structure as returned from
2121 L{ganeti.backend.GetVolumeList} (runtime)
2122 @ivar instances: a list of running instances (runtime)
2123 @ivar pinst: list of configured primary instances (config)
2124 @ivar sinst: list of configured secondary instances (config)
2125 @ivar sbp: dictionary of {primary-node: list of instances} for all
2126 instances for which this node is secondary (config)
2127 @ivar mfree: free memory, as reported by hypervisor (runtime)
2128 @ivar dfree: free disk, as reported by the node (runtime)
2129 @ivar offline: the offline status (config)
2130 @type rpc_fail: boolean
2131 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2132 not whether the individual keys were correct) (runtime)
2133 @type lvm_fail: boolean
2134 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2135 @type hyp_fail: boolean
2136 @ivar hyp_fail: whether the RPC call didn't return the instance list
2137 @type ghost: boolean
2138 @ivar ghost: whether this is a known node or not (config)
2139 @type os_fail: boolean
2140 @ivar os_fail: whether the RPC call didn't return valid OS data
2142 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2143 @type vm_capable: boolean
2144 @ivar vm_capable: whether the node can host instances
2147 def __init__(self, offline=False, name=None, vm_capable=True):
2156 self.offline = offline
2157 self.vm_capable = vm_capable
2158 self.rpc_fail = False
2159 self.lvm_fail = False
2160 self.hyp_fail = False
2162 self.os_fail = False
2165 def ExpandNames(self):
2166 # This raises errors.OpPrereqError on its own:
2167 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2169 # Get instances in node group; this is unsafe and needs verification later
2171 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2173 self.needed_locks = {
2174 locking.LEVEL_INSTANCE: inst_names,
2175 locking.LEVEL_NODEGROUP: [self.group_uuid],
2176 locking.LEVEL_NODE: [],
2179 self.share_locks = _ShareAll()
2181 def DeclareLocks(self, level):
2182 if level == locking.LEVEL_NODE:
2183 # Get members of node group; this is unsafe and needs verification later
2184 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2186 all_inst_info = self.cfg.GetAllInstancesInfo()
2188 # In Exec(), we warn about mirrored instances that have primary and
2189 # secondary living in separate node groups. To fully verify that
2190 # volumes for these instances are healthy, we will need to do an
2191 # extra call to their secondaries. We ensure here those nodes will
2193 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2194 # Important: access only the instances whose lock is owned
2195 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2196 nodes.update(all_inst_info[inst].secondary_nodes)
2198 self.needed_locks[locking.LEVEL_NODE] = nodes
2200 def CheckPrereq(self):
2201 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2202 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2204 group_nodes = set(self.group_info.members)
2206 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2209 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2211 unlocked_instances = \
2212 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2215 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2216 utils.CommaJoin(unlocked_nodes),
2219 if unlocked_instances:
2220 raise errors.OpPrereqError("Missing lock for instances: %s" %
2221 utils.CommaJoin(unlocked_instances),
2224 self.all_node_info = self.cfg.GetAllNodesInfo()
2225 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2227 self.my_node_names = utils.NiceSort(group_nodes)
2228 self.my_inst_names = utils.NiceSort(group_instances)
2230 self.my_node_info = dict((name, self.all_node_info[name])
2231 for name in self.my_node_names)
2233 self.my_inst_info = dict((name, self.all_inst_info[name])
2234 for name in self.my_inst_names)
2236 # We detect here the nodes that will need the extra RPC calls for verifying
2237 # split LV volumes; they should be locked.
2238 extra_lv_nodes = set()
2240 for inst in self.my_inst_info.values():
2241 if inst.disk_template in constants.DTS_INT_MIRROR:
2242 for nname in inst.all_nodes:
2243 if self.all_node_info[nname].group != self.group_uuid:
2244 extra_lv_nodes.add(nname)
2246 unlocked_lv_nodes = \
2247 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2249 if unlocked_lv_nodes:
2250 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2251 utils.CommaJoin(unlocked_lv_nodes),
2253 self.extra_lv_nodes = list(extra_lv_nodes)
2255 def _VerifyNode(self, ninfo, nresult):
2256 """Perform some basic validation on data returned from a node.
2258 - check the result data structure is well formed and has all the
2260 - check ganeti version
2262 @type ninfo: L{objects.Node}
2263 @param ninfo: the node to check
2264 @param nresult: the results from the node
2266 @return: whether overall this call was successful (and we can expect
2267 reasonable values in the respose)
2271 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2273 # main result, nresult should be a non-empty dict
2274 test = not nresult or not isinstance(nresult, dict)
2275 _ErrorIf(test, constants.CV_ENODERPC, node,
2276 "unable to verify node: no data returned")
2280 # compares ganeti version
2281 local_version = constants.PROTOCOL_VERSION
2282 remote_version = nresult.get("version", None)
2283 test = not (remote_version and
2284 isinstance(remote_version, (list, tuple)) and
2285 len(remote_version) == 2)
2286 _ErrorIf(test, constants.CV_ENODERPC, node,
2287 "connection to node returned invalid data")
2291 test = local_version != remote_version[0]
2292 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2293 "incompatible protocol versions: master %s,"
2294 " node %s", local_version, remote_version[0])
2298 # node seems compatible, we can actually try to look into its results
2300 # full package version
2301 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2302 constants.CV_ENODEVERSION, node,
2303 "software version mismatch: master %s, node %s",
2304 constants.RELEASE_VERSION, remote_version[1],
2305 code=self.ETYPE_WARNING)
2307 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2308 if ninfo.vm_capable and isinstance(hyp_result, dict):
2309 for hv_name, hv_result in hyp_result.iteritems():
2310 test = hv_result is not None
2311 _ErrorIf(test, constants.CV_ENODEHV, node,
2312 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2314 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2315 if ninfo.vm_capable and isinstance(hvp_result, list):
2316 for item, hv_name, hv_result in hvp_result:
2317 _ErrorIf(True, constants.CV_ENODEHV, node,
2318 "hypervisor %s parameter verify failure (source %s): %s",
2319 hv_name, item, hv_result)
2321 test = nresult.get(constants.NV_NODESETUP,
2322 ["Missing NODESETUP results"])
2323 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2328 def _VerifyNodeTime(self, ninfo, nresult,
2329 nvinfo_starttime, nvinfo_endtime):
2330 """Check the node time.
2332 @type ninfo: L{objects.Node}
2333 @param ninfo: the node to check
2334 @param nresult: the remote results for the node
2335 @param nvinfo_starttime: the start time of the RPC call
2336 @param nvinfo_endtime: the end time of the RPC call
2340 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2342 ntime = nresult.get(constants.NV_TIME, None)
2344 ntime_merged = utils.MergeTime(ntime)
2345 except (ValueError, TypeError):
2346 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2349 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2350 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2351 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2352 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2356 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2357 "Node time diverges by at least %s from master node time",
2360 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2361 """Check the node LVM results.
2363 @type ninfo: L{objects.Node}
2364 @param ninfo: the node to check
2365 @param nresult: the remote results for the node
2366 @param vg_name: the configured VG name
2373 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2375 # checks vg existence and size > 20G
2376 vglist = nresult.get(constants.NV_VGLIST, None)
2378 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2380 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2381 constants.MIN_VG_SIZE)
2382 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2385 pvlist = nresult.get(constants.NV_PVLIST, None)
2386 test = pvlist is None
2387 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2389 # check that ':' is not present in PV names, since it's a
2390 # special character for lvcreate (denotes the range of PEs to
2392 for _, pvname, owner_vg in pvlist:
2393 test = ":" in pvname
2394 _ErrorIf(test, constants.CV_ENODELVM, node,
2395 "Invalid character ':' in PV '%s' of VG '%s'",
2398 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2399 """Check the node bridges.
2401 @type ninfo: L{objects.Node}
2402 @param ninfo: the node to check
2403 @param nresult: the remote results for the node
2404 @param bridges: the expected list of bridges
2411 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2413 missing = nresult.get(constants.NV_BRIDGES, None)
2414 test = not isinstance(missing, list)
2415 _ErrorIf(test, constants.CV_ENODENET, node,
2416 "did not return valid bridge information")
2418 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2419 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2421 def _VerifyNodeUserScripts(self, ninfo, nresult):
2422 """Check the results of user scripts presence and executability on the node
2424 @type ninfo: L{objects.Node}
2425 @param ninfo: the node to check
2426 @param nresult: the remote results for the node
2431 test = not constants.NV_USERSCRIPTS in nresult
2432 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2433 "did not return user scripts information")
2435 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2437 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2438 "user scripts not present or not executable: %s" %
2439 utils.CommaJoin(sorted(broken_scripts)))
2441 def _VerifyNodeNetwork(self, ninfo, nresult):
2442 """Check the node network connectivity results.
2444 @type ninfo: L{objects.Node}
2445 @param ninfo: the node to check
2446 @param nresult: the remote results for the node
2450 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2452 test = constants.NV_NODELIST not in nresult
2453 _ErrorIf(test, constants.CV_ENODESSH, node,
2454 "node hasn't returned node ssh connectivity data")
2456 if nresult[constants.NV_NODELIST]:
2457 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2458 _ErrorIf(True, constants.CV_ENODESSH, node,
2459 "ssh communication with node '%s': %s", a_node, a_msg)
2461 test = constants.NV_NODENETTEST not in nresult
2462 _ErrorIf(test, constants.CV_ENODENET, node,
2463 "node hasn't returned node tcp connectivity data")
2465 if nresult[constants.NV_NODENETTEST]:
2466 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2468 _ErrorIf(True, constants.CV_ENODENET, node,
2469 "tcp communication with node '%s': %s",
2470 anode, nresult[constants.NV_NODENETTEST][anode])
2472 test = constants.NV_MASTERIP not in nresult
2473 _ErrorIf(test, constants.CV_ENODENET, node,
2474 "node hasn't returned node master IP reachability data")
2476 if not nresult[constants.NV_MASTERIP]:
2477 if node == self.master_node:
2478 msg = "the master node cannot reach the master IP (not configured?)"
2480 msg = "cannot reach the master IP"
2481 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2483 def _VerifyInstance(self, instance, instanceconfig, node_image,
2485 """Verify an instance.
2487 This function checks to see if the required block devices are
2488 available on the instance's node.
2491 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2492 node_current = instanceconfig.primary_node
2494 node_vol_should = {}
2495 instanceconfig.MapLVsByNode(node_vol_should)
2497 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2498 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2499 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2501 for node in node_vol_should:
2502 n_img = node_image[node]
2503 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2504 # ignore missing volumes on offline or broken nodes
2506 for volume in node_vol_should[node]:
2507 test = volume not in n_img.volumes
2508 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2509 "volume %s missing on node %s", volume, node)
2511 if instanceconfig.admin_state == constants.ADMINST_UP:
2512 pri_img = node_image[node_current]
2513 test = instance not in pri_img.instances and not pri_img.offline
2514 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2515 "instance not running on its primary node %s",
2518 diskdata = [(nname, success, status, idx)
2519 for (nname, disks) in diskstatus.items()
2520 for idx, (success, status) in enumerate(disks)]
2522 for nname, success, bdev_status, idx in diskdata:
2523 # the 'ghost node' construction in Exec() ensures that we have a
2525 snode = node_image[nname]
2526 bad_snode = snode.ghost or snode.offline
2527 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2528 not success and not bad_snode,
2529 constants.CV_EINSTANCEFAULTYDISK, instance,
2530 "couldn't retrieve status for disk/%s on %s: %s",
2531 idx, nname, bdev_status)
2532 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2533 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2534 constants.CV_EINSTANCEFAULTYDISK, instance,
2535 "disk/%s on %s is faulty", idx, nname)
2537 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2538 """Verify if there are any unknown volumes in the cluster.
2540 The .os, .swap and backup volumes are ignored. All other volumes are
2541 reported as unknown.
2543 @type reserved: L{ganeti.utils.FieldSet}
2544 @param reserved: a FieldSet of reserved volume names
2547 for node, n_img in node_image.items():
2548 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2549 self.all_node_info[node].group != self.group_uuid):
2550 # skip non-healthy nodes
2552 for volume in n_img.volumes:
2553 test = ((node not in node_vol_should or
2554 volume not in node_vol_should[node]) and
2555 not reserved.Matches(volume))
2556 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2557 "volume %s is unknown", volume)
2559 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2560 """Verify N+1 Memory Resilience.
2562 Check that if one single node dies we can still start all the
2563 instances it was primary for.
2566 cluster_info = self.cfg.GetClusterInfo()
2567 for node, n_img in node_image.items():
2568 # This code checks that every node which is now listed as
2569 # secondary has enough memory to host all instances it is
2570 # supposed to should a single other node in the cluster fail.
2571 # FIXME: not ready for failover to an arbitrary node
2572 # FIXME: does not support file-backed instances
2573 # WARNING: we currently take into account down instances as well
2574 # as up ones, considering that even if they're down someone
2575 # might want to start them even in the event of a node failure.
2576 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2577 # we're skipping nodes marked offline and nodes in other groups from
2578 # the N+1 warning, since most likely we don't have good memory
2579 # infromation from them; we already list instances living on such
2580 # nodes, and that's enough warning
2582 #TODO(dynmem): also consider ballooning out other instances
2583 for prinode, instances in n_img.sbp.items():
2585 for instance in instances:
2586 bep = cluster_info.FillBE(instance_cfg[instance])
2587 if bep[constants.BE_AUTO_BALANCE]:
2588 needed_mem += bep[constants.BE_MINMEM]
2589 test = n_img.mfree < needed_mem
2590 self._ErrorIf(test, constants.CV_ENODEN1, node,
2591 "not enough memory to accomodate instance failovers"
2592 " should node %s fail (%dMiB needed, %dMiB available)",
2593 prinode, needed_mem, n_img.mfree)
2596 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2597 (files_all, files_opt, files_mc, files_vm)):
2598 """Verifies file checksums collected from all nodes.
2600 @param errorif: Callback for reporting errors
2601 @param nodeinfo: List of L{objects.Node} objects
2602 @param master_node: Name of master node
2603 @param all_nvinfo: RPC results
2606 # Define functions determining which nodes to consider for a file
2609 (files_mc, lambda node: (node.master_candidate or
2610 node.name == master_node)),
2611 (files_vm, lambda node: node.vm_capable),
2614 # Build mapping from filename to list of nodes which should have the file
2616 for (files, fn) in files2nodefn:
2618 filenodes = nodeinfo
2620 filenodes = filter(fn, nodeinfo)
2621 nodefiles.update((filename,
2622 frozenset(map(operator.attrgetter("name"), filenodes)))
2623 for filename in files)
2625 assert set(nodefiles) == (files_all | files_mc | files_vm)
2627 fileinfo = dict((filename, {}) for filename in nodefiles)
2628 ignore_nodes = set()
2630 for node in nodeinfo:
2632 ignore_nodes.add(node.name)
2635 nresult = all_nvinfo[node.name]
2637 if nresult.fail_msg or not nresult.payload:
2640 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2642 test = not (node_files and isinstance(node_files, dict))
2643 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2644 "Node did not return file checksum data")
2646 ignore_nodes.add(node.name)
2649 # Build per-checksum mapping from filename to nodes having it
2650 for (filename, checksum) in node_files.items():
2651 assert filename in nodefiles
2652 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2654 for (filename, checksums) in fileinfo.items():
2655 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2657 # Nodes having the file
2658 with_file = frozenset(node_name
2659 for nodes in fileinfo[filename].values()
2660 for node_name in nodes) - ignore_nodes
2662 expected_nodes = nodefiles[filename] - ignore_nodes
2664 # Nodes missing file
2665 missing_file = expected_nodes - with_file
2667 if filename in files_opt:
2669 errorif(missing_file and missing_file != expected_nodes,
2670 constants.CV_ECLUSTERFILECHECK, None,
2671 "File %s is optional, but it must exist on all or no"
2672 " nodes (not found on %s)",
2673 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2675 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2676 "File %s is missing from node(s) %s", filename,
2677 utils.CommaJoin(utils.NiceSort(missing_file)))
2679 # Warn if a node has a file it shouldn't
2680 unexpected = with_file - expected_nodes
2682 constants.CV_ECLUSTERFILECHECK, None,
2683 "File %s should not exist on node(s) %s",
2684 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2686 # See if there are multiple versions of the file
2687 test = len(checksums) > 1
2689 variants = ["variant %s on %s" %
2690 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2691 for (idx, (checksum, nodes)) in
2692 enumerate(sorted(checksums.items()))]
2696 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2697 "File %s found with %s different checksums (%s)",
2698 filename, len(checksums), "; ".join(variants))
2700 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2702 """Verifies and the node DRBD status.
2704 @type ninfo: L{objects.Node}
2705 @param ninfo: the node to check
2706 @param nresult: the remote results for the node
2707 @param instanceinfo: the dict of instances
2708 @param drbd_helper: the configured DRBD usermode helper
2709 @param drbd_map: the DRBD map as returned by
2710 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2714 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2717 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2718 test = (helper_result == None)
2719 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2720 "no drbd usermode helper returned")
2722 status, payload = helper_result
2724 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2725 "drbd usermode helper check unsuccessful: %s", payload)
2726 test = status and (payload != drbd_helper)
2727 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2728 "wrong drbd usermode helper: %s", payload)
2730 # compute the DRBD minors
2732 for minor, instance in drbd_map[node].items():
2733 test = instance not in instanceinfo
2734 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2735 "ghost instance '%s' in temporary DRBD map", instance)
2736 # ghost instance should not be running, but otherwise we
2737 # don't give double warnings (both ghost instance and
2738 # unallocated minor in use)
2740 node_drbd[minor] = (instance, False)
2742 instance = instanceinfo[instance]
2743 node_drbd[minor] = (instance.name,
2744 instance.admin_state == constants.ADMINST_UP)
2746 # and now check them
2747 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2748 test = not isinstance(used_minors, (tuple, list))
2749 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2750 "cannot parse drbd status file: %s", str(used_minors))
2752 # we cannot check drbd status
2755 for minor, (iname, must_exist) in node_drbd.items():
2756 test = minor not in used_minors and must_exist
2757 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2758 "drbd minor %d of instance %s is not active", minor, iname)
2759 for minor in used_minors:
2760 test = minor not in node_drbd
2761 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2762 "unallocated drbd minor %d is in use", minor)
2764 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2765 """Builds the node OS structures.
2767 @type ninfo: L{objects.Node}
2768 @param ninfo: the node to check
2769 @param nresult: the remote results for the node
2770 @param nimg: the node image object
2774 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2776 remote_os = nresult.get(constants.NV_OSLIST, None)
2777 test = (not isinstance(remote_os, list) or
2778 not compat.all(isinstance(v, list) and len(v) == 7
2779 for v in remote_os))
2781 _ErrorIf(test, constants.CV_ENODEOS, node,
2782 "node hasn't returned valid OS data")
2791 for (name, os_path, status, diagnose,
2792 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2794 if name not in os_dict:
2797 # parameters is a list of lists instead of list of tuples due to
2798 # JSON lacking a real tuple type, fix it:
2799 parameters = [tuple(v) for v in parameters]
2800 os_dict[name].append((os_path, status, diagnose,
2801 set(variants), set(parameters), set(api_ver)))
2803 nimg.oslist = os_dict
2805 def _VerifyNodeOS(self, ninfo, nimg, base):
2806 """Verifies the node OS list.
2808 @type ninfo: L{objects.Node}
2809 @param ninfo: the node to check
2810 @param nimg: the node image object
2811 @param base: the 'template' node we match against (e.g. from the master)
2815 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2817 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2819 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2820 for os_name, os_data in nimg.oslist.items():
2821 assert os_data, "Empty OS status for OS %s?!" % os_name
2822 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2823 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2824 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2825 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2826 "OS '%s' has multiple entries (first one shadows the rest): %s",
2827 os_name, utils.CommaJoin([v[0] for v in os_data]))
2828 # comparisons with the 'base' image
2829 test = os_name not in base.oslist
2830 _ErrorIf(test, constants.CV_ENODEOS, node,
2831 "Extra OS %s not present on reference node (%s)",
2835 assert base.oslist[os_name], "Base node has empty OS status?"
2836 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2838 # base OS is invalid, skipping
2840 for kind, a, b in [("API version", f_api, b_api),
2841 ("variants list", f_var, b_var),
2842 ("parameters", beautify_params(f_param),
2843 beautify_params(b_param))]:
2844 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2845 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2846 kind, os_name, base.name,
2847 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2849 # check any missing OSes
2850 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2851 _ErrorIf(missing, constants.CV_ENODEOS, node,
2852 "OSes present on reference node %s but missing on this node: %s",
2853 base.name, utils.CommaJoin(missing))
2855 def _VerifyOob(self, ninfo, nresult):
2856 """Verifies out of band functionality of a node.
2858 @type ninfo: L{objects.Node}
2859 @param ninfo: the node to check
2860 @param nresult: the remote results for the node
2864 # We just have to verify the paths on master and/or master candidates
2865 # as the oob helper is invoked on the master
2866 if ((ninfo.master_candidate or ninfo.master_capable) and
2867 constants.NV_OOB_PATHS in nresult):
2868 for path_result in nresult[constants.NV_OOB_PATHS]:
2869 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2871 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2872 """Verifies and updates the node volume data.
2874 This function will update a L{NodeImage}'s internal structures
2875 with data from the remote call.
2877 @type ninfo: L{objects.Node}
2878 @param ninfo: the node to check
2879 @param nresult: the remote results for the node
2880 @param nimg: the node image object
2881 @param vg_name: the configured VG name
2885 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2887 nimg.lvm_fail = True
2888 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2891 elif isinstance(lvdata, basestring):
2892 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2893 utils.SafeEncode(lvdata))
2894 elif not isinstance(lvdata, dict):
2895 _ErrorIf(True, constants.CV_ENODELVM, node,
2896 "rpc call to node failed (lvlist)")
2898 nimg.volumes = lvdata
2899 nimg.lvm_fail = False
2901 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2902 """Verifies and updates the node instance list.
2904 If the listing was successful, then updates this node's instance
2905 list. Otherwise, it marks the RPC call as failed for the instance
2908 @type ninfo: L{objects.Node}
2909 @param ninfo: the node to check
2910 @param nresult: the remote results for the node
2911 @param nimg: the node image object
2914 idata = nresult.get(constants.NV_INSTANCELIST, None)
2915 test = not isinstance(idata, list)
2916 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2917 "rpc call to node failed (instancelist): %s",
2918 utils.SafeEncode(str(idata)))
2920 nimg.hyp_fail = True
2922 nimg.instances = idata
2924 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2925 """Verifies and computes a node information map
2927 @type ninfo: L{objects.Node}
2928 @param ninfo: the node to check
2929 @param nresult: the remote results for the node
2930 @param nimg: the node image object
2931 @param vg_name: the configured VG name
2935 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2937 # try to read free memory (from the hypervisor)
2938 hv_info = nresult.get(constants.NV_HVINFO, None)
2939 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2940 _ErrorIf(test, constants.CV_ENODEHV, node,
2941 "rpc call to node failed (hvinfo)")
2944 nimg.mfree = int(hv_info["memory_free"])
2945 except (ValueError, TypeError):
2946 _ErrorIf(True, constants.CV_ENODERPC, node,
2947 "node returned invalid nodeinfo, check hypervisor")
2949 # FIXME: devise a free space model for file based instances as well
2950 if vg_name is not None:
2951 test = (constants.NV_VGLIST not in nresult or
2952 vg_name not in nresult[constants.NV_VGLIST])
2953 _ErrorIf(test, constants.CV_ENODELVM, node,
2954 "node didn't return data for the volume group '%s'"
2955 " - it is either missing or broken", vg_name)
2958 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2959 except (ValueError, TypeError):
2960 _ErrorIf(True, constants.CV_ENODERPC, node,
2961 "node returned invalid LVM info, check LVM status")
2963 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2964 """Gets per-disk status information for all instances.
2966 @type nodelist: list of strings
2967 @param nodelist: Node names
2968 @type node_image: dict of (name, L{objects.Node})
2969 @param node_image: Node objects
2970 @type instanceinfo: dict of (name, L{objects.Instance})
2971 @param instanceinfo: Instance objects
2972 @rtype: {instance: {node: [(succes, payload)]}}
2973 @return: a dictionary of per-instance dictionaries with nodes as
2974 keys and disk information as values; the disk information is a
2975 list of tuples (success, payload)
2978 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2981 node_disks_devonly = {}
2982 diskless_instances = set()
2983 diskless = constants.DT_DISKLESS
2985 for nname in nodelist:
2986 node_instances = list(itertools.chain(node_image[nname].pinst,
2987 node_image[nname].sinst))
2988 diskless_instances.update(inst for inst in node_instances
2989 if instanceinfo[inst].disk_template == diskless)
2990 disks = [(inst, disk)
2991 for inst in node_instances
2992 for disk in instanceinfo[inst].disks]
2995 # No need to collect data
2998 node_disks[nname] = disks
3000 # _AnnotateDiskParams makes already copies of the disks
3002 for (inst, dev) in disks:
3003 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3004 self.cfg.SetDiskID(anno_disk, nname)
3005 devonly.append(anno_disk)
3007 node_disks_devonly[nname] = devonly
3009 assert len(node_disks) == len(node_disks_devonly)
3011 # Collect data from all nodes with disks
3012 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3015 assert len(result) == len(node_disks)
3019 for (nname, nres) in result.items():
3020 disks = node_disks[nname]
3023 # No data from this node
3024 data = len(disks) * [(False, "node offline")]
3027 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3028 "while getting disk information: %s", msg)
3030 # No data from this node
3031 data = len(disks) * [(False, msg)]
3034 for idx, i in enumerate(nres.payload):
3035 if isinstance(i, (tuple, list)) and len(i) == 2:
3038 logging.warning("Invalid result from node %s, entry %d: %s",
3040 data.append((False, "Invalid result from the remote node"))
3042 for ((inst, _), status) in zip(disks, data):
3043 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3045 # Add empty entries for diskless instances.
3046 for inst in diskless_instances:
3047 assert inst not in instdisk
3050 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3051 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3052 compat.all(isinstance(s, (tuple, list)) and
3053 len(s) == 2 for s in statuses)
3054 for inst, nnames in instdisk.items()
3055 for nname, statuses in nnames.items())
3056 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3061 def _SshNodeSelector(group_uuid, all_nodes):
3062 """Create endless iterators for all potential SSH check hosts.
3065 nodes = [node for node in all_nodes
3066 if (node.group != group_uuid and
3068 keyfunc = operator.attrgetter("group")
3070 return map(itertools.cycle,
3071 [sorted(map(operator.attrgetter("name"), names))
3072 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3076 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3077 """Choose which nodes should talk to which other nodes.
3079 We will make nodes contact all nodes in their group, and one node from
3082 @warning: This algorithm has a known issue if one node group is much
3083 smaller than others (e.g. just one node). In such a case all other
3084 nodes will talk to the single node.
3087 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3088 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3090 return (online_nodes,
3091 dict((name, sorted([i.next() for i in sel]))
3092 for name in online_nodes))
3094 def BuildHooksEnv(self):
3097 Cluster-Verify hooks just ran in the post phase and their failure makes
3098 the output be logged in the verify output and the verification to fail.
3102 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3105 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3106 for node in self.my_node_info.values())
3110 def BuildHooksNodes(self):
3111 """Build hooks nodes.
3114 return ([], self.my_node_names)
3116 def Exec(self, feedback_fn):
3117 """Verify integrity of the node group, performing various test on nodes.
3120 # This method has too many local variables. pylint: disable=R0914
3121 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3123 if not self.my_node_names:
3125 feedback_fn("* Empty node group, skipping verification")
3129 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3130 verbose = self.op.verbose
3131 self._feedback_fn = feedback_fn
3133 vg_name = self.cfg.GetVGName()
3134 drbd_helper = self.cfg.GetDRBDHelper()
3135 cluster = self.cfg.GetClusterInfo()
3136 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3137 hypervisors = cluster.enabled_hypervisors
3138 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3140 i_non_redundant = [] # Non redundant instances
3141 i_non_a_balanced = [] # Non auto-balanced instances
3142 i_offline = 0 # Count of offline instances
3143 n_offline = 0 # Count of offline nodes
3144 n_drained = 0 # Count of nodes being drained
3145 node_vol_should = {}
3147 # FIXME: verify OS list
3150 filemap = _ComputeAncillaryFiles(cluster, False)
3152 # do local checksums
3153 master_node = self.master_node = self.cfg.GetMasterNode()
3154 master_ip = self.cfg.GetMasterIP()
3156 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3159 if self.cfg.GetUseExternalMipScript():
3160 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3162 node_verify_param = {
3163 constants.NV_FILELIST:
3164 utils.UniqueSequence(filename
3165 for files in filemap
3166 for filename in files),
3167 constants.NV_NODELIST:
3168 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3169 self.all_node_info.values()),
3170 constants.NV_HYPERVISOR: hypervisors,
3171 constants.NV_HVPARAMS:
3172 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3173 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3174 for node in node_data_list
3175 if not node.offline],
3176 constants.NV_INSTANCELIST: hypervisors,
3177 constants.NV_VERSION: None,
3178 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3179 constants.NV_NODESETUP: None,
3180 constants.NV_TIME: None,
3181 constants.NV_MASTERIP: (master_node, master_ip),
3182 constants.NV_OSLIST: None,
3183 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3184 constants.NV_USERSCRIPTS: user_scripts,
3187 if vg_name is not None:
3188 node_verify_param[constants.NV_VGLIST] = None
3189 node_verify_param[constants.NV_LVLIST] = vg_name
3190 node_verify_param[constants.NV_PVLIST] = [vg_name]
3191 node_verify_param[constants.NV_DRBDLIST] = None
3194 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3197 # FIXME: this needs to be changed per node-group, not cluster-wide
3199 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3200 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3201 bridges.add(default_nicpp[constants.NIC_LINK])
3202 for instance in self.my_inst_info.values():
3203 for nic in instance.nics:
3204 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3205 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3206 bridges.add(full_nic[constants.NIC_LINK])
3209 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3211 # Build our expected cluster state
3212 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3214 vm_capable=node.vm_capable))
3215 for node in node_data_list)
3219 for node in self.all_node_info.values():
3220 path = _SupportsOob(self.cfg, node)
3221 if path and path not in oob_paths:
3222 oob_paths.append(path)
3225 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3227 for instance in self.my_inst_names:
3228 inst_config = self.my_inst_info[instance]
3229 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3232 for nname in inst_config.all_nodes:
3233 if nname not in node_image:
3234 gnode = self.NodeImage(name=nname)
3235 gnode.ghost = (nname not in self.all_node_info)
3236 node_image[nname] = gnode
3238 inst_config.MapLVsByNode(node_vol_should)
3240 pnode = inst_config.primary_node
3241 node_image[pnode].pinst.append(instance)
3243 for snode in inst_config.secondary_nodes:
3244 nimg = node_image[snode]
3245 nimg.sinst.append(instance)
3246 if pnode not in nimg.sbp:
3247 nimg.sbp[pnode] = []
3248 nimg.sbp[pnode].append(instance)
3250 # At this point, we have the in-memory data structures complete,
3251 # except for the runtime information, which we'll gather next
3253 # Due to the way our RPC system works, exact response times cannot be
3254 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3255 # time before and after executing the request, we can at least have a time
3257 nvinfo_starttime = time.time()
3258 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3260 self.cfg.GetClusterName())
3261 nvinfo_endtime = time.time()
3263 if self.extra_lv_nodes and vg_name is not None:
3265 self.rpc.call_node_verify(self.extra_lv_nodes,
3266 {constants.NV_LVLIST: vg_name},
3267 self.cfg.GetClusterName())
3269 extra_lv_nvinfo = {}
3271 all_drbd_map = self.cfg.ComputeDRBDMap()
3273 feedback_fn("* Gathering disk information (%s nodes)" %
3274 len(self.my_node_names))
3275 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3278 feedback_fn("* Verifying configuration file consistency")
3280 # If not all nodes are being checked, we need to make sure the master node
3281 # and a non-checked vm_capable node are in the list.
3282 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3284 vf_nvinfo = all_nvinfo.copy()
3285 vf_node_info = list(self.my_node_info.values())
3286 additional_nodes = []
3287 if master_node not in self.my_node_info:
3288 additional_nodes.append(master_node)
3289 vf_node_info.append(self.all_node_info[master_node])
3290 # Add the first vm_capable node we find which is not included,
3291 # excluding the master node (which we already have)
3292 for node in absent_nodes:
3293 nodeinfo = self.all_node_info[node]
3294 if (nodeinfo.vm_capable and not nodeinfo.offline and
3295 node != master_node):
3296 additional_nodes.append(node)
3297 vf_node_info.append(self.all_node_info[node])
3299 key = constants.NV_FILELIST
3300 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3301 {key: node_verify_param[key]},
3302 self.cfg.GetClusterName()))
3304 vf_nvinfo = all_nvinfo
3305 vf_node_info = self.my_node_info.values()
3307 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3309 feedback_fn("* Verifying node status")
3313 for node_i in node_data_list:
3315 nimg = node_image[node]
3319 feedback_fn("* Skipping offline node %s" % (node,))
3323 if node == master_node:
3325 elif node_i.master_candidate:
3326 ntype = "master candidate"
3327 elif node_i.drained:
3333 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3335 msg = all_nvinfo[node].fail_msg
3336 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3339 nimg.rpc_fail = True
3342 nresult = all_nvinfo[node].payload
3344 nimg.call_ok = self._VerifyNode(node_i, nresult)
3345 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3346 self._VerifyNodeNetwork(node_i, nresult)
3347 self._VerifyNodeUserScripts(node_i, nresult)
3348 self._VerifyOob(node_i, nresult)
3351 self._VerifyNodeLVM(node_i, nresult, vg_name)
3352 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3355 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3356 self._UpdateNodeInstances(node_i, nresult, nimg)
3357 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3358 self._UpdateNodeOS(node_i, nresult, nimg)
3360 if not nimg.os_fail:
3361 if refos_img is None:
3363 self._VerifyNodeOS(node_i, nimg, refos_img)
3364 self._VerifyNodeBridges(node_i, nresult, bridges)
3366 # Check whether all running instancies are primary for the node. (This
3367 # can no longer be done from _VerifyInstance below, since some of the
3368 # wrong instances could be from other node groups.)
3369 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3371 for inst in non_primary_inst:
3372 test = inst in self.all_inst_info
3373 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3374 "instance should not run on node %s", node_i.name)
3375 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3376 "node is running unknown instance %s", inst)
3378 for node, result in extra_lv_nvinfo.items():
3379 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3380 node_image[node], vg_name)
3382 feedback_fn("* Verifying instance status")
3383 for instance in self.my_inst_names:
3385 feedback_fn("* Verifying instance %s" % instance)
3386 inst_config = self.my_inst_info[instance]
3387 self._VerifyInstance(instance, inst_config, node_image,
3389 inst_nodes_offline = []
3391 pnode = inst_config.primary_node
3392 pnode_img = node_image[pnode]
3393 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3394 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3395 " primary node failed", instance)
3397 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3399 constants.CV_EINSTANCEBADNODE, instance,
3400 "instance is marked as running and lives on offline node %s",
3401 inst_config.primary_node)
3403 # If the instance is non-redundant we cannot survive losing its primary
3404 # node, so we are not N+1 compliant. On the other hand we have no disk
3405 # templates with more than one secondary so that situation is not well
3407 # FIXME: does not support file-backed instances
3408 if not inst_config.secondary_nodes:
3409 i_non_redundant.append(instance)
3411 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3412 constants.CV_EINSTANCELAYOUT,
3413 instance, "instance has multiple secondary nodes: %s",
3414 utils.CommaJoin(inst_config.secondary_nodes),
3415 code=self.ETYPE_WARNING)
3417 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3418 pnode = inst_config.primary_node
3419 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3420 instance_groups = {}
3422 for node in instance_nodes:
3423 instance_groups.setdefault(self.all_node_info[node].group,
3427 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3428 # Sort so that we always list the primary node first.
3429 for group, nodes in sorted(instance_groups.items(),
3430 key=lambda (_, nodes): pnode in nodes,
3433 self._ErrorIf(len(instance_groups) > 1,
3434 constants.CV_EINSTANCESPLITGROUPS,
3435 instance, "instance has primary and secondary nodes in"
3436 " different groups: %s", utils.CommaJoin(pretty_list),
3437 code=self.ETYPE_WARNING)
3439 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3440 i_non_a_balanced.append(instance)
3442 for snode in inst_config.secondary_nodes:
3443 s_img = node_image[snode]
3444 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3445 snode, "instance %s, connection to secondary node failed",
3449 inst_nodes_offline.append(snode)
3451 # warn that the instance lives on offline nodes
3452 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3453 "instance has offline secondary node(s) %s",
3454 utils.CommaJoin(inst_nodes_offline))
3455 # ... or ghost/non-vm_capable nodes
3456 for node in inst_config.all_nodes:
3457 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3458 instance, "instance lives on ghost node %s", node)
3459 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3460 instance, "instance lives on non-vm_capable node %s", node)
3462 feedback_fn("* Verifying orphan volumes")
3463 reserved = utils.FieldSet(*cluster.reserved_lvs)
3465 # We will get spurious "unknown volume" warnings if any node of this group
3466 # is secondary for an instance whose primary is in another group. To avoid
3467 # them, we find these instances and add their volumes to node_vol_should.
3468 for inst in self.all_inst_info.values():
3469 for secondary in inst.secondary_nodes:
3470 if (secondary in self.my_node_info
3471 and inst.name not in self.my_inst_info):
3472 inst.MapLVsByNode(node_vol_should)
3475 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3477 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3478 feedback_fn("* Verifying N+1 Memory redundancy")
3479 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3481 feedback_fn("* Other Notes")
3483 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3484 % len(i_non_redundant))
3486 if i_non_a_balanced:
3487 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3488 % len(i_non_a_balanced))
3491 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3494 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3497 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3501 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3502 """Analyze the post-hooks' result
3504 This method analyses the hook result, handles it, and sends some
3505 nicely-formatted feedback back to the user.
3507 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3508 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3509 @param hooks_results: the results of the multi-node hooks rpc call
3510 @param feedback_fn: function used send feedback back to the caller
3511 @param lu_result: previous Exec result
3512 @return: the new Exec result, based on the previous result
3516 # We only really run POST phase hooks, only for non-empty groups,
3517 # and are only interested in their results
3518 if not self.my_node_names:
3521 elif phase == constants.HOOKS_PHASE_POST:
3522 # Used to change hooks' output to proper indentation
3523 feedback_fn("* Hooks Results")
3524 assert hooks_results, "invalid result from hooks"
3526 for node_name in hooks_results:
3527 res = hooks_results[node_name]
3529 test = msg and not res.offline
3530 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3531 "Communication failure in hooks execution: %s", msg)
3532 if res.offline or msg:
3533 # No need to investigate payload if node is offline or gave
3536 for script, hkr, output in res.payload:
3537 test = hkr == constants.HKR_FAIL
3538 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3539 "Script %s failed, output:", script)
3541 output = self._HOOKS_INDENT_RE.sub(" ", output)
3542 feedback_fn("%s" % output)
3548 class LUClusterVerifyDisks(NoHooksLU):
3549 """Verifies the cluster disks status.
3554 def ExpandNames(self):
3555 self.share_locks = _ShareAll()
3556 self.needed_locks = {
3557 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3560 def Exec(self, feedback_fn):
3561 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3563 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3564 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3565 for group in group_names])
3568 class LUGroupVerifyDisks(NoHooksLU):
3569 """Verifies the status of all disks in a node group.
3574 def ExpandNames(self):
3575 # Raises errors.OpPrereqError on its own if group can't be found
3576 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3578 self.share_locks = _ShareAll()
3579 self.needed_locks = {
3580 locking.LEVEL_INSTANCE: [],
3581 locking.LEVEL_NODEGROUP: [],
3582 locking.LEVEL_NODE: [],
3585 def DeclareLocks(self, level):
3586 if level == locking.LEVEL_INSTANCE:
3587 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3589 # Lock instances optimistically, needs verification once node and group
3590 # locks have been acquired
3591 self.needed_locks[locking.LEVEL_INSTANCE] = \
3592 self.cfg.GetNodeGroupInstances(self.group_uuid)
3594 elif level == locking.LEVEL_NODEGROUP:
3595 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3597 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3598 set([self.group_uuid] +
3599 # Lock all groups used by instances optimistically; this requires
3600 # going via the node before it's locked, requiring verification
3603 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3604 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3606 elif level == locking.LEVEL_NODE:
3607 # This will only lock the nodes in the group to be verified which contain
3609 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3610 self._LockInstancesNodes()
3612 # Lock all nodes in group to be verified
3613 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3614 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3615 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3617 def CheckPrereq(self):
3618 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3619 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3620 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3622 assert self.group_uuid in owned_groups
3624 # Check if locked instances are still correct
3625 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3627 # Get instance information
3628 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3630 # Check if node groups for locked instances are still correct
3631 _CheckInstancesNodeGroups(self.cfg, self.instances,
3632 owned_groups, owned_nodes, self.group_uuid)
3634 def Exec(self, feedback_fn):
3635 """Verify integrity of cluster disks.
3637 @rtype: tuple of three items
3638 @return: a tuple of (dict of node-to-node_error, list of instances
3639 which need activate-disks, dict of instance: (node, volume) for
3644 res_instances = set()
3647 nv_dict = _MapInstanceDisksToNodes([inst
3648 for inst in self.instances.values()
3649 if inst.admin_state == constants.ADMINST_UP])
3652 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3653 set(self.cfg.GetVmCapableNodeList()))
3655 node_lvs = self.rpc.call_lv_list(nodes, [])
3657 for (node, node_res) in node_lvs.items():
3658 if node_res.offline:
3661 msg = node_res.fail_msg
3663 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3664 res_nodes[node] = msg
3667 for lv_name, (_, _, lv_online) in node_res.payload.items():
3668 inst = nv_dict.pop((node, lv_name), None)
3669 if not (lv_online or inst is None):
3670 res_instances.add(inst)
3672 # any leftover items in nv_dict are missing LVs, let's arrange the data
3674 for key, inst in nv_dict.iteritems():
3675 res_missing.setdefault(inst, []).append(list(key))
3677 return (res_nodes, list(res_instances), res_missing)
3680 class LUClusterRepairDiskSizes(NoHooksLU):
3681 """Verifies the cluster disks sizes.
3686 def ExpandNames(self):
3687 if self.op.instances:
3688 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3689 self.needed_locks = {
3690 locking.LEVEL_NODE_RES: [],
3691 locking.LEVEL_INSTANCE: self.wanted_names,
3693 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3695 self.wanted_names = None
3696 self.needed_locks = {
3697 locking.LEVEL_NODE_RES: locking.ALL_SET,
3698 locking.LEVEL_INSTANCE: locking.ALL_SET,
3700 self.share_locks = {
3701 locking.LEVEL_NODE_RES: 1,
3702 locking.LEVEL_INSTANCE: 0,
3705 def DeclareLocks(self, level):
3706 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3707 self._LockInstancesNodes(primary_only=True, level=level)
3709 def CheckPrereq(self):
3710 """Check prerequisites.
3712 This only checks the optional instance list against the existing names.
3715 if self.wanted_names is None:
3716 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3718 self.wanted_instances = \
3719 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3721 def _EnsureChildSizes(self, disk):
3722 """Ensure children of the disk have the needed disk size.
3724 This is valid mainly for DRBD8 and fixes an issue where the
3725 children have smaller disk size.
3727 @param disk: an L{ganeti.objects.Disk} object
3730 if disk.dev_type == constants.LD_DRBD8:
3731 assert disk.children, "Empty children for DRBD8?"
3732 fchild = disk.children[0]
3733 mismatch = fchild.size < disk.size
3735 self.LogInfo("Child disk has size %d, parent %d, fixing",
3736 fchild.size, disk.size)
3737 fchild.size = disk.size
3739 # and we recurse on this child only, not on the metadev
3740 return self._EnsureChildSizes(fchild) or mismatch
3744 def Exec(self, feedback_fn):
3745 """Verify the size of cluster disks.
3748 # TODO: check child disks too
3749 # TODO: check differences in size between primary/secondary nodes
3751 for instance in self.wanted_instances:
3752 pnode = instance.primary_node
3753 if pnode not in per_node_disks:
3754 per_node_disks[pnode] = []
3755 for idx, disk in enumerate(instance.disks):
3756 per_node_disks[pnode].append((instance, idx, disk))
3758 assert not (frozenset(per_node_disks.keys()) -
3759 self.owned_locks(locking.LEVEL_NODE_RES)), \
3760 "Not owning correct locks"
3761 assert not self.owned_locks(locking.LEVEL_NODE)
3764 for node, dskl in per_node_disks.items():
3765 newl = [v[2].Copy() for v in dskl]
3767 self.cfg.SetDiskID(dsk, node)
3768 result = self.rpc.call_blockdev_getsize(node, newl)
3770 self.LogWarning("Failure in blockdev_getsize call to node"
3771 " %s, ignoring", node)
3773 if len(result.payload) != len(dskl):
3774 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3775 " result.payload=%s", node, len(dskl), result.payload)
3776 self.LogWarning("Invalid result from node %s, ignoring node results",
3779 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3781 self.LogWarning("Disk %d of instance %s did not return size"
3782 " information, ignoring", idx, instance.name)
3784 if not isinstance(size, (int, long)):
3785 self.LogWarning("Disk %d of instance %s did not return valid"
3786 " size information, ignoring", idx, instance.name)
3789 if size != disk.size:
3790 self.LogInfo("Disk %d of instance %s has mismatched size,"
3791 " correcting: recorded %d, actual %d", idx,
3792 instance.name, disk.size, size)
3794 self.cfg.Update(instance, feedback_fn)
3795 changed.append((instance.name, idx, size))
3796 if self._EnsureChildSizes(disk):
3797 self.cfg.Update(instance, feedback_fn)
3798 changed.append((instance.name, idx, disk.size))
3802 class LUClusterRename(LogicalUnit):
3803 """Rename the cluster.
3806 HPATH = "cluster-rename"
3807 HTYPE = constants.HTYPE_CLUSTER
3809 def BuildHooksEnv(self):
3814 "OP_TARGET": self.cfg.GetClusterName(),
3815 "NEW_NAME": self.op.name,
3818 def BuildHooksNodes(self):
3819 """Build hooks nodes.
3822 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3824 def CheckPrereq(self):
3825 """Verify that the passed name is a valid one.
3828 hostname = netutils.GetHostname(name=self.op.name,
3829 family=self.cfg.GetPrimaryIPFamily())
3831 new_name = hostname.name
3832 self.ip = new_ip = hostname.ip
3833 old_name = self.cfg.GetClusterName()
3834 old_ip = self.cfg.GetMasterIP()
3835 if new_name == old_name and new_ip == old_ip:
3836 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3837 " cluster has changed",
3839 if new_ip != old_ip:
3840 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3841 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3842 " reachable on the network" %
3843 new_ip, errors.ECODE_NOTUNIQUE)
3845 self.op.name = new_name
3847 def Exec(self, feedback_fn):
3848 """Rename the cluster.
3851 clustername = self.op.name
3854 # shutdown the master IP
3855 master_params = self.cfg.GetMasterNetworkParameters()
3856 ems = self.cfg.GetUseExternalMipScript()
3857 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3859 result.Raise("Could not disable the master role")
3862 cluster = self.cfg.GetClusterInfo()
3863 cluster.cluster_name = clustername
3864 cluster.master_ip = new_ip
3865 self.cfg.Update(cluster, feedback_fn)
3867 # update the known hosts file
3868 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3869 node_list = self.cfg.GetOnlineNodeList()
3871 node_list.remove(master_params.name)
3874 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3876 master_params.ip = new_ip
3877 result = self.rpc.call_node_activate_master_ip(master_params.name,
3879 msg = result.fail_msg
3881 self.LogWarning("Could not re-enable the master role on"
3882 " the master, please restart manually: %s", msg)
3887 def _ValidateNetmask(cfg, netmask):
3888 """Checks if a netmask is valid.
3890 @type cfg: L{config.ConfigWriter}
3891 @param cfg: The cluster configuration
3893 @param netmask: the netmask to be verified
3894 @raise errors.OpPrereqError: if the validation fails
3897 ip_family = cfg.GetPrimaryIPFamily()
3899 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3900 except errors.ProgrammerError:
3901 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3903 if not ipcls.ValidateNetmask(netmask):
3904 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3908 class LUClusterSetParams(LogicalUnit):
3909 """Change the parameters of the cluster.
3912 HPATH = "cluster-modify"
3913 HTYPE = constants.HTYPE_CLUSTER
3916 def CheckArguments(self):
3920 if self.op.uid_pool:
3921 uidpool.CheckUidPool(self.op.uid_pool)
3923 if self.op.add_uids:
3924 uidpool.CheckUidPool(self.op.add_uids)
3926 if self.op.remove_uids:
3927 uidpool.CheckUidPool(self.op.remove_uids)
3929 if self.op.master_netmask is not None:
3930 _ValidateNetmask(self.cfg, self.op.master_netmask)
3932 if self.op.diskparams:
3933 for dt_params in self.op.diskparams.values():
3934 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3936 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3937 except errors.OpPrereqError, err:
3938 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3941 def ExpandNames(self):
3942 # FIXME: in the future maybe other cluster params won't require checking on
3943 # all nodes to be modified.
3944 self.needed_locks = {
3945 locking.LEVEL_NODE: locking.ALL_SET,
3946 locking.LEVEL_INSTANCE: locking.ALL_SET,
3947 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3949 self.share_locks = {
3950 locking.LEVEL_NODE: 1,
3951 locking.LEVEL_INSTANCE: 1,
3952 locking.LEVEL_NODEGROUP: 1,
3955 def BuildHooksEnv(self):
3960 "OP_TARGET": self.cfg.GetClusterName(),
3961 "NEW_VG_NAME": self.op.vg_name,
3964 def BuildHooksNodes(self):
3965 """Build hooks nodes.
3968 mn = self.cfg.GetMasterNode()
3971 def CheckPrereq(self):
3972 """Check prerequisites.
3974 This checks whether the given params don't conflict and
3975 if the given volume group is valid.
3978 if self.op.vg_name is not None and not self.op.vg_name:
3979 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3980 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3981 " instances exist", errors.ECODE_INVAL)
3983 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3984 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3985 raise errors.OpPrereqError("Cannot disable drbd helper while"
3986 " drbd-based instances exist",
3989 node_list = self.owned_locks(locking.LEVEL_NODE)
3991 # if vg_name not None, checks given volume group on all nodes
3993 vglist = self.rpc.call_vg_list(node_list)
3994 for node in node_list:
3995 msg = vglist[node].fail_msg
3997 # ignoring down node
3998 self.LogWarning("Error while gathering data on node %s"
3999 " (ignoring node): %s", node, msg)
4001 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4003 constants.MIN_VG_SIZE)
4005 raise errors.OpPrereqError("Error on node '%s': %s" %
4006 (node, vgstatus), errors.ECODE_ENVIRON)
4008 if self.op.drbd_helper:
4009 # checks given drbd helper on all nodes
4010 helpers = self.rpc.call_drbd_helper(node_list)
4011 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4013 self.LogInfo("Not checking drbd helper on offline node %s", node)
4015 msg = helpers[node].fail_msg
4017 raise errors.OpPrereqError("Error checking drbd helper on node"
4018 " '%s': %s" % (node, msg),
4019 errors.ECODE_ENVIRON)
4020 node_helper = helpers[node].payload
4021 if node_helper != self.op.drbd_helper:
4022 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4023 (node, node_helper), errors.ECODE_ENVIRON)
4025 self.cluster = cluster = self.cfg.GetClusterInfo()
4026 # validate params changes
4027 if self.op.beparams:
4028 objects.UpgradeBeParams(self.op.beparams)
4029 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4030 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4032 if self.op.ndparams:
4033 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4034 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4036 # TODO: we need a more general way to handle resetting
4037 # cluster-level parameters to default values
4038 if self.new_ndparams["oob_program"] == "":
4039 self.new_ndparams["oob_program"] = \
4040 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4042 if self.op.hv_state:
4043 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4044 self.cluster.hv_state_static)
4045 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4046 for hv, values in new_hv_state.items())
4048 if self.op.disk_state:
4049 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4050 self.cluster.disk_state_static)
4051 self.new_disk_state = \
4052 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4053 for name, values in svalues.items()))
4054 for storage, svalues in new_disk_state.items())
4057 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4060 all_instances = self.cfg.GetAllInstancesInfo().values()
4062 for group in self.cfg.GetAllNodeGroupsInfo().values():
4063 instances = frozenset([inst for inst in all_instances
4064 if compat.any(node in group.members
4065 for node in inst.all_nodes)])
4066 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4067 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
4069 new_ipolicy, instances)
4071 violations.update(new)
4074 self.LogWarning("After the ipolicy change the following instances"
4075 " violate them: %s",
4076 utils.CommaJoin(utils.NiceSort(violations)))
4078 if self.op.nicparams:
4079 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4080 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4081 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4084 # check all instances for consistency
4085 for instance in self.cfg.GetAllInstancesInfo().values():
4086 for nic_idx, nic in enumerate(instance.nics):
4087 params_copy = copy.deepcopy(nic.nicparams)
4088 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4090 # check parameter syntax
4092 objects.NIC.CheckParameterSyntax(params_filled)
4093 except errors.ConfigurationError, err:
4094 nic_errors.append("Instance %s, nic/%d: %s" %
4095 (instance.name, nic_idx, err))
4097 # if we're moving instances to routed, check that they have an ip
4098 target_mode = params_filled[constants.NIC_MODE]
4099 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4100 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4101 " address" % (instance.name, nic_idx))
4103 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4104 "\n".join(nic_errors))
4106 # hypervisor list/parameters
4107 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4108 if self.op.hvparams:
4109 for hv_name, hv_dict in self.op.hvparams.items():
4110 if hv_name not in self.new_hvparams:
4111 self.new_hvparams[hv_name] = hv_dict
4113 self.new_hvparams[hv_name].update(hv_dict)
4115 # disk template parameters
4116 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4117 if self.op.diskparams:
4118 for dt_name, dt_params in self.op.diskparams.items():
4119 if dt_name not in self.op.diskparams:
4120 self.new_diskparams[dt_name] = dt_params
4122 self.new_diskparams[dt_name].update(dt_params)
4124 # os hypervisor parameters
4125 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4127 for os_name, hvs in self.op.os_hvp.items():
4128 if os_name not in self.new_os_hvp:
4129 self.new_os_hvp[os_name] = hvs
4131 for hv_name, hv_dict in hvs.items():
4132 if hv_name not in self.new_os_hvp[os_name]:
4133 self.new_os_hvp[os_name][hv_name] = hv_dict
4135 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4138 self.new_osp = objects.FillDict(cluster.osparams, {})
4139 if self.op.osparams:
4140 for os_name, osp in self.op.osparams.items():
4141 if os_name not in self.new_osp:
4142 self.new_osp[os_name] = {}
4144 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4147 if not self.new_osp[os_name]:
4148 # we removed all parameters
4149 del self.new_osp[os_name]
4151 # check the parameter validity (remote check)
4152 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4153 os_name, self.new_osp[os_name])
4155 # changes to the hypervisor list
4156 if self.op.enabled_hypervisors is not None:
4157 self.hv_list = self.op.enabled_hypervisors
4158 for hv in self.hv_list:
4159 # if the hypervisor doesn't already exist in the cluster
4160 # hvparams, we initialize it to empty, and then (in both
4161 # cases) we make sure to fill the defaults, as we might not
4162 # have a complete defaults list if the hypervisor wasn't
4164 if hv not in new_hvp:
4166 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4167 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4169 self.hv_list = cluster.enabled_hypervisors
4171 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4172 # either the enabled list has changed, or the parameters have, validate
4173 for hv_name, hv_params in self.new_hvparams.items():
4174 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4175 (self.op.enabled_hypervisors and
4176 hv_name in self.op.enabled_hypervisors)):
4177 # either this is a new hypervisor, or its parameters have changed
4178 hv_class = hypervisor.GetHypervisor(hv_name)
4179 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4180 hv_class.CheckParameterSyntax(hv_params)
4181 _CheckHVParams(self, node_list, hv_name, hv_params)
4184 # no need to check any newly-enabled hypervisors, since the
4185 # defaults have already been checked in the above code-block
4186 for os_name, os_hvp in self.new_os_hvp.items():
4187 for hv_name, hv_params in os_hvp.items():
4188 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4189 # we need to fill in the new os_hvp on top of the actual hv_p
4190 cluster_defaults = self.new_hvparams.get(hv_name, {})
4191 new_osp = objects.FillDict(cluster_defaults, hv_params)
4192 hv_class = hypervisor.GetHypervisor(hv_name)
4193 hv_class.CheckParameterSyntax(new_osp)
4194 _CheckHVParams(self, node_list, hv_name, new_osp)
4196 if self.op.default_iallocator:
4197 alloc_script = utils.FindFile(self.op.default_iallocator,
4198 constants.IALLOCATOR_SEARCH_PATH,
4200 if alloc_script is None:
4201 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4202 " specified" % self.op.default_iallocator,
4205 def Exec(self, feedback_fn):
4206 """Change the parameters of the cluster.
4209 if self.op.vg_name is not None:
4210 new_volume = self.op.vg_name
4213 if new_volume != self.cfg.GetVGName():
4214 self.cfg.SetVGName(new_volume)
4216 feedback_fn("Cluster LVM configuration already in desired"
4217 " state, not changing")
4218 if self.op.drbd_helper is not None:
4219 new_helper = self.op.drbd_helper
4222 if new_helper != self.cfg.GetDRBDHelper():
4223 self.cfg.SetDRBDHelper(new_helper)
4225 feedback_fn("Cluster DRBD helper already in desired state,"
4227 if self.op.hvparams:
4228 self.cluster.hvparams = self.new_hvparams
4230 self.cluster.os_hvp = self.new_os_hvp
4231 if self.op.enabled_hypervisors is not None:
4232 self.cluster.hvparams = self.new_hvparams
4233 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4234 if self.op.beparams:
4235 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4236 if self.op.nicparams:
4237 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4239 self.cluster.ipolicy = self.new_ipolicy
4240 if self.op.osparams:
4241 self.cluster.osparams = self.new_osp
4242 if self.op.ndparams:
4243 self.cluster.ndparams = self.new_ndparams
4244 if self.op.diskparams:
4245 self.cluster.diskparams = self.new_diskparams
4246 if self.op.hv_state:
4247 self.cluster.hv_state_static = self.new_hv_state
4248 if self.op.disk_state:
4249 self.cluster.disk_state_static = self.new_disk_state
4251 if self.op.candidate_pool_size is not None:
4252 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4253 # we need to update the pool size here, otherwise the save will fail
4254 _AdjustCandidatePool(self, [])
4256 if self.op.maintain_node_health is not None:
4257 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4258 feedback_fn("Note: CONFD was disabled at build time, node health"
4259 " maintenance is not useful (still enabling it)")
4260 self.cluster.maintain_node_health = self.op.maintain_node_health
4262 if self.op.prealloc_wipe_disks is not None:
4263 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4265 if self.op.add_uids is not None:
4266 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4268 if self.op.remove_uids is not None:
4269 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4271 if self.op.uid_pool is not None:
4272 self.cluster.uid_pool = self.op.uid_pool
4274 if self.op.default_iallocator is not None:
4275 self.cluster.default_iallocator = self.op.default_iallocator
4277 if self.op.reserved_lvs is not None:
4278 self.cluster.reserved_lvs = self.op.reserved_lvs
4280 if self.op.use_external_mip_script is not None:
4281 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4283 def helper_os(aname, mods, desc):
4285 lst = getattr(self.cluster, aname)
4286 for key, val in mods:
4287 if key == constants.DDM_ADD:
4289 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4292 elif key == constants.DDM_REMOVE:
4296 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4298 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4300 if self.op.hidden_os:
4301 helper_os("hidden_os", self.op.hidden_os, "hidden")
4303 if self.op.blacklisted_os:
4304 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4306 if self.op.master_netdev:
4307 master_params = self.cfg.GetMasterNetworkParameters()
4308 ems = self.cfg.GetUseExternalMipScript()
4309 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4310 self.cluster.master_netdev)
4311 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4313 result.Raise("Could not disable the master ip")
4314 feedback_fn("Changing master_netdev from %s to %s" %
4315 (master_params.netdev, self.op.master_netdev))
4316 self.cluster.master_netdev = self.op.master_netdev
4318 if self.op.master_netmask:
4319 master_params = self.cfg.GetMasterNetworkParameters()
4320 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4321 result = self.rpc.call_node_change_master_netmask(master_params.name,
4322 master_params.netmask,
4323 self.op.master_netmask,
4325 master_params.netdev)
4327 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4330 self.cluster.master_netmask = self.op.master_netmask
4332 self.cfg.Update(self.cluster, feedback_fn)
4334 if self.op.master_netdev:
4335 master_params = self.cfg.GetMasterNetworkParameters()
4336 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4337 self.op.master_netdev)
4338 ems = self.cfg.GetUseExternalMipScript()
4339 result = self.rpc.call_node_activate_master_ip(master_params.name,
4342 self.LogWarning("Could not re-enable the master ip on"
4343 " the master, please restart manually: %s",
4347 def _UploadHelper(lu, nodes, fname):
4348 """Helper for uploading a file and showing warnings.
4351 if os.path.exists(fname):
4352 result = lu.rpc.call_upload_file(nodes, fname)
4353 for to_node, to_result in result.items():
4354 msg = to_result.fail_msg
4356 msg = ("Copy of file %s to node %s failed: %s" %
4357 (fname, to_node, msg))
4358 lu.proc.LogWarning(msg)
4361 def _ComputeAncillaryFiles(cluster, redist):
4362 """Compute files external to Ganeti which need to be consistent.
4364 @type redist: boolean
4365 @param redist: Whether to include files which need to be redistributed
4368 # Compute files for all nodes
4370 constants.SSH_KNOWN_HOSTS_FILE,
4371 constants.CONFD_HMAC_KEY,
4372 constants.CLUSTER_DOMAIN_SECRET_FILE,
4373 constants.SPICE_CERT_FILE,
4374 constants.SPICE_CACERT_FILE,
4375 constants.RAPI_USERS_FILE,
4379 files_all.update(constants.ALL_CERT_FILES)
4380 files_all.update(ssconf.SimpleStore().GetFileList())
4382 # we need to ship at least the RAPI certificate
4383 files_all.add(constants.RAPI_CERT_FILE)
4385 if cluster.modify_etc_hosts:
4386 files_all.add(constants.ETC_HOSTS)
4388 if cluster.use_external_mip_script:
4389 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4391 # Files which are optional, these must:
4392 # - be present in one other category as well
4393 # - either exist or not exist on all nodes of that category (mc, vm all)
4395 constants.RAPI_USERS_FILE,
4398 # Files which should only be on master candidates
4402 files_mc.add(constants.CLUSTER_CONF_FILE)
4404 # Files which should only be on VM-capable nodes
4405 files_vm = set(filename
4406 for hv_name in cluster.enabled_hypervisors
4407 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4409 files_opt |= set(filename
4410 for hv_name in cluster.enabled_hypervisors
4411 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4413 # Filenames in each category must be unique
4414 all_files_set = files_all | files_mc | files_vm
4415 assert (len(all_files_set) ==
4416 sum(map(len, [files_all, files_mc, files_vm]))), \
4417 "Found file listed in more than one file list"
4419 # Optional files must be present in one other category
4420 assert all_files_set.issuperset(files_opt), \
4421 "Optional file not in a different required list"
4423 return (files_all, files_opt, files_mc, files_vm)
4426 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4427 """Distribute additional files which are part of the cluster configuration.
4429 ConfigWriter takes care of distributing the config and ssconf files, but
4430 there are more files which should be distributed to all nodes. This function
4431 makes sure those are copied.
4433 @param lu: calling logical unit
4434 @param additional_nodes: list of nodes not in the config to distribute to
4435 @type additional_vm: boolean
4436 @param additional_vm: whether the additional nodes are vm-capable or not
4439 # Gather target nodes
4440 cluster = lu.cfg.GetClusterInfo()
4441 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4443 online_nodes = lu.cfg.GetOnlineNodeList()
4444 online_set = frozenset(online_nodes)
4445 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4447 if additional_nodes is not None:
4448 online_nodes.extend(additional_nodes)
4450 vm_nodes.extend(additional_nodes)
4452 # Never distribute to master node
4453 for nodelist in [online_nodes, vm_nodes]:
4454 if master_info.name in nodelist:
4455 nodelist.remove(master_info.name)
4458 (files_all, _, files_mc, files_vm) = \
4459 _ComputeAncillaryFiles(cluster, True)
4461 # Never re-distribute configuration file from here
4462 assert not (constants.CLUSTER_CONF_FILE in files_all or
4463 constants.CLUSTER_CONF_FILE in files_vm)
4464 assert not files_mc, "Master candidates not handled in this function"
4467 (online_nodes, files_all),
4468 (vm_nodes, files_vm),
4472 for (node_list, files) in filemap:
4474 _UploadHelper(lu, node_list, fname)
4477 class LUClusterRedistConf(NoHooksLU):
4478 """Force the redistribution of cluster configuration.
4480 This is a very simple LU.
4485 def ExpandNames(self):
4486 self.needed_locks = {
4487 locking.LEVEL_NODE: locking.ALL_SET,
4489 self.share_locks[locking.LEVEL_NODE] = 1
4491 def Exec(self, feedback_fn):
4492 """Redistribute the configuration.
4495 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4496 _RedistributeAncillaryFiles(self)
4499 class LUClusterActivateMasterIp(NoHooksLU):
4500 """Activate the master IP on the master node.
4503 def Exec(self, feedback_fn):
4504 """Activate the master IP.
4507 master_params = self.cfg.GetMasterNetworkParameters()
4508 ems = self.cfg.GetUseExternalMipScript()
4509 result = self.rpc.call_node_activate_master_ip(master_params.name,
4511 result.Raise("Could not activate the master IP")
4514 class LUClusterDeactivateMasterIp(NoHooksLU):
4515 """Deactivate the master IP on the master node.
4518 def Exec(self, feedback_fn):
4519 """Deactivate the master IP.
4522 master_params = self.cfg.GetMasterNetworkParameters()
4523 ems = self.cfg.GetUseExternalMipScript()
4524 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4526 result.Raise("Could not deactivate the master IP")
4529 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4530 """Sleep and poll for an instance's disk to sync.
4533 if not instance.disks or disks is not None and not disks:
4536 disks = _ExpandCheckDisks(instance, disks)
4539 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4541 node = instance.primary_node
4544 lu.cfg.SetDiskID(dev, node)
4546 # TODO: Convert to utils.Retry
4549 degr_retries = 10 # in seconds, as we sleep 1 second each time
4553 cumul_degraded = False
4554 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4555 msg = rstats.fail_msg
4557 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4560 raise errors.RemoteError("Can't contact node %s for mirror data,"
4561 " aborting." % node)
4564 rstats = rstats.payload
4566 for i, mstat in enumerate(rstats):
4568 lu.LogWarning("Can't compute data for node %s/%s",
4569 node, disks[i].iv_name)
4572 cumul_degraded = (cumul_degraded or
4573 (mstat.is_degraded and mstat.sync_percent is None))
4574 if mstat.sync_percent is not None:
4576 if mstat.estimated_time is not None:
4577 rem_time = ("%s remaining (estimated)" %
4578 utils.FormatSeconds(mstat.estimated_time))
4579 max_time = mstat.estimated_time
4581 rem_time = "no time estimate"
4582 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4583 (disks[i].iv_name, mstat.sync_percent, rem_time))
4585 # if we're done but degraded, let's do a few small retries, to
4586 # make sure we see a stable and not transient situation; therefore
4587 # we force restart of the loop
4588 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4589 logging.info("Degraded disks found, %d retries left", degr_retries)
4597 time.sleep(min(60, max_time))
4600 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4601 return not cumul_degraded
4604 def _BlockdevFind(lu, node, dev, instance):
4605 """Wrapper around call_blockdev_find to annotate diskparams.
4607 @param lu: A reference to the lu object
4608 @param node: The node to call out
4609 @param dev: The device to find
4610 @param instance: The instance object the device belongs to
4611 @returns The result of the rpc call
4614 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4615 return lu.rpc.call_blockdev_find(node, disk)
4618 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4619 """Wrapper around L{_CheckDiskConsistencyInner}.
4622 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4623 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4627 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4629 """Check that mirrors are not degraded.
4631 @attention: The device has to be annotated already.
4633 The ldisk parameter, if True, will change the test from the
4634 is_degraded attribute (which represents overall non-ok status for
4635 the device(s)) to the ldisk (representing the local storage status).
4638 lu.cfg.SetDiskID(dev, node)
4642 if on_primary or dev.AssembleOnSecondary():
4643 rstats = lu.rpc.call_blockdev_find(node, dev)
4644 msg = rstats.fail_msg
4646 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4648 elif not rstats.payload:
4649 lu.LogWarning("Can't find disk on node %s", node)
4653 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4655 result = result and not rstats.payload.is_degraded
4658 for child in dev.children:
4659 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4665 class LUOobCommand(NoHooksLU):
4666 """Logical unit for OOB handling.
4670 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4672 def ExpandNames(self):
4673 """Gather locks we need.
4676 if self.op.node_names:
4677 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4678 lock_names = self.op.node_names
4680 lock_names = locking.ALL_SET
4682 self.needed_locks = {
4683 locking.LEVEL_NODE: lock_names,
4686 def CheckPrereq(self):
4687 """Check prerequisites.
4690 - the node exists in the configuration
4693 Any errors are signaled by raising errors.OpPrereqError.
4697 self.master_node = self.cfg.GetMasterNode()
4699 assert self.op.power_delay >= 0.0
4701 if self.op.node_names:
4702 if (self.op.command in self._SKIP_MASTER and
4703 self.master_node in self.op.node_names):
4704 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4705 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4707 if master_oob_handler:
4708 additional_text = ("run '%s %s %s' if you want to operate on the"
4709 " master regardless") % (master_oob_handler,
4713 additional_text = "it does not support out-of-band operations"
4715 raise errors.OpPrereqError(("Operating on the master node %s is not"
4716 " allowed for %s; %s") %
4717 (self.master_node, self.op.command,
4718 additional_text), errors.ECODE_INVAL)
4720 self.op.node_names = self.cfg.GetNodeList()
4721 if self.op.command in self._SKIP_MASTER:
4722 self.op.node_names.remove(self.master_node)
4724 if self.op.command in self._SKIP_MASTER:
4725 assert self.master_node not in self.op.node_names
4727 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4729 raise errors.OpPrereqError("Node %s not found" % node_name,
4732 self.nodes.append(node)
4734 if (not self.op.ignore_status and
4735 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4736 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4737 " not marked offline") % node_name,
4740 def Exec(self, feedback_fn):
4741 """Execute OOB and return result if we expect any.
4744 master_node = self.master_node
4747 for idx, node in enumerate(utils.NiceSort(self.nodes,
4748 key=lambda node: node.name)):
4749 node_entry = [(constants.RS_NORMAL, node.name)]
4750 ret.append(node_entry)
4752 oob_program = _SupportsOob(self.cfg, node)
4755 node_entry.append((constants.RS_UNAVAIL, None))
4758 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4759 self.op.command, oob_program, node.name)
4760 result = self.rpc.call_run_oob(master_node, oob_program,
4761 self.op.command, node.name,
4765 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4766 node.name, result.fail_msg)
4767 node_entry.append((constants.RS_NODATA, None))
4770 self._CheckPayload(result)
4771 except errors.OpExecError, err:
4772 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4774 node_entry.append((constants.RS_NODATA, None))
4776 if self.op.command == constants.OOB_HEALTH:
4777 # For health we should log important events
4778 for item, status in result.payload:
4779 if status in [constants.OOB_STATUS_WARNING,
4780 constants.OOB_STATUS_CRITICAL]:
4781 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4782 item, node.name, status)
4784 if self.op.command == constants.OOB_POWER_ON:
4786 elif self.op.command == constants.OOB_POWER_OFF:
4787 node.powered = False
4788 elif self.op.command == constants.OOB_POWER_STATUS:
4789 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4790 if powered != node.powered:
4791 logging.warning(("Recorded power state (%s) of node '%s' does not"
4792 " match actual power state (%s)"), node.powered,
4795 # For configuration changing commands we should update the node
4796 if self.op.command in (constants.OOB_POWER_ON,
4797 constants.OOB_POWER_OFF):
4798 self.cfg.Update(node, feedback_fn)
4800 node_entry.append((constants.RS_NORMAL, result.payload))
4802 if (self.op.command == constants.OOB_POWER_ON and
4803 idx < len(self.nodes) - 1):
4804 time.sleep(self.op.power_delay)
4808 def _CheckPayload(self, result):
4809 """Checks if the payload is valid.
4811 @param result: RPC result
4812 @raises errors.OpExecError: If payload is not valid
4816 if self.op.command == constants.OOB_HEALTH:
4817 if not isinstance(result.payload, list):
4818 errs.append("command 'health' is expected to return a list but got %s" %
4819 type(result.payload))
4821 for item, status in result.payload:
4822 if status not in constants.OOB_STATUSES:
4823 errs.append("health item '%s' has invalid status '%s'" %
4826 if self.op.command == constants.OOB_POWER_STATUS:
4827 if not isinstance(result.payload, dict):
4828 errs.append("power-status is expected to return a dict but got %s" %
4829 type(result.payload))
4831 if self.op.command in [
4832 constants.OOB_POWER_ON,
4833 constants.OOB_POWER_OFF,
4834 constants.OOB_POWER_CYCLE,
4836 if result.payload is not None:
4837 errs.append("%s is expected to not return payload but got '%s'" %
4838 (self.op.command, result.payload))
4841 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4842 utils.CommaJoin(errs))
4845 class _OsQuery(_QueryBase):
4846 FIELDS = query.OS_FIELDS
4848 def ExpandNames(self, lu):
4849 # Lock all nodes in shared mode
4850 # Temporary removal of locks, should be reverted later
4851 # TODO: reintroduce locks when they are lighter-weight
4852 lu.needed_locks = {}
4853 #self.share_locks[locking.LEVEL_NODE] = 1
4854 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4856 # The following variables interact with _QueryBase._GetNames
4858 self.wanted = self.names
4860 self.wanted = locking.ALL_SET
4862 self.do_locking = self.use_locking
4864 def DeclareLocks(self, lu, level):
4868 def _DiagnoseByOS(rlist):
4869 """Remaps a per-node return list into an a per-os per-node dictionary
4871 @param rlist: a map with node names as keys and OS objects as values
4874 @return: a dictionary with osnames as keys and as value another
4875 map, with nodes as keys and tuples of (path, status, diagnose,
4876 variants, parameters, api_versions) as values, eg::
4878 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4879 (/srv/..., False, "invalid api")],
4880 "node2": [(/srv/..., True, "", [], [])]}
4885 # we build here the list of nodes that didn't fail the RPC (at RPC
4886 # level), so that nodes with a non-responding node daemon don't
4887 # make all OSes invalid
4888 good_nodes = [node_name for node_name in rlist
4889 if not rlist[node_name].fail_msg]
4890 for node_name, nr in rlist.items():
4891 if nr.fail_msg or not nr.payload:
4893 for (name, path, status, diagnose, variants,
4894 params, api_versions) in nr.payload:
4895 if name not in all_os:
4896 # build a list of nodes for this os containing empty lists
4897 # for each node in node_list
4899 for nname in good_nodes:
4900 all_os[name][nname] = []
4901 # convert params from [name, help] to (name, help)
4902 params = [tuple(v) for v in params]
4903 all_os[name][node_name].append((path, status, diagnose,
4904 variants, params, api_versions))
4907 def _GetQueryData(self, lu):
4908 """Computes the list of nodes and their attributes.
4911 # Locking is not used
4912 assert not (compat.any(lu.glm.is_owned(level)
4913 for level in locking.LEVELS
4914 if level != locking.LEVEL_CLUSTER) or
4915 self.do_locking or self.use_locking)
4917 valid_nodes = [node.name
4918 for node in lu.cfg.GetAllNodesInfo().values()
4919 if not node.offline and node.vm_capable]
4920 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4921 cluster = lu.cfg.GetClusterInfo()
4925 for (os_name, os_data) in pol.items():
4926 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4927 hidden=(os_name in cluster.hidden_os),
4928 blacklisted=(os_name in cluster.blacklisted_os))
4932 api_versions = set()
4934 for idx, osl in enumerate(os_data.values()):
4935 info.valid = bool(info.valid and osl and osl[0][1])
4939 (node_variants, node_params, node_api) = osl[0][3:6]
4942 variants.update(node_variants)
4943 parameters.update(node_params)
4944 api_versions.update(node_api)
4946 # Filter out inconsistent values
4947 variants.intersection_update(node_variants)
4948 parameters.intersection_update(node_params)
4949 api_versions.intersection_update(node_api)
4951 info.variants = list(variants)
4952 info.parameters = list(parameters)
4953 info.api_versions = list(api_versions)
4955 data[os_name] = info
4957 # Prepare data in requested order
4958 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4962 class LUOsDiagnose(NoHooksLU):
4963 """Logical unit for OS diagnose/query.
4969 def _BuildFilter(fields, names):
4970 """Builds a filter for querying OSes.
4973 name_filter = qlang.MakeSimpleFilter("name", names)
4975 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4976 # respective field is not requested
4977 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4978 for fname in ["hidden", "blacklisted"]
4979 if fname not in fields]
4980 if "valid" not in fields:
4981 status_filter.append([qlang.OP_TRUE, "valid"])
4984 status_filter.insert(0, qlang.OP_AND)
4986 status_filter = None
4988 if name_filter and status_filter:
4989 return [qlang.OP_AND, name_filter, status_filter]
4993 return status_filter
4995 def CheckArguments(self):
4996 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4997 self.op.output_fields, False)
4999 def ExpandNames(self):
5000 self.oq.ExpandNames(self)
5002 def Exec(self, feedback_fn):
5003 return self.oq.OldStyleQuery(self)
5006 class LUNodeRemove(LogicalUnit):
5007 """Logical unit for removing a node.
5010 HPATH = "node-remove"
5011 HTYPE = constants.HTYPE_NODE
5013 def BuildHooksEnv(self):
5018 "OP_TARGET": self.op.node_name,
5019 "NODE_NAME": self.op.node_name,
5022 def BuildHooksNodes(self):
5023 """Build hooks nodes.
5025 This doesn't run on the target node in the pre phase as a failed
5026 node would then be impossible to remove.
5029 all_nodes = self.cfg.GetNodeList()
5031 all_nodes.remove(self.op.node_name)
5034 return (all_nodes, all_nodes)
5036 def CheckPrereq(self):
5037 """Check prerequisites.
5040 - the node exists in the configuration
5041 - it does not have primary or secondary instances
5042 - it's not the master
5044 Any errors are signaled by raising errors.OpPrereqError.
5047 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5048 node = self.cfg.GetNodeInfo(self.op.node_name)
5049 assert node is not None
5051 masternode = self.cfg.GetMasterNode()
5052 if node.name == masternode:
5053 raise errors.OpPrereqError("Node is the master node, failover to another"
5054 " node is required", errors.ECODE_INVAL)
5056 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5057 if node.name in instance.all_nodes:
5058 raise errors.OpPrereqError("Instance %s is still running on the node,"
5059 " please remove first" % instance_name,
5061 self.op.node_name = node.name
5064 def Exec(self, feedback_fn):
5065 """Removes the node from the cluster.
5069 logging.info("Stopping the node daemon and removing configs from node %s",
5072 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5074 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5077 # Promote nodes to master candidate as needed
5078 _AdjustCandidatePool(self, exceptions=[node.name])
5079 self.context.RemoveNode(node.name)
5081 # Run post hooks on the node before it's removed
5082 _RunPostHook(self, node.name)
5084 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5085 msg = result.fail_msg
5087 self.LogWarning("Errors encountered on the remote node while leaving"
5088 " the cluster: %s", msg)
5090 # Remove node from our /etc/hosts
5091 if self.cfg.GetClusterInfo().modify_etc_hosts:
5092 master_node = self.cfg.GetMasterNode()
5093 result = self.rpc.call_etc_hosts_modify(master_node,
5094 constants.ETC_HOSTS_REMOVE,
5096 result.Raise("Can't update hosts file with new host data")
5097 _RedistributeAncillaryFiles(self)
5100 class _NodeQuery(_QueryBase):
5101 FIELDS = query.NODE_FIELDS
5103 def ExpandNames(self, lu):
5104 lu.needed_locks = {}
5105 lu.share_locks = _ShareAll()
5108 self.wanted = _GetWantedNodes(lu, self.names)
5110 self.wanted = locking.ALL_SET
5112 self.do_locking = (self.use_locking and
5113 query.NQ_LIVE in self.requested_data)
5116 # If any non-static field is requested we need to lock the nodes
5117 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5119 def DeclareLocks(self, lu, level):
5122 def _GetQueryData(self, lu):
5123 """Computes the list of nodes and their attributes.
5126 all_info = lu.cfg.GetAllNodesInfo()
5128 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5130 # Gather data as requested
5131 if query.NQ_LIVE in self.requested_data:
5132 # filter out non-vm_capable nodes
5133 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5135 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5136 [lu.cfg.GetHypervisorType()])
5137 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5138 for (name, nresult) in node_data.items()
5139 if not nresult.fail_msg and nresult.payload)
5143 if query.NQ_INST in self.requested_data:
5144 node_to_primary = dict([(name, set()) for name in nodenames])
5145 node_to_secondary = dict([(name, set()) for name in nodenames])
5147 inst_data = lu.cfg.GetAllInstancesInfo()
5149 for inst in inst_data.values():
5150 if inst.primary_node in node_to_primary:
5151 node_to_primary[inst.primary_node].add(inst.name)
5152 for secnode in inst.secondary_nodes:
5153 if secnode in node_to_secondary:
5154 node_to_secondary[secnode].add(inst.name)
5156 node_to_primary = None
5157 node_to_secondary = None
5159 if query.NQ_OOB in self.requested_data:
5160 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5161 for name, node in all_info.iteritems())
5165 if query.NQ_GROUP in self.requested_data:
5166 groups = lu.cfg.GetAllNodeGroupsInfo()
5170 return query.NodeQueryData([all_info[name] for name in nodenames],
5171 live_data, lu.cfg.GetMasterNode(),
5172 node_to_primary, node_to_secondary, groups,
5173 oob_support, lu.cfg.GetClusterInfo())
5176 class LUNodeQuery(NoHooksLU):
5177 """Logical unit for querying nodes.
5180 # pylint: disable=W0142
5183 def CheckArguments(self):
5184 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5185 self.op.output_fields, self.op.use_locking)
5187 def ExpandNames(self):
5188 self.nq.ExpandNames(self)
5190 def DeclareLocks(self, level):
5191 self.nq.DeclareLocks(self, level)
5193 def Exec(self, feedback_fn):
5194 return self.nq.OldStyleQuery(self)
5197 class LUNodeQueryvols(NoHooksLU):
5198 """Logical unit for getting volumes on node(s).
5202 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5203 _FIELDS_STATIC = utils.FieldSet("node")
5205 def CheckArguments(self):
5206 _CheckOutputFields(static=self._FIELDS_STATIC,
5207 dynamic=self._FIELDS_DYNAMIC,
5208 selected=self.op.output_fields)
5210 def ExpandNames(self):
5211 self.share_locks = _ShareAll()
5212 self.needed_locks = {}
5214 if not self.op.nodes:
5215 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5217 self.needed_locks[locking.LEVEL_NODE] = \
5218 _GetWantedNodes(self, self.op.nodes)
5220 def Exec(self, feedback_fn):
5221 """Computes the list of nodes and their attributes.
5224 nodenames = self.owned_locks(locking.LEVEL_NODE)
5225 volumes = self.rpc.call_node_volumes(nodenames)
5227 ilist = self.cfg.GetAllInstancesInfo()
5228 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5231 for node in nodenames:
5232 nresult = volumes[node]
5235 msg = nresult.fail_msg
5237 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5240 node_vols = sorted(nresult.payload,
5241 key=operator.itemgetter("dev"))
5243 for vol in node_vols:
5245 for field in self.op.output_fields:
5248 elif field == "phys":
5252 elif field == "name":
5254 elif field == "size":
5255 val = int(float(vol["size"]))
5256 elif field == "instance":
5257 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5259 raise errors.ParameterError(field)
5260 node_output.append(str(val))
5262 output.append(node_output)
5267 class LUNodeQueryStorage(NoHooksLU):
5268 """Logical unit for getting information on storage units on node(s).
5271 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5274 def CheckArguments(self):
5275 _CheckOutputFields(static=self._FIELDS_STATIC,
5276 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5277 selected=self.op.output_fields)
5279 def ExpandNames(self):
5280 self.share_locks = _ShareAll()
5281 self.needed_locks = {}
5284 self.needed_locks[locking.LEVEL_NODE] = \
5285 _GetWantedNodes(self, self.op.nodes)
5287 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5289 def Exec(self, feedback_fn):
5290 """Computes the list of nodes and their attributes.
5293 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5295 # Always get name to sort by
5296 if constants.SF_NAME in self.op.output_fields:
5297 fields = self.op.output_fields[:]
5299 fields = [constants.SF_NAME] + self.op.output_fields
5301 # Never ask for node or type as it's only known to the LU
5302 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5303 while extra in fields:
5304 fields.remove(extra)
5306 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5307 name_idx = field_idx[constants.SF_NAME]
5309 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5310 data = self.rpc.call_storage_list(self.nodes,
5311 self.op.storage_type, st_args,
5312 self.op.name, fields)
5316 for node in utils.NiceSort(self.nodes):
5317 nresult = data[node]
5321 msg = nresult.fail_msg
5323 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5326 rows = dict([(row[name_idx], row) for row in nresult.payload])
5328 for name in utils.NiceSort(rows.keys()):
5333 for field in self.op.output_fields:
5334 if field == constants.SF_NODE:
5336 elif field == constants.SF_TYPE:
5337 val = self.op.storage_type
5338 elif field in field_idx:
5339 val = row[field_idx[field]]
5341 raise errors.ParameterError(field)
5350 class _InstanceQuery(_QueryBase):
5351 FIELDS = query.INSTANCE_FIELDS
5353 def ExpandNames(self, lu):
5354 lu.needed_locks = {}
5355 lu.share_locks = _ShareAll()
5358 self.wanted = _GetWantedInstances(lu, self.names)
5360 self.wanted = locking.ALL_SET
5362 self.do_locking = (self.use_locking and
5363 query.IQ_LIVE in self.requested_data)
5365 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5366 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5367 lu.needed_locks[locking.LEVEL_NODE] = []
5368 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5370 self.do_grouplocks = (self.do_locking and
5371 query.IQ_NODES in self.requested_data)
5373 def DeclareLocks(self, lu, level):
5375 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5376 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5378 # Lock all groups used by instances optimistically; this requires going
5379 # via the node before it's locked, requiring verification later on
5380 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5382 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5383 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5384 elif level == locking.LEVEL_NODE:
5385 lu._LockInstancesNodes() # pylint: disable=W0212
5388 def _CheckGroupLocks(lu):
5389 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5390 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5392 # Check if node groups for locked instances are still correct
5393 for instance_name in owned_instances:
5394 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5396 def _GetQueryData(self, lu):
5397 """Computes the list of instances and their attributes.
5400 if self.do_grouplocks:
5401 self._CheckGroupLocks(lu)
5403 cluster = lu.cfg.GetClusterInfo()
5404 all_info = lu.cfg.GetAllInstancesInfo()
5406 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5408 instance_list = [all_info[name] for name in instance_names]
5409 nodes = frozenset(itertools.chain(*(inst.all_nodes
5410 for inst in instance_list)))
5411 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5414 wrongnode_inst = set()
5416 # Gather data as requested
5417 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5419 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5421 result = node_data[name]
5423 # offline nodes will be in both lists
5424 assert result.fail_msg
5425 offline_nodes.append(name)
5427 bad_nodes.append(name)
5428 elif result.payload:
5429 for inst in result.payload:
5430 if inst in all_info:
5431 if all_info[inst].primary_node == name:
5432 live_data.update(result.payload)
5434 wrongnode_inst.add(inst)
5436 # orphan instance; we don't list it here as we don't
5437 # handle this case yet in the output of instance listing
5438 logging.warning("Orphan instance '%s' found on node %s",
5440 # else no instance is alive
5444 if query.IQ_DISKUSAGE in self.requested_data:
5445 disk_usage = dict((inst.name,
5446 _ComputeDiskSize(inst.disk_template,
5447 [{constants.IDISK_SIZE: disk.size}
5448 for disk in inst.disks]))
5449 for inst in instance_list)
5453 if query.IQ_CONSOLE in self.requested_data:
5455 for inst in instance_list:
5456 if inst.name in live_data:
5457 # Instance is running
5458 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5460 consinfo[inst.name] = None
5461 assert set(consinfo.keys()) == set(instance_names)
5465 if query.IQ_NODES in self.requested_data:
5466 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5468 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5469 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5470 for uuid in set(map(operator.attrgetter("group"),
5476 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5477 disk_usage, offline_nodes, bad_nodes,
5478 live_data, wrongnode_inst, consinfo,
5482 class LUQuery(NoHooksLU):
5483 """Query for resources/items of a certain kind.
5486 # pylint: disable=W0142
5489 def CheckArguments(self):
5490 qcls = _GetQueryImplementation(self.op.what)
5492 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5494 def ExpandNames(self):
5495 self.impl.ExpandNames(self)
5497 def DeclareLocks(self, level):
5498 self.impl.DeclareLocks(self, level)
5500 def Exec(self, feedback_fn):
5501 return self.impl.NewStyleQuery(self)
5504 class LUQueryFields(NoHooksLU):
5505 """Query for resources/items of a certain kind.
5508 # pylint: disable=W0142
5511 def CheckArguments(self):
5512 self.qcls = _GetQueryImplementation(self.op.what)
5514 def ExpandNames(self):
5515 self.needed_locks = {}
5517 def Exec(self, feedback_fn):
5518 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5521 class LUNodeModifyStorage(NoHooksLU):
5522 """Logical unit for modifying a storage volume on a node.
5527 def CheckArguments(self):
5528 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5530 storage_type = self.op.storage_type
5533 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5535 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5536 " modified" % storage_type,
5539 diff = set(self.op.changes.keys()) - modifiable
5541 raise errors.OpPrereqError("The following fields can not be modified for"
5542 " storage units of type '%s': %r" %
5543 (storage_type, list(diff)),
5546 def ExpandNames(self):
5547 self.needed_locks = {
5548 locking.LEVEL_NODE: self.op.node_name,
5551 def Exec(self, feedback_fn):
5552 """Computes the list of nodes and their attributes.
5555 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5556 result = self.rpc.call_storage_modify(self.op.node_name,
5557 self.op.storage_type, st_args,
5558 self.op.name, self.op.changes)
5559 result.Raise("Failed to modify storage unit '%s' on %s" %
5560 (self.op.name, self.op.node_name))
5563 class LUNodeAdd(LogicalUnit):
5564 """Logical unit for adding node to the cluster.
5568 HTYPE = constants.HTYPE_NODE
5569 _NFLAGS = ["master_capable", "vm_capable"]
5571 def CheckArguments(self):
5572 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5573 # validate/normalize the node name
5574 self.hostname = netutils.GetHostname(name=self.op.node_name,
5575 family=self.primary_ip_family)
5576 self.op.node_name = self.hostname.name
5578 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5579 raise errors.OpPrereqError("Cannot readd the master node",
5582 if self.op.readd and self.op.group:
5583 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5584 " being readded", errors.ECODE_INVAL)
5586 def BuildHooksEnv(self):
5589 This will run on all nodes before, and on all nodes + the new node after.
5593 "OP_TARGET": self.op.node_name,
5594 "NODE_NAME": self.op.node_name,
5595 "NODE_PIP": self.op.primary_ip,
5596 "NODE_SIP": self.op.secondary_ip,
5597 "MASTER_CAPABLE": str(self.op.master_capable),
5598 "VM_CAPABLE": str(self.op.vm_capable),
5601 def BuildHooksNodes(self):
5602 """Build hooks nodes.
5605 # Exclude added node
5606 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5607 post_nodes = pre_nodes + [self.op.node_name, ]
5609 return (pre_nodes, post_nodes)
5611 def CheckPrereq(self):
5612 """Check prerequisites.
5615 - the new node is not already in the config
5617 - its parameters (single/dual homed) matches the cluster
5619 Any errors are signaled by raising errors.OpPrereqError.
5623 hostname = self.hostname
5624 node = hostname.name
5625 primary_ip = self.op.primary_ip = hostname.ip
5626 if self.op.secondary_ip is None:
5627 if self.primary_ip_family == netutils.IP6Address.family:
5628 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5629 " IPv4 address must be given as secondary",
5631 self.op.secondary_ip = primary_ip
5633 secondary_ip = self.op.secondary_ip
5634 if not netutils.IP4Address.IsValid(secondary_ip):
5635 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5636 " address" % secondary_ip, errors.ECODE_INVAL)
5638 node_list = cfg.GetNodeList()
5639 if not self.op.readd and node in node_list:
5640 raise errors.OpPrereqError("Node %s is already in the configuration" %
5641 node, errors.ECODE_EXISTS)
5642 elif self.op.readd and node not in node_list:
5643 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5646 self.changed_primary_ip = False
5648 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5649 if self.op.readd and node == existing_node_name:
5650 if existing_node.secondary_ip != secondary_ip:
5651 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5652 " address configuration as before",
5654 if existing_node.primary_ip != primary_ip:
5655 self.changed_primary_ip = True
5659 if (existing_node.primary_ip == primary_ip or
5660 existing_node.secondary_ip == primary_ip or
5661 existing_node.primary_ip == secondary_ip or
5662 existing_node.secondary_ip == secondary_ip):
5663 raise errors.OpPrereqError("New node ip address(es) conflict with"
5664 " existing node %s" % existing_node.name,
5665 errors.ECODE_NOTUNIQUE)
5667 # After this 'if' block, None is no longer a valid value for the
5668 # _capable op attributes
5670 old_node = self.cfg.GetNodeInfo(node)
5671 assert old_node is not None, "Can't retrieve locked node %s" % node
5672 for attr in self._NFLAGS:
5673 if getattr(self.op, attr) is None:
5674 setattr(self.op, attr, getattr(old_node, attr))
5676 for attr in self._NFLAGS:
5677 if getattr(self.op, attr) is None:
5678 setattr(self.op, attr, True)
5680 if self.op.readd and not self.op.vm_capable:
5681 pri, sec = cfg.GetNodeInstances(node)
5683 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5684 " flag set to false, but it already holds"
5685 " instances" % node,
5688 # check that the type of the node (single versus dual homed) is the
5689 # same as for the master
5690 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5691 master_singlehomed = myself.secondary_ip == myself.primary_ip
5692 newbie_singlehomed = secondary_ip == primary_ip
5693 if master_singlehomed != newbie_singlehomed:
5694 if master_singlehomed:
5695 raise errors.OpPrereqError("The master has no secondary ip but the"
5696 " new node has one",
5699 raise errors.OpPrereqError("The master has a secondary ip but the"
5700 " new node doesn't have one",
5703 # checks reachability
5704 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5705 raise errors.OpPrereqError("Node not reachable by ping",
5706 errors.ECODE_ENVIRON)
5708 if not newbie_singlehomed:
5709 # check reachability from my secondary ip to newbie's secondary ip
5710 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5711 source=myself.secondary_ip):
5712 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5713 " based ping to node daemon port",
5714 errors.ECODE_ENVIRON)
5721 if self.op.master_capable:
5722 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5724 self.master_candidate = False
5727 self.new_node = old_node
5729 node_group = cfg.LookupNodeGroup(self.op.group)
5730 self.new_node = objects.Node(name=node,
5731 primary_ip=primary_ip,
5732 secondary_ip=secondary_ip,
5733 master_candidate=self.master_candidate,
5734 offline=False, drained=False,
5737 if self.op.ndparams:
5738 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5740 if self.op.hv_state:
5741 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5743 if self.op.disk_state:
5744 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5746 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5747 # it a property on the base class.
5748 result = rpc.DnsOnlyRunner().call_version([node])[node]
5749 result.Raise("Can't get version information from node %s" % node)
5750 if constants.PROTOCOL_VERSION == result.payload:
5751 logging.info("Communication to node %s fine, sw version %s match",
5752 node, result.payload)
5754 raise errors.OpPrereqError("Version mismatch master version %s,"
5755 " node version %s" %
5756 (constants.PROTOCOL_VERSION, result.payload),
5757 errors.ECODE_ENVIRON)
5759 def Exec(self, feedback_fn):
5760 """Adds the new node to the cluster.
5763 new_node = self.new_node
5764 node = new_node.name
5766 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5769 # We adding a new node so we assume it's powered
5770 new_node.powered = True
5772 # for re-adds, reset the offline/drained/master-candidate flags;
5773 # we need to reset here, otherwise offline would prevent RPC calls
5774 # later in the procedure; this also means that if the re-add
5775 # fails, we are left with a non-offlined, broken node
5777 new_node.drained = new_node.offline = False # pylint: disable=W0201
5778 self.LogInfo("Readding a node, the offline/drained flags were reset")
5779 # if we demote the node, we do cleanup later in the procedure
5780 new_node.master_candidate = self.master_candidate
5781 if self.changed_primary_ip:
5782 new_node.primary_ip = self.op.primary_ip
5784 # copy the master/vm_capable flags
5785 for attr in self._NFLAGS:
5786 setattr(new_node, attr, getattr(self.op, attr))
5788 # notify the user about any possible mc promotion
5789 if new_node.master_candidate:
5790 self.LogInfo("Node will be a master candidate")
5792 if self.op.ndparams:
5793 new_node.ndparams = self.op.ndparams
5795 new_node.ndparams = {}
5797 if self.op.hv_state:
5798 new_node.hv_state_static = self.new_hv_state
5800 if self.op.disk_state:
5801 new_node.disk_state_static = self.new_disk_state
5803 # Add node to our /etc/hosts, and add key to known_hosts
5804 if self.cfg.GetClusterInfo().modify_etc_hosts:
5805 master_node = self.cfg.GetMasterNode()
5806 result = self.rpc.call_etc_hosts_modify(master_node,
5807 constants.ETC_HOSTS_ADD,
5810 result.Raise("Can't update hosts file with new host data")
5812 if new_node.secondary_ip != new_node.primary_ip:
5813 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5816 node_verify_list = [self.cfg.GetMasterNode()]
5817 node_verify_param = {
5818 constants.NV_NODELIST: ([node], {}),
5819 # TODO: do a node-net-test as well?
5822 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5823 self.cfg.GetClusterName())
5824 for verifier in node_verify_list:
5825 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5826 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5828 for failed in nl_payload:
5829 feedback_fn("ssh/hostname verification failed"
5830 " (checking from %s): %s" %
5831 (verifier, nl_payload[failed]))
5832 raise errors.OpExecError("ssh/hostname verification failed")
5835 _RedistributeAncillaryFiles(self)
5836 self.context.ReaddNode(new_node)
5837 # make sure we redistribute the config
5838 self.cfg.Update(new_node, feedback_fn)
5839 # and make sure the new node will not have old files around
5840 if not new_node.master_candidate:
5841 result = self.rpc.call_node_demote_from_mc(new_node.name)
5842 msg = result.fail_msg
5844 self.LogWarning("Node failed to demote itself from master"
5845 " candidate status: %s" % msg)
5847 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5848 additional_vm=self.op.vm_capable)
5849 self.context.AddNode(new_node, self.proc.GetECId())
5852 class LUNodeSetParams(LogicalUnit):
5853 """Modifies the parameters of a node.
5855 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5856 to the node role (as _ROLE_*)
5857 @cvar _R2F: a dictionary from node role to tuples of flags
5858 @cvar _FLAGS: a list of attribute names corresponding to the flags
5861 HPATH = "node-modify"
5862 HTYPE = constants.HTYPE_NODE
5864 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5866 (True, False, False): _ROLE_CANDIDATE,
5867 (False, True, False): _ROLE_DRAINED,
5868 (False, False, True): _ROLE_OFFLINE,
5869 (False, False, False): _ROLE_REGULAR,
5871 _R2F = dict((v, k) for k, v in _F2R.items())
5872 _FLAGS = ["master_candidate", "drained", "offline"]
5874 def CheckArguments(self):
5875 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5876 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5877 self.op.master_capable, self.op.vm_capable,
5878 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5880 if all_mods.count(None) == len(all_mods):
5881 raise errors.OpPrereqError("Please pass at least one modification",
5883 if all_mods.count(True) > 1:
5884 raise errors.OpPrereqError("Can't set the node into more than one"
5885 " state at the same time",
5888 # Boolean value that tells us whether we might be demoting from MC
5889 self.might_demote = (self.op.master_candidate == False or
5890 self.op.offline == True or
5891 self.op.drained == True or
5892 self.op.master_capable == False)
5894 if self.op.secondary_ip:
5895 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5896 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5897 " address" % self.op.secondary_ip,
5900 self.lock_all = self.op.auto_promote and self.might_demote
5901 self.lock_instances = self.op.secondary_ip is not None
5903 def _InstanceFilter(self, instance):
5904 """Filter for getting affected instances.
5907 return (instance.disk_template in constants.DTS_INT_MIRROR and
5908 self.op.node_name in instance.all_nodes)
5910 def ExpandNames(self):
5912 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5914 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5916 # Since modifying a node can have severe effects on currently running
5917 # operations the resource lock is at least acquired in shared mode
5918 self.needed_locks[locking.LEVEL_NODE_RES] = \
5919 self.needed_locks[locking.LEVEL_NODE]
5921 # Get node resource and instance locks in shared mode; they are not used
5922 # for anything but read-only access
5923 self.share_locks[locking.LEVEL_NODE_RES] = 1
5924 self.share_locks[locking.LEVEL_INSTANCE] = 1
5926 if self.lock_instances:
5927 self.needed_locks[locking.LEVEL_INSTANCE] = \
5928 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5930 def BuildHooksEnv(self):
5933 This runs on the master node.
5937 "OP_TARGET": self.op.node_name,
5938 "MASTER_CANDIDATE": str(self.op.master_candidate),
5939 "OFFLINE": str(self.op.offline),
5940 "DRAINED": str(self.op.drained),
5941 "MASTER_CAPABLE": str(self.op.master_capable),
5942 "VM_CAPABLE": str(self.op.vm_capable),
5945 def BuildHooksNodes(self):
5946 """Build hooks nodes.
5949 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5952 def CheckPrereq(self):
5953 """Check prerequisites.
5955 This only checks the instance list against the existing names.
5958 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5960 if self.lock_instances:
5961 affected_instances = \
5962 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5964 # Verify instance locks
5965 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5966 wanted_instances = frozenset(affected_instances.keys())
5967 if wanted_instances - owned_instances:
5968 raise errors.OpPrereqError("Instances affected by changing node %s's"
5969 " secondary IP address have changed since"
5970 " locks were acquired, wanted '%s', have"
5971 " '%s'; retry the operation" %
5973 utils.CommaJoin(wanted_instances),
5974 utils.CommaJoin(owned_instances)),
5977 affected_instances = None
5979 if (self.op.master_candidate is not None or
5980 self.op.drained is not None or
5981 self.op.offline is not None):
5982 # we can't change the master's node flags
5983 if self.op.node_name == self.cfg.GetMasterNode():
5984 raise errors.OpPrereqError("The master role can be changed"
5985 " only via master-failover",
5988 if self.op.master_candidate and not node.master_capable:
5989 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5990 " it a master candidate" % node.name,
5993 if self.op.vm_capable == False:
5994 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5996 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5997 " the vm_capable flag" % node.name,
6000 if node.master_candidate and self.might_demote and not self.lock_all:
6001 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6002 # check if after removing the current node, we're missing master
6004 (mc_remaining, mc_should, _) = \
6005 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6006 if mc_remaining < mc_should:
6007 raise errors.OpPrereqError("Not enough master candidates, please"
6008 " pass auto promote option to allow"
6009 " promotion (--auto-promote or RAPI"
6010 " auto_promote=True)", errors.ECODE_STATE)
6012 self.old_flags = old_flags = (node.master_candidate,
6013 node.drained, node.offline)
6014 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6015 self.old_role = old_role = self._F2R[old_flags]
6017 # Check for ineffective changes
6018 for attr in self._FLAGS:
6019 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
6020 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6021 setattr(self.op, attr, None)
6023 # Past this point, any flag change to False means a transition
6024 # away from the respective state, as only real changes are kept
6026 # TODO: We might query the real power state if it supports OOB
6027 if _SupportsOob(self.cfg, node):
6028 if self.op.offline is False and not (node.powered or
6029 self.op.powered == True):
6030 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6031 " offline status can be reset") %
6033 elif self.op.powered is not None:
6034 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6035 " as it does not support out-of-band"
6036 " handling") % self.op.node_name)
6038 # If we're being deofflined/drained, we'll MC ourself if needed
6039 if (self.op.drained == False or self.op.offline == False or
6040 (self.op.master_capable and not node.master_capable)):
6041 if _DecideSelfPromotion(self):
6042 self.op.master_candidate = True
6043 self.LogInfo("Auto-promoting node to master candidate")
6045 # If we're no longer master capable, we'll demote ourselves from MC
6046 if self.op.master_capable == False and node.master_candidate:
6047 self.LogInfo("Demoting from master candidate")
6048 self.op.master_candidate = False
6051 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6052 if self.op.master_candidate:
6053 new_role = self._ROLE_CANDIDATE
6054 elif self.op.drained:
6055 new_role = self._ROLE_DRAINED
6056 elif self.op.offline:
6057 new_role = self._ROLE_OFFLINE
6058 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6059 # False is still in new flags, which means we're un-setting (the
6061 new_role = self._ROLE_REGULAR
6062 else: # no new flags, nothing, keep old role
6065 self.new_role = new_role
6067 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6068 # Trying to transition out of offline status
6069 result = self.rpc.call_version([node.name])[node.name]
6071 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6072 " to report its version: %s" %
6073 (node.name, result.fail_msg),
6076 self.LogWarning("Transitioning node from offline to online state"
6077 " without using re-add. Please make sure the node"
6080 if self.op.secondary_ip:
6081 # Ok even without locking, because this can't be changed by any LU
6082 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6083 master_singlehomed = master.secondary_ip == master.primary_ip
6084 if master_singlehomed and self.op.secondary_ip:
6085 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6086 " homed cluster", errors.ECODE_INVAL)
6088 assert not (frozenset(affected_instances) -
6089 self.owned_locks(locking.LEVEL_INSTANCE))
6092 if affected_instances:
6093 raise errors.OpPrereqError("Cannot change secondary IP address:"
6094 " offline node has instances (%s)"
6095 " configured to use it" %
6096 utils.CommaJoin(affected_instances.keys()))
6098 # On online nodes, check that no instances are running, and that
6099 # the node has the new ip and we can reach it.
6100 for instance in affected_instances.values():
6101 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6102 msg="cannot change secondary ip")
6104 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6105 if master.name != node.name:
6106 # check reachability from master secondary ip to new secondary ip
6107 if not netutils.TcpPing(self.op.secondary_ip,
6108 constants.DEFAULT_NODED_PORT,
6109 source=master.secondary_ip):
6110 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6111 " based ping to node daemon port",
6112 errors.ECODE_ENVIRON)
6114 if self.op.ndparams:
6115 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6116 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6117 self.new_ndparams = new_ndparams
6119 if self.op.hv_state:
6120 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6121 self.node.hv_state_static)
6123 if self.op.disk_state:
6124 self.new_disk_state = \
6125 _MergeAndVerifyDiskState(self.op.disk_state,
6126 self.node.disk_state_static)
6128 def Exec(self, feedback_fn):
6133 old_role = self.old_role
6134 new_role = self.new_role
6138 if self.op.ndparams:
6139 node.ndparams = self.new_ndparams
6141 if self.op.powered is not None:
6142 node.powered = self.op.powered
6144 if self.op.hv_state:
6145 node.hv_state_static = self.new_hv_state
6147 if self.op.disk_state:
6148 node.disk_state_static = self.new_disk_state
6150 for attr in ["master_capable", "vm_capable"]:
6151 val = getattr(self.op, attr)
6153 setattr(node, attr, val)
6154 result.append((attr, str(val)))
6156 if new_role != old_role:
6157 # Tell the node to demote itself, if no longer MC and not offline
6158 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6159 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6161 self.LogWarning("Node failed to demote itself: %s", msg)
6163 new_flags = self._R2F[new_role]
6164 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6166 result.append((desc, str(nf)))
6167 (node.master_candidate, node.drained, node.offline) = new_flags
6169 # we locked all nodes, we adjust the CP before updating this node
6171 _AdjustCandidatePool(self, [node.name])
6173 if self.op.secondary_ip:
6174 node.secondary_ip = self.op.secondary_ip
6175 result.append(("secondary_ip", self.op.secondary_ip))
6177 # this will trigger configuration file update, if needed
6178 self.cfg.Update(node, feedback_fn)
6180 # this will trigger job queue propagation or cleanup if the mc
6182 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6183 self.context.ReaddNode(node)
6188 class LUNodePowercycle(NoHooksLU):
6189 """Powercycles a node.
6194 def CheckArguments(self):
6195 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6196 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6197 raise errors.OpPrereqError("The node is the master and the force"
6198 " parameter was not set",
6201 def ExpandNames(self):
6202 """Locking for PowercycleNode.
6204 This is a last-resort option and shouldn't block on other
6205 jobs. Therefore, we grab no locks.
6208 self.needed_locks = {}
6210 def Exec(self, feedback_fn):
6214 result = self.rpc.call_node_powercycle(self.op.node_name,
6215 self.cfg.GetHypervisorType())
6216 result.Raise("Failed to schedule the reboot")
6217 return result.payload
6220 class LUClusterQuery(NoHooksLU):
6221 """Query cluster configuration.
6226 def ExpandNames(self):
6227 self.needed_locks = {}
6229 def Exec(self, feedback_fn):
6230 """Return cluster config.
6233 cluster = self.cfg.GetClusterInfo()
6236 # Filter just for enabled hypervisors
6237 for os_name, hv_dict in cluster.os_hvp.items():
6238 os_hvp[os_name] = {}
6239 for hv_name, hv_params in hv_dict.items():
6240 if hv_name in cluster.enabled_hypervisors:
6241 os_hvp[os_name][hv_name] = hv_params
6243 # Convert ip_family to ip_version
6244 primary_ip_version = constants.IP4_VERSION
6245 if cluster.primary_ip_family == netutils.IP6Address.family:
6246 primary_ip_version = constants.IP6_VERSION
6249 "software_version": constants.RELEASE_VERSION,
6250 "protocol_version": constants.PROTOCOL_VERSION,
6251 "config_version": constants.CONFIG_VERSION,
6252 "os_api_version": max(constants.OS_API_VERSIONS),
6253 "export_version": constants.EXPORT_VERSION,
6254 "architecture": runtime.GetArchInfo(),
6255 "name": cluster.cluster_name,
6256 "master": cluster.master_node,
6257 "default_hypervisor": cluster.primary_hypervisor,
6258 "enabled_hypervisors": cluster.enabled_hypervisors,
6259 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6260 for hypervisor_name in cluster.enabled_hypervisors]),
6262 "beparams": cluster.beparams,
6263 "osparams": cluster.osparams,
6264 "ipolicy": cluster.ipolicy,
6265 "nicparams": cluster.nicparams,
6266 "ndparams": cluster.ndparams,
6267 "diskparams": cluster.diskparams,
6268 "candidate_pool_size": cluster.candidate_pool_size,
6269 "master_netdev": cluster.master_netdev,
6270 "master_netmask": cluster.master_netmask,
6271 "use_external_mip_script": cluster.use_external_mip_script,
6272 "volume_group_name": cluster.volume_group_name,
6273 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6274 "file_storage_dir": cluster.file_storage_dir,
6275 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6276 "maintain_node_health": cluster.maintain_node_health,
6277 "ctime": cluster.ctime,
6278 "mtime": cluster.mtime,
6279 "uuid": cluster.uuid,
6280 "tags": list(cluster.GetTags()),
6281 "uid_pool": cluster.uid_pool,
6282 "default_iallocator": cluster.default_iallocator,
6283 "reserved_lvs": cluster.reserved_lvs,
6284 "primary_ip_version": primary_ip_version,
6285 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6286 "hidden_os": cluster.hidden_os,
6287 "blacklisted_os": cluster.blacklisted_os,
6293 class LUClusterConfigQuery(NoHooksLU):
6294 """Return configuration values.
6299 def CheckArguments(self):
6300 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6302 def ExpandNames(self):
6303 self.cq.ExpandNames(self)
6305 def DeclareLocks(self, level):
6306 self.cq.DeclareLocks(self, level)
6308 def Exec(self, feedback_fn):
6309 result = self.cq.OldStyleQuery(self)
6311 assert len(result) == 1
6316 class _ClusterQuery(_QueryBase):
6317 FIELDS = query.CLUSTER_FIELDS
6319 #: Do not sort (there is only one item)
6322 def ExpandNames(self, lu):
6323 lu.needed_locks = {}
6325 # The following variables interact with _QueryBase._GetNames
6326 self.wanted = locking.ALL_SET
6327 self.do_locking = self.use_locking
6330 raise errors.OpPrereqError("Can not use locking for cluster queries",
6333 def DeclareLocks(self, lu, level):
6336 def _GetQueryData(self, lu):
6337 """Computes the list of nodes and their attributes.
6340 # Locking is not used
6341 assert not (compat.any(lu.glm.is_owned(level)
6342 for level in locking.LEVELS
6343 if level != locking.LEVEL_CLUSTER) or
6344 self.do_locking or self.use_locking)
6346 if query.CQ_CONFIG in self.requested_data:
6347 cluster = lu.cfg.GetClusterInfo()
6349 cluster = NotImplemented
6351 if query.CQ_QUEUE_DRAINED in self.requested_data:
6352 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6354 drain_flag = NotImplemented
6356 if query.CQ_WATCHER_PAUSE in self.requested_data:
6357 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6359 watcher_pause = NotImplemented
6361 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6364 class LUInstanceActivateDisks(NoHooksLU):
6365 """Bring up an instance's disks.
6370 def ExpandNames(self):
6371 self._ExpandAndLockInstance()
6372 self.needed_locks[locking.LEVEL_NODE] = []
6373 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6375 def DeclareLocks(self, level):
6376 if level == locking.LEVEL_NODE:
6377 self._LockInstancesNodes()
6379 def CheckPrereq(self):
6380 """Check prerequisites.
6382 This checks that the instance is in the cluster.
6385 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6386 assert self.instance is not None, \
6387 "Cannot retrieve locked instance %s" % self.op.instance_name
6388 _CheckNodeOnline(self, self.instance.primary_node)
6390 def Exec(self, feedback_fn):
6391 """Activate the disks.
6394 disks_ok, disks_info = \
6395 _AssembleInstanceDisks(self, self.instance,
6396 ignore_size=self.op.ignore_size)
6398 raise errors.OpExecError("Cannot activate block devices")
6403 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6405 """Prepare the block devices for an instance.
6407 This sets up the block devices on all nodes.
6409 @type lu: L{LogicalUnit}
6410 @param lu: the logical unit on whose behalf we execute
6411 @type instance: L{objects.Instance}
6412 @param instance: the instance for whose disks we assemble
6413 @type disks: list of L{objects.Disk} or None
6414 @param disks: which disks to assemble (or all, if None)
6415 @type ignore_secondaries: boolean
6416 @param ignore_secondaries: if true, errors on secondary nodes
6417 won't result in an error return from the function
6418 @type ignore_size: boolean
6419 @param ignore_size: if true, the current known size of the disk
6420 will not be used during the disk activation, useful for cases
6421 when the size is wrong
6422 @return: False if the operation failed, otherwise a list of
6423 (host, instance_visible_name, node_visible_name)
6424 with the mapping from node devices to instance devices
6429 iname = instance.name
6430 disks = _ExpandCheckDisks(instance, disks)
6432 # With the two passes mechanism we try to reduce the window of
6433 # opportunity for the race condition of switching DRBD to primary
6434 # before handshaking occured, but we do not eliminate it
6436 # The proper fix would be to wait (with some limits) until the
6437 # connection has been made and drbd transitions from WFConnection
6438 # into any other network-connected state (Connected, SyncTarget,
6441 # 1st pass, assemble on all nodes in secondary mode
6442 for idx, inst_disk in enumerate(disks):
6443 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6445 node_disk = node_disk.Copy()
6446 node_disk.UnsetSize()
6447 lu.cfg.SetDiskID(node_disk, node)
6448 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6450 msg = result.fail_msg
6452 is_offline_secondary = (node in instance.secondary_nodes and
6454 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6455 " (is_primary=False, pass=1): %s",
6456 inst_disk.iv_name, node, msg)
6457 if not (ignore_secondaries or is_offline_secondary):
6460 # FIXME: race condition on drbd migration to primary
6462 # 2nd pass, do only the primary node
6463 for idx, inst_disk in enumerate(disks):
6466 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6467 if node != instance.primary_node:
6470 node_disk = node_disk.Copy()
6471 node_disk.UnsetSize()
6472 lu.cfg.SetDiskID(node_disk, node)
6473 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6475 msg = result.fail_msg
6477 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6478 " (is_primary=True, pass=2): %s",
6479 inst_disk.iv_name, node, msg)
6482 dev_path = result.payload
6484 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6486 # leave the disks configured for the primary node
6487 # this is a workaround that would be fixed better by
6488 # improving the logical/physical id handling
6490 lu.cfg.SetDiskID(disk, instance.primary_node)
6492 return disks_ok, device_info
6495 def _StartInstanceDisks(lu, instance, force):
6496 """Start the disks of an instance.
6499 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6500 ignore_secondaries=force)
6502 _ShutdownInstanceDisks(lu, instance)
6503 if force is not None and not force:
6504 lu.proc.LogWarning("", hint="If the message above refers to a"
6506 " you can retry the operation using '--force'.")
6507 raise errors.OpExecError("Disk consistency error")
6510 class LUInstanceDeactivateDisks(NoHooksLU):
6511 """Shutdown an instance's disks.
6516 def ExpandNames(self):
6517 self._ExpandAndLockInstance()
6518 self.needed_locks[locking.LEVEL_NODE] = []
6519 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6521 def DeclareLocks(self, level):
6522 if level == locking.LEVEL_NODE:
6523 self._LockInstancesNodes()
6525 def CheckPrereq(self):
6526 """Check prerequisites.
6528 This checks that the instance is in the cluster.
6531 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6532 assert self.instance is not None, \
6533 "Cannot retrieve locked instance %s" % self.op.instance_name
6535 def Exec(self, feedback_fn):
6536 """Deactivate the disks
6539 instance = self.instance
6541 _ShutdownInstanceDisks(self, instance)
6543 _SafeShutdownInstanceDisks(self, instance)
6546 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6547 """Shutdown block devices of an instance.
6549 This function checks if an instance is running, before calling
6550 _ShutdownInstanceDisks.
6553 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6554 _ShutdownInstanceDisks(lu, instance, disks=disks)
6557 def _ExpandCheckDisks(instance, disks):
6558 """Return the instance disks selected by the disks list
6560 @type disks: list of L{objects.Disk} or None
6561 @param disks: selected disks
6562 @rtype: list of L{objects.Disk}
6563 @return: selected instance disks to act on
6567 return instance.disks
6569 if not set(disks).issubset(instance.disks):
6570 raise errors.ProgrammerError("Can only act on disks belonging to the"
6575 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6576 """Shutdown block devices of an instance.
6578 This does the shutdown on all nodes of the instance.
6580 If the ignore_primary is false, errors on the primary node are
6585 disks = _ExpandCheckDisks(instance, disks)
6588 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6589 lu.cfg.SetDiskID(top_disk, node)
6590 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6591 msg = result.fail_msg
6593 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6594 disk.iv_name, node, msg)
6595 if ((node == instance.primary_node and not ignore_primary) or
6596 (node != instance.primary_node and not result.offline)):
6601 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6602 """Checks if a node has enough free memory.
6604 This function check if a given node has the needed amount of free
6605 memory. In case the node has less memory or we cannot get the
6606 information from the node, this function raise an OpPrereqError
6609 @type lu: C{LogicalUnit}
6610 @param lu: a logical unit from which we get configuration data
6612 @param node: the node to check
6613 @type reason: C{str}
6614 @param reason: string to use in the error message
6615 @type requested: C{int}
6616 @param requested: the amount of memory in MiB to check for
6617 @type hypervisor_name: C{str}
6618 @param hypervisor_name: the hypervisor to ask for memory stats
6620 @return: node current free memory
6621 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6622 we cannot check the node
6625 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6626 nodeinfo[node].Raise("Can't get data from node %s" % node,
6627 prereq=True, ecode=errors.ECODE_ENVIRON)
6628 (_, _, (hv_info, )) = nodeinfo[node].payload
6630 free_mem = hv_info.get("memory_free", None)
6631 if not isinstance(free_mem, int):
6632 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6633 " was '%s'" % (node, free_mem),
6634 errors.ECODE_ENVIRON)
6635 if requested > free_mem:
6636 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6637 " needed %s MiB, available %s MiB" %
6638 (node, reason, requested, free_mem),
6643 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6644 """Checks if nodes have enough free disk space in the all VGs.
6646 This function check if all given nodes have the needed amount of
6647 free disk. In case any node has less disk or we cannot get the
6648 information from the node, this function raise an OpPrereqError
6651 @type lu: C{LogicalUnit}
6652 @param lu: a logical unit from which we get configuration data
6653 @type nodenames: C{list}
6654 @param nodenames: the list of node names to check
6655 @type req_sizes: C{dict}
6656 @param req_sizes: the hash of vg and corresponding amount of disk in
6658 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6659 or we cannot check the node
6662 for vg, req_size in req_sizes.items():
6663 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6666 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6667 """Checks if nodes have enough free disk space in the specified VG.
6669 This function check if all given nodes have the needed amount of
6670 free disk. In case any node has less disk or we cannot get the
6671 information from the node, this function raise an OpPrereqError
6674 @type lu: C{LogicalUnit}
6675 @param lu: a logical unit from which we get configuration data
6676 @type nodenames: C{list}
6677 @param nodenames: the list of node names to check
6679 @param vg: the volume group to check
6680 @type requested: C{int}
6681 @param requested: the amount of disk in MiB to check for
6682 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6683 or we cannot check the node
6686 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6687 for node in nodenames:
6688 info = nodeinfo[node]
6689 info.Raise("Cannot get current information from node %s" % node,
6690 prereq=True, ecode=errors.ECODE_ENVIRON)
6691 (_, (vg_info, ), _) = info.payload
6692 vg_free = vg_info.get("vg_free", None)
6693 if not isinstance(vg_free, int):
6694 raise errors.OpPrereqError("Can't compute free disk space on node"
6695 " %s for vg %s, result was '%s'" %
6696 (node, vg, vg_free), errors.ECODE_ENVIRON)
6697 if requested > vg_free:
6698 raise errors.OpPrereqError("Not enough disk space on target node %s"
6699 " vg %s: required %d MiB, available %d MiB" %
6700 (node, vg, requested, vg_free),
6704 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6705 """Checks if nodes have enough physical CPUs
6707 This function checks if all given nodes have the needed number of
6708 physical CPUs. In case any node has less CPUs or we cannot get the
6709 information from the node, this function raises an OpPrereqError
6712 @type lu: C{LogicalUnit}
6713 @param lu: a logical unit from which we get configuration data
6714 @type nodenames: C{list}
6715 @param nodenames: the list of node names to check
6716 @type requested: C{int}
6717 @param requested: the minimum acceptable number of physical CPUs
6718 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6719 or we cannot check the node
6722 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6723 for node in nodenames:
6724 info = nodeinfo[node]
6725 info.Raise("Cannot get current information from node %s" % node,
6726 prereq=True, ecode=errors.ECODE_ENVIRON)
6727 (_, _, (hv_info, )) = info.payload
6728 num_cpus = hv_info.get("cpu_total", None)
6729 if not isinstance(num_cpus, int):
6730 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6731 " on node %s, result was '%s'" %
6732 (node, num_cpus), errors.ECODE_ENVIRON)
6733 if requested > num_cpus:
6734 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6735 "required" % (node, num_cpus, requested),
6739 class LUInstanceStartup(LogicalUnit):
6740 """Starts an instance.
6743 HPATH = "instance-start"
6744 HTYPE = constants.HTYPE_INSTANCE
6747 def CheckArguments(self):
6749 if self.op.beparams:
6750 # fill the beparams dict
6751 objects.UpgradeBeParams(self.op.beparams)
6752 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6754 def ExpandNames(self):
6755 self._ExpandAndLockInstance()
6756 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6758 def DeclareLocks(self, level):
6759 if level == locking.LEVEL_NODE_RES:
6760 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6762 def BuildHooksEnv(self):
6765 This runs on master, primary and secondary nodes of the instance.
6769 "FORCE": self.op.force,
6772 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6776 def BuildHooksNodes(self):
6777 """Build hooks nodes.
6780 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6783 def CheckPrereq(self):
6784 """Check prerequisites.
6786 This checks that the instance is in the cluster.
6789 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6790 assert self.instance is not None, \
6791 "Cannot retrieve locked instance %s" % self.op.instance_name
6794 if self.op.hvparams:
6795 # check hypervisor parameter syntax (locally)
6796 cluster = self.cfg.GetClusterInfo()
6797 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6798 filled_hvp = cluster.FillHV(instance)
6799 filled_hvp.update(self.op.hvparams)
6800 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6801 hv_type.CheckParameterSyntax(filled_hvp)
6802 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6804 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6806 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6808 if self.primary_offline and self.op.ignore_offline_nodes:
6809 self.proc.LogWarning("Ignoring offline primary node")
6811 if self.op.hvparams or self.op.beparams:
6812 self.proc.LogWarning("Overridden parameters are ignored")
6814 _CheckNodeOnline(self, instance.primary_node)
6816 bep = self.cfg.GetClusterInfo().FillBE(instance)
6817 bep.update(self.op.beparams)
6819 # check bridges existence
6820 _CheckInstanceBridgesExist(self, instance)
6822 remote_info = self.rpc.call_instance_info(instance.primary_node,
6824 instance.hypervisor)
6825 remote_info.Raise("Error checking node %s" % instance.primary_node,
6826 prereq=True, ecode=errors.ECODE_ENVIRON)
6827 if not remote_info.payload: # not running already
6828 _CheckNodeFreeMemory(self, instance.primary_node,
6829 "starting instance %s" % instance.name,
6830 bep[constants.BE_MINMEM], instance.hypervisor)
6832 def Exec(self, feedback_fn):
6833 """Start the instance.
6836 instance = self.instance
6837 force = self.op.force
6839 if not self.op.no_remember:
6840 self.cfg.MarkInstanceUp(instance.name)
6842 if self.primary_offline:
6843 assert self.op.ignore_offline_nodes
6844 self.proc.LogInfo("Primary node offline, marked instance as started")
6846 node_current = instance.primary_node
6848 _StartInstanceDisks(self, instance, force)
6851 self.rpc.call_instance_start(node_current,
6852 (instance, self.op.hvparams,
6854 self.op.startup_paused)
6855 msg = result.fail_msg
6857 _ShutdownInstanceDisks(self, instance)
6858 raise errors.OpExecError("Could not start instance: %s" % msg)
6861 class LUInstanceReboot(LogicalUnit):
6862 """Reboot an instance.
6865 HPATH = "instance-reboot"
6866 HTYPE = constants.HTYPE_INSTANCE
6869 def ExpandNames(self):
6870 self._ExpandAndLockInstance()
6872 def BuildHooksEnv(self):
6875 This runs on master, primary and secondary nodes of the instance.
6879 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6880 "REBOOT_TYPE": self.op.reboot_type,
6881 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6884 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6888 def BuildHooksNodes(self):
6889 """Build hooks nodes.
6892 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6895 def CheckPrereq(self):
6896 """Check prerequisites.
6898 This checks that the instance is in the cluster.
6901 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6902 assert self.instance is not None, \
6903 "Cannot retrieve locked instance %s" % self.op.instance_name
6904 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6905 _CheckNodeOnline(self, instance.primary_node)
6907 # check bridges existence
6908 _CheckInstanceBridgesExist(self, instance)
6910 def Exec(self, feedback_fn):
6911 """Reboot the instance.
6914 instance = self.instance
6915 ignore_secondaries = self.op.ignore_secondaries
6916 reboot_type = self.op.reboot_type
6918 remote_info = self.rpc.call_instance_info(instance.primary_node,
6920 instance.hypervisor)
6921 remote_info.Raise("Error checking node %s" % instance.primary_node)
6922 instance_running = bool(remote_info.payload)
6924 node_current = instance.primary_node
6926 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6927 constants.INSTANCE_REBOOT_HARD]:
6928 for disk in instance.disks:
6929 self.cfg.SetDiskID(disk, node_current)
6930 result = self.rpc.call_instance_reboot(node_current, instance,
6932 self.op.shutdown_timeout)
6933 result.Raise("Could not reboot instance")
6935 if instance_running:
6936 result = self.rpc.call_instance_shutdown(node_current, instance,
6937 self.op.shutdown_timeout)
6938 result.Raise("Could not shutdown instance for full reboot")
6939 _ShutdownInstanceDisks(self, instance)
6941 self.LogInfo("Instance %s was already stopped, starting now",
6943 _StartInstanceDisks(self, instance, ignore_secondaries)
6944 result = self.rpc.call_instance_start(node_current,
6945 (instance, None, None), False)
6946 msg = result.fail_msg
6948 _ShutdownInstanceDisks(self, instance)
6949 raise errors.OpExecError("Could not start instance for"
6950 " full reboot: %s" % msg)
6952 self.cfg.MarkInstanceUp(instance.name)
6955 class LUInstanceShutdown(LogicalUnit):
6956 """Shutdown an instance.
6959 HPATH = "instance-stop"
6960 HTYPE = constants.HTYPE_INSTANCE
6963 def ExpandNames(self):
6964 self._ExpandAndLockInstance()
6966 def BuildHooksEnv(self):
6969 This runs on master, primary and secondary nodes of the instance.
6972 env = _BuildInstanceHookEnvByObject(self, self.instance)
6973 env["TIMEOUT"] = self.op.timeout
6976 def BuildHooksNodes(self):
6977 """Build hooks nodes.
6980 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6983 def CheckPrereq(self):
6984 """Check prerequisites.
6986 This checks that the instance is in the cluster.
6989 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6990 assert self.instance is not None, \
6991 "Cannot retrieve locked instance %s" % self.op.instance_name
6993 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6995 self.primary_offline = \
6996 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6998 if self.primary_offline and self.op.ignore_offline_nodes:
6999 self.proc.LogWarning("Ignoring offline primary node")
7001 _CheckNodeOnline(self, self.instance.primary_node)
7003 def Exec(self, feedback_fn):
7004 """Shutdown the instance.
7007 instance = self.instance
7008 node_current = instance.primary_node
7009 timeout = self.op.timeout
7011 if not self.op.no_remember:
7012 self.cfg.MarkInstanceDown(instance.name)
7014 if self.primary_offline:
7015 assert self.op.ignore_offline_nodes
7016 self.proc.LogInfo("Primary node offline, marked instance as stopped")
7018 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7019 msg = result.fail_msg
7021 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7023 _ShutdownInstanceDisks(self, instance)
7026 class LUInstanceReinstall(LogicalUnit):
7027 """Reinstall an instance.
7030 HPATH = "instance-reinstall"
7031 HTYPE = constants.HTYPE_INSTANCE
7034 def ExpandNames(self):
7035 self._ExpandAndLockInstance()
7037 def BuildHooksEnv(self):
7040 This runs on master, primary and secondary nodes of the instance.
7043 return _BuildInstanceHookEnvByObject(self, self.instance)
7045 def BuildHooksNodes(self):
7046 """Build hooks nodes.
7049 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7052 def CheckPrereq(self):
7053 """Check prerequisites.
7055 This checks that the instance is in the cluster and is not running.
7058 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7059 assert instance is not None, \
7060 "Cannot retrieve locked instance %s" % self.op.instance_name
7061 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7062 " offline, cannot reinstall")
7064 if instance.disk_template == constants.DT_DISKLESS:
7065 raise errors.OpPrereqError("Instance '%s' has no disks" %
7066 self.op.instance_name,
7068 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7070 if self.op.os_type is not None:
7072 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7073 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7074 instance_os = self.op.os_type
7076 instance_os = instance.os
7078 nodelist = list(instance.all_nodes)
7080 if self.op.osparams:
7081 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7082 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7083 self.os_inst = i_osdict # the new dict (without defaults)
7087 self.instance = instance
7089 def Exec(self, feedback_fn):
7090 """Reinstall the instance.
7093 inst = self.instance
7095 if self.op.os_type is not None:
7096 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7097 inst.os = self.op.os_type
7098 # Write to configuration
7099 self.cfg.Update(inst, feedback_fn)
7101 _StartInstanceDisks(self, inst, None)
7103 feedback_fn("Running the instance OS create scripts...")
7104 # FIXME: pass debug option from opcode to backend
7105 result = self.rpc.call_instance_os_add(inst.primary_node,
7106 (inst, self.os_inst), True,
7107 self.op.debug_level)
7108 result.Raise("Could not install OS for instance %s on node %s" %
7109 (inst.name, inst.primary_node))
7111 _ShutdownInstanceDisks(self, inst)
7114 class LUInstanceRecreateDisks(LogicalUnit):
7115 """Recreate an instance's missing disks.
7118 HPATH = "instance-recreate-disks"
7119 HTYPE = constants.HTYPE_INSTANCE
7122 _MODIFYABLE = frozenset([
7123 constants.IDISK_SIZE,
7124 constants.IDISK_MODE,
7127 # New or changed disk parameters may have different semantics
7128 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7129 constants.IDISK_ADOPT,
7131 # TODO: Implement support changing VG while recreating
7133 constants.IDISK_METAVG,
7136 def CheckArguments(self):
7137 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7138 # Normalize and convert deprecated list of disk indices
7139 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7141 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7143 raise errors.OpPrereqError("Some disks have been specified more than"
7144 " once: %s" % utils.CommaJoin(duplicates),
7147 for (idx, params) in self.op.disks:
7148 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7149 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7151 raise errors.OpPrereqError("Parameters for disk %s try to change"
7152 " unmodifyable parameter(s): %s" %
7153 (idx, utils.CommaJoin(unsupported)),
7156 def ExpandNames(self):
7157 self._ExpandAndLockInstance()
7158 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7160 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7161 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7163 self.needed_locks[locking.LEVEL_NODE] = []
7164 self.needed_locks[locking.LEVEL_NODE_RES] = []
7166 def DeclareLocks(self, level):
7167 if level == locking.LEVEL_NODE:
7168 # if we replace the nodes, we only need to lock the old primary,
7169 # otherwise we need to lock all nodes for disk re-creation
7170 primary_only = bool(self.op.nodes)
7171 self._LockInstancesNodes(primary_only=primary_only)
7172 elif level == locking.LEVEL_NODE_RES:
7174 self.needed_locks[locking.LEVEL_NODE_RES] = \
7175 self.needed_locks[locking.LEVEL_NODE][:]
7177 def BuildHooksEnv(self):
7180 This runs on master, primary and secondary nodes of the instance.
7183 return _BuildInstanceHookEnvByObject(self, self.instance)
7185 def BuildHooksNodes(self):
7186 """Build hooks nodes.
7189 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7192 def CheckPrereq(self):
7193 """Check prerequisites.
7195 This checks that the instance is in the cluster and is not running.
7198 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7199 assert instance is not None, \
7200 "Cannot retrieve locked instance %s" % self.op.instance_name
7202 if len(self.op.nodes) != len(instance.all_nodes):
7203 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7204 " %d replacement nodes were specified" %
7205 (instance.name, len(instance.all_nodes),
7206 len(self.op.nodes)),
7208 assert instance.disk_template != constants.DT_DRBD8 or \
7209 len(self.op.nodes) == 2
7210 assert instance.disk_template != constants.DT_PLAIN or \
7211 len(self.op.nodes) == 1
7212 primary_node = self.op.nodes[0]
7214 primary_node = instance.primary_node
7215 _CheckNodeOnline(self, primary_node)
7217 if instance.disk_template == constants.DT_DISKLESS:
7218 raise errors.OpPrereqError("Instance '%s' has no disks" %
7219 self.op.instance_name, errors.ECODE_INVAL)
7221 # if we replace nodes *and* the old primary is offline, we don't
7223 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7224 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7225 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7226 if not (self.op.nodes and old_pnode.offline):
7227 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7228 msg="cannot recreate disks")
7231 self.disks = dict(self.op.disks)
7233 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7235 maxidx = max(self.disks.keys())
7236 if maxidx >= len(instance.disks):
7237 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7240 if (self.op.nodes and
7241 sorted(self.disks.keys()) != range(len(instance.disks))):
7242 raise errors.OpPrereqError("Can't recreate disks partially and"
7243 " change the nodes at the same time",
7246 self.instance = instance
7248 def Exec(self, feedback_fn):
7249 """Recreate the disks.
7252 instance = self.instance
7254 assert (self.owned_locks(locking.LEVEL_NODE) ==
7255 self.owned_locks(locking.LEVEL_NODE_RES))
7258 mods = [] # keeps track of needed changes
7260 for idx, disk in enumerate(instance.disks):
7262 changes = self.disks[idx]
7264 # Disk should not be recreated
7268 # update secondaries for disks, if needed
7269 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7270 # need to update the nodes and minors
7271 assert len(self.op.nodes) == 2
7272 assert len(disk.logical_id) == 6 # otherwise disk internals
7274 (_, _, old_port, _, _, old_secret) = disk.logical_id
7275 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7276 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7277 new_minors[0], new_minors[1], old_secret)
7278 assert len(disk.logical_id) == len(new_id)
7282 mods.append((idx, new_id, changes))
7284 # now that we have passed all asserts above, we can apply the mods
7285 # in a single run (to avoid partial changes)
7286 for idx, new_id, changes in mods:
7287 disk = instance.disks[idx]
7288 if new_id is not None:
7289 assert disk.dev_type == constants.LD_DRBD8
7290 disk.logical_id = new_id
7292 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7293 mode=changes.get(constants.IDISK_MODE, None))
7295 # change primary node, if needed
7297 instance.primary_node = self.op.nodes[0]
7298 self.LogWarning("Changing the instance's nodes, you will have to"
7299 " remove any disks left on the older nodes manually")
7302 self.cfg.Update(instance, feedback_fn)
7304 _CreateDisks(self, instance, to_skip=to_skip)
7307 class LUInstanceRename(LogicalUnit):
7308 """Rename an instance.
7311 HPATH = "instance-rename"
7312 HTYPE = constants.HTYPE_INSTANCE
7314 def CheckArguments(self):
7318 if self.op.ip_check and not self.op.name_check:
7319 # TODO: make the ip check more flexible and not depend on the name check
7320 raise errors.OpPrereqError("IP address check requires a name check",
7323 def BuildHooksEnv(self):
7326 This runs on master, primary and secondary nodes of the instance.
7329 env = _BuildInstanceHookEnvByObject(self, self.instance)
7330 env["INSTANCE_NEW_NAME"] = self.op.new_name
7333 def BuildHooksNodes(self):
7334 """Build hooks nodes.
7337 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7340 def CheckPrereq(self):
7341 """Check prerequisites.
7343 This checks that the instance is in the cluster and is not running.
7346 self.op.instance_name = _ExpandInstanceName(self.cfg,
7347 self.op.instance_name)
7348 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7349 assert instance is not None
7350 _CheckNodeOnline(self, instance.primary_node)
7351 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7352 msg="cannot rename")
7353 self.instance = instance
7355 new_name = self.op.new_name
7356 if self.op.name_check:
7357 hostname = netutils.GetHostname(name=new_name)
7358 if hostname.name != new_name:
7359 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7361 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7362 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7363 " same as given hostname '%s'") %
7364 (hostname.name, self.op.new_name),
7366 new_name = self.op.new_name = hostname.name
7367 if (self.op.ip_check and
7368 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7369 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7370 (hostname.ip, new_name),
7371 errors.ECODE_NOTUNIQUE)
7373 instance_list = self.cfg.GetInstanceList()
7374 if new_name in instance_list and new_name != instance.name:
7375 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7376 new_name, errors.ECODE_EXISTS)
7378 def Exec(self, feedback_fn):
7379 """Rename the instance.
7382 inst = self.instance
7383 old_name = inst.name
7385 rename_file_storage = False
7386 if (inst.disk_template in constants.DTS_FILEBASED and
7387 self.op.new_name != inst.name):
7388 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7389 rename_file_storage = True
7391 self.cfg.RenameInstance(inst.name, self.op.new_name)
7392 # Change the instance lock. This is definitely safe while we hold the BGL.
7393 # Otherwise the new lock would have to be added in acquired mode.
7395 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7396 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7398 # re-read the instance from the configuration after rename
7399 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7401 if rename_file_storage:
7402 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7403 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7404 old_file_storage_dir,
7405 new_file_storage_dir)
7406 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7407 " (but the instance has been renamed in Ganeti)" %
7408 (inst.primary_node, old_file_storage_dir,
7409 new_file_storage_dir))
7411 _StartInstanceDisks(self, inst, None)
7413 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7414 old_name, self.op.debug_level)
7415 msg = result.fail_msg
7417 msg = ("Could not run OS rename script for instance %s on node %s"
7418 " (but the instance has been renamed in Ganeti): %s" %
7419 (inst.name, inst.primary_node, msg))
7420 self.proc.LogWarning(msg)
7422 _ShutdownInstanceDisks(self, inst)
7427 class LUInstanceRemove(LogicalUnit):
7428 """Remove an instance.
7431 HPATH = "instance-remove"
7432 HTYPE = constants.HTYPE_INSTANCE
7435 def ExpandNames(self):
7436 self._ExpandAndLockInstance()
7437 self.needed_locks[locking.LEVEL_NODE] = []
7438 self.needed_locks[locking.LEVEL_NODE_RES] = []
7439 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7441 def DeclareLocks(self, level):
7442 if level == locking.LEVEL_NODE:
7443 self._LockInstancesNodes()
7444 elif level == locking.LEVEL_NODE_RES:
7446 self.needed_locks[locking.LEVEL_NODE_RES] = \
7447 self.needed_locks[locking.LEVEL_NODE][:]
7449 def BuildHooksEnv(self):
7452 This runs on master, primary and secondary nodes of the instance.
7455 env = _BuildInstanceHookEnvByObject(self, self.instance)
7456 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7459 def BuildHooksNodes(self):
7460 """Build hooks nodes.
7463 nl = [self.cfg.GetMasterNode()]
7464 nl_post = list(self.instance.all_nodes) + nl
7465 return (nl, nl_post)
7467 def CheckPrereq(self):
7468 """Check prerequisites.
7470 This checks that the instance is in the cluster.
7473 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7474 assert self.instance is not None, \
7475 "Cannot retrieve locked instance %s" % self.op.instance_name
7477 def Exec(self, feedback_fn):
7478 """Remove the instance.
7481 instance = self.instance
7482 logging.info("Shutting down instance %s on node %s",
7483 instance.name, instance.primary_node)
7485 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7486 self.op.shutdown_timeout)
7487 msg = result.fail_msg
7489 if self.op.ignore_failures:
7490 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7492 raise errors.OpExecError("Could not shutdown instance %s on"
7494 (instance.name, instance.primary_node, msg))
7496 assert (self.owned_locks(locking.LEVEL_NODE) ==
7497 self.owned_locks(locking.LEVEL_NODE_RES))
7498 assert not (set(instance.all_nodes) -
7499 self.owned_locks(locking.LEVEL_NODE)), \
7500 "Not owning correct locks"
7502 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7505 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7506 """Utility function to remove an instance.
7509 logging.info("Removing block devices for instance %s", instance.name)
7511 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7512 if not ignore_failures:
7513 raise errors.OpExecError("Can't remove instance's disks")
7514 feedback_fn("Warning: can't remove instance's disks")
7516 logging.info("Removing instance %s out of cluster config", instance.name)
7518 lu.cfg.RemoveInstance(instance.name)
7520 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7521 "Instance lock removal conflict"
7523 # Remove lock for the instance
7524 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7527 class LUInstanceQuery(NoHooksLU):
7528 """Logical unit for querying instances.
7531 # pylint: disable=W0142
7534 def CheckArguments(self):
7535 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7536 self.op.output_fields, self.op.use_locking)
7538 def ExpandNames(self):
7539 self.iq.ExpandNames(self)
7541 def DeclareLocks(self, level):
7542 self.iq.DeclareLocks(self, level)
7544 def Exec(self, feedback_fn):
7545 return self.iq.OldStyleQuery(self)
7548 class LUInstanceFailover(LogicalUnit):
7549 """Failover an instance.
7552 HPATH = "instance-failover"
7553 HTYPE = constants.HTYPE_INSTANCE
7556 def CheckArguments(self):
7557 """Check the arguments.
7560 self.iallocator = getattr(self.op, "iallocator", None)
7561 self.target_node = getattr(self.op, "target_node", None)
7563 def ExpandNames(self):
7564 self._ExpandAndLockInstance()
7566 if self.op.target_node is not None:
7567 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7569 self.needed_locks[locking.LEVEL_NODE] = []
7570 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7572 self.needed_locks[locking.LEVEL_NODE_RES] = []
7573 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7575 ignore_consistency = self.op.ignore_consistency
7576 shutdown_timeout = self.op.shutdown_timeout
7577 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7580 ignore_consistency=ignore_consistency,
7581 shutdown_timeout=shutdown_timeout,
7582 ignore_ipolicy=self.op.ignore_ipolicy)
7583 self.tasklets = [self._migrater]
7585 def DeclareLocks(self, level):
7586 if level == locking.LEVEL_NODE:
7587 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7588 if instance.disk_template in constants.DTS_EXT_MIRROR:
7589 if self.op.target_node is None:
7590 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7592 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7593 self.op.target_node]
7594 del self.recalculate_locks[locking.LEVEL_NODE]
7596 self._LockInstancesNodes()
7597 elif level == locking.LEVEL_NODE_RES:
7599 self.needed_locks[locking.LEVEL_NODE_RES] = \
7600 self.needed_locks[locking.LEVEL_NODE][:]
7602 def BuildHooksEnv(self):
7605 This runs on master, primary and secondary nodes of the instance.
7608 instance = self._migrater.instance
7609 source_node = instance.primary_node
7610 target_node = self.op.target_node
7612 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7613 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7614 "OLD_PRIMARY": source_node,
7615 "NEW_PRIMARY": target_node,
7618 if instance.disk_template in constants.DTS_INT_MIRROR:
7619 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7620 env["NEW_SECONDARY"] = source_node
7622 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7624 env.update(_BuildInstanceHookEnvByObject(self, instance))
7628 def BuildHooksNodes(self):
7629 """Build hooks nodes.
7632 instance = self._migrater.instance
7633 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7634 return (nl, nl + [instance.primary_node])
7637 class LUInstanceMigrate(LogicalUnit):
7638 """Migrate an instance.
7640 This is migration without shutting down, compared to the failover,
7641 which is done with shutdown.
7644 HPATH = "instance-migrate"
7645 HTYPE = constants.HTYPE_INSTANCE
7648 def ExpandNames(self):
7649 self._ExpandAndLockInstance()
7651 if self.op.target_node is not None:
7652 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7654 self.needed_locks[locking.LEVEL_NODE] = []
7655 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7657 self.needed_locks[locking.LEVEL_NODE] = []
7658 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7661 TLMigrateInstance(self, self.op.instance_name,
7662 cleanup=self.op.cleanup,
7664 fallback=self.op.allow_failover,
7665 allow_runtime_changes=self.op.allow_runtime_changes,
7666 ignore_ipolicy=self.op.ignore_ipolicy)
7667 self.tasklets = [self._migrater]
7669 def DeclareLocks(self, level):
7670 if level == locking.LEVEL_NODE:
7671 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7672 if instance.disk_template in constants.DTS_EXT_MIRROR:
7673 if self.op.target_node is None:
7674 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7676 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7677 self.op.target_node]
7678 del self.recalculate_locks[locking.LEVEL_NODE]
7680 self._LockInstancesNodes()
7681 elif level == locking.LEVEL_NODE_RES:
7683 self.needed_locks[locking.LEVEL_NODE_RES] = \
7684 self.needed_locks[locking.LEVEL_NODE][:]
7686 def BuildHooksEnv(self):
7689 This runs on master, primary and secondary nodes of the instance.
7692 instance = self._migrater.instance
7693 source_node = instance.primary_node
7694 target_node = self.op.target_node
7695 env = _BuildInstanceHookEnvByObject(self, instance)
7697 "MIGRATE_LIVE": self._migrater.live,
7698 "MIGRATE_CLEANUP": self.op.cleanup,
7699 "OLD_PRIMARY": source_node,
7700 "NEW_PRIMARY": target_node,
7701 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7704 if instance.disk_template in constants.DTS_INT_MIRROR:
7705 env["OLD_SECONDARY"] = target_node
7706 env["NEW_SECONDARY"] = source_node
7708 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7712 def BuildHooksNodes(self):
7713 """Build hooks nodes.
7716 instance = self._migrater.instance
7717 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7718 return (nl, nl + [instance.primary_node])
7721 class LUInstanceMove(LogicalUnit):
7722 """Move an instance by data-copying.
7725 HPATH = "instance-move"
7726 HTYPE = constants.HTYPE_INSTANCE
7729 def ExpandNames(self):
7730 self._ExpandAndLockInstance()
7731 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7732 self.op.target_node = target_node
7733 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7734 self.needed_locks[locking.LEVEL_NODE_RES] = []
7735 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7737 def DeclareLocks(self, level):
7738 if level == locking.LEVEL_NODE:
7739 self._LockInstancesNodes(primary_only=True)
7740 elif level == locking.LEVEL_NODE_RES:
7742 self.needed_locks[locking.LEVEL_NODE_RES] = \
7743 self.needed_locks[locking.LEVEL_NODE][:]
7745 def BuildHooksEnv(self):
7748 This runs on master, primary and secondary nodes of the instance.
7752 "TARGET_NODE": self.op.target_node,
7753 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7755 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7758 def BuildHooksNodes(self):
7759 """Build hooks nodes.
7763 self.cfg.GetMasterNode(),
7764 self.instance.primary_node,
7765 self.op.target_node,
7769 def CheckPrereq(self):
7770 """Check prerequisites.
7772 This checks that the instance is in the cluster.
7775 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7776 assert self.instance is not None, \
7777 "Cannot retrieve locked instance %s" % self.op.instance_name
7779 node = self.cfg.GetNodeInfo(self.op.target_node)
7780 assert node is not None, \
7781 "Cannot retrieve locked node %s" % self.op.target_node
7783 self.target_node = target_node = node.name
7785 if target_node == instance.primary_node:
7786 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7787 (instance.name, target_node),
7790 bep = self.cfg.GetClusterInfo().FillBE(instance)
7792 for idx, dsk in enumerate(instance.disks):
7793 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7794 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7795 " cannot copy" % idx, errors.ECODE_STATE)
7797 _CheckNodeOnline(self, target_node)
7798 _CheckNodeNotDrained(self, target_node)
7799 _CheckNodeVmCapable(self, target_node)
7800 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7801 self.cfg.GetNodeGroup(node.group))
7802 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7803 ignore=self.op.ignore_ipolicy)
7805 if instance.admin_state == constants.ADMINST_UP:
7806 # check memory requirements on the secondary node
7807 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7808 instance.name, bep[constants.BE_MAXMEM],
7809 instance.hypervisor)
7811 self.LogInfo("Not checking memory on the secondary node as"
7812 " instance will not be started")
7814 # check bridge existance
7815 _CheckInstanceBridgesExist(self, instance, node=target_node)
7817 def Exec(self, feedback_fn):
7818 """Move an instance.
7820 The move is done by shutting it down on its present node, copying
7821 the data over (slow) and starting it on the new node.
7824 instance = self.instance
7826 source_node = instance.primary_node
7827 target_node = self.target_node
7829 self.LogInfo("Shutting down instance %s on source node %s",
7830 instance.name, source_node)
7832 assert (self.owned_locks(locking.LEVEL_NODE) ==
7833 self.owned_locks(locking.LEVEL_NODE_RES))
7835 result = self.rpc.call_instance_shutdown(source_node, instance,
7836 self.op.shutdown_timeout)
7837 msg = result.fail_msg
7839 if self.op.ignore_consistency:
7840 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7841 " Proceeding anyway. Please make sure node"
7842 " %s is down. Error details: %s",
7843 instance.name, source_node, source_node, msg)
7845 raise errors.OpExecError("Could not shutdown instance %s on"
7847 (instance.name, source_node, msg))
7849 # create the target disks
7851 _CreateDisks(self, instance, target_node=target_node)
7852 except errors.OpExecError:
7853 self.LogWarning("Device creation failed, reverting...")
7855 _RemoveDisks(self, instance, target_node=target_node)
7857 self.cfg.ReleaseDRBDMinors(instance.name)
7860 cluster_name = self.cfg.GetClusterInfo().cluster_name
7863 # activate, get path, copy the data over
7864 for idx, disk in enumerate(instance.disks):
7865 self.LogInfo("Copying data for disk %d", idx)
7866 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7867 instance.name, True, idx)
7869 self.LogWarning("Can't assemble newly created disk %d: %s",
7870 idx, result.fail_msg)
7871 errs.append(result.fail_msg)
7873 dev_path = result.payload
7874 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7875 target_node, dev_path,
7878 self.LogWarning("Can't copy data over for disk %d: %s",
7879 idx, result.fail_msg)
7880 errs.append(result.fail_msg)
7884 self.LogWarning("Some disks failed to copy, aborting")
7886 _RemoveDisks(self, instance, target_node=target_node)
7888 self.cfg.ReleaseDRBDMinors(instance.name)
7889 raise errors.OpExecError("Errors during disk copy: %s" %
7892 instance.primary_node = target_node
7893 self.cfg.Update(instance, feedback_fn)
7895 self.LogInfo("Removing the disks on the original node")
7896 _RemoveDisks(self, instance, target_node=source_node)
7898 # Only start the instance if it's marked as up
7899 if instance.admin_state == constants.ADMINST_UP:
7900 self.LogInfo("Starting instance %s on node %s",
7901 instance.name, target_node)
7903 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7904 ignore_secondaries=True)
7906 _ShutdownInstanceDisks(self, instance)
7907 raise errors.OpExecError("Can't activate the instance's disks")
7909 result = self.rpc.call_instance_start(target_node,
7910 (instance, None, None), False)
7911 msg = result.fail_msg
7913 _ShutdownInstanceDisks(self, instance)
7914 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7915 (instance.name, target_node, msg))
7918 class LUNodeMigrate(LogicalUnit):
7919 """Migrate all instances from a node.
7922 HPATH = "node-migrate"
7923 HTYPE = constants.HTYPE_NODE
7926 def CheckArguments(self):
7929 def ExpandNames(self):
7930 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7932 self.share_locks = _ShareAll()
7933 self.needed_locks = {
7934 locking.LEVEL_NODE: [self.op.node_name],
7937 def BuildHooksEnv(self):
7940 This runs on the master, the primary and all the secondaries.
7944 "NODE_NAME": self.op.node_name,
7945 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7948 def BuildHooksNodes(self):
7949 """Build hooks nodes.
7952 nl = [self.cfg.GetMasterNode()]
7955 def CheckPrereq(self):
7958 def Exec(self, feedback_fn):
7959 # Prepare jobs for migration instances
7960 allow_runtime_changes = self.op.allow_runtime_changes
7962 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7965 iallocator=self.op.iallocator,
7966 target_node=self.op.target_node,
7967 allow_runtime_changes=allow_runtime_changes,
7968 ignore_ipolicy=self.op.ignore_ipolicy)]
7969 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7972 # TODO: Run iallocator in this opcode and pass correct placement options to
7973 # OpInstanceMigrate. Since other jobs can modify the cluster between
7974 # running the iallocator and the actual migration, a good consistency model
7975 # will have to be found.
7977 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7978 frozenset([self.op.node_name]))
7980 return ResultWithJobs(jobs)
7983 class TLMigrateInstance(Tasklet):
7984 """Tasklet class for instance migration.
7987 @ivar live: whether the migration will be done live or non-live;
7988 this variable is initalized only after CheckPrereq has run
7989 @type cleanup: boolean
7990 @ivar cleanup: Wheater we cleanup from a failed migration
7991 @type iallocator: string
7992 @ivar iallocator: The iallocator used to determine target_node
7993 @type target_node: string
7994 @ivar target_node: If given, the target_node to reallocate the instance to
7995 @type failover: boolean
7996 @ivar failover: Whether operation results in failover or migration
7997 @type fallback: boolean
7998 @ivar fallback: Whether fallback to failover is allowed if migration not
8000 @type ignore_consistency: boolean
8001 @ivar ignore_consistency: Wheter we should ignore consistency between source
8003 @type shutdown_timeout: int
8004 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8005 @type ignore_ipolicy: bool
8006 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8011 _MIGRATION_POLL_INTERVAL = 1 # seconds
8012 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8014 def __init__(self, lu, instance_name, cleanup=False,
8015 failover=False, fallback=False,
8016 ignore_consistency=False,
8017 allow_runtime_changes=True,
8018 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8019 ignore_ipolicy=False):
8020 """Initializes this class.
8023 Tasklet.__init__(self, lu)
8026 self.instance_name = instance_name
8027 self.cleanup = cleanup
8028 self.live = False # will be overridden later
8029 self.failover = failover
8030 self.fallback = fallback
8031 self.ignore_consistency = ignore_consistency
8032 self.shutdown_timeout = shutdown_timeout
8033 self.ignore_ipolicy = ignore_ipolicy
8034 self.allow_runtime_changes = allow_runtime_changes
8036 def CheckPrereq(self):
8037 """Check prerequisites.
8039 This checks that the instance is in the cluster.
8042 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8043 instance = self.cfg.GetInstanceInfo(instance_name)
8044 assert instance is not None
8045 self.instance = instance
8046 cluster = self.cfg.GetClusterInfo()
8048 if (not self.cleanup and
8049 not instance.admin_state == constants.ADMINST_UP and
8050 not self.failover and self.fallback):
8051 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8052 " switching to failover")
8053 self.failover = True
8055 if instance.disk_template not in constants.DTS_MIRRORED:
8060 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8061 " %s" % (instance.disk_template, text),
8064 if instance.disk_template in constants.DTS_EXT_MIRROR:
8065 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8067 if self.lu.op.iallocator:
8068 self._RunAllocator()
8070 # We set set self.target_node as it is required by
8072 self.target_node = self.lu.op.target_node
8074 # Check that the target node is correct in terms of instance policy
8075 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8076 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8077 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8078 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8079 ignore=self.ignore_ipolicy)
8081 # self.target_node is already populated, either directly or by the
8083 target_node = self.target_node
8084 if self.target_node == instance.primary_node:
8085 raise errors.OpPrereqError("Cannot migrate instance %s"
8086 " to its primary (%s)" %
8087 (instance.name, instance.primary_node))
8089 if len(self.lu.tasklets) == 1:
8090 # It is safe to release locks only when we're the only tasklet
8092 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8093 keep=[instance.primary_node, self.target_node])
8096 secondary_nodes = instance.secondary_nodes
8097 if not secondary_nodes:
8098 raise errors.ConfigurationError("No secondary node but using"
8099 " %s disk template" %
8100 instance.disk_template)
8101 target_node = secondary_nodes[0]
8102 if self.lu.op.iallocator or (self.lu.op.target_node and
8103 self.lu.op.target_node != target_node):
8105 text = "failed over"
8108 raise errors.OpPrereqError("Instances with disk template %s cannot"
8109 " be %s to arbitrary nodes"
8110 " (neither an iallocator nor a target"
8111 " node can be passed)" %
8112 (instance.disk_template, text),
8114 nodeinfo = self.cfg.GetNodeInfo(target_node)
8115 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8116 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8117 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8118 ignore=self.ignore_ipolicy)
8120 i_be = cluster.FillBE(instance)
8122 # check memory requirements on the secondary node
8123 if (not self.cleanup and
8124 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8125 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8126 "migrating instance %s" %
8128 i_be[constants.BE_MINMEM],
8129 instance.hypervisor)
8131 self.lu.LogInfo("Not checking memory on the secondary node as"
8132 " instance will not be started")
8134 # check if failover must be forced instead of migration
8135 if (not self.cleanup and not self.failover and
8136 i_be[constants.BE_ALWAYS_FAILOVER]):
8138 self.lu.LogInfo("Instance configured to always failover; fallback"
8140 self.failover = True
8142 raise errors.OpPrereqError("This instance has been configured to"
8143 " always failover, please allow failover",
8146 # check bridge existance
8147 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8149 if not self.cleanup:
8150 _CheckNodeNotDrained(self.lu, target_node)
8151 if not self.failover:
8152 result = self.rpc.call_instance_migratable(instance.primary_node,
8154 if result.fail_msg and self.fallback:
8155 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8157 self.failover = True
8159 result.Raise("Can't migrate, please use failover",
8160 prereq=True, ecode=errors.ECODE_STATE)
8162 assert not (self.failover and self.cleanup)
8164 if not self.failover:
8165 if self.lu.op.live is not None and self.lu.op.mode is not None:
8166 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8167 " parameters are accepted",
8169 if self.lu.op.live is not None:
8171 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8173 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8174 # reset the 'live' parameter to None so that repeated
8175 # invocations of CheckPrereq do not raise an exception
8176 self.lu.op.live = None
8177 elif self.lu.op.mode is None:
8178 # read the default value from the hypervisor
8179 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8180 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8182 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8184 # Failover is never live
8187 if not (self.failover or self.cleanup):
8188 remote_info = self.rpc.call_instance_info(instance.primary_node,
8190 instance.hypervisor)
8191 remote_info.Raise("Error checking instance on node %s" %
8192 instance.primary_node)
8193 instance_running = bool(remote_info.payload)
8194 if instance_running:
8195 self.current_mem = int(remote_info.payload["memory"])
8197 def _RunAllocator(self):
8198 """Run the allocator based on input opcode.
8201 # FIXME: add a self.ignore_ipolicy option
8202 ial = IAllocator(self.cfg, self.rpc,
8203 mode=constants.IALLOCATOR_MODE_RELOC,
8204 name=self.instance_name,
8205 relocate_from=[self.instance.primary_node],
8208 ial.Run(self.lu.op.iallocator)
8211 raise errors.OpPrereqError("Can't compute nodes using"
8212 " iallocator '%s': %s" %
8213 (self.lu.op.iallocator, ial.info),
8215 if len(ial.result) != ial.required_nodes:
8216 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8217 " of nodes (%s), required %s" %
8218 (self.lu.op.iallocator, len(ial.result),
8219 ial.required_nodes), errors.ECODE_FAULT)
8220 self.target_node = ial.result[0]
8221 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8222 self.instance_name, self.lu.op.iallocator,
8223 utils.CommaJoin(ial.result))
8225 def _WaitUntilSync(self):
8226 """Poll with custom rpc for disk sync.
8228 This uses our own step-based rpc call.
8231 self.feedback_fn("* wait until resync is done")
8235 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8237 (self.instance.disks,
8240 for node, nres in result.items():
8241 nres.Raise("Cannot resync disks on node %s" % node)
8242 node_done, node_percent = nres.payload
8243 all_done = all_done and node_done
8244 if node_percent is not None:
8245 min_percent = min(min_percent, node_percent)
8247 if min_percent < 100:
8248 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8251 def _EnsureSecondary(self, node):
8252 """Demote a node to secondary.
8255 self.feedback_fn("* switching node %s to secondary mode" % node)
8257 for dev in self.instance.disks:
8258 self.cfg.SetDiskID(dev, node)
8260 result = self.rpc.call_blockdev_close(node, self.instance.name,
8261 self.instance.disks)
8262 result.Raise("Cannot change disk to secondary on node %s" % node)
8264 def _GoStandalone(self):
8265 """Disconnect from the network.
8268 self.feedback_fn("* changing into standalone mode")
8269 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8270 self.instance.disks)
8271 for node, nres in result.items():
8272 nres.Raise("Cannot disconnect disks node %s" % node)
8274 def _GoReconnect(self, multimaster):
8275 """Reconnect to the network.
8281 msg = "single-master"
8282 self.feedback_fn("* changing disks into %s mode" % msg)
8283 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8284 (self.instance.disks, self.instance),
8285 self.instance.name, multimaster)
8286 for node, nres in result.items():
8287 nres.Raise("Cannot change disks config on node %s" % node)
8289 def _ExecCleanup(self):
8290 """Try to cleanup after a failed migration.
8292 The cleanup is done by:
8293 - check that the instance is running only on one node
8294 (and update the config if needed)
8295 - change disks on its secondary node to secondary
8296 - wait until disks are fully synchronized
8297 - disconnect from the network
8298 - change disks into single-master mode
8299 - wait again until disks are fully synchronized
8302 instance = self.instance
8303 target_node = self.target_node
8304 source_node = self.source_node
8306 # check running on only one node
8307 self.feedback_fn("* checking where the instance actually runs"
8308 " (if this hangs, the hypervisor might be in"
8310 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8311 for node, result in ins_l.items():
8312 result.Raise("Can't contact node %s" % node)
8314 runningon_source = instance.name in ins_l[source_node].payload
8315 runningon_target = instance.name in ins_l[target_node].payload
8317 if runningon_source and runningon_target:
8318 raise errors.OpExecError("Instance seems to be running on two nodes,"
8319 " or the hypervisor is confused; you will have"
8320 " to ensure manually that it runs only on one"
8321 " and restart this operation")
8323 if not (runningon_source or runningon_target):
8324 raise errors.OpExecError("Instance does not seem to be running at all;"
8325 " in this case it's safer to repair by"
8326 " running 'gnt-instance stop' to ensure disk"
8327 " shutdown, and then restarting it")
8329 if runningon_target:
8330 # the migration has actually succeeded, we need to update the config
8331 self.feedback_fn("* instance running on secondary node (%s),"
8332 " updating config" % target_node)
8333 instance.primary_node = target_node
8334 self.cfg.Update(instance, self.feedback_fn)
8335 demoted_node = source_node
8337 self.feedback_fn("* instance confirmed to be running on its"
8338 " primary node (%s)" % source_node)
8339 demoted_node = target_node
8341 if instance.disk_template in constants.DTS_INT_MIRROR:
8342 self._EnsureSecondary(demoted_node)
8344 self._WaitUntilSync()
8345 except errors.OpExecError:
8346 # we ignore here errors, since if the device is standalone, it
8347 # won't be able to sync
8349 self._GoStandalone()
8350 self._GoReconnect(False)
8351 self._WaitUntilSync()
8353 self.feedback_fn("* done")
8355 def _RevertDiskStatus(self):
8356 """Try to revert the disk status after a failed migration.
8359 target_node = self.target_node
8360 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8364 self._EnsureSecondary(target_node)
8365 self._GoStandalone()
8366 self._GoReconnect(False)
8367 self._WaitUntilSync()
8368 except errors.OpExecError, err:
8369 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8370 " please try to recover the instance manually;"
8371 " error '%s'" % str(err))
8373 def _AbortMigration(self):
8374 """Call the hypervisor code to abort a started migration.
8377 instance = self.instance
8378 target_node = self.target_node
8379 source_node = self.source_node
8380 migration_info = self.migration_info
8382 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8386 abort_msg = abort_result.fail_msg
8388 logging.error("Aborting migration failed on target node %s: %s",
8389 target_node, abort_msg)
8390 # Don't raise an exception here, as we stil have to try to revert the
8391 # disk status, even if this step failed.
8393 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8394 instance, False, self.live)
8395 abort_msg = abort_result.fail_msg
8397 logging.error("Aborting migration failed on source node %s: %s",
8398 source_node, abort_msg)
8400 def _ExecMigration(self):
8401 """Migrate an instance.
8403 The migrate is done by:
8404 - change the disks into dual-master mode
8405 - wait until disks are fully synchronized again
8406 - migrate the instance
8407 - change disks on the new secondary node (the old primary) to secondary
8408 - wait until disks are fully synchronized
8409 - change disks into single-master mode
8412 instance = self.instance
8413 target_node = self.target_node
8414 source_node = self.source_node
8416 # Check for hypervisor version mismatch and warn the user.
8417 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8418 None, [self.instance.hypervisor])
8419 for ninfo in nodeinfo.values():
8420 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8422 (_, _, (src_info, )) = nodeinfo[source_node].payload
8423 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8425 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8426 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8427 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8428 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8429 if src_version != dst_version:
8430 self.feedback_fn("* warning: hypervisor version mismatch between"
8431 " source (%s) and target (%s) node" %
8432 (src_version, dst_version))
8434 self.feedback_fn("* checking disk consistency between source and target")
8435 for (idx, dev) in enumerate(instance.disks):
8436 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8437 raise errors.OpExecError("Disk %s is degraded or not fully"
8438 " synchronized on target node,"
8439 " aborting migration" % idx)
8441 if self.current_mem > self.tgt_free_mem:
8442 if not self.allow_runtime_changes:
8443 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8444 " free memory to fit instance %s on target"
8445 " node %s (have %dMB, need %dMB)" %
8446 (instance.name, target_node,
8447 self.tgt_free_mem, self.current_mem))
8448 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8449 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8452 rpcres.Raise("Cannot modify instance runtime memory")
8454 # First get the migration information from the remote node
8455 result = self.rpc.call_migration_info(source_node, instance)
8456 msg = result.fail_msg
8458 log_err = ("Failed fetching source migration information from %s: %s" %
8460 logging.error(log_err)
8461 raise errors.OpExecError(log_err)
8463 self.migration_info = migration_info = result.payload
8465 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8466 # Then switch the disks to master/master mode
8467 self._EnsureSecondary(target_node)
8468 self._GoStandalone()
8469 self._GoReconnect(True)
8470 self._WaitUntilSync()
8472 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8473 result = self.rpc.call_accept_instance(target_node,
8476 self.nodes_ip[target_node])
8478 msg = result.fail_msg
8480 logging.error("Instance pre-migration failed, trying to revert"
8481 " disk status: %s", msg)
8482 self.feedback_fn("Pre-migration failed, aborting")
8483 self._AbortMigration()
8484 self._RevertDiskStatus()
8485 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8486 (instance.name, msg))
8488 self.feedback_fn("* migrating instance to %s" % target_node)
8489 result = self.rpc.call_instance_migrate(source_node, instance,
8490 self.nodes_ip[target_node],
8492 msg = result.fail_msg
8494 logging.error("Instance migration failed, trying to revert"
8495 " disk status: %s", msg)
8496 self.feedback_fn("Migration failed, aborting")
8497 self._AbortMigration()
8498 self._RevertDiskStatus()
8499 raise errors.OpExecError("Could not migrate instance %s: %s" %
8500 (instance.name, msg))
8502 self.feedback_fn("* starting memory transfer")
8503 last_feedback = time.time()
8505 result = self.rpc.call_instance_get_migration_status(source_node,
8507 msg = result.fail_msg
8508 ms = result.payload # MigrationStatus instance
8509 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8510 logging.error("Instance migration failed, trying to revert"
8511 " disk status: %s", msg)
8512 self.feedback_fn("Migration failed, aborting")
8513 self._AbortMigration()
8514 self._RevertDiskStatus()
8515 raise errors.OpExecError("Could not migrate instance %s: %s" %
8516 (instance.name, msg))
8518 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8519 self.feedback_fn("* memory transfer complete")
8522 if (utils.TimeoutExpired(last_feedback,
8523 self._MIGRATION_FEEDBACK_INTERVAL) and
8524 ms.transferred_ram is not None):
8525 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8526 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8527 last_feedback = time.time()
8529 time.sleep(self._MIGRATION_POLL_INTERVAL)
8531 result = self.rpc.call_instance_finalize_migration_src(source_node,
8535 msg = result.fail_msg
8537 logging.error("Instance migration succeeded, but finalization failed"
8538 " on the source node: %s", msg)
8539 raise errors.OpExecError("Could not finalize instance migration: %s" %
8542 instance.primary_node = target_node
8544 # distribute new instance config to the other nodes
8545 self.cfg.Update(instance, self.feedback_fn)
8547 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8551 msg = result.fail_msg
8553 logging.error("Instance migration succeeded, but finalization failed"
8554 " on the target node: %s", msg)
8555 raise errors.OpExecError("Could not finalize instance migration: %s" %
8558 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8559 self._EnsureSecondary(source_node)
8560 self._WaitUntilSync()
8561 self._GoStandalone()
8562 self._GoReconnect(False)
8563 self._WaitUntilSync()
8565 # If the instance's disk template is `rbd' and there was a successful
8566 # migration, unmap the device from the source node.
8567 if self.instance.disk_template == constants.DT_RBD:
8568 disks = _ExpandCheckDisks(instance, instance.disks)
8569 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8571 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8572 msg = result.fail_msg
8574 logging.error("Migration was successful, but couldn't unmap the"
8575 " block device %s on source node %s: %s",
8576 disk.iv_name, source_node, msg)
8577 logging.error("You need to unmap the device %s manually on %s",
8578 disk.iv_name, source_node)
8580 self.feedback_fn("* done")
8582 def _ExecFailover(self):
8583 """Failover an instance.
8585 The failover is done by shutting it down on its present node and
8586 starting it on the secondary.
8589 instance = self.instance
8590 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8592 source_node = instance.primary_node
8593 target_node = self.target_node
8595 if instance.admin_state == constants.ADMINST_UP:
8596 self.feedback_fn("* checking disk consistency between source and target")
8597 for (idx, dev) in enumerate(instance.disks):
8598 # for drbd, these are drbd over lvm
8599 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8601 if primary_node.offline:
8602 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8604 (primary_node.name, idx, target_node))
8605 elif not self.ignore_consistency:
8606 raise errors.OpExecError("Disk %s is degraded on target node,"
8607 " aborting failover" % idx)
8609 self.feedback_fn("* not checking disk consistency as instance is not"
8612 self.feedback_fn("* shutting down instance on source node")
8613 logging.info("Shutting down instance %s on node %s",
8614 instance.name, source_node)
8616 result = self.rpc.call_instance_shutdown(source_node, instance,
8617 self.shutdown_timeout)
8618 msg = result.fail_msg
8620 if self.ignore_consistency or primary_node.offline:
8621 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8622 " proceeding anyway; please make sure node"
8623 " %s is down; error details: %s",
8624 instance.name, source_node, source_node, msg)
8626 raise errors.OpExecError("Could not shutdown instance %s on"
8628 (instance.name, source_node, msg))
8630 self.feedback_fn("* deactivating the instance's disks on source node")
8631 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8632 raise errors.OpExecError("Can't shut down the instance's disks")
8634 instance.primary_node = target_node
8635 # distribute new instance config to the other nodes
8636 self.cfg.Update(instance, self.feedback_fn)
8638 # Only start the instance if it's marked as up
8639 if instance.admin_state == constants.ADMINST_UP:
8640 self.feedback_fn("* activating the instance's disks on target node %s" %
8642 logging.info("Starting instance %s on node %s",
8643 instance.name, target_node)
8645 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8646 ignore_secondaries=True)
8648 _ShutdownInstanceDisks(self.lu, instance)
8649 raise errors.OpExecError("Can't activate the instance's disks")
8651 self.feedback_fn("* starting the instance on the target node %s" %
8653 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8655 msg = result.fail_msg
8657 _ShutdownInstanceDisks(self.lu, instance)
8658 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8659 (instance.name, target_node, msg))
8661 def Exec(self, feedback_fn):
8662 """Perform the migration.
8665 self.feedback_fn = feedback_fn
8666 self.source_node = self.instance.primary_node
8668 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8669 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8670 self.target_node = self.instance.secondary_nodes[0]
8671 # Otherwise self.target_node has been populated either
8672 # directly, or through an iallocator.
8674 self.all_nodes = [self.source_node, self.target_node]
8675 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8676 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8679 feedback_fn("Failover instance %s" % self.instance.name)
8680 self._ExecFailover()
8682 feedback_fn("Migrating instance %s" % self.instance.name)
8685 return self._ExecCleanup()
8687 return self._ExecMigration()
8690 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8692 """Wrapper around L{_CreateBlockDevInner}.
8694 This method annotates the root device first.
8697 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8698 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8702 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8704 """Create a tree of block devices on a given node.
8706 If this device type has to be created on secondaries, create it and
8709 If not, just recurse to children keeping the same 'force' value.
8711 @attention: The device has to be annotated already.
8713 @param lu: the lu on whose behalf we execute
8714 @param node: the node on which to create the device
8715 @type instance: L{objects.Instance}
8716 @param instance: the instance which owns the device
8717 @type device: L{objects.Disk}
8718 @param device: the device to create
8719 @type force_create: boolean
8720 @param force_create: whether to force creation of this device; this
8721 will be change to True whenever we find a device which has
8722 CreateOnSecondary() attribute
8723 @param info: the extra 'metadata' we should attach to the device
8724 (this will be represented as a LVM tag)
8725 @type force_open: boolean
8726 @param force_open: this parameter will be passes to the
8727 L{backend.BlockdevCreate} function where it specifies
8728 whether we run on primary or not, and it affects both
8729 the child assembly and the device own Open() execution
8732 if device.CreateOnSecondary():
8736 for child in device.children:
8737 _CreateBlockDevInner(lu, node, instance, child, force_create,
8740 if not force_create:
8743 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8746 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8747 """Create a single block device on a given node.
8749 This will not recurse over children of the device, so they must be
8752 @param lu: the lu on whose behalf we execute
8753 @param node: the node on which to create the device
8754 @type instance: L{objects.Instance}
8755 @param instance: the instance which owns the device
8756 @type device: L{objects.Disk}
8757 @param device: the device to create
8758 @param info: the extra 'metadata' we should attach to the device
8759 (this will be represented as a LVM tag)
8760 @type force_open: boolean
8761 @param force_open: this parameter will be passes to the
8762 L{backend.BlockdevCreate} function where it specifies
8763 whether we run on primary or not, and it affects both
8764 the child assembly and the device own Open() execution
8767 lu.cfg.SetDiskID(device, node)
8768 result = lu.rpc.call_blockdev_create(node, device, device.size,
8769 instance.name, force_open, info)
8770 result.Raise("Can't create block device %s on"
8771 " node %s for instance %s" % (device, node, instance.name))
8772 if device.physical_id is None:
8773 device.physical_id = result.payload
8776 def _GenerateUniqueNames(lu, exts):
8777 """Generate a suitable LV name.
8779 This will generate a logical volume name for the given instance.
8784 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8785 results.append("%s%s" % (new_id, val))
8789 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8790 iv_name, p_minor, s_minor):
8791 """Generate a drbd8 device complete with its children.
8794 assert len(vgnames) == len(names) == 2
8795 port = lu.cfg.AllocatePort()
8796 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8798 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8799 logical_id=(vgnames[0], names[0]),
8801 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8802 logical_id=(vgnames[1], names[1]),
8804 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8805 logical_id=(primary, secondary, port,
8808 children=[dev_data, dev_meta],
8809 iv_name=iv_name, params={})
8813 _DISK_TEMPLATE_NAME_PREFIX = {
8814 constants.DT_PLAIN: "",
8815 constants.DT_RBD: ".rbd",
8819 _DISK_TEMPLATE_DEVICE_TYPE = {
8820 constants.DT_PLAIN: constants.LD_LV,
8821 constants.DT_FILE: constants.LD_FILE,
8822 constants.DT_SHARED_FILE: constants.LD_FILE,
8823 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8824 constants.DT_RBD: constants.LD_RBD,
8828 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8829 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8830 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8831 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8832 """Generate the entire disk layout for a given template type.
8835 #TODO: compute space requirements
8837 vgname = lu.cfg.GetVGName()
8838 disk_count = len(disk_info)
8841 if template_name == constants.DT_DISKLESS:
8843 elif template_name == constants.DT_DRBD8:
8844 if len(secondary_nodes) != 1:
8845 raise errors.ProgrammerError("Wrong template configuration")
8846 remote_node = secondary_nodes[0]
8847 minors = lu.cfg.AllocateDRBDMinor(
8848 [primary_node, remote_node] * len(disk_info), instance_name)
8850 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8852 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8855 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8856 for i in range(disk_count)]):
8857 names.append(lv_prefix + "_data")
8858 names.append(lv_prefix + "_meta")
8859 for idx, disk in enumerate(disk_info):
8860 disk_index = idx + base_index
8861 data_vg = disk.get(constants.IDISK_VG, vgname)
8862 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8863 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8864 disk[constants.IDISK_SIZE],
8866 names[idx * 2:idx * 2 + 2],
8867 "disk/%d" % disk_index,
8868 minors[idx * 2], minors[idx * 2 + 1])
8869 disk_dev.mode = disk[constants.IDISK_MODE]
8870 disks.append(disk_dev)
8873 raise errors.ProgrammerError("Wrong template configuration")
8875 if template_name == constants.DT_FILE:
8877 elif template_name == constants.DT_SHARED_FILE:
8878 _req_shr_file_storage()
8880 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8881 if name_prefix is None:
8884 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8885 (name_prefix, base_index + i)
8886 for i in range(disk_count)])
8888 if template_name == constants.DT_PLAIN:
8889 def logical_id_fn(idx, _, disk):
8890 vg = disk.get(constants.IDISK_VG, vgname)
8891 return (vg, names[idx])
8892 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8894 lambda _, disk_index, disk: (file_driver,
8895 "%s/disk%d" % (file_storage_dir,
8897 elif template_name == constants.DT_BLOCK:
8899 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8900 disk[constants.IDISK_ADOPT])
8901 elif template_name == constants.DT_RBD:
8902 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8904 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8906 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8908 for idx, disk in enumerate(disk_info):
8909 disk_index = idx + base_index
8910 size = disk[constants.IDISK_SIZE]
8911 feedback_fn("* disk %s, size %s" %
8912 (disk_index, utils.FormatUnit(size, "h")))
8913 disks.append(objects.Disk(dev_type=dev_type, size=size,
8914 logical_id=logical_id_fn(idx, disk_index, disk),
8915 iv_name="disk/%d" % disk_index,
8916 mode=disk[constants.IDISK_MODE],
8922 def _GetInstanceInfoText(instance):
8923 """Compute that text that should be added to the disk's metadata.
8926 return "originstname+%s" % instance.name
8929 def _CalcEta(time_taken, written, total_size):
8930 """Calculates the ETA based on size written and total size.
8932 @param time_taken: The time taken so far
8933 @param written: amount written so far
8934 @param total_size: The total size of data to be written
8935 @return: The remaining time in seconds
8938 avg_time = time_taken / float(written)
8939 return (total_size - written) * avg_time
8942 def _WipeDisks(lu, instance):
8943 """Wipes instance disks.
8945 @type lu: L{LogicalUnit}
8946 @param lu: the logical unit on whose behalf we execute
8947 @type instance: L{objects.Instance}
8948 @param instance: the instance whose disks we should create
8949 @return: the success of the wipe
8952 node = instance.primary_node
8954 for device in instance.disks:
8955 lu.cfg.SetDiskID(device, node)
8957 logging.info("Pause sync of instance %s disks", instance.name)
8958 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8959 (instance.disks, instance),
8961 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8963 for idx, success in enumerate(result.payload):
8965 logging.warn("pause-sync of instance %s for disks %d failed",
8969 for idx, device in enumerate(instance.disks):
8970 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8971 # MAX_WIPE_CHUNK at max
8972 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8973 constants.MIN_WIPE_CHUNK_PERCENT)
8974 # we _must_ make this an int, otherwise rounding errors will
8976 wipe_chunk_size = int(wipe_chunk_size)
8978 lu.LogInfo("* Wiping disk %d", idx)
8979 logging.info("Wiping disk %d for instance %s, node %s using"
8980 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8985 start_time = time.time()
8987 while offset < size:
8988 wipe_size = min(wipe_chunk_size, size - offset)
8989 logging.debug("Wiping disk %d, offset %s, chunk %s",
8990 idx, offset, wipe_size)
8991 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8993 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8994 (idx, offset, wipe_size))
8997 if now - last_output >= 60:
8998 eta = _CalcEta(now - start_time, offset, size)
8999 lu.LogInfo(" - done: %.1f%% ETA: %s" %
9000 (offset / float(size) * 100, utils.FormatSeconds(eta)))
9003 logging.info("Resume sync of instance %s disks", instance.name)
9005 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9006 (instance.disks, instance),
9010 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
9011 " please have a look at the status and troubleshoot"
9012 " the issue: %s", node, result.fail_msg)
9014 for idx, success in enumerate(result.payload):
9016 lu.LogWarning("Resume sync of disk %d failed, please have a"
9017 " look at the status and troubleshoot the issue", idx)
9018 logging.warn("resume-sync of instance %s for disks %d failed",
9022 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9023 """Create all disks for an instance.
9025 This abstracts away some work from AddInstance.
9027 @type lu: L{LogicalUnit}
9028 @param lu: the logical unit on whose behalf we execute
9029 @type instance: L{objects.Instance}
9030 @param instance: the instance whose disks we should create
9032 @param to_skip: list of indices to skip
9033 @type target_node: string
9034 @param target_node: if passed, overrides the target node for creation
9036 @return: the success of the creation
9039 info = _GetInstanceInfoText(instance)
9040 if target_node is None:
9041 pnode = instance.primary_node
9042 all_nodes = instance.all_nodes
9047 if instance.disk_template in constants.DTS_FILEBASED:
9048 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9049 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9051 result.Raise("Failed to create directory '%s' on"
9052 " node %s" % (file_storage_dir, pnode))
9054 # Note: this needs to be kept in sync with adding of disks in
9055 # LUInstanceSetParams
9056 for idx, device in enumerate(instance.disks):
9057 if to_skip and idx in to_skip:
9059 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9061 for node in all_nodes:
9062 f_create = node == pnode
9063 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9066 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9067 """Remove all disks for an instance.
9069 This abstracts away some work from `AddInstance()` and
9070 `RemoveInstance()`. Note that in case some of the devices couldn't
9071 be removed, the removal will continue with the other ones (compare
9072 with `_CreateDisks()`).
9074 @type lu: L{LogicalUnit}
9075 @param lu: the logical unit on whose behalf we execute
9076 @type instance: L{objects.Instance}
9077 @param instance: the instance whose disks we should remove
9078 @type target_node: string
9079 @param target_node: used to override the node on which to remove the disks
9081 @return: the success of the removal
9084 logging.info("Removing block devices for instance %s", instance.name)
9087 ports_to_release = set()
9088 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9089 for (idx, device) in enumerate(anno_disks):
9091 edata = [(target_node, device)]
9093 edata = device.ComputeNodeTree(instance.primary_node)
9094 for node, disk in edata:
9095 lu.cfg.SetDiskID(disk, node)
9096 result = lu.rpc.call_blockdev_remove(node, disk)
9098 lu.LogWarning("Could not remove disk %s on node %s,"
9099 " continuing anyway: %s", idx, node, result.fail_msg)
9100 if not (result.offline and node != instance.primary_node):
9103 # if this is a DRBD disk, return its port to the pool
9104 if device.dev_type in constants.LDS_DRBD:
9105 ports_to_release.add(device.logical_id[2])
9107 if all_result or ignore_failures:
9108 for port in ports_to_release:
9109 lu.cfg.AddTcpUdpPort(port)
9111 if instance.disk_template == constants.DT_FILE:
9112 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9116 tgt = instance.primary_node
9117 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9119 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9120 file_storage_dir, instance.primary_node, result.fail_msg)
9126 def _ComputeDiskSizePerVG(disk_template, disks):
9127 """Compute disk size requirements in the volume group
9130 def _compute(disks, payload):
9131 """Universal algorithm.
9136 vgs[disk[constants.IDISK_VG]] = \
9137 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9141 # Required free disk space as a function of disk and swap space
9143 constants.DT_DISKLESS: {},
9144 constants.DT_PLAIN: _compute(disks, 0),
9145 # 128 MB are added for drbd metadata for each disk
9146 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9147 constants.DT_FILE: {},
9148 constants.DT_SHARED_FILE: {},
9151 if disk_template not in req_size_dict:
9152 raise errors.ProgrammerError("Disk template '%s' size requirement"
9153 " is unknown" % disk_template)
9155 return req_size_dict[disk_template]
9158 def _ComputeDiskSize(disk_template, disks):
9159 """Compute disk size requirements according to disk template
9162 # Required free disk space as a function of disk and swap space
9164 constants.DT_DISKLESS: None,
9165 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9166 # 128 MB are added for drbd metadata for each disk
9168 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9169 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9170 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9171 constants.DT_BLOCK: 0,
9172 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9175 if disk_template not in req_size_dict:
9176 raise errors.ProgrammerError("Disk template '%s' size requirement"
9177 " is unknown" % disk_template)
9179 return req_size_dict[disk_template]
9182 def _FilterVmNodes(lu, nodenames):
9183 """Filters out non-vm_capable nodes from a list.
9185 @type lu: L{LogicalUnit}
9186 @param lu: the logical unit for which we check
9187 @type nodenames: list
9188 @param nodenames: the list of nodes on which we should check
9190 @return: the list of vm-capable nodes
9193 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9194 return [name for name in nodenames if name not in vm_nodes]
9197 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9198 """Hypervisor parameter validation.
9200 This function abstract the hypervisor parameter validation to be
9201 used in both instance create and instance modify.
9203 @type lu: L{LogicalUnit}
9204 @param lu: the logical unit for which we check
9205 @type nodenames: list
9206 @param nodenames: the list of nodes on which we should check
9207 @type hvname: string
9208 @param hvname: the name of the hypervisor we should use
9209 @type hvparams: dict
9210 @param hvparams: the parameters which we need to check
9211 @raise errors.OpPrereqError: if the parameters are not valid
9214 nodenames = _FilterVmNodes(lu, nodenames)
9216 cluster = lu.cfg.GetClusterInfo()
9217 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9219 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9220 for node in nodenames:
9224 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9227 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9228 """OS parameters validation.
9230 @type lu: L{LogicalUnit}
9231 @param lu: the logical unit for which we check
9232 @type required: boolean
9233 @param required: whether the validation should fail if the OS is not
9235 @type nodenames: list
9236 @param nodenames: the list of nodes on which we should check
9237 @type osname: string
9238 @param osname: the name of the hypervisor we should use
9239 @type osparams: dict
9240 @param osparams: the parameters which we need to check
9241 @raise errors.OpPrereqError: if the parameters are not valid
9244 nodenames = _FilterVmNodes(lu, nodenames)
9245 result = lu.rpc.call_os_validate(nodenames, required, osname,
9246 [constants.OS_VALIDATE_PARAMETERS],
9248 for node, nres in result.items():
9249 # we don't check for offline cases since this should be run only
9250 # against the master node and/or an instance's nodes
9251 nres.Raise("OS Parameters validation failed on node %s" % node)
9252 if not nres.payload:
9253 lu.LogInfo("OS %s not found on node %s, validation skipped",
9257 class LUInstanceCreate(LogicalUnit):
9258 """Create an instance.
9261 HPATH = "instance-add"
9262 HTYPE = constants.HTYPE_INSTANCE
9265 def CheckArguments(self):
9269 # do not require name_check to ease forward/backward compatibility
9271 if self.op.no_install and self.op.start:
9272 self.LogInfo("No-installation mode selected, disabling startup")
9273 self.op.start = False
9274 # validate/normalize the instance name
9275 self.op.instance_name = \
9276 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9278 if self.op.ip_check and not self.op.name_check:
9279 # TODO: make the ip check more flexible and not depend on the name check
9280 raise errors.OpPrereqError("Cannot do IP address check without a name"
9281 " check", errors.ECODE_INVAL)
9283 # check nics' parameter names
9284 for nic in self.op.nics:
9285 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9287 # check disks. parameter names and consistent adopt/no-adopt strategy
9288 has_adopt = has_no_adopt = False
9289 for disk in self.op.disks:
9290 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9291 if constants.IDISK_ADOPT in disk:
9295 if has_adopt and has_no_adopt:
9296 raise errors.OpPrereqError("Either all disks are adopted or none is",
9299 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9300 raise errors.OpPrereqError("Disk adoption is not supported for the"
9301 " '%s' disk template" %
9302 self.op.disk_template,
9304 if self.op.iallocator is not None:
9305 raise errors.OpPrereqError("Disk adoption not allowed with an"
9306 " iallocator script", errors.ECODE_INVAL)
9307 if self.op.mode == constants.INSTANCE_IMPORT:
9308 raise errors.OpPrereqError("Disk adoption not allowed for"
9309 " instance import", errors.ECODE_INVAL)
9311 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9312 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9313 " but no 'adopt' parameter given" %
9314 self.op.disk_template,
9317 self.adopt_disks = has_adopt
9319 # instance name verification
9320 if self.op.name_check:
9321 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9322 self.op.instance_name = self.hostname1.name
9323 # used in CheckPrereq for ip ping check
9324 self.check_ip = self.hostname1.ip
9326 self.check_ip = None
9328 # file storage checks
9329 if (self.op.file_driver and
9330 not self.op.file_driver in constants.FILE_DRIVER):
9331 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9332 self.op.file_driver, errors.ECODE_INVAL)
9334 if self.op.disk_template == constants.DT_FILE:
9335 opcodes.RequireFileStorage()
9336 elif self.op.disk_template == constants.DT_SHARED_FILE:
9337 opcodes.RequireSharedFileStorage()
9339 ### Node/iallocator related checks
9340 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9342 if self.op.pnode is not None:
9343 if self.op.disk_template in constants.DTS_INT_MIRROR:
9344 if self.op.snode is None:
9345 raise errors.OpPrereqError("The networked disk templates need"
9346 " a mirror node", errors.ECODE_INVAL)
9348 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9350 self.op.snode = None
9352 self._cds = _GetClusterDomainSecret()
9354 if self.op.mode == constants.INSTANCE_IMPORT:
9355 # On import force_variant must be True, because if we forced it at
9356 # initial install, our only chance when importing it back is that it
9358 self.op.force_variant = True
9360 if self.op.no_install:
9361 self.LogInfo("No-installation mode has no effect during import")
9363 elif self.op.mode == constants.INSTANCE_CREATE:
9364 if self.op.os_type is None:
9365 raise errors.OpPrereqError("No guest OS specified",
9367 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9368 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9369 " installation" % self.op.os_type,
9371 if self.op.disk_template is None:
9372 raise errors.OpPrereqError("No disk template specified",
9375 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9376 # Check handshake to ensure both clusters have the same domain secret
9377 src_handshake = self.op.source_handshake
9378 if not src_handshake:
9379 raise errors.OpPrereqError("Missing source handshake",
9382 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9385 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9388 # Load and check source CA
9389 self.source_x509_ca_pem = self.op.source_x509_ca
9390 if not self.source_x509_ca_pem:
9391 raise errors.OpPrereqError("Missing source X509 CA",
9395 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9397 except OpenSSL.crypto.Error, err:
9398 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9399 (err, ), errors.ECODE_INVAL)
9401 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9402 if errcode is not None:
9403 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9406 self.source_x509_ca = cert
9408 src_instance_name = self.op.source_instance_name
9409 if not src_instance_name:
9410 raise errors.OpPrereqError("Missing source instance name",
9413 self.source_instance_name = \
9414 netutils.GetHostname(name=src_instance_name).name
9417 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9418 self.op.mode, errors.ECODE_INVAL)
9420 def ExpandNames(self):
9421 """ExpandNames for CreateInstance.
9423 Figure out the right locks for instance creation.
9426 self.needed_locks = {}
9428 instance_name = self.op.instance_name
9429 # this is just a preventive check, but someone might still add this
9430 # instance in the meantime, and creation will fail at lock-add time
9431 if instance_name in self.cfg.GetInstanceList():
9432 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9433 instance_name, errors.ECODE_EXISTS)
9435 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9437 if self.op.iallocator:
9438 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9439 # specifying a group on instance creation and then selecting nodes from
9441 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9442 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9444 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9445 nodelist = [self.op.pnode]
9446 if self.op.snode is not None:
9447 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9448 nodelist.append(self.op.snode)
9449 self.needed_locks[locking.LEVEL_NODE] = nodelist
9450 # Lock resources of instance's primary and secondary nodes (copy to
9451 # prevent accidential modification)
9452 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9454 # in case of import lock the source node too
9455 if self.op.mode == constants.INSTANCE_IMPORT:
9456 src_node = self.op.src_node
9457 src_path = self.op.src_path
9459 if src_path is None:
9460 self.op.src_path = src_path = self.op.instance_name
9462 if src_node is None:
9463 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9464 self.op.src_node = None
9465 if os.path.isabs(src_path):
9466 raise errors.OpPrereqError("Importing an instance from a path"
9467 " requires a source node option",
9470 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9471 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9472 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9473 if not os.path.isabs(src_path):
9474 self.op.src_path = src_path = \
9475 utils.PathJoin(constants.EXPORT_DIR, src_path)
9477 def _RunAllocator(self):
9478 """Run the allocator based on input opcode.
9481 #TODO Export network to iallocator so that it chooses a pnode
9482 # in a nodegroup that has the desired network connected to
9483 nics = [n.ToDict() for n in self.nics]
9484 ial = IAllocator(self.cfg, self.rpc,
9485 mode=constants.IALLOCATOR_MODE_ALLOC,
9486 name=self.op.instance_name,
9487 disk_template=self.op.disk_template,
9490 vcpus=self.be_full[constants.BE_VCPUS],
9491 memory=self.be_full[constants.BE_MAXMEM],
9492 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9495 hypervisor=self.op.hypervisor,
9498 ial.Run(self.op.iallocator)
9501 raise errors.OpPrereqError("Can't compute nodes using"
9502 " iallocator '%s': %s" %
9503 (self.op.iallocator, ial.info),
9505 if len(ial.result) != ial.required_nodes:
9506 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9507 " of nodes (%s), required %s" %
9508 (self.op.iallocator, len(ial.result),
9509 ial.required_nodes), errors.ECODE_FAULT)
9510 self.op.pnode = ial.result[0]
9511 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9512 self.op.instance_name, self.op.iallocator,
9513 utils.CommaJoin(ial.result))
9514 if ial.required_nodes == 2:
9515 self.op.snode = ial.result[1]
9517 def BuildHooksEnv(self):
9520 This runs on master, primary and secondary nodes of the instance.
9524 "ADD_MODE": self.op.mode,
9526 if self.op.mode == constants.INSTANCE_IMPORT:
9527 env["SRC_NODE"] = self.op.src_node
9528 env["SRC_PATH"] = self.op.src_path
9529 env["SRC_IMAGES"] = self.src_images
9531 env.update(_BuildInstanceHookEnv(
9532 name=self.op.instance_name,
9533 primary_node=self.op.pnode,
9534 secondary_nodes=self.secondaries,
9535 status=self.op.start,
9536 os_type=self.op.os_type,
9537 minmem=self.be_full[constants.BE_MINMEM],
9538 maxmem=self.be_full[constants.BE_MAXMEM],
9539 vcpus=self.be_full[constants.BE_VCPUS],
9540 nics=_NICListToTuple(self, self.nics),
9541 disk_template=self.op.disk_template,
9542 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9543 for d in self.disks],
9546 hypervisor_name=self.op.hypervisor,
9552 def BuildHooksNodes(self):
9553 """Build hooks nodes.
9556 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9559 def _ReadExportInfo(self):
9560 """Reads the export information from disk.
9562 It will override the opcode source node and path with the actual
9563 information, if these two were not specified before.
9565 @return: the export information
9568 assert self.op.mode == constants.INSTANCE_IMPORT
9570 src_node = self.op.src_node
9571 src_path = self.op.src_path
9573 if src_node is None:
9574 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9575 exp_list = self.rpc.call_export_list(locked_nodes)
9577 for node in exp_list:
9578 if exp_list[node].fail_msg:
9580 if src_path in exp_list[node].payload:
9582 self.op.src_node = src_node = node
9583 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9587 raise errors.OpPrereqError("No export found for relative path %s" %
9588 src_path, errors.ECODE_INVAL)
9590 _CheckNodeOnline(self, src_node)
9591 result = self.rpc.call_export_info(src_node, src_path)
9592 result.Raise("No export or invalid export found in dir %s" % src_path)
9594 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9595 if not export_info.has_section(constants.INISECT_EXP):
9596 raise errors.ProgrammerError("Corrupted export config",
9597 errors.ECODE_ENVIRON)
9599 ei_version = export_info.get(constants.INISECT_EXP, "version")
9600 if (int(ei_version) != constants.EXPORT_VERSION):
9601 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9602 (ei_version, constants.EXPORT_VERSION),
9603 errors.ECODE_ENVIRON)
9606 def _ReadExportParams(self, einfo):
9607 """Use export parameters as defaults.
9609 In case the opcode doesn't specify (as in override) some instance
9610 parameters, then try to use them from the export information, if
9614 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9616 if self.op.disk_template is None:
9617 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9618 self.op.disk_template = einfo.get(constants.INISECT_INS,
9620 if self.op.disk_template not in constants.DISK_TEMPLATES:
9621 raise errors.OpPrereqError("Disk template specified in configuration"
9622 " file is not one of the allowed values:"
9623 " %s" % " ".join(constants.DISK_TEMPLATES))
9625 raise errors.OpPrereqError("No disk template specified and the export"
9626 " is missing the disk_template information",
9629 if not self.op.disks:
9631 # TODO: import the disk iv_name too
9632 for idx in range(constants.MAX_DISKS):
9633 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9634 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9635 disks.append({constants.IDISK_SIZE: disk_sz})
9636 self.op.disks = disks
9637 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9638 raise errors.OpPrereqError("No disk info specified and the export"
9639 " is missing the disk information",
9642 if not self.op.nics:
9644 for idx in range(constants.MAX_NICS):
9645 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9647 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9648 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9655 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9656 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9658 if (self.op.hypervisor is None and
9659 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9660 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9662 if einfo.has_section(constants.INISECT_HYP):
9663 # use the export parameters but do not override the ones
9664 # specified by the user
9665 for name, value in einfo.items(constants.INISECT_HYP):
9666 if name not in self.op.hvparams:
9667 self.op.hvparams[name] = value
9669 if einfo.has_section(constants.INISECT_BEP):
9670 # use the parameters, without overriding
9671 for name, value in einfo.items(constants.INISECT_BEP):
9672 if name not in self.op.beparams:
9673 self.op.beparams[name] = value
9674 # Compatibility for the old "memory" be param
9675 if name == constants.BE_MEMORY:
9676 if constants.BE_MAXMEM not in self.op.beparams:
9677 self.op.beparams[constants.BE_MAXMEM] = value
9678 if constants.BE_MINMEM not in self.op.beparams:
9679 self.op.beparams[constants.BE_MINMEM] = value
9681 # try to read the parameters old style, from the main section
9682 for name in constants.BES_PARAMETERS:
9683 if (name not in self.op.beparams and
9684 einfo.has_option(constants.INISECT_INS, name)):
9685 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9687 if einfo.has_section(constants.INISECT_OSP):
9688 # use the parameters, without overriding
9689 for name, value in einfo.items(constants.INISECT_OSP):
9690 if name not in self.op.osparams:
9691 self.op.osparams[name] = value
9693 def _RevertToDefaults(self, cluster):
9694 """Revert the instance parameters to the default values.
9698 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9699 for name in self.op.hvparams.keys():
9700 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9701 del self.op.hvparams[name]
9703 be_defs = cluster.SimpleFillBE({})
9704 for name in self.op.beparams.keys():
9705 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9706 del self.op.beparams[name]
9708 nic_defs = cluster.SimpleFillNIC({})
9709 for nic in self.op.nics:
9710 for name in constants.NICS_PARAMETERS:
9711 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9714 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9715 for name in self.op.osparams.keys():
9716 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9717 del self.op.osparams[name]
9719 def _CalculateFileStorageDir(self):
9720 """Calculate final instance file storage dir.
9723 # file storage dir calculation/check
9724 self.instance_file_storage_dir = None
9725 if self.op.disk_template in constants.DTS_FILEBASED:
9726 # build the full file storage dir path
9729 if self.op.disk_template == constants.DT_SHARED_FILE:
9730 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9732 get_fsd_fn = self.cfg.GetFileStorageDir
9734 cfg_storagedir = get_fsd_fn()
9735 if not cfg_storagedir:
9736 raise errors.OpPrereqError("Cluster file storage dir not defined")
9737 joinargs.append(cfg_storagedir)
9739 if self.op.file_storage_dir is not None:
9740 joinargs.append(self.op.file_storage_dir)
9742 joinargs.append(self.op.instance_name)
9744 # pylint: disable=W0142
9745 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9747 def CheckPrereq(self): # pylint: disable=R0914
9748 """Check prerequisites.
9751 self._CalculateFileStorageDir()
9753 if self.op.mode == constants.INSTANCE_IMPORT:
9754 export_info = self._ReadExportInfo()
9755 self._ReadExportParams(export_info)
9756 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9758 self._old_instance_name = None
9760 if (not self.cfg.GetVGName() and
9761 self.op.disk_template not in constants.DTS_NOT_LVM):
9762 raise errors.OpPrereqError("Cluster does not support lvm-based"
9763 " instances", errors.ECODE_STATE)
9765 if (self.op.hypervisor is None or
9766 self.op.hypervisor == constants.VALUE_AUTO):
9767 self.op.hypervisor = self.cfg.GetHypervisorType()
9769 cluster = self.cfg.GetClusterInfo()
9770 enabled_hvs = cluster.enabled_hypervisors
9771 if self.op.hypervisor not in enabled_hvs:
9772 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9773 " cluster (%s)" % (self.op.hypervisor,
9774 ",".join(enabled_hvs)),
9777 # Check tag validity
9778 for tag in self.op.tags:
9779 objects.TaggableObject.ValidateTag(tag)
9781 # check hypervisor parameter syntax (locally)
9782 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9783 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9785 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9786 hv_type.CheckParameterSyntax(filled_hvp)
9787 self.hv_full = filled_hvp
9788 # check that we don't specify global parameters on an instance
9789 _CheckGlobalHvParams(self.op.hvparams)
9791 # fill and remember the beparams dict
9792 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9793 for param, value in self.op.beparams.iteritems():
9794 if value == constants.VALUE_AUTO:
9795 self.op.beparams[param] = default_beparams[param]
9796 objects.UpgradeBeParams(self.op.beparams)
9797 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9798 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9800 # build os parameters
9801 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9803 # now that hvp/bep are in final format, let's reset to defaults,
9805 if self.op.identify_defaults:
9806 self._RevertToDefaults(cluster)
9810 for idx, nic in enumerate(self.op.nics):
9811 nic_mode_req = nic.get(constants.INIC_MODE, None)
9812 nic_mode = nic_mode_req
9813 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9814 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9816 net = nic.get(constants.INIC_NETWORK, None)
9817 link = nic.get(constants.NIC_LINK, None)
9818 ip = nic.get(constants.INIC_IP, None)
9820 if net is None or net.lower() == constants.VALUE_NONE:
9823 if nic_mode_req is not None or link is not None:
9824 raise errors.OpPrereqError("If network is given, no mode or link"
9825 " is allowed to be passed",
9828 # ip validity checks
9829 if ip is None or ip.lower() == constants.VALUE_NONE:
9831 elif ip.lower() == constants.VALUE_AUTO:
9832 if not self.op.name_check:
9833 raise errors.OpPrereqError("IP address set to auto but name checks"
9834 " have been skipped",
9836 nic_ip = self.hostname1.ip
9838 # We defer pool operations until later, so that the iallocator has
9839 # filled in the instance's node(s) dimara
9840 if ip.lower() == constants.NIC_IP_POOL:
9842 raise errors.OpPrereqError("if ip=pool, parameter network"
9843 " must be passed too",
9846 elif not netutils.IPAddress.IsValid(ip):
9847 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9852 # TODO: check the ip address for uniqueness
9853 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9854 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9857 # MAC address verification
9858 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9859 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9860 mac = utils.NormalizeAndValidateMac(mac)
9863 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9864 except errors.ReservationError:
9865 raise errors.OpPrereqError("MAC address %s already in use"
9866 " in cluster" % mac,
9867 errors.ECODE_NOTUNIQUE)
9869 # Build nic parameters
9872 nicparams[constants.NIC_MODE] = nic_mode
9874 nicparams[constants.NIC_LINK] = link
9876 check_params = cluster.SimpleFillNIC(nicparams)
9877 objects.NIC.CheckParameterSyntax(check_params)
9878 self.nics.append(objects.NIC(mac=mac, ip=nic_ip,
9879 network=net, nicparams=check_params))
9881 # disk checks/pre-build
9882 default_vg = self.cfg.GetVGName()
9884 for disk in self.op.disks:
9885 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9886 if mode not in constants.DISK_ACCESS_SET:
9887 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9888 mode, errors.ECODE_INVAL)
9889 size = disk.get(constants.IDISK_SIZE, None)
9891 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9894 except (TypeError, ValueError):
9895 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9898 data_vg = disk.get(constants.IDISK_VG, default_vg)
9900 constants.IDISK_SIZE: size,
9901 constants.IDISK_MODE: mode,
9902 constants.IDISK_VG: data_vg,
9904 if constants.IDISK_METAVG in disk:
9905 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9906 if constants.IDISK_ADOPT in disk:
9907 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9908 self.disks.append(new_disk)
9910 if self.op.mode == constants.INSTANCE_IMPORT:
9912 for idx in range(len(self.disks)):
9913 option = "disk%d_dump" % idx
9914 if export_info.has_option(constants.INISECT_INS, option):
9915 # FIXME: are the old os-es, disk sizes, etc. useful?
9916 export_name = export_info.get(constants.INISECT_INS, option)
9917 image = utils.PathJoin(self.op.src_path, export_name)
9918 disk_images.append(image)
9920 disk_images.append(False)
9922 self.src_images = disk_images
9924 if self.op.instance_name == self._old_instance_name:
9925 for idx, nic in enumerate(self.nics):
9926 if nic.mac == constants.VALUE_AUTO:
9927 nic_mac_ini = "nic%d_mac" % idx
9928 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9930 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9932 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9933 if self.op.ip_check:
9934 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9935 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9936 (self.check_ip, self.op.instance_name),
9937 errors.ECODE_NOTUNIQUE)
9939 #### mac address generation
9940 # By generating here the mac address both the allocator and the hooks get
9941 # the real final mac address rather than the 'auto' or 'generate' value.
9942 # There is a race condition between the generation and the instance object
9943 # creation, which means that we know the mac is valid now, but we're not
9944 # sure it will be when we actually add the instance. If things go bad
9945 # adding the instance will abort because of a duplicate mac, and the
9946 # creation job will fail.
9947 for nic in self.nics:
9948 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9949 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
9953 if self.op.iallocator is not None:
9954 self._RunAllocator()
9956 # Release all unneeded node locks
9957 _ReleaseLocks(self, locking.LEVEL_NODE,
9958 keep=filter(None, [self.op.pnode, self.op.snode,
9960 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9961 keep=filter(None, [self.op.pnode, self.op.snode,
9964 #### node related checks
9966 # check primary node
9967 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9968 assert self.pnode is not None, \
9969 "Cannot retrieve locked node %s" % self.op.pnode
9971 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9972 pnode.name, errors.ECODE_STATE)
9974 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9975 pnode.name, errors.ECODE_STATE)
9976 if not pnode.vm_capable:
9977 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9978 " '%s'" % pnode.name, errors.ECODE_STATE)
9980 self.secondaries = []
9982 # Fill in any IPs from IP pools. This must happen here, because we need to
9983 # know the nic's primary node, as specified by the iallocator
9984 for idx, nic in enumerate(self.nics):
9987 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
9988 if netparams is None:
9989 raise errors.OpPrereqError("No netparams found for network"
9990 " %s. Propably not connected to"
9991 " node's %s nodegroup" %
9992 (net, self.pnode.name),
9994 self.LogInfo("NIC/%d inherits netparams %s" %
9995 (idx, netparams.values()))
9996 nic.nicparams = dict(netparams)
9997 if nic.ip is not None:
9998 filled_params = cluster.SimpleFillNIC(nic.nicparams)
9999 if nic.ip.lower() == constants.NIC_IP_POOL:
10001 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10002 except errors.ReservationError:
10003 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10004 " from the address pool" % idx,
10005 errors.ECODE_STATE)
10006 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10009 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10010 except errors.ReservationError:
10011 raise errors.OpPrereqError("IP address %s already in use"
10012 " or does not belong to network %s" %
10014 errors.ECODE_NOTUNIQUE)
10016 # net is None, ip None or given
10017 if self.op.conflicts_check:
10018 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10021 # mirror node verification
10022 if self.op.disk_template in constants.DTS_INT_MIRROR:
10023 if self.op.snode == pnode.name:
10024 raise errors.OpPrereqError("The secondary node cannot be the"
10025 " primary node", errors.ECODE_INVAL)
10026 _CheckNodeOnline(self, self.op.snode)
10027 _CheckNodeNotDrained(self, self.op.snode)
10028 _CheckNodeVmCapable(self, self.op.snode)
10029 self.secondaries.append(self.op.snode)
10031 snode = self.cfg.GetNodeInfo(self.op.snode)
10032 if pnode.group != snode.group:
10033 self.LogWarning("The primary and secondary nodes are in two"
10034 " different node groups; the disk parameters"
10035 " from the first disk's node group will be"
10038 nodenames = [pnode.name] + self.secondaries
10040 # Verify instance specs
10041 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10043 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10044 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10045 constants.ISPEC_DISK_COUNT: len(self.disks),
10046 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10047 constants.ISPEC_NIC_COUNT: len(self.nics),
10048 constants.ISPEC_SPINDLE_USE: spindle_use,
10051 group_info = self.cfg.GetNodeGroup(pnode.group)
10052 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
10053 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10054 if not self.op.ignore_ipolicy and res:
10055 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10056 " policy: %s") % (pnode.group,
10057 utils.CommaJoin(res)),
10058 errors.ECODE_INVAL)
10060 if not self.adopt_disks:
10061 if self.op.disk_template == constants.DT_RBD:
10062 # _CheckRADOSFreeSpace() is just a placeholder.
10063 # Any function that checks prerequisites can be placed here.
10064 # Check if there is enough space on the RADOS cluster.
10065 _CheckRADOSFreeSpace()
10067 # Check lv size requirements, if not adopting
10068 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10069 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10071 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10072 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10073 disk[constants.IDISK_ADOPT])
10074 for disk in self.disks])
10075 if len(all_lvs) != len(self.disks):
10076 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10077 errors.ECODE_INVAL)
10078 for lv_name in all_lvs:
10080 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10081 # to ReserveLV uses the same syntax
10082 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10083 except errors.ReservationError:
10084 raise errors.OpPrereqError("LV named %s used by another instance" %
10085 lv_name, errors.ECODE_NOTUNIQUE)
10087 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10088 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10090 node_lvs = self.rpc.call_lv_list([pnode.name],
10091 vg_names.payload.keys())[pnode.name]
10092 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10093 node_lvs = node_lvs.payload
10095 delta = all_lvs.difference(node_lvs.keys())
10097 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10098 utils.CommaJoin(delta),
10099 errors.ECODE_INVAL)
10100 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10102 raise errors.OpPrereqError("Online logical volumes found, cannot"
10103 " adopt: %s" % utils.CommaJoin(online_lvs),
10104 errors.ECODE_STATE)
10105 # update the size of disk based on what is found
10106 for dsk in self.disks:
10107 dsk[constants.IDISK_SIZE] = \
10108 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10109 dsk[constants.IDISK_ADOPT])][0]))
10111 elif self.op.disk_template == constants.DT_BLOCK:
10112 # Normalize and de-duplicate device paths
10113 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10114 for disk in self.disks])
10115 if len(all_disks) != len(self.disks):
10116 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10117 errors.ECODE_INVAL)
10118 baddisks = [d for d in all_disks
10119 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10121 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10122 " cannot be adopted" %
10123 (", ".join(baddisks),
10124 constants.ADOPTABLE_BLOCKDEV_ROOT),
10125 errors.ECODE_INVAL)
10127 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10128 list(all_disks))[pnode.name]
10129 node_disks.Raise("Cannot get block device information from node %s" %
10131 node_disks = node_disks.payload
10132 delta = all_disks.difference(node_disks.keys())
10134 raise errors.OpPrereqError("Missing block device(s): %s" %
10135 utils.CommaJoin(delta),
10136 errors.ECODE_INVAL)
10137 for dsk in self.disks:
10138 dsk[constants.IDISK_SIZE] = \
10139 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10141 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10143 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10144 # check OS parameters (remotely)
10145 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10147 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10149 # memory check on primary node
10150 #TODO(dynmem): use MINMEM for checking
10152 _CheckNodeFreeMemory(self, self.pnode.name,
10153 "creating instance %s" % self.op.instance_name,
10154 self.be_full[constants.BE_MAXMEM],
10155 self.op.hypervisor)
10157 self.dry_run_result = list(nodenames)
10159 def Exec(self, feedback_fn):
10160 """Create and add the instance to the cluster.
10163 instance = self.op.instance_name
10164 pnode_name = self.pnode.name
10166 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10167 self.owned_locks(locking.LEVEL_NODE)), \
10168 "Node locks differ from node resource locks"
10170 ht_kind = self.op.hypervisor
10171 if ht_kind in constants.HTS_REQ_PORT:
10172 network_port = self.cfg.AllocatePort()
10174 network_port = None
10176 # This is ugly but we got a chicken-egg problem here
10177 # We can only take the group disk parameters, as the instance
10178 # has no disks yet (we are generating them right here).
10179 node = self.cfg.GetNodeInfo(pnode_name)
10180 nodegroup = self.cfg.GetNodeGroup(node.group)
10181 disks = _GenerateDiskTemplate(self,
10182 self.op.disk_template,
10183 instance, pnode_name,
10186 self.instance_file_storage_dir,
10187 self.op.file_driver,
10190 self.cfg.GetGroupDiskParams(nodegroup))
10192 iobj = objects.Instance(name=instance, os=self.op.os_type,
10193 primary_node=pnode_name,
10194 nics=self.nics, disks=disks,
10195 disk_template=self.op.disk_template,
10196 admin_state=constants.ADMINST_DOWN,
10197 network_port=network_port,
10198 beparams=self.op.beparams,
10199 hvparams=self.op.hvparams,
10200 hypervisor=self.op.hypervisor,
10201 osparams=self.op.osparams,
10205 for tag in self.op.tags:
10208 if self.adopt_disks:
10209 if self.op.disk_template == constants.DT_PLAIN:
10210 # rename LVs to the newly-generated names; we need to construct
10211 # 'fake' LV disks with the old data, plus the new unique_id
10212 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10214 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10215 rename_to.append(t_dsk.logical_id)
10216 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10217 self.cfg.SetDiskID(t_dsk, pnode_name)
10218 result = self.rpc.call_blockdev_rename(pnode_name,
10219 zip(tmp_disks, rename_to))
10220 result.Raise("Failed to rename adoped LVs")
10222 feedback_fn("* creating instance disks...")
10224 _CreateDisks(self, iobj)
10225 except errors.OpExecError:
10226 self.LogWarning("Device creation failed, reverting...")
10228 _RemoveDisks(self, iobj)
10230 self.cfg.ReleaseDRBDMinors(instance)
10233 feedback_fn("adding instance %s to cluster config" % instance)
10235 self.cfg.AddInstance(iobj, self.proc.GetECId())
10237 # Declare that we don't want to remove the instance lock anymore, as we've
10238 # added the instance to the config
10239 del self.remove_locks[locking.LEVEL_INSTANCE]
10241 if self.op.mode == constants.INSTANCE_IMPORT:
10242 # Release unused nodes
10243 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10245 # Release all nodes
10246 _ReleaseLocks(self, locking.LEVEL_NODE)
10249 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10250 feedback_fn("* wiping instance disks...")
10252 _WipeDisks(self, iobj)
10253 except errors.OpExecError, err:
10254 logging.exception("Wiping disks failed")
10255 self.LogWarning("Wiping instance disks failed (%s)", err)
10259 # Something is already wrong with the disks, don't do anything else
10261 elif self.op.wait_for_sync:
10262 disk_abort = not _WaitForSync(self, iobj)
10263 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10264 # make sure the disks are not degraded (still sync-ing is ok)
10265 feedback_fn("* checking mirrors status")
10266 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10271 _RemoveDisks(self, iobj)
10272 self.cfg.RemoveInstance(iobj.name)
10273 # Make sure the instance lock gets removed
10274 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10275 raise errors.OpExecError("There are some degraded disks for"
10278 # Release all node resource locks
10279 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10281 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10282 # we need to set the disks ID to the primary node, since the
10283 # preceding code might or might have not done it, depending on
10284 # disk template and other options
10285 for disk in iobj.disks:
10286 self.cfg.SetDiskID(disk, pnode_name)
10287 if self.op.mode == constants.INSTANCE_CREATE:
10288 if not self.op.no_install:
10289 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10290 not self.op.wait_for_sync)
10292 feedback_fn("* pausing disk sync to install instance OS")
10293 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10296 for idx, success in enumerate(result.payload):
10298 logging.warn("pause-sync of instance %s for disk %d failed",
10301 feedback_fn("* running the instance OS create scripts...")
10302 # FIXME: pass debug option from opcode to backend
10304 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10305 self.op.debug_level)
10307 feedback_fn("* resuming disk sync")
10308 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10311 for idx, success in enumerate(result.payload):
10313 logging.warn("resume-sync of instance %s for disk %d failed",
10316 os_add_result.Raise("Could not add os for instance %s"
10317 " on node %s" % (instance, pnode_name))
10320 if self.op.mode == constants.INSTANCE_IMPORT:
10321 feedback_fn("* running the instance OS import scripts...")
10325 for idx, image in enumerate(self.src_images):
10329 # FIXME: pass debug option from opcode to backend
10330 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10331 constants.IEIO_FILE, (image, ),
10332 constants.IEIO_SCRIPT,
10333 (iobj.disks[idx], idx),
10335 transfers.append(dt)
10338 masterd.instance.TransferInstanceData(self, feedback_fn,
10339 self.op.src_node, pnode_name,
10340 self.pnode.secondary_ip,
10342 if not compat.all(import_result):
10343 self.LogWarning("Some disks for instance %s on node %s were not"
10344 " imported successfully" % (instance, pnode_name))
10346 rename_from = self._old_instance_name
10348 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10349 feedback_fn("* preparing remote import...")
10350 # The source cluster will stop the instance before attempting to make
10351 # a connection. In some cases stopping an instance can take a long
10352 # time, hence the shutdown timeout is added to the connection
10354 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10355 self.op.source_shutdown_timeout)
10356 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10358 assert iobj.primary_node == self.pnode.name
10360 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10361 self.source_x509_ca,
10362 self._cds, timeouts)
10363 if not compat.all(disk_results):
10364 # TODO: Should the instance still be started, even if some disks
10365 # failed to import (valid for local imports, too)?
10366 self.LogWarning("Some disks for instance %s on node %s were not"
10367 " imported successfully" % (instance, pnode_name))
10369 rename_from = self.source_instance_name
10372 # also checked in the prereq part
10373 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10376 # Run rename script on newly imported instance
10377 assert iobj.name == instance
10378 feedback_fn("Running rename script for %s" % instance)
10379 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10381 self.op.debug_level)
10382 if result.fail_msg:
10383 self.LogWarning("Failed to run rename script for %s on node"
10384 " %s: %s" % (instance, pnode_name, result.fail_msg))
10386 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10389 iobj.admin_state = constants.ADMINST_UP
10390 self.cfg.Update(iobj, feedback_fn)
10391 logging.info("Starting instance %s on node %s", instance, pnode_name)
10392 feedback_fn("* starting instance...")
10393 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10395 result.Raise("Could not start instance")
10397 return list(iobj.all_nodes)
10400 def _CheckRADOSFreeSpace():
10401 """Compute disk size requirements inside the RADOS cluster.
10404 # For the RADOS cluster we assume there is always enough space.
10408 class LUInstanceConsole(NoHooksLU):
10409 """Connect to an instance's console.
10411 This is somewhat special in that it returns the command line that
10412 you need to run on the master node in order to connect to the
10418 def ExpandNames(self):
10419 self.share_locks = _ShareAll()
10420 self._ExpandAndLockInstance()
10422 def CheckPrereq(self):
10423 """Check prerequisites.
10425 This checks that the instance is in the cluster.
10428 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10429 assert self.instance is not None, \
10430 "Cannot retrieve locked instance %s" % self.op.instance_name
10431 _CheckNodeOnline(self, self.instance.primary_node)
10433 def Exec(self, feedback_fn):
10434 """Connect to the console of an instance
10437 instance = self.instance
10438 node = instance.primary_node
10440 node_insts = self.rpc.call_instance_list([node],
10441 [instance.hypervisor])[node]
10442 node_insts.Raise("Can't get node information from %s" % node)
10444 if instance.name not in node_insts.payload:
10445 if instance.admin_state == constants.ADMINST_UP:
10446 state = constants.INSTST_ERRORDOWN
10447 elif instance.admin_state == constants.ADMINST_DOWN:
10448 state = constants.INSTST_ADMINDOWN
10450 state = constants.INSTST_ADMINOFFLINE
10451 raise errors.OpExecError("Instance %s is not running (state %s)" %
10452 (instance.name, state))
10454 logging.debug("Connecting to console of %s on %s", instance.name, node)
10456 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10459 def _GetInstanceConsole(cluster, instance):
10460 """Returns console information for an instance.
10462 @type cluster: L{objects.Cluster}
10463 @type instance: L{objects.Instance}
10467 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10468 # beparams and hvparams are passed separately, to avoid editing the
10469 # instance and then saving the defaults in the instance itself.
10470 hvparams = cluster.FillHV(instance)
10471 beparams = cluster.FillBE(instance)
10472 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10474 assert console.instance == instance.name
10475 assert console.Validate()
10477 return console.ToDict()
10480 class LUInstanceReplaceDisks(LogicalUnit):
10481 """Replace the disks of an instance.
10484 HPATH = "mirrors-replace"
10485 HTYPE = constants.HTYPE_INSTANCE
10488 def CheckArguments(self):
10489 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10490 self.op.iallocator)
10492 def ExpandNames(self):
10493 self._ExpandAndLockInstance()
10495 assert locking.LEVEL_NODE not in self.needed_locks
10496 assert locking.LEVEL_NODE_RES not in self.needed_locks
10497 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10499 assert self.op.iallocator is None or self.op.remote_node is None, \
10500 "Conflicting options"
10502 if self.op.remote_node is not None:
10503 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10505 # Warning: do not remove the locking of the new secondary here
10506 # unless DRBD8.AddChildren is changed to work in parallel;
10507 # currently it doesn't since parallel invocations of
10508 # FindUnusedMinor will conflict
10509 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10510 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10512 self.needed_locks[locking.LEVEL_NODE] = []
10513 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10515 if self.op.iallocator is not None:
10516 # iallocator will select a new node in the same group
10517 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10519 self.needed_locks[locking.LEVEL_NODE_RES] = []
10521 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10522 self.op.iallocator, self.op.remote_node,
10523 self.op.disks, False, self.op.early_release,
10524 self.op.ignore_ipolicy)
10526 self.tasklets = [self.replacer]
10528 def DeclareLocks(self, level):
10529 if level == locking.LEVEL_NODEGROUP:
10530 assert self.op.remote_node is None
10531 assert self.op.iallocator is not None
10532 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10534 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10535 # Lock all groups used by instance optimistically; this requires going
10536 # via the node before it's locked, requiring verification later on
10537 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10538 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10540 elif level == locking.LEVEL_NODE:
10541 if self.op.iallocator is not None:
10542 assert self.op.remote_node is None
10543 assert not self.needed_locks[locking.LEVEL_NODE]
10545 # Lock member nodes of all locked groups
10546 self.needed_locks[locking.LEVEL_NODE] = [node_name
10547 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10548 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10550 self._LockInstancesNodes()
10551 elif level == locking.LEVEL_NODE_RES:
10553 self.needed_locks[locking.LEVEL_NODE_RES] = \
10554 self.needed_locks[locking.LEVEL_NODE]
10556 def BuildHooksEnv(self):
10557 """Build hooks env.
10559 This runs on the master, the primary and all the secondaries.
10562 instance = self.replacer.instance
10564 "MODE": self.op.mode,
10565 "NEW_SECONDARY": self.op.remote_node,
10566 "OLD_SECONDARY": instance.secondary_nodes[0],
10568 env.update(_BuildInstanceHookEnvByObject(self, instance))
10571 def BuildHooksNodes(self):
10572 """Build hooks nodes.
10575 instance = self.replacer.instance
10577 self.cfg.GetMasterNode(),
10578 instance.primary_node,
10580 if self.op.remote_node is not None:
10581 nl.append(self.op.remote_node)
10584 def CheckPrereq(self):
10585 """Check prerequisites.
10588 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10589 self.op.iallocator is None)
10591 # Verify if node group locks are still correct
10592 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10594 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10596 return LogicalUnit.CheckPrereq(self)
10599 class TLReplaceDisks(Tasklet):
10600 """Replaces disks for an instance.
10602 Note: Locking is not within the scope of this class.
10605 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10606 disks, delay_iallocator, early_release, ignore_ipolicy):
10607 """Initializes this class.
10610 Tasklet.__init__(self, lu)
10613 self.instance_name = instance_name
10615 self.iallocator_name = iallocator_name
10616 self.remote_node = remote_node
10618 self.delay_iallocator = delay_iallocator
10619 self.early_release = early_release
10620 self.ignore_ipolicy = ignore_ipolicy
10623 self.instance = None
10624 self.new_node = None
10625 self.target_node = None
10626 self.other_node = None
10627 self.remote_node_info = None
10628 self.node_secondary_ip = None
10631 def CheckArguments(mode, remote_node, iallocator):
10632 """Helper function for users of this class.
10635 # check for valid parameter combination
10636 if mode == constants.REPLACE_DISK_CHG:
10637 if remote_node is None and iallocator is None:
10638 raise errors.OpPrereqError("When changing the secondary either an"
10639 " iallocator script must be used or the"
10640 " new node given", errors.ECODE_INVAL)
10642 if remote_node is not None and iallocator is not None:
10643 raise errors.OpPrereqError("Give either the iallocator or the new"
10644 " secondary, not both", errors.ECODE_INVAL)
10646 elif remote_node is not None or iallocator is not None:
10647 # Not replacing the secondary
10648 raise errors.OpPrereqError("The iallocator and new node options can"
10649 " only be used when changing the"
10650 " secondary node", errors.ECODE_INVAL)
10653 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10654 """Compute a new secondary node using an IAllocator.
10657 ial = IAllocator(lu.cfg, lu.rpc,
10658 mode=constants.IALLOCATOR_MODE_RELOC,
10659 name=instance_name,
10660 relocate_from=list(relocate_from))
10662 ial.Run(iallocator_name)
10664 if not ial.success:
10665 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10666 " %s" % (iallocator_name, ial.info),
10667 errors.ECODE_NORES)
10669 if len(ial.result) != ial.required_nodes:
10670 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10671 " of nodes (%s), required %s" %
10673 len(ial.result), ial.required_nodes),
10674 errors.ECODE_FAULT)
10676 remote_node_name = ial.result[0]
10678 lu.LogInfo("Selected new secondary for instance '%s': %s",
10679 instance_name, remote_node_name)
10681 return remote_node_name
10683 def _FindFaultyDisks(self, node_name):
10684 """Wrapper for L{_FindFaultyInstanceDisks}.
10687 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10690 def _CheckDisksActivated(self, instance):
10691 """Checks if the instance disks are activated.
10693 @param instance: The instance to check disks
10694 @return: True if they are activated, False otherwise
10697 nodes = instance.all_nodes
10699 for idx, dev in enumerate(instance.disks):
10701 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10702 self.cfg.SetDiskID(dev, node)
10704 result = _BlockdevFind(self, node, dev, instance)
10708 elif result.fail_msg or not result.payload:
10713 def CheckPrereq(self):
10714 """Check prerequisites.
10716 This checks that the instance is in the cluster.
10719 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10720 assert instance is not None, \
10721 "Cannot retrieve locked instance %s" % self.instance_name
10723 if instance.disk_template != constants.DT_DRBD8:
10724 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10725 " instances", errors.ECODE_INVAL)
10727 if len(instance.secondary_nodes) != 1:
10728 raise errors.OpPrereqError("The instance has a strange layout,"
10729 " expected one secondary but found %d" %
10730 len(instance.secondary_nodes),
10731 errors.ECODE_FAULT)
10733 if not self.delay_iallocator:
10734 self._CheckPrereq2()
10736 def _CheckPrereq2(self):
10737 """Check prerequisites, second part.
10739 This function should always be part of CheckPrereq. It was separated and is
10740 now called from Exec because during node evacuation iallocator was only
10741 called with an unmodified cluster model, not taking planned changes into
10745 instance = self.instance
10746 secondary_node = instance.secondary_nodes[0]
10748 if self.iallocator_name is None:
10749 remote_node = self.remote_node
10751 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10752 instance.name, instance.secondary_nodes)
10754 if remote_node is None:
10755 self.remote_node_info = None
10757 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10758 "Remote node '%s' is not locked" % remote_node
10760 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10761 assert self.remote_node_info is not None, \
10762 "Cannot retrieve locked node %s" % remote_node
10764 if remote_node == self.instance.primary_node:
10765 raise errors.OpPrereqError("The specified node is the primary node of"
10766 " the instance", errors.ECODE_INVAL)
10768 if remote_node == secondary_node:
10769 raise errors.OpPrereqError("The specified node is already the"
10770 " secondary node of the instance",
10771 errors.ECODE_INVAL)
10773 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10774 constants.REPLACE_DISK_CHG):
10775 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10776 errors.ECODE_INVAL)
10778 if self.mode == constants.REPLACE_DISK_AUTO:
10779 if not self._CheckDisksActivated(instance):
10780 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10781 " first" % self.instance_name,
10782 errors.ECODE_STATE)
10783 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10784 faulty_secondary = self._FindFaultyDisks(secondary_node)
10786 if faulty_primary and faulty_secondary:
10787 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10788 " one node and can not be repaired"
10789 " automatically" % self.instance_name,
10790 errors.ECODE_STATE)
10793 self.disks = faulty_primary
10794 self.target_node = instance.primary_node
10795 self.other_node = secondary_node
10796 check_nodes = [self.target_node, self.other_node]
10797 elif faulty_secondary:
10798 self.disks = faulty_secondary
10799 self.target_node = secondary_node
10800 self.other_node = instance.primary_node
10801 check_nodes = [self.target_node, self.other_node]
10807 # Non-automatic modes
10808 if self.mode == constants.REPLACE_DISK_PRI:
10809 self.target_node = instance.primary_node
10810 self.other_node = secondary_node
10811 check_nodes = [self.target_node, self.other_node]
10813 elif self.mode == constants.REPLACE_DISK_SEC:
10814 self.target_node = secondary_node
10815 self.other_node = instance.primary_node
10816 check_nodes = [self.target_node, self.other_node]
10818 elif self.mode == constants.REPLACE_DISK_CHG:
10819 self.new_node = remote_node
10820 self.other_node = instance.primary_node
10821 self.target_node = secondary_node
10822 check_nodes = [self.new_node, self.other_node]
10824 _CheckNodeNotDrained(self.lu, remote_node)
10825 _CheckNodeVmCapable(self.lu, remote_node)
10827 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10828 assert old_node_info is not None
10829 if old_node_info.offline and not self.early_release:
10830 # doesn't make sense to delay the release
10831 self.early_release = True
10832 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10833 " early-release mode", secondary_node)
10836 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10839 # If not specified all disks should be replaced
10841 self.disks = range(len(self.instance.disks))
10843 # TODO: This is ugly, but right now we can't distinguish between internal
10844 # submitted opcode and external one. We should fix that.
10845 if self.remote_node_info:
10846 # We change the node, lets verify it still meets instance policy
10847 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10848 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10850 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10851 ignore=self.ignore_ipolicy)
10853 for node in check_nodes:
10854 _CheckNodeOnline(self.lu, node)
10856 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10859 if node_name is not None)
10861 # Release unneeded node and node resource locks
10862 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10863 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10865 # Release any owned node group
10866 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10867 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10869 # Check whether disks are valid
10870 for disk_idx in self.disks:
10871 instance.FindDisk(disk_idx)
10873 # Get secondary node IP addresses
10874 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10875 in self.cfg.GetMultiNodeInfo(touched_nodes))
10877 def Exec(self, feedback_fn):
10878 """Execute disk replacement.
10880 This dispatches the disk replacement to the appropriate handler.
10883 if self.delay_iallocator:
10884 self._CheckPrereq2()
10887 # Verify owned locks before starting operation
10888 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10889 assert set(owned_nodes) == set(self.node_secondary_ip), \
10890 ("Incorrect node locks, owning %s, expected %s" %
10891 (owned_nodes, self.node_secondary_ip.keys()))
10892 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10893 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10895 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10896 assert list(owned_instances) == [self.instance_name], \
10897 "Instance '%s' not locked" % self.instance_name
10899 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10900 "Should not own any node group lock at this point"
10903 feedback_fn("No disks need replacement")
10906 feedback_fn("Replacing disk(s) %s for %s" %
10907 (utils.CommaJoin(self.disks), self.instance.name))
10909 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10911 # Activate the instance disks if we're replacing them on a down instance
10913 _StartInstanceDisks(self.lu, self.instance, True)
10916 # Should we replace the secondary node?
10917 if self.new_node is not None:
10918 fn = self._ExecDrbd8Secondary
10920 fn = self._ExecDrbd8DiskOnly
10922 result = fn(feedback_fn)
10924 # Deactivate the instance disks if we're replacing them on a
10927 _SafeShutdownInstanceDisks(self.lu, self.instance)
10929 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10932 # Verify owned locks
10933 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10934 nodes = frozenset(self.node_secondary_ip)
10935 assert ((self.early_release and not owned_nodes) or
10936 (not self.early_release and not (set(owned_nodes) - nodes))), \
10937 ("Not owning the correct locks, early_release=%s, owned=%r,"
10938 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10942 def _CheckVolumeGroup(self, nodes):
10943 self.lu.LogInfo("Checking volume groups")
10945 vgname = self.cfg.GetVGName()
10947 # Make sure volume group exists on all involved nodes
10948 results = self.rpc.call_vg_list(nodes)
10950 raise errors.OpExecError("Can't list volume groups on the nodes")
10953 res = results[node]
10954 res.Raise("Error checking node %s" % node)
10955 if vgname not in res.payload:
10956 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10959 def _CheckDisksExistence(self, nodes):
10960 # Check disk existence
10961 for idx, dev in enumerate(self.instance.disks):
10962 if idx not in self.disks:
10966 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10967 self.cfg.SetDiskID(dev, node)
10969 result = _BlockdevFind(self, node, dev, self.instance)
10971 msg = result.fail_msg
10972 if msg or not result.payload:
10974 msg = "disk not found"
10975 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10978 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10979 for idx, dev in enumerate(self.instance.disks):
10980 if idx not in self.disks:
10983 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10986 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10987 on_primary, ldisk=ldisk):
10988 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10989 " replace disks for instance %s" %
10990 (node_name, self.instance.name))
10992 def _CreateNewStorage(self, node_name):
10993 """Create new storage on the primary or secondary node.
10995 This is only used for same-node replaces, not for changing the
10996 secondary node, hence we don't want to modify the existing disk.
11001 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11002 for idx, dev in enumerate(disks):
11003 if idx not in self.disks:
11006 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11008 self.cfg.SetDiskID(dev, node_name)
11010 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11011 names = _GenerateUniqueNames(self.lu, lv_names)
11013 (data_disk, meta_disk) = dev.children
11014 vg_data = data_disk.logical_id[0]
11015 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11016 logical_id=(vg_data, names[0]),
11017 params=data_disk.params)
11018 vg_meta = meta_disk.logical_id[0]
11019 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
11020 logical_id=(vg_meta, names[1]),
11021 params=meta_disk.params)
11023 new_lvs = [lv_data, lv_meta]
11024 old_lvs = [child.Copy() for child in dev.children]
11025 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11027 # we pass force_create=True to force the LVM creation
11028 for new_lv in new_lvs:
11029 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11030 _GetInstanceInfoText(self.instance), False)
11034 def _CheckDevices(self, node_name, iv_names):
11035 for name, (dev, _, _) in iv_names.iteritems():
11036 self.cfg.SetDiskID(dev, node_name)
11038 result = _BlockdevFind(self, node_name, dev, self.instance)
11040 msg = result.fail_msg
11041 if msg or not result.payload:
11043 msg = "disk not found"
11044 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11047 if result.payload.is_degraded:
11048 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11050 def _RemoveOldStorage(self, node_name, iv_names):
11051 for name, (_, old_lvs, _) in iv_names.iteritems():
11052 self.lu.LogInfo("Remove logical volumes for %s" % name)
11055 self.cfg.SetDiskID(lv, node_name)
11057 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11059 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11060 hint="remove unused LVs manually")
11062 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11063 """Replace a disk on the primary or secondary for DRBD 8.
11065 The algorithm for replace is quite complicated:
11067 1. for each disk to be replaced:
11069 1. create new LVs on the target node with unique names
11070 1. detach old LVs from the drbd device
11071 1. rename old LVs to name_replaced.<time_t>
11072 1. rename new LVs to old LVs
11073 1. attach the new LVs (with the old names now) to the drbd device
11075 1. wait for sync across all devices
11077 1. for each modified disk:
11079 1. remove old LVs (which have the name name_replaces.<time_t>)
11081 Failures are not very well handled.
11086 # Step: check device activation
11087 self.lu.LogStep(1, steps_total, "Check device existence")
11088 self._CheckDisksExistence([self.other_node, self.target_node])
11089 self._CheckVolumeGroup([self.target_node, self.other_node])
11091 # Step: check other node consistency
11092 self.lu.LogStep(2, steps_total, "Check peer consistency")
11093 self._CheckDisksConsistency(self.other_node,
11094 self.other_node == self.instance.primary_node,
11097 # Step: create new storage
11098 self.lu.LogStep(3, steps_total, "Allocate new storage")
11099 iv_names = self._CreateNewStorage(self.target_node)
11101 # Step: for each lv, detach+rename*2+attach
11102 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11103 for dev, old_lvs, new_lvs in iv_names.itervalues():
11104 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11106 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11108 result.Raise("Can't detach drbd from local storage on node"
11109 " %s for device %s" % (self.target_node, dev.iv_name))
11111 #cfg.Update(instance)
11113 # ok, we created the new LVs, so now we know we have the needed
11114 # storage; as such, we proceed on the target node to rename
11115 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11116 # using the assumption that logical_id == physical_id (which in
11117 # turn is the unique_id on that node)
11119 # FIXME(iustin): use a better name for the replaced LVs
11120 temp_suffix = int(time.time())
11121 ren_fn = lambda d, suff: (d.physical_id[0],
11122 d.physical_id[1] + "_replaced-%s" % suff)
11124 # Build the rename list based on what LVs exist on the node
11125 rename_old_to_new = []
11126 for to_ren in old_lvs:
11127 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11128 if not result.fail_msg and result.payload:
11130 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11132 self.lu.LogInfo("Renaming the old LVs on the target node")
11133 result = self.rpc.call_blockdev_rename(self.target_node,
11135 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11137 # Now we rename the new LVs to the old LVs
11138 self.lu.LogInfo("Renaming the new LVs on the target node")
11139 rename_new_to_old = [(new, old.physical_id)
11140 for old, new in zip(old_lvs, new_lvs)]
11141 result = self.rpc.call_blockdev_rename(self.target_node,
11143 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11145 # Intermediate steps of in memory modifications
11146 for old, new in zip(old_lvs, new_lvs):
11147 new.logical_id = old.logical_id
11148 self.cfg.SetDiskID(new, self.target_node)
11150 # We need to modify old_lvs so that removal later removes the
11151 # right LVs, not the newly added ones; note that old_lvs is a
11153 for disk in old_lvs:
11154 disk.logical_id = ren_fn(disk, temp_suffix)
11155 self.cfg.SetDiskID(disk, self.target_node)
11157 # Now that the new lvs have the old name, we can add them to the device
11158 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11159 result = self.rpc.call_blockdev_addchildren(self.target_node,
11160 (dev, self.instance), new_lvs)
11161 msg = result.fail_msg
11163 for new_lv in new_lvs:
11164 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11167 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11168 hint=("cleanup manually the unused logical"
11170 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11172 cstep = itertools.count(5)
11174 if self.early_release:
11175 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11176 self._RemoveOldStorage(self.target_node, iv_names)
11177 # TODO: Check if releasing locks early still makes sense
11178 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11180 # Release all resource locks except those used by the instance
11181 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11182 keep=self.node_secondary_ip.keys())
11184 # Release all node locks while waiting for sync
11185 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11187 # TODO: Can the instance lock be downgraded here? Take the optional disk
11188 # shutdown in the caller into consideration.
11191 # This can fail as the old devices are degraded and _WaitForSync
11192 # does a combined result over all disks, so we don't check its return value
11193 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11194 _WaitForSync(self.lu, self.instance)
11196 # Check all devices manually
11197 self._CheckDevices(self.instance.primary_node, iv_names)
11199 # Step: remove old storage
11200 if not self.early_release:
11201 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11202 self._RemoveOldStorage(self.target_node, iv_names)
11204 def _ExecDrbd8Secondary(self, feedback_fn):
11205 """Replace the secondary node for DRBD 8.
11207 The algorithm for replace is quite complicated:
11208 - for all disks of the instance:
11209 - create new LVs on the new node with same names
11210 - shutdown the drbd device on the old secondary
11211 - disconnect the drbd network on the primary
11212 - create the drbd device on the new secondary
11213 - network attach the drbd on the primary, using an artifice:
11214 the drbd code for Attach() will connect to the network if it
11215 finds a device which is connected to the good local disks but
11216 not network enabled
11217 - wait for sync across all devices
11218 - remove all disks from the old secondary
11220 Failures are not very well handled.
11225 pnode = self.instance.primary_node
11227 # Step: check device activation
11228 self.lu.LogStep(1, steps_total, "Check device existence")
11229 self._CheckDisksExistence([self.instance.primary_node])
11230 self._CheckVolumeGroup([self.instance.primary_node])
11232 # Step: check other node consistency
11233 self.lu.LogStep(2, steps_total, "Check peer consistency")
11234 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11236 # Step: create new storage
11237 self.lu.LogStep(3, steps_total, "Allocate new storage")
11238 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11239 for idx, dev in enumerate(disks):
11240 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11241 (self.new_node, idx))
11242 # we pass force_create=True to force LVM creation
11243 for new_lv in dev.children:
11244 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11245 True, _GetInstanceInfoText(self.instance), False)
11247 # Step 4: dbrd minors and drbd setups changes
11248 # after this, we must manually remove the drbd minors on both the
11249 # error and the success paths
11250 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11251 minors = self.cfg.AllocateDRBDMinor([self.new_node
11252 for dev in self.instance.disks],
11253 self.instance.name)
11254 logging.debug("Allocated minors %r", minors)
11257 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11258 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11259 (self.new_node, idx))
11260 # create new devices on new_node; note that we create two IDs:
11261 # one without port, so the drbd will be activated without
11262 # networking information on the new node at this stage, and one
11263 # with network, for the latter activation in step 4
11264 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11265 if self.instance.primary_node == o_node1:
11268 assert self.instance.primary_node == o_node2, "Three-node instance?"
11271 new_alone_id = (self.instance.primary_node, self.new_node, None,
11272 p_minor, new_minor, o_secret)
11273 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11274 p_minor, new_minor, o_secret)
11276 iv_names[idx] = (dev, dev.children, new_net_id)
11277 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11279 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11280 logical_id=new_alone_id,
11281 children=dev.children,
11284 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11287 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11289 _GetInstanceInfoText(self.instance), False)
11290 except errors.GenericError:
11291 self.cfg.ReleaseDRBDMinors(self.instance.name)
11294 # We have new devices, shutdown the drbd on the old secondary
11295 for idx, dev in enumerate(self.instance.disks):
11296 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11297 self.cfg.SetDiskID(dev, self.target_node)
11298 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11299 (dev, self.instance)).fail_msg
11301 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11302 "node: %s" % (idx, msg),
11303 hint=("Please cleanup this device manually as"
11304 " soon as possible"))
11306 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11307 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11308 self.instance.disks)[pnode]
11310 msg = result.fail_msg
11312 # detaches didn't succeed (unlikely)
11313 self.cfg.ReleaseDRBDMinors(self.instance.name)
11314 raise errors.OpExecError("Can't detach the disks from the network on"
11315 " old node: %s" % (msg,))
11317 # if we managed to detach at least one, we update all the disks of
11318 # the instance to point to the new secondary
11319 self.lu.LogInfo("Updating instance configuration")
11320 for dev, _, new_logical_id in iv_names.itervalues():
11321 dev.logical_id = new_logical_id
11322 self.cfg.SetDiskID(dev, self.instance.primary_node)
11324 self.cfg.Update(self.instance, feedback_fn)
11326 # Release all node locks (the configuration has been updated)
11327 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11329 # and now perform the drbd attach
11330 self.lu.LogInfo("Attaching primary drbds to new secondary"
11331 " (standalone => connected)")
11332 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11334 self.node_secondary_ip,
11335 (self.instance.disks, self.instance),
11336 self.instance.name,
11338 for to_node, to_result in result.items():
11339 msg = to_result.fail_msg
11341 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11343 hint=("please do a gnt-instance info to see the"
11344 " status of disks"))
11346 cstep = itertools.count(5)
11348 if self.early_release:
11349 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11350 self._RemoveOldStorage(self.target_node, iv_names)
11351 # TODO: Check if releasing locks early still makes sense
11352 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11354 # Release all resource locks except those used by the instance
11355 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11356 keep=self.node_secondary_ip.keys())
11358 # TODO: Can the instance lock be downgraded here? Take the optional disk
11359 # shutdown in the caller into consideration.
11362 # This can fail as the old devices are degraded and _WaitForSync
11363 # does a combined result over all disks, so we don't check its return value
11364 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11365 _WaitForSync(self.lu, self.instance)
11367 # Check all devices manually
11368 self._CheckDevices(self.instance.primary_node, iv_names)
11370 # Step: remove old storage
11371 if not self.early_release:
11372 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11373 self._RemoveOldStorage(self.target_node, iv_names)
11376 class LURepairNodeStorage(NoHooksLU):
11377 """Repairs the volume group on a node.
11382 def CheckArguments(self):
11383 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11385 storage_type = self.op.storage_type
11387 if (constants.SO_FIX_CONSISTENCY not in
11388 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11389 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11390 " repaired" % storage_type,
11391 errors.ECODE_INVAL)
11393 def ExpandNames(self):
11394 self.needed_locks = {
11395 locking.LEVEL_NODE: [self.op.node_name],
11398 def _CheckFaultyDisks(self, instance, node_name):
11399 """Ensure faulty disks abort the opcode or at least warn."""
11401 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11403 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11404 " node '%s'" % (instance.name, node_name),
11405 errors.ECODE_STATE)
11406 except errors.OpPrereqError, err:
11407 if self.op.ignore_consistency:
11408 self.proc.LogWarning(str(err.args[0]))
11412 def CheckPrereq(self):
11413 """Check prerequisites.
11416 # Check whether any instance on this node has faulty disks
11417 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11418 if inst.admin_state != constants.ADMINST_UP:
11420 check_nodes = set(inst.all_nodes)
11421 check_nodes.discard(self.op.node_name)
11422 for inst_node_name in check_nodes:
11423 self._CheckFaultyDisks(inst, inst_node_name)
11425 def Exec(self, feedback_fn):
11426 feedback_fn("Repairing storage unit '%s' on %s ..." %
11427 (self.op.name, self.op.node_name))
11429 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11430 result = self.rpc.call_storage_execute(self.op.node_name,
11431 self.op.storage_type, st_args,
11433 constants.SO_FIX_CONSISTENCY)
11434 result.Raise("Failed to repair storage unit '%s' on %s" %
11435 (self.op.name, self.op.node_name))
11438 class LUNodeEvacuate(NoHooksLU):
11439 """Evacuates instances off a list of nodes.
11444 _MODE2IALLOCATOR = {
11445 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11446 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11447 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11449 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11450 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11451 constants.IALLOCATOR_NEVAC_MODES)
11453 def CheckArguments(self):
11454 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11456 def ExpandNames(self):
11457 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11459 if self.op.remote_node is not None:
11460 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11461 assert self.op.remote_node
11463 if self.op.remote_node == self.op.node_name:
11464 raise errors.OpPrereqError("Can not use evacuated node as a new"
11465 " secondary node", errors.ECODE_INVAL)
11467 if self.op.mode != constants.NODE_EVAC_SEC:
11468 raise errors.OpPrereqError("Without the use of an iallocator only"
11469 " secondary instances can be evacuated",
11470 errors.ECODE_INVAL)
11473 self.share_locks = _ShareAll()
11474 self.needed_locks = {
11475 locking.LEVEL_INSTANCE: [],
11476 locking.LEVEL_NODEGROUP: [],
11477 locking.LEVEL_NODE: [],
11480 # Determine nodes (via group) optimistically, needs verification once locks
11481 # have been acquired
11482 self.lock_nodes = self._DetermineNodes()
11484 def _DetermineNodes(self):
11485 """Gets the list of nodes to operate on.
11488 if self.op.remote_node is None:
11489 # Iallocator will choose any node(s) in the same group
11490 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11492 group_nodes = frozenset([self.op.remote_node])
11494 # Determine nodes to be locked
11495 return set([self.op.node_name]) | group_nodes
11497 def _DetermineInstances(self):
11498 """Builds list of instances to operate on.
11501 assert self.op.mode in constants.NODE_EVAC_MODES
11503 if self.op.mode == constants.NODE_EVAC_PRI:
11504 # Primary instances only
11505 inst_fn = _GetNodePrimaryInstances
11506 assert self.op.remote_node is None, \
11507 "Evacuating primary instances requires iallocator"
11508 elif self.op.mode == constants.NODE_EVAC_SEC:
11509 # Secondary instances only
11510 inst_fn = _GetNodeSecondaryInstances
11513 assert self.op.mode == constants.NODE_EVAC_ALL
11514 inst_fn = _GetNodeInstances
11515 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11517 raise errors.OpPrereqError("Due to an issue with the iallocator"
11518 " interface it is not possible to evacuate"
11519 " all instances at once; specify explicitly"
11520 " whether to evacuate primary or secondary"
11522 errors.ECODE_INVAL)
11524 return inst_fn(self.cfg, self.op.node_name)
11526 def DeclareLocks(self, level):
11527 if level == locking.LEVEL_INSTANCE:
11528 # Lock instances optimistically, needs verification once node and group
11529 # locks have been acquired
11530 self.needed_locks[locking.LEVEL_INSTANCE] = \
11531 set(i.name for i in self._DetermineInstances())
11533 elif level == locking.LEVEL_NODEGROUP:
11534 # Lock node groups for all potential target nodes optimistically, needs
11535 # verification once nodes have been acquired
11536 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11537 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11539 elif level == locking.LEVEL_NODE:
11540 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11542 def CheckPrereq(self):
11544 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11545 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11546 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11548 need_nodes = self._DetermineNodes()
11550 if not owned_nodes.issuperset(need_nodes):
11551 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11552 " locks were acquired, current nodes are"
11553 " are '%s', used to be '%s'; retry the"
11555 (self.op.node_name,
11556 utils.CommaJoin(need_nodes),
11557 utils.CommaJoin(owned_nodes)),
11558 errors.ECODE_STATE)
11560 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11561 if owned_groups != wanted_groups:
11562 raise errors.OpExecError("Node groups changed since locks were acquired,"
11563 " current groups are '%s', used to be '%s';"
11564 " retry the operation" %
11565 (utils.CommaJoin(wanted_groups),
11566 utils.CommaJoin(owned_groups)))
11568 # Determine affected instances
11569 self.instances = self._DetermineInstances()
11570 self.instance_names = [i.name for i in self.instances]
11572 if set(self.instance_names) != owned_instances:
11573 raise errors.OpExecError("Instances on node '%s' changed since locks"
11574 " were acquired, current instances are '%s',"
11575 " used to be '%s'; retry the operation" %
11576 (self.op.node_name,
11577 utils.CommaJoin(self.instance_names),
11578 utils.CommaJoin(owned_instances)))
11580 if self.instance_names:
11581 self.LogInfo("Evacuating instances from node '%s': %s",
11583 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11585 self.LogInfo("No instances to evacuate from node '%s'",
11588 if self.op.remote_node is not None:
11589 for i in self.instances:
11590 if i.primary_node == self.op.remote_node:
11591 raise errors.OpPrereqError("Node %s is the primary node of"
11592 " instance %s, cannot use it as"
11594 (self.op.remote_node, i.name),
11595 errors.ECODE_INVAL)
11597 def Exec(self, feedback_fn):
11598 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11600 if not self.instance_names:
11601 # No instances to evacuate
11604 elif self.op.iallocator is not None:
11605 # TODO: Implement relocation to other group
11606 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11607 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11608 instances=list(self.instance_names))
11610 ial.Run(self.op.iallocator)
11612 if not ial.success:
11613 raise errors.OpPrereqError("Can't compute node evacuation using"
11614 " iallocator '%s': %s" %
11615 (self.op.iallocator, ial.info),
11616 errors.ECODE_NORES)
11618 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11620 elif self.op.remote_node is not None:
11621 assert self.op.mode == constants.NODE_EVAC_SEC
11623 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11624 remote_node=self.op.remote_node,
11626 mode=constants.REPLACE_DISK_CHG,
11627 early_release=self.op.early_release)]
11628 for instance_name in self.instance_names
11632 raise errors.ProgrammerError("No iallocator or remote node")
11634 return ResultWithJobs(jobs)
11637 def _SetOpEarlyRelease(early_release, op):
11638 """Sets C{early_release} flag on opcodes if available.
11642 op.early_release = early_release
11643 except AttributeError:
11644 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11649 def _NodeEvacDest(use_nodes, group, nodes):
11650 """Returns group or nodes depending on caller's choice.
11654 return utils.CommaJoin(nodes)
11659 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11660 """Unpacks the result of change-group and node-evacuate iallocator requests.
11662 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11663 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11665 @type lu: L{LogicalUnit}
11666 @param lu: Logical unit instance
11667 @type alloc_result: tuple/list
11668 @param alloc_result: Result from iallocator
11669 @type early_release: bool
11670 @param early_release: Whether to release locks early if possible
11671 @type use_nodes: bool
11672 @param use_nodes: Whether to display node names instead of groups
11675 (moved, failed, jobs) = alloc_result
11678 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11679 for (name, reason) in failed)
11680 lu.LogWarning("Unable to evacuate instances %s", failreason)
11681 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11684 lu.LogInfo("Instances to be moved: %s",
11685 utils.CommaJoin("%s (to %s)" %
11686 (name, _NodeEvacDest(use_nodes, group, nodes))
11687 for (name, group, nodes) in moved))
11689 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11690 map(opcodes.OpCode.LoadOpCode, ops))
11694 class LUInstanceGrowDisk(LogicalUnit):
11695 """Grow a disk of an instance.
11698 HPATH = "disk-grow"
11699 HTYPE = constants.HTYPE_INSTANCE
11702 def ExpandNames(self):
11703 self._ExpandAndLockInstance()
11704 self.needed_locks[locking.LEVEL_NODE] = []
11705 self.needed_locks[locking.LEVEL_NODE_RES] = []
11706 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11707 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11709 def DeclareLocks(self, level):
11710 if level == locking.LEVEL_NODE:
11711 self._LockInstancesNodes()
11712 elif level == locking.LEVEL_NODE_RES:
11714 self.needed_locks[locking.LEVEL_NODE_RES] = \
11715 self.needed_locks[locking.LEVEL_NODE][:]
11717 def BuildHooksEnv(self):
11718 """Build hooks env.
11720 This runs on the master, the primary and all the secondaries.
11724 "DISK": self.op.disk,
11725 "AMOUNT": self.op.amount,
11726 "ABSOLUTE": self.op.absolute,
11728 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11731 def BuildHooksNodes(self):
11732 """Build hooks nodes.
11735 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11738 def CheckPrereq(self):
11739 """Check prerequisites.
11741 This checks that the instance is in the cluster.
11744 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11745 assert instance is not None, \
11746 "Cannot retrieve locked instance %s" % self.op.instance_name
11747 nodenames = list(instance.all_nodes)
11748 for node in nodenames:
11749 _CheckNodeOnline(self, node)
11751 self.instance = instance
11753 if instance.disk_template not in constants.DTS_GROWABLE:
11754 raise errors.OpPrereqError("Instance's disk layout does not support"
11755 " growing", errors.ECODE_INVAL)
11757 self.disk = instance.FindDisk(self.op.disk)
11759 if self.op.absolute:
11760 self.target = self.op.amount
11761 self.delta = self.target - self.disk.size
11763 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11764 "current disk size (%s)" %
11765 (utils.FormatUnit(self.target, "h"),
11766 utils.FormatUnit(self.disk.size, "h")),
11767 errors.ECODE_STATE)
11769 self.delta = self.op.amount
11770 self.target = self.disk.size + self.delta
11772 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11773 utils.FormatUnit(self.delta, "h"),
11774 errors.ECODE_INVAL)
11776 if instance.disk_template not in (constants.DT_FILE,
11777 constants.DT_SHARED_FILE,
11779 # TODO: check the free disk space for file, when that feature will be
11781 _CheckNodesFreeDiskPerVG(self, nodenames,
11782 self.disk.ComputeGrowth(self.delta))
11784 def Exec(self, feedback_fn):
11785 """Execute disk grow.
11788 instance = self.instance
11791 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11792 assert (self.owned_locks(locking.LEVEL_NODE) ==
11793 self.owned_locks(locking.LEVEL_NODE_RES))
11795 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11797 raise errors.OpExecError("Cannot activate block device to grow")
11799 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11800 (self.op.disk, instance.name,
11801 utils.FormatUnit(self.delta, "h"),
11802 utils.FormatUnit(self.target, "h")))
11804 # First run all grow ops in dry-run mode
11805 for node in instance.all_nodes:
11806 self.cfg.SetDiskID(disk, node)
11807 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11809 result.Raise("Grow request failed to node %s" % node)
11811 # We know that (as far as we can test) operations across different
11812 # nodes will succeed, time to run it for real
11813 for node in instance.all_nodes:
11814 self.cfg.SetDiskID(disk, node)
11815 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11817 result.Raise("Grow request failed to node %s" % node)
11819 # TODO: Rewrite code to work properly
11820 # DRBD goes into sync mode for a short amount of time after executing the
11821 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11822 # calling "resize" in sync mode fails. Sleeping for a short amount of
11823 # time is a work-around.
11826 disk.RecordGrow(self.delta)
11827 self.cfg.Update(instance, feedback_fn)
11829 # Changes have been recorded, release node lock
11830 _ReleaseLocks(self, locking.LEVEL_NODE)
11832 # Downgrade lock while waiting for sync
11833 self.glm.downgrade(locking.LEVEL_INSTANCE)
11835 if self.op.wait_for_sync:
11836 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11838 self.proc.LogWarning("Disk sync-ing has not returned a good"
11839 " status; please check the instance")
11840 if instance.admin_state != constants.ADMINST_UP:
11841 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11842 elif instance.admin_state != constants.ADMINST_UP:
11843 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11844 " not supposed to be running because no wait for"
11845 " sync mode was requested")
11847 assert self.owned_locks(locking.LEVEL_NODE_RES)
11848 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11851 class LUInstanceQueryData(NoHooksLU):
11852 """Query runtime instance data.
11857 def ExpandNames(self):
11858 self.needed_locks = {}
11860 # Use locking if requested or when non-static information is wanted
11861 if not (self.op.static or self.op.use_locking):
11862 self.LogWarning("Non-static data requested, locks need to be acquired")
11863 self.op.use_locking = True
11865 if self.op.instances or not self.op.use_locking:
11866 # Expand instance names right here
11867 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11869 # Will use acquired locks
11870 self.wanted_names = None
11872 if self.op.use_locking:
11873 self.share_locks = _ShareAll()
11875 if self.wanted_names is None:
11876 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11878 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11880 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11881 self.needed_locks[locking.LEVEL_NODE] = []
11882 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11884 def DeclareLocks(self, level):
11885 if self.op.use_locking:
11886 if level == locking.LEVEL_NODEGROUP:
11887 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11889 # Lock all groups used by instances optimistically; this requires going
11890 # via the node before it's locked, requiring verification later on
11891 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11892 frozenset(group_uuid
11893 for instance_name in owned_instances
11895 self.cfg.GetInstanceNodeGroups(instance_name))
11897 elif level == locking.LEVEL_NODE:
11898 self._LockInstancesNodes()
11900 def CheckPrereq(self):
11901 """Check prerequisites.
11903 This only checks the optional instance list against the existing names.
11906 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11907 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11908 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11910 if self.wanted_names is None:
11911 assert self.op.use_locking, "Locking was not used"
11912 self.wanted_names = owned_instances
11914 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11916 if self.op.use_locking:
11917 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11920 assert not (owned_instances or owned_groups or owned_nodes)
11922 self.wanted_instances = instances.values()
11924 def _ComputeBlockdevStatus(self, node, instance, dev):
11925 """Returns the status of a block device
11928 if self.op.static or not node:
11931 self.cfg.SetDiskID(dev, node)
11933 result = self.rpc.call_blockdev_find(node, dev)
11937 result.Raise("Can't compute disk status for %s" % instance.name)
11939 status = result.payload
11943 return (status.dev_path, status.major, status.minor,
11944 status.sync_percent, status.estimated_time,
11945 status.is_degraded, status.ldisk_status)
11947 def _ComputeDiskStatus(self, instance, snode, dev):
11948 """Compute block device status.
11951 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11953 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11955 def _ComputeDiskStatusInner(self, instance, snode, dev):
11956 """Compute block device status.
11958 @attention: The device has to be annotated already.
11961 if dev.dev_type in constants.LDS_DRBD:
11962 # we change the snode then (otherwise we use the one passed in)
11963 if dev.logical_id[0] == instance.primary_node:
11964 snode = dev.logical_id[1]
11966 snode = dev.logical_id[0]
11968 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11970 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11973 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11980 "iv_name": dev.iv_name,
11981 "dev_type": dev.dev_type,
11982 "logical_id": dev.logical_id,
11983 "physical_id": dev.physical_id,
11984 "pstatus": dev_pstatus,
11985 "sstatus": dev_sstatus,
11986 "children": dev_children,
11991 def Exec(self, feedback_fn):
11992 """Gather and return data"""
11995 cluster = self.cfg.GetClusterInfo()
11997 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11998 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12000 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12001 for node in nodes.values()))
12003 group2name_fn = lambda uuid: groups[uuid].name
12005 for instance in self.wanted_instances:
12006 pnode = nodes[instance.primary_node]
12008 if self.op.static or pnode.offline:
12009 remote_state = None
12011 self.LogWarning("Primary node %s is marked offline, returning static"
12012 " information only for instance %s" %
12013 (pnode.name, instance.name))
12015 remote_info = self.rpc.call_instance_info(instance.primary_node,
12017 instance.hypervisor)
12018 remote_info.Raise("Error checking node %s" % instance.primary_node)
12019 remote_info = remote_info.payload
12020 if remote_info and "state" in remote_info:
12021 remote_state = "up"
12023 if instance.admin_state == constants.ADMINST_UP:
12024 remote_state = "down"
12026 remote_state = instance.admin_state
12028 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12031 snodes_group_uuids = [nodes[snode_name].group
12032 for snode_name in instance.secondary_nodes]
12034 result[instance.name] = {
12035 "name": instance.name,
12036 "config_state": instance.admin_state,
12037 "run_state": remote_state,
12038 "pnode": instance.primary_node,
12039 "pnode_group_uuid": pnode.group,
12040 "pnode_group_name": group2name_fn(pnode.group),
12041 "snodes": instance.secondary_nodes,
12042 "snodes_group_uuids": snodes_group_uuids,
12043 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12045 # this happens to be the same format used for hooks
12046 "nics": _NICListToTuple(self, instance.nics),
12047 "disk_template": instance.disk_template,
12049 "hypervisor": instance.hypervisor,
12050 "network_port": instance.network_port,
12051 "hv_instance": instance.hvparams,
12052 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12053 "be_instance": instance.beparams,
12054 "be_actual": cluster.FillBE(instance),
12055 "os_instance": instance.osparams,
12056 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12057 "serial_no": instance.serial_no,
12058 "mtime": instance.mtime,
12059 "ctime": instance.ctime,
12060 "uuid": instance.uuid,
12066 def PrepareContainerMods(mods, private_fn):
12067 """Prepares a list of container modifications by adding a private data field.
12069 @type mods: list of tuples; (operation, index, parameters)
12070 @param mods: List of modifications
12071 @type private_fn: callable or None
12072 @param private_fn: Callable for constructing a private data field for a
12077 if private_fn is None:
12082 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12085 #: Type description for changes as returned by L{ApplyContainerMods}'s
12087 _TApplyContModsCbChanges = \
12088 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12089 ht.TNonEmptyString,
12094 def ApplyContainerMods(kind, container, chgdesc, mods,
12095 create_fn, modify_fn, remove_fn):
12096 """Applies descriptions in C{mods} to C{container}.
12099 @param kind: One-word item description
12100 @type container: list
12101 @param container: Container to modify
12102 @type chgdesc: None or list
12103 @param chgdesc: List of applied changes
12105 @param mods: Modifications as returned by L{PrepareContainerMods}
12106 @type create_fn: callable
12107 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12108 receives absolute item index, parameters and private data object as added
12109 by L{PrepareContainerMods}, returns tuple containing new item and changes
12111 @type modify_fn: callable
12112 @param modify_fn: Callback for modifying an existing item
12113 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12114 and private data object as added by L{PrepareContainerMods}, returns
12116 @type remove_fn: callable
12117 @param remove_fn: Callback on removing item; receives absolute item index,
12118 item and private data object as added by L{PrepareContainerMods}
12121 for (op, idx, params, private) in mods:
12124 absidx = len(container) - 1
12126 raise IndexError("Not accepting negative indices other than -1")
12127 elif idx > len(container):
12128 raise IndexError("Got %s index %s, but there are only %s" %
12129 (kind, idx, len(container)))
12135 if op == constants.DDM_ADD:
12136 # Calculate where item will be added
12138 addidx = len(container)
12142 if create_fn is None:
12145 (item, changes) = create_fn(addidx, params, private)
12148 container.append(item)
12151 assert idx <= len(container)
12152 # list.insert does so before the specified index
12153 container.insert(idx, item)
12155 # Retrieve existing item
12157 item = container[absidx]
12159 raise IndexError("Invalid %s index %s" % (kind, idx))
12161 if op == constants.DDM_REMOVE:
12164 if remove_fn is not None:
12165 remove_fn(absidx, item, private)
12167 changes = [("%s/%s" % (kind, absidx), "remove")]
12169 assert container[absidx] == item
12170 del container[absidx]
12171 elif op == constants.DDM_MODIFY:
12172 if modify_fn is not None:
12173 changes = modify_fn(absidx, item, params, private)
12175 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12177 assert _TApplyContModsCbChanges(changes)
12179 if not (chgdesc is None or changes is None):
12180 chgdesc.extend(changes)
12183 def _UpdateIvNames(base_index, disks):
12184 """Updates the C{iv_name} attribute of disks.
12186 @type disks: list of L{objects.Disk}
12189 for (idx, disk) in enumerate(disks):
12190 disk.iv_name = "disk/%s" % (base_index + idx, )
12193 class _InstNicModPrivate:
12194 """Data structure for network interface modifications.
12196 Used by L{LUInstanceSetParams}.
12199 def __init__(self):
12204 class LUInstanceSetParams(LogicalUnit):
12205 """Modifies an instances's parameters.
12208 HPATH = "instance-modify"
12209 HTYPE = constants.HTYPE_INSTANCE
12213 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12214 assert ht.TList(mods)
12215 assert not mods or len(mods[0]) in (2, 3)
12217 if mods and len(mods[0]) == 2:
12221 for op, params in mods:
12222 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12223 result.append((op, -1, params))
12227 raise errors.OpPrereqError("Only one %s add or remove operation is"
12228 " supported at a time" % kind,
12229 errors.ECODE_INVAL)
12231 result.append((constants.DDM_MODIFY, op, params))
12233 assert verify_fn(result)
12240 def _CheckMods(kind, mods, key_types, item_fn):
12241 """Ensures requested disk/NIC modifications are valid.
12244 for (op, _, params) in mods:
12245 assert ht.TDict(params)
12247 utils.ForceDictType(params, key_types)
12249 if op == constants.DDM_REMOVE:
12251 raise errors.OpPrereqError("No settings should be passed when"
12252 " removing a %s" % kind,
12253 errors.ECODE_INVAL)
12254 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12255 item_fn(op, params)
12257 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12260 def _VerifyDiskModification(op, params):
12261 """Verifies a disk modification.
12264 if op == constants.DDM_ADD:
12265 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12266 if mode not in constants.DISK_ACCESS_SET:
12267 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12268 errors.ECODE_INVAL)
12270 size = params.get(constants.IDISK_SIZE, None)
12272 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12273 constants.IDISK_SIZE, errors.ECODE_INVAL)
12277 except (TypeError, ValueError), err:
12278 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12279 errors.ECODE_INVAL)
12281 params[constants.IDISK_SIZE] = size
12283 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12284 raise errors.OpPrereqError("Disk size change not possible, use"
12285 " grow-disk", errors.ECODE_INVAL)
12288 def _VerifyNicModification(op, params):
12289 """Verifies a network interface modification.
12292 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12293 ip = params.get(constants.INIC_IP, None)
12294 req_net = params.get(constants.INIC_NETWORK, None)
12295 link = params.get(constants.NIC_LINK, None)
12296 mode = params.get(constants.NIC_MODE, None)
12297 if req_net is not None:
12298 if req_net.lower() == constants.VALUE_NONE:
12299 params[constants.INIC_NETWORK] = None
12301 elif link is not None or mode is not None:
12302 raise errors.OpPrereqError("If network is given"
12303 " mode or link should not",
12304 errors.ECODE_INVAL)
12306 if op == constants.DDM_ADD:
12307 macaddr = params.get(constants.INIC_MAC, None)
12308 if macaddr is None:
12309 params[constants.INIC_MAC] = constants.VALUE_AUTO
12312 if ip.lower() == constants.VALUE_NONE:
12313 params[constants.INIC_IP] = None
12315 if ip.lower() == constants.NIC_IP_POOL:
12316 if op == constants.DDM_ADD and req_net is None:
12317 raise errors.OpPrereqError("If ip=pool, parameter network"
12319 errors.ECODE_INVAL)
12321 if not netutils.IPAddress.IsValid(ip):
12322 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12323 errors.ECODE_INVAL)
12325 if constants.INIC_MAC in params:
12326 macaddr = params[constants.INIC_MAC]
12327 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12328 macaddr = utils.NormalizeAndValidateMac(macaddr)
12330 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12331 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12332 " modifying an existing NIC",
12333 errors.ECODE_INVAL)
12335 def CheckArguments(self):
12336 if not (self.op.nics or self.op.disks or self.op.disk_template or
12337 self.op.hvparams or self.op.beparams or self.op.os_name or
12338 self.op.offline is not None or self.op.runtime_mem):
12339 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12341 if self.op.hvparams:
12342 _CheckGlobalHvParams(self.op.hvparams)
12345 self._UpgradeDiskNicMods("disk", self.op.disks,
12346 opcodes.OpInstanceSetParams.TestDiskModifications)
12348 self._UpgradeDiskNicMods("NIC", self.op.nics,
12349 opcodes.OpInstanceSetParams.TestNicModifications)
12351 # Check disk modifications
12352 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12353 self._VerifyDiskModification)
12355 if self.op.disks and self.op.disk_template is not None:
12356 raise errors.OpPrereqError("Disk template conversion and other disk"
12357 " changes not supported at the same time",
12358 errors.ECODE_INVAL)
12360 if (self.op.disk_template and
12361 self.op.disk_template in constants.DTS_INT_MIRROR and
12362 self.op.remote_node is None):
12363 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12364 " one requires specifying a secondary node",
12365 errors.ECODE_INVAL)
12367 # Check NIC modifications
12368 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12369 self._VerifyNicModification)
12371 def ExpandNames(self):
12372 self._ExpandAndLockInstance()
12373 # Can't even acquire node locks in shared mode as upcoming changes in
12374 # Ganeti 2.6 will start to modify the node object on disk conversion
12375 self.needed_locks[locking.LEVEL_NODE] = []
12376 self.needed_locks[locking.LEVEL_NODE_RES] = []
12377 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12379 def DeclareLocks(self, level):
12380 # TODO: Acquire group lock in shared mode (disk parameters)
12381 if level == locking.LEVEL_NODE:
12382 self._LockInstancesNodes()
12383 if self.op.disk_template and self.op.remote_node:
12384 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12385 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12386 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12388 self.needed_locks[locking.LEVEL_NODE_RES] = \
12389 self.needed_locks[locking.LEVEL_NODE][:]
12391 def BuildHooksEnv(self):
12392 """Build hooks env.
12394 This runs on the master, primary and secondaries.
12398 if constants.BE_MINMEM in self.be_new:
12399 args["minmem"] = self.be_new[constants.BE_MINMEM]
12400 if constants.BE_MAXMEM in self.be_new:
12401 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12402 if constants.BE_VCPUS in self.be_new:
12403 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12404 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12405 # information at all.
12407 if self._new_nics is not None:
12410 for nic in self._new_nics:
12411 n = copy.deepcopy(nic)
12412 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12413 n.nicparams = nicparams
12414 nics.append(_NICToTuple(self, n))
12416 args["nics"] = nics
12418 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12419 if self.op.disk_template:
12420 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12421 if self.op.runtime_mem:
12422 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12426 def BuildHooksNodes(self):
12427 """Build hooks nodes.
12430 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12433 def _PrepareNicModification(self, params, private, old_ip, old_net,
12434 old_params, cluster, pnode):
12436 update_params_dict = dict([(key, params[key])
12437 for key in constants.NICS_PARAMETERS
12440 req_link = update_params_dict.get(constants.NIC_LINK, None)
12441 req_mode = update_params_dict.get(constants.NIC_MODE, None)
12443 new_net = params.get(constants.INIC_NETWORK, old_net)
12444 if new_net is not None:
12445 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12446 if netparams is None:
12447 raise errors.OpPrereqError("No netparams found for the network"
12448 " %s, propably not connected." % new_net,
12449 errors.ECODE_INVAL)
12450 new_params = dict(netparams)
12452 new_params = _GetUpdatedParams(old_params, update_params_dict)
12454 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12456 new_filled_params = cluster.SimpleFillNIC(new_params)
12457 objects.NIC.CheckParameterSyntax(new_filled_params)
12459 new_mode = new_filled_params[constants.NIC_MODE]
12460 if new_mode == constants.NIC_MODE_BRIDGED:
12461 bridge = new_filled_params[constants.NIC_LINK]
12462 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12464 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12466 self.warn.append(msg)
12468 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12470 elif new_mode == constants.NIC_MODE_ROUTED:
12471 ip = params.get(constants.INIC_IP, old_ip)
12473 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12474 " on a routed NIC", errors.ECODE_INVAL)
12476 if constants.INIC_MAC in params:
12477 mac = params[constants.INIC_MAC]
12479 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12480 errors.ECODE_INVAL)
12481 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12482 # otherwise generate the MAC address
12483 params[constants.INIC_MAC] = \
12484 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12486 # or validate/reserve the current one
12488 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12489 except errors.ReservationError:
12490 raise errors.OpPrereqError("MAC address '%s' already in use"
12491 " in cluster" % mac,
12492 errors.ECODE_NOTUNIQUE)
12493 elif new_net != old_net:
12494 def get_net_prefix(net):
12496 uuid = self.cfg.LookupNetwork(net)
12498 nobj = self.cfg.GetNetwork(uuid)
12499 return nobj.mac_prefix
12501 new_prefix = get_net_prefix(new_net)
12502 old_prefix = get_net_prefix(old_net)
12503 if old_prefix != new_prefix:
12504 params[constants.INIC_MAC] = \
12505 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12507 #if there is a change in nic-network configuration
12508 new_ip = params.get(constants.INIC_IP, old_ip)
12509 if (new_ip, new_net) != (old_ip, old_net):
12512 if new_ip.lower() == constants.NIC_IP_POOL:
12514 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
12515 except errors.ReservationError:
12516 raise errors.OpPrereqError("Unable to get a free IP"
12517 " from the address pool",
12518 errors.ECODE_STATE)
12519 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
12520 params[constants.INIC_IP] = new_ip
12521 elif new_ip != old_ip or new_net != old_net:
12523 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
12524 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
12525 except errors.ReservationError:
12526 raise errors.OpPrereqError("IP %s not available in network %s" %
12528 errors.ECODE_NOTUNIQUE)
12529 elif new_ip.lower() == constants.NIC_IP_POOL:
12530 raise errors.OpPrereqError("ip=pool, but no network found",
12534 if self.op.conflicts_check:
12535 _CheckForConflictingIp(self, new_ip, pnode)
12540 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
12541 except errors.AddressPoolError:
12542 logging.warning("Release IP %s not contained in network %s",
12545 # there are no changes in (net, ip) tuple
12546 elif (old_net is not None and
12547 (req_link is not None or req_mode is not None)):
12548 raise errors.OpPrereqError("Not allowed to change link or mode of"
12549 " a NIC that is connected to a network.",
12550 errors.ECODE_INVAL)
12552 private.params = new_params
12553 private.filled = new_filled_params
12555 def CheckPrereq(self):
12556 """Check prerequisites.
12558 This only checks the instance list against the existing names.
12561 # checking the new params on the primary/secondary nodes
12563 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12564 cluster = self.cluster = self.cfg.GetClusterInfo()
12565 assert self.instance is not None, \
12566 "Cannot retrieve locked instance %s" % self.op.instance_name
12567 pnode = instance.primary_node
12568 nodelist = list(instance.all_nodes)
12569 pnode_info = self.cfg.GetNodeInfo(pnode)
12570 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12572 # Prepare disk/NIC modifications
12573 self.diskmod = PrepareContainerMods(self.op.disks, None)
12574 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12577 if self.op.os_name and not self.op.force:
12578 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12579 self.op.force_variant)
12580 instance_os = self.op.os_name
12582 instance_os = instance.os
12584 assert not (self.op.disk_template and self.op.disks), \
12585 "Can't modify disk template and apply disk changes at the same time"
12587 if self.op.disk_template:
12588 if instance.disk_template == self.op.disk_template:
12589 raise errors.OpPrereqError("Instance already has disk template %s" %
12590 instance.disk_template, errors.ECODE_INVAL)
12592 if (instance.disk_template,
12593 self.op.disk_template) not in self._DISK_CONVERSIONS:
12594 raise errors.OpPrereqError("Unsupported disk template conversion from"
12595 " %s to %s" % (instance.disk_template,
12596 self.op.disk_template),
12597 errors.ECODE_INVAL)
12598 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12599 msg="cannot change disk template")
12600 if self.op.disk_template in constants.DTS_INT_MIRROR:
12601 if self.op.remote_node == pnode:
12602 raise errors.OpPrereqError("Given new secondary node %s is the same"
12603 " as the primary node of the instance" %
12604 self.op.remote_node, errors.ECODE_STATE)
12605 _CheckNodeOnline(self, self.op.remote_node)
12606 _CheckNodeNotDrained(self, self.op.remote_node)
12607 # FIXME: here we assume that the old instance type is DT_PLAIN
12608 assert instance.disk_template == constants.DT_PLAIN
12609 disks = [{constants.IDISK_SIZE: d.size,
12610 constants.IDISK_VG: d.logical_id[0]}
12611 for d in instance.disks]
12612 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12613 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12615 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12616 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12617 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12618 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12619 ignore=self.op.ignore_ipolicy)
12620 if pnode_info.group != snode_info.group:
12621 self.LogWarning("The primary and secondary nodes are in two"
12622 " different node groups; the disk parameters"
12623 " from the first disk's node group will be"
12626 # hvparams processing
12627 if self.op.hvparams:
12628 hv_type = instance.hypervisor
12629 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12630 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12631 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12634 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12635 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12636 self.hv_proposed = self.hv_new = hv_new # the new actual values
12637 self.hv_inst = i_hvdict # the new dict (without defaults)
12639 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12641 self.hv_new = self.hv_inst = {}
12643 # beparams processing
12644 if self.op.beparams:
12645 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12647 objects.UpgradeBeParams(i_bedict)
12648 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12649 be_new = cluster.SimpleFillBE(i_bedict)
12650 self.be_proposed = self.be_new = be_new # the new actual values
12651 self.be_inst = i_bedict # the new dict (without defaults)
12653 self.be_new = self.be_inst = {}
12654 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12655 be_old = cluster.FillBE(instance)
12657 # CPU param validation -- checking every time a parameter is
12658 # changed to cover all cases where either CPU mask or vcpus have
12660 if (constants.BE_VCPUS in self.be_proposed and
12661 constants.HV_CPU_MASK in self.hv_proposed):
12663 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12664 # Verify mask is consistent with number of vCPUs. Can skip this
12665 # test if only 1 entry in the CPU mask, which means same mask
12666 # is applied to all vCPUs.
12667 if (len(cpu_list) > 1 and
12668 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12669 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12671 (self.be_proposed[constants.BE_VCPUS],
12672 self.hv_proposed[constants.HV_CPU_MASK]),
12673 errors.ECODE_INVAL)
12675 # Only perform this test if a new CPU mask is given
12676 if constants.HV_CPU_MASK in self.hv_new:
12677 # Calculate the largest CPU number requested
12678 max_requested_cpu = max(map(max, cpu_list))
12679 # Check that all of the instance's nodes have enough physical CPUs to
12680 # satisfy the requested CPU mask
12681 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12682 max_requested_cpu + 1, instance.hypervisor)
12684 # osparams processing
12685 if self.op.osparams:
12686 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12687 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12688 self.os_inst = i_osdict # the new dict (without defaults)
12694 #TODO(dynmem): do the appropriate check involving MINMEM
12695 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12696 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12697 mem_check_list = [pnode]
12698 if be_new[constants.BE_AUTO_BALANCE]:
12699 # either we changed auto_balance to yes or it was from before
12700 mem_check_list.extend(instance.secondary_nodes)
12701 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12702 instance.hypervisor)
12703 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12704 [instance.hypervisor])
12705 pninfo = nodeinfo[pnode]
12706 msg = pninfo.fail_msg
12708 # Assume the primary node is unreachable and go ahead
12709 self.warn.append("Can't get info from primary node %s: %s" %
12712 (_, _, (pnhvinfo, )) = pninfo.payload
12713 if not isinstance(pnhvinfo.get("memory_free", None), int):
12714 self.warn.append("Node data from primary node %s doesn't contain"
12715 " free memory information" % pnode)
12716 elif instance_info.fail_msg:
12717 self.warn.append("Can't get instance runtime information: %s" %
12718 instance_info.fail_msg)
12720 if instance_info.payload:
12721 current_mem = int(instance_info.payload["memory"])
12723 # Assume instance not running
12724 # (there is a slight race condition here, but it's not very
12725 # probable, and we have no other way to check)
12726 # TODO: Describe race condition
12728 #TODO(dynmem): do the appropriate check involving MINMEM
12729 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12730 pnhvinfo["memory_free"])
12732 raise errors.OpPrereqError("This change will prevent the instance"
12733 " from starting, due to %d MB of memory"
12734 " missing on its primary node" %
12736 errors.ECODE_NORES)
12738 if be_new[constants.BE_AUTO_BALANCE]:
12739 for node, nres in nodeinfo.items():
12740 if node not in instance.secondary_nodes:
12742 nres.Raise("Can't get info from secondary node %s" % node,
12743 prereq=True, ecode=errors.ECODE_STATE)
12744 (_, _, (nhvinfo, )) = nres.payload
12745 if not isinstance(nhvinfo.get("memory_free", None), int):
12746 raise errors.OpPrereqError("Secondary node %s didn't return free"
12747 " memory information" % node,
12748 errors.ECODE_STATE)
12749 #TODO(dynmem): do the appropriate check involving MINMEM
12750 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12751 raise errors.OpPrereqError("This change will prevent the instance"
12752 " from failover to its secondary node"
12753 " %s, due to not enough memory" % node,
12754 errors.ECODE_STATE)
12756 if self.op.runtime_mem:
12757 remote_info = self.rpc.call_instance_info(instance.primary_node,
12759 instance.hypervisor)
12760 remote_info.Raise("Error checking node %s" % instance.primary_node)
12761 if not remote_info.payload: # not running already
12762 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12763 errors.ECODE_STATE)
12765 current_memory = remote_info.payload["memory"]
12766 if (not self.op.force and
12767 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12768 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12769 raise errors.OpPrereqError("Instance %s must have memory between %d"
12770 " and %d MB of memory unless --force is"
12771 " given" % (instance.name,
12772 self.be_proposed[constants.BE_MINMEM],
12773 self.be_proposed[constants.BE_MAXMEM]),
12774 errors.ECODE_INVAL)
12776 if self.op.runtime_mem > current_memory:
12777 _CheckNodeFreeMemory(self, instance.primary_node,
12778 "ballooning memory for instance %s" %
12780 self.op.memory - current_memory,
12781 instance.hypervisor)
12783 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12784 raise errors.OpPrereqError("Disk operations not supported for"
12785 " diskless instances",
12786 errors.ECODE_INVAL)
12788 def _PrepareNicCreate(_, params, private):
12789 self._PrepareNicModification(params, private, None, None,
12790 {}, cluster, pnode)
12791 return (None, None)
12793 def _PrepareNicMod(_, nic, params, private):
12794 self._PrepareNicModification(params, private, nic.ip, nic.network,
12795 nic.nicparams, cluster, pnode)
12798 def _PrepareNicRemove(_, params, private):
12800 net = params.network
12801 if net is not None and ip is not None:
12802 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
12804 # Verify NIC changes (operating on copy)
12805 nics = instance.nics[:]
12806 ApplyContainerMods("NIC", nics, None, self.nicmod,
12807 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
12808 if len(nics) > constants.MAX_NICS:
12809 raise errors.OpPrereqError("Instance has too many network interfaces"
12810 " (%d), cannot add more" % constants.MAX_NICS,
12811 errors.ECODE_STATE)
12813 # Verify disk changes (operating on a copy)
12814 disks = instance.disks[:]
12815 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12816 if len(disks) > constants.MAX_DISKS:
12817 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12818 " more" % constants.MAX_DISKS,
12819 errors.ECODE_STATE)
12821 if self.op.offline is not None:
12822 if self.op.offline:
12823 msg = "can't change to offline"
12825 msg = "can't change to online"
12826 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12828 # Pre-compute NIC changes (necessary to use result in hooks)
12829 self._nic_chgdesc = []
12831 # Operate on copies as this is still in prereq
12832 nics = [nic.Copy() for nic in instance.nics]
12833 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12834 self._CreateNewNic, self._ApplyNicMods, None)
12835 self._new_nics = nics
12837 self._new_nics = None
12839 def _ConvertPlainToDrbd(self, feedback_fn):
12840 """Converts an instance from plain to drbd.
12843 feedback_fn("Converting template to drbd")
12844 instance = self.instance
12845 pnode = instance.primary_node
12846 snode = self.op.remote_node
12848 assert instance.disk_template == constants.DT_PLAIN
12850 # create a fake disk info for _GenerateDiskTemplate
12851 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12852 constants.IDISK_VG: d.logical_id[0]}
12853 for d in instance.disks]
12854 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12855 instance.name, pnode, [snode],
12856 disk_info, None, None, 0, feedback_fn,
12858 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12860 info = _GetInstanceInfoText(instance)
12861 feedback_fn("Creating additional volumes...")
12862 # first, create the missing data and meta devices
12863 for disk in anno_disks:
12864 # unfortunately this is... not too nice
12865 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12867 for child in disk.children:
12868 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12869 # at this stage, all new LVs have been created, we can rename the
12871 feedback_fn("Renaming original volumes...")
12872 rename_list = [(o, n.children[0].logical_id)
12873 for (o, n) in zip(instance.disks, new_disks)]
12874 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12875 result.Raise("Failed to rename original LVs")
12877 feedback_fn("Initializing DRBD devices...")
12878 # all child devices are in place, we can now create the DRBD devices
12879 for disk in anno_disks:
12880 for node in [pnode, snode]:
12881 f_create = node == pnode
12882 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12884 # at this point, the instance has been modified
12885 instance.disk_template = constants.DT_DRBD8
12886 instance.disks = new_disks
12887 self.cfg.Update(instance, feedback_fn)
12889 # Release node locks while waiting for sync
12890 _ReleaseLocks(self, locking.LEVEL_NODE)
12892 # disks are created, waiting for sync
12893 disk_abort = not _WaitForSync(self, instance,
12894 oneshot=not self.op.wait_for_sync)
12896 raise errors.OpExecError("There are some degraded disks for"
12897 " this instance, please cleanup manually")
12899 # Node resource locks will be released by caller
12901 def _ConvertDrbdToPlain(self, feedback_fn):
12902 """Converts an instance from drbd to plain.
12905 instance = self.instance
12907 assert len(instance.secondary_nodes) == 1
12908 assert instance.disk_template == constants.DT_DRBD8
12910 pnode = instance.primary_node
12911 snode = instance.secondary_nodes[0]
12912 feedback_fn("Converting template to plain")
12914 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12915 new_disks = [d.children[0] for d in instance.disks]
12917 # copy over size and mode
12918 for parent, child in zip(old_disks, new_disks):
12919 child.size = parent.size
12920 child.mode = parent.mode
12922 # this is a DRBD disk, return its port to the pool
12923 # NOTE: this must be done right before the call to cfg.Update!
12924 for disk in old_disks:
12925 tcp_port = disk.logical_id[2]
12926 self.cfg.AddTcpUdpPort(tcp_port)
12928 # update instance structure
12929 instance.disks = new_disks
12930 instance.disk_template = constants.DT_PLAIN
12931 self.cfg.Update(instance, feedback_fn)
12933 # Release locks in case removing disks takes a while
12934 _ReleaseLocks(self, locking.LEVEL_NODE)
12936 feedback_fn("Removing volumes on the secondary node...")
12937 for disk in old_disks:
12938 self.cfg.SetDiskID(disk, snode)
12939 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12941 self.LogWarning("Could not remove block device %s on node %s,"
12942 " continuing anyway: %s", disk.iv_name, snode, msg)
12944 feedback_fn("Removing unneeded volumes on the primary node...")
12945 for idx, disk in enumerate(old_disks):
12946 meta = disk.children[1]
12947 self.cfg.SetDiskID(meta, pnode)
12948 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12950 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12951 " continuing anyway: %s", idx, pnode, msg)
12953 def _CreateNewDisk(self, idx, params, _):
12954 """Creates a new disk.
12957 instance = self.instance
12960 if instance.disk_template in constants.DTS_FILEBASED:
12961 (file_driver, file_path) = instance.disks[0].logical_id
12962 file_path = os.path.dirname(file_path)
12964 file_driver = file_path = None
12967 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12968 instance.primary_node, instance.secondary_nodes,
12969 [params], file_path, file_driver, idx,
12970 self.Log, self.diskparams)[0]
12972 info = _GetInstanceInfoText(instance)
12974 logging.info("Creating volume %s for instance %s",
12975 disk.iv_name, instance.name)
12976 # Note: this needs to be kept in sync with _CreateDisks
12978 for node in instance.all_nodes:
12979 f_create = (node == instance.primary_node)
12981 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12982 except errors.OpExecError, err:
12983 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12984 disk.iv_name, disk, node, err)
12987 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12991 def _ModifyDisk(idx, disk, params, _):
12992 """Modifies a disk.
12995 disk.mode = params[constants.IDISK_MODE]
12998 ("disk.mode/%d" % idx, disk.mode),
13001 def _RemoveDisk(self, idx, root, _):
13005 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13006 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13007 self.cfg.SetDiskID(disk, node)
13008 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13010 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13011 " continuing anyway", idx, node, msg)
13013 # if this is a DRBD disk, return its port to the pool
13014 if root.dev_type in constants.LDS_DRBD:
13015 self.cfg.AddTcpUdpPort(root.logical_id[2])
13018 def _CreateNewNic(idx, params, private):
13019 """Creates data structure for a new network interface.
13022 mac = params[constants.INIC_MAC]
13023 ip = params.get(constants.INIC_IP, None)
13024 network = params.get(constants.INIC_NETWORK, None)
13025 #TODO: not private.filled?? can a nic have no nicparams??
13026 nicparams = private.filled
13028 return (objects.NIC(mac=mac, ip=ip, network=network, nicparams=nicparams), [
13030 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13031 (mac, ip, private.filled[constants.NIC_MODE],
13032 private.filled[constants.NIC_LINK],
13037 def _ApplyNicMods(idx, nic, params, private):
13038 """Modifies a network interface.
13043 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13045 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13046 setattr(nic, key, params[key])
13049 nic.nicparams = private.filled
13051 for (key, val) in nic.nicparams.items():
13052 changes.append(("nic.%s/%d" % (key, idx), val))
13056 def Exec(self, feedback_fn):
13057 """Modifies an instance.
13059 All parameters take effect only at the next restart of the instance.
13062 # Process here the warnings from CheckPrereq, as we don't have a
13063 # feedback_fn there.
13064 # TODO: Replace with self.LogWarning
13065 for warn in self.warn:
13066 feedback_fn("WARNING: %s" % warn)
13068 assert ((self.op.disk_template is None) ^
13069 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13070 "Not owning any node resource locks"
13073 instance = self.instance
13076 if self.op.runtime_mem:
13077 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13079 self.op.runtime_mem)
13080 rpcres.Raise("Cannot modify instance runtime memory")
13081 result.append(("runtime_memory", self.op.runtime_mem))
13083 # Apply disk changes
13084 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13085 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13086 _UpdateIvNames(0, instance.disks)
13088 if self.op.disk_template:
13090 check_nodes = set(instance.all_nodes)
13091 if self.op.remote_node:
13092 check_nodes.add(self.op.remote_node)
13093 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13094 owned = self.owned_locks(level)
13095 assert not (check_nodes - owned), \
13096 ("Not owning the correct locks, owning %r, expected at least %r" %
13097 (owned, check_nodes))
13099 r_shut = _ShutdownInstanceDisks(self, instance)
13101 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13102 " proceed with disk template conversion")
13103 mode = (instance.disk_template, self.op.disk_template)
13105 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13107 self.cfg.ReleaseDRBDMinors(instance.name)
13109 result.append(("disk_template", self.op.disk_template))
13111 assert instance.disk_template == self.op.disk_template, \
13112 ("Expected disk template '%s', found '%s'" %
13113 (self.op.disk_template, instance.disk_template))
13115 # Release node and resource locks if there are any (they might already have
13116 # been released during disk conversion)
13117 _ReleaseLocks(self, locking.LEVEL_NODE)
13118 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13120 # Apply NIC changes
13121 if self._new_nics is not None:
13122 instance.nics = self._new_nics
13123 result.extend(self._nic_chgdesc)
13126 if self.op.hvparams:
13127 instance.hvparams = self.hv_inst
13128 for key, val in self.op.hvparams.iteritems():
13129 result.append(("hv/%s" % key, val))
13132 if self.op.beparams:
13133 instance.beparams = self.be_inst
13134 for key, val in self.op.beparams.iteritems():
13135 result.append(("be/%s" % key, val))
13138 if self.op.os_name:
13139 instance.os = self.op.os_name
13142 if self.op.osparams:
13143 instance.osparams = self.os_inst
13144 for key, val in self.op.osparams.iteritems():
13145 result.append(("os/%s" % key, val))
13147 if self.op.offline is None:
13150 elif self.op.offline:
13151 # Mark instance as offline
13152 self.cfg.MarkInstanceOffline(instance.name)
13153 result.append(("admin_state", constants.ADMINST_OFFLINE))
13155 # Mark instance as online, but stopped
13156 self.cfg.MarkInstanceDown(instance.name)
13157 result.append(("admin_state", constants.ADMINST_DOWN))
13159 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13161 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13162 self.owned_locks(locking.LEVEL_NODE)), \
13163 "All node locks should have been released by now"
13167 _DISK_CONVERSIONS = {
13168 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13169 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13173 class LUInstanceChangeGroup(LogicalUnit):
13174 HPATH = "instance-change-group"
13175 HTYPE = constants.HTYPE_INSTANCE
13178 def ExpandNames(self):
13179 self.share_locks = _ShareAll()
13180 self.needed_locks = {
13181 locking.LEVEL_NODEGROUP: [],
13182 locking.LEVEL_NODE: [],
13185 self._ExpandAndLockInstance()
13187 if self.op.target_groups:
13188 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13189 self.op.target_groups)
13191 self.req_target_uuids = None
13193 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13195 def DeclareLocks(self, level):
13196 if level == locking.LEVEL_NODEGROUP:
13197 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13199 if self.req_target_uuids:
13200 lock_groups = set(self.req_target_uuids)
13202 # Lock all groups used by instance optimistically; this requires going
13203 # via the node before it's locked, requiring verification later on
13204 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13205 lock_groups.update(instance_groups)
13207 # No target groups, need to lock all of them
13208 lock_groups = locking.ALL_SET
13210 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13212 elif level == locking.LEVEL_NODE:
13213 if self.req_target_uuids:
13214 # Lock all nodes used by instances
13215 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13216 self._LockInstancesNodes()
13218 # Lock all nodes in all potential target groups
13219 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13220 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13221 member_nodes = [node_name
13222 for group in lock_groups
13223 for node_name in self.cfg.GetNodeGroup(group).members]
13224 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13226 # Lock all nodes as all groups are potential targets
13227 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13229 def CheckPrereq(self):
13230 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13231 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13232 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13234 assert (self.req_target_uuids is None or
13235 owned_groups.issuperset(self.req_target_uuids))
13236 assert owned_instances == set([self.op.instance_name])
13238 # Get instance information
13239 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13241 # Check if node groups for locked instance are still correct
13242 assert owned_nodes.issuperset(self.instance.all_nodes), \
13243 ("Instance %s's nodes changed while we kept the lock" %
13244 self.op.instance_name)
13246 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13249 if self.req_target_uuids:
13250 # User requested specific target groups
13251 self.target_uuids = frozenset(self.req_target_uuids)
13253 # All groups except those used by the instance are potential targets
13254 self.target_uuids = owned_groups - inst_groups
13256 conflicting_groups = self.target_uuids & inst_groups
13257 if conflicting_groups:
13258 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13259 " used by the instance '%s'" %
13260 (utils.CommaJoin(conflicting_groups),
13261 self.op.instance_name),
13262 errors.ECODE_INVAL)
13264 if not self.target_uuids:
13265 raise errors.OpPrereqError("There are no possible target groups",
13266 errors.ECODE_INVAL)
13268 def BuildHooksEnv(self):
13269 """Build hooks env.
13272 assert self.target_uuids
13275 "TARGET_GROUPS": " ".join(self.target_uuids),
13278 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13282 def BuildHooksNodes(self):
13283 """Build hooks nodes.
13286 mn = self.cfg.GetMasterNode()
13287 return ([mn], [mn])
13289 def Exec(self, feedback_fn):
13290 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13292 assert instances == [self.op.instance_name], "Instance not locked"
13294 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13295 instances=instances, target_groups=list(self.target_uuids))
13297 ial.Run(self.op.iallocator)
13299 if not ial.success:
13300 raise errors.OpPrereqError("Can't compute solution for changing group of"
13301 " instance '%s' using iallocator '%s': %s" %
13302 (self.op.instance_name, self.op.iallocator,
13304 errors.ECODE_NORES)
13306 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13308 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13309 " instance '%s'", len(jobs), self.op.instance_name)
13311 return ResultWithJobs(jobs)
13314 class LUBackupQuery(NoHooksLU):
13315 """Query the exports list
13320 def CheckArguments(self):
13321 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13322 ["node", "export"], self.op.use_locking)
13324 def ExpandNames(self):
13325 self.expq.ExpandNames(self)
13327 def DeclareLocks(self, level):
13328 self.expq.DeclareLocks(self, level)
13330 def Exec(self, feedback_fn):
13333 for (node, expname) in self.expq.OldStyleQuery(self):
13334 if expname is None:
13335 result[node] = False
13337 result.setdefault(node, []).append(expname)
13342 class _ExportQuery(_QueryBase):
13343 FIELDS = query.EXPORT_FIELDS
13345 #: The node name is not a unique key for this query
13346 SORT_FIELD = "node"
13348 def ExpandNames(self, lu):
13349 lu.needed_locks = {}
13351 # The following variables interact with _QueryBase._GetNames
13353 self.wanted = _GetWantedNodes(lu, self.names)
13355 self.wanted = locking.ALL_SET
13357 self.do_locking = self.use_locking
13359 if self.do_locking:
13360 lu.share_locks = _ShareAll()
13361 lu.needed_locks = {
13362 locking.LEVEL_NODE: self.wanted,
13365 def DeclareLocks(self, lu, level):
13368 def _GetQueryData(self, lu):
13369 """Computes the list of nodes and their attributes.
13372 # Locking is not used
13374 assert not (compat.any(lu.glm.is_owned(level)
13375 for level in locking.LEVELS
13376 if level != locking.LEVEL_CLUSTER) or
13377 self.do_locking or self.use_locking)
13379 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13383 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13385 result.append((node, None))
13387 result.extend((node, expname) for expname in nres.payload)
13392 class LUBackupPrepare(NoHooksLU):
13393 """Prepares an instance for an export and returns useful information.
13398 def ExpandNames(self):
13399 self._ExpandAndLockInstance()
13401 def CheckPrereq(self):
13402 """Check prerequisites.
13405 instance_name = self.op.instance_name
13407 self.instance = self.cfg.GetInstanceInfo(instance_name)
13408 assert self.instance is not None, \
13409 "Cannot retrieve locked instance %s" % self.op.instance_name
13410 _CheckNodeOnline(self, self.instance.primary_node)
13412 self._cds = _GetClusterDomainSecret()
13414 def Exec(self, feedback_fn):
13415 """Prepares an instance for an export.
13418 instance = self.instance
13420 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13421 salt = utils.GenerateSecret(8)
13423 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13424 result = self.rpc.call_x509_cert_create(instance.primary_node,
13425 constants.RIE_CERT_VALIDITY)
13426 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13428 (name, cert_pem) = result.payload
13430 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13434 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13435 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13437 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13443 class LUBackupExport(LogicalUnit):
13444 """Export an instance to an image in the cluster.
13447 HPATH = "instance-export"
13448 HTYPE = constants.HTYPE_INSTANCE
13451 def CheckArguments(self):
13452 """Check the arguments.
13455 self.x509_key_name = self.op.x509_key_name
13456 self.dest_x509_ca_pem = self.op.destination_x509_ca
13458 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13459 if not self.x509_key_name:
13460 raise errors.OpPrereqError("Missing X509 key name for encryption",
13461 errors.ECODE_INVAL)
13463 if not self.dest_x509_ca_pem:
13464 raise errors.OpPrereqError("Missing destination X509 CA",
13465 errors.ECODE_INVAL)
13467 def ExpandNames(self):
13468 self._ExpandAndLockInstance()
13470 # Lock all nodes for local exports
13471 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13472 # FIXME: lock only instance primary and destination node
13474 # Sad but true, for now we have do lock all nodes, as we don't know where
13475 # the previous export might be, and in this LU we search for it and
13476 # remove it from its current node. In the future we could fix this by:
13477 # - making a tasklet to search (share-lock all), then create the
13478 # new one, then one to remove, after
13479 # - removing the removal operation altogether
13480 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13482 def DeclareLocks(self, level):
13483 """Last minute lock declaration."""
13484 # All nodes are locked anyway, so nothing to do here.
13486 def BuildHooksEnv(self):
13487 """Build hooks env.
13489 This will run on the master, primary node and target node.
13493 "EXPORT_MODE": self.op.mode,
13494 "EXPORT_NODE": self.op.target_node,
13495 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13496 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13497 # TODO: Generic function for boolean env variables
13498 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13501 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13505 def BuildHooksNodes(self):
13506 """Build hooks nodes.
13509 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13511 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13512 nl.append(self.op.target_node)
13516 def CheckPrereq(self):
13517 """Check prerequisites.
13519 This checks that the instance and node names are valid.
13522 instance_name = self.op.instance_name
13524 self.instance = self.cfg.GetInstanceInfo(instance_name)
13525 assert self.instance is not None, \
13526 "Cannot retrieve locked instance %s" % self.op.instance_name
13527 _CheckNodeOnline(self, self.instance.primary_node)
13529 if (self.op.remove_instance and
13530 self.instance.admin_state == constants.ADMINST_UP and
13531 not self.op.shutdown):
13532 raise errors.OpPrereqError("Can not remove instance without shutting it"
13535 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13536 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13537 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13538 assert self.dst_node is not None
13540 _CheckNodeOnline(self, self.dst_node.name)
13541 _CheckNodeNotDrained(self, self.dst_node.name)
13544 self.dest_disk_info = None
13545 self.dest_x509_ca = None
13547 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13548 self.dst_node = None
13550 if len(self.op.target_node) != len(self.instance.disks):
13551 raise errors.OpPrereqError(("Received destination information for %s"
13552 " disks, but instance %s has %s disks") %
13553 (len(self.op.target_node), instance_name,
13554 len(self.instance.disks)),
13555 errors.ECODE_INVAL)
13557 cds = _GetClusterDomainSecret()
13559 # Check X509 key name
13561 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13562 except (TypeError, ValueError), err:
13563 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13565 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13566 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13567 errors.ECODE_INVAL)
13569 # Load and verify CA
13571 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13572 except OpenSSL.crypto.Error, err:
13573 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13574 (err, ), errors.ECODE_INVAL)
13576 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13577 if errcode is not None:
13578 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13579 (msg, ), errors.ECODE_INVAL)
13581 self.dest_x509_ca = cert
13583 # Verify target information
13585 for idx, disk_data in enumerate(self.op.target_node):
13587 (host, port, magic) = \
13588 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13589 except errors.GenericError, err:
13590 raise errors.OpPrereqError("Target info for disk %s: %s" %
13591 (idx, err), errors.ECODE_INVAL)
13593 disk_info.append((host, port, magic))
13595 assert len(disk_info) == len(self.op.target_node)
13596 self.dest_disk_info = disk_info
13599 raise errors.ProgrammerError("Unhandled export mode %r" %
13602 # instance disk type verification
13603 # TODO: Implement export support for file-based disks
13604 for disk in self.instance.disks:
13605 if disk.dev_type == constants.LD_FILE:
13606 raise errors.OpPrereqError("Export not supported for instances with"
13607 " file-based disks", errors.ECODE_INVAL)
13609 def _CleanupExports(self, feedback_fn):
13610 """Removes exports of current instance from all other nodes.
13612 If an instance in a cluster with nodes A..D was exported to node C, its
13613 exports will be removed from the nodes A, B and D.
13616 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13618 nodelist = self.cfg.GetNodeList()
13619 nodelist.remove(self.dst_node.name)
13621 # on one-node clusters nodelist will be empty after the removal
13622 # if we proceed the backup would be removed because OpBackupQuery
13623 # substitutes an empty list with the full cluster node list.
13624 iname = self.instance.name
13626 feedback_fn("Removing old exports for instance %s" % iname)
13627 exportlist = self.rpc.call_export_list(nodelist)
13628 for node in exportlist:
13629 if exportlist[node].fail_msg:
13631 if iname in exportlist[node].payload:
13632 msg = self.rpc.call_export_remove(node, iname).fail_msg
13634 self.LogWarning("Could not remove older export for instance %s"
13635 " on node %s: %s", iname, node, msg)
13637 def Exec(self, feedback_fn):
13638 """Export an instance to an image in the cluster.
13641 assert self.op.mode in constants.EXPORT_MODES
13643 instance = self.instance
13644 src_node = instance.primary_node
13646 if self.op.shutdown:
13647 # shutdown the instance, but not the disks
13648 feedback_fn("Shutting down instance %s" % instance.name)
13649 result = self.rpc.call_instance_shutdown(src_node, instance,
13650 self.op.shutdown_timeout)
13651 # TODO: Maybe ignore failures if ignore_remove_failures is set
13652 result.Raise("Could not shutdown instance %s on"
13653 " node %s" % (instance.name, src_node))
13655 # set the disks ID correctly since call_instance_start needs the
13656 # correct drbd minor to create the symlinks
13657 for disk in instance.disks:
13658 self.cfg.SetDiskID(disk, src_node)
13660 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13663 # Activate the instance disks if we'exporting a stopped instance
13664 feedback_fn("Activating disks for %s" % instance.name)
13665 _StartInstanceDisks(self, instance, None)
13668 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13671 helper.CreateSnapshots()
13673 if (self.op.shutdown and
13674 instance.admin_state == constants.ADMINST_UP and
13675 not self.op.remove_instance):
13676 assert not activate_disks
13677 feedback_fn("Starting instance %s" % instance.name)
13678 result = self.rpc.call_instance_start(src_node,
13679 (instance, None, None), False)
13680 msg = result.fail_msg
13682 feedback_fn("Failed to start instance: %s" % msg)
13683 _ShutdownInstanceDisks(self, instance)
13684 raise errors.OpExecError("Could not start instance: %s" % msg)
13686 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13687 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13688 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13689 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13690 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13692 (key_name, _, _) = self.x509_key_name
13695 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13698 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13699 key_name, dest_ca_pem,
13704 # Check for backwards compatibility
13705 assert len(dresults) == len(instance.disks)
13706 assert compat.all(isinstance(i, bool) for i in dresults), \
13707 "Not all results are boolean: %r" % dresults
13711 feedback_fn("Deactivating disks for %s" % instance.name)
13712 _ShutdownInstanceDisks(self, instance)
13714 if not (compat.all(dresults) and fin_resu):
13717 failures.append("export finalization")
13718 if not compat.all(dresults):
13719 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13721 failures.append("disk export: disk(s) %s" % fdsk)
13723 raise errors.OpExecError("Export failed, errors in %s" %
13724 utils.CommaJoin(failures))
13726 # At this point, the export was successful, we can cleanup/finish
13728 # Remove instance if requested
13729 if self.op.remove_instance:
13730 feedback_fn("Removing instance %s" % instance.name)
13731 _RemoveInstance(self, feedback_fn, instance,
13732 self.op.ignore_remove_failures)
13734 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13735 self._CleanupExports(feedback_fn)
13737 return fin_resu, dresults
13740 class LUBackupRemove(NoHooksLU):
13741 """Remove exports related to the named instance.
13746 def ExpandNames(self):
13747 self.needed_locks = {}
13748 # We need all nodes to be locked in order for RemoveExport to work, but we
13749 # don't need to lock the instance itself, as nothing will happen to it (and
13750 # we can remove exports also for a removed instance)
13751 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13753 def Exec(self, feedback_fn):
13754 """Remove any export.
13757 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13758 # If the instance was not found we'll try with the name that was passed in.
13759 # This will only work if it was an FQDN, though.
13761 if not instance_name:
13763 instance_name = self.op.instance_name
13765 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13766 exportlist = self.rpc.call_export_list(locked_nodes)
13768 for node in exportlist:
13769 msg = exportlist[node].fail_msg
13771 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13773 if instance_name in exportlist[node].payload:
13775 result = self.rpc.call_export_remove(node, instance_name)
13776 msg = result.fail_msg
13778 logging.error("Could not remove export for instance %s"
13779 " on node %s: %s", instance_name, node, msg)
13781 if fqdn_warn and not found:
13782 feedback_fn("Export not found. If trying to remove an export belonging"
13783 " to a deleted instance please use its Fully Qualified"
13787 class LUGroupAdd(LogicalUnit):
13788 """Logical unit for creating node groups.
13791 HPATH = "group-add"
13792 HTYPE = constants.HTYPE_GROUP
13795 def ExpandNames(self):
13796 # We need the new group's UUID here so that we can create and acquire the
13797 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13798 # that it should not check whether the UUID exists in the configuration.
13799 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13800 self.needed_locks = {}
13801 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13803 def CheckPrereq(self):
13804 """Check prerequisites.
13806 This checks that the given group name is not an existing node group
13811 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13812 except errors.OpPrereqError:
13815 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13816 " node group (UUID: %s)" %
13817 (self.op.group_name, existing_uuid),
13818 errors.ECODE_EXISTS)
13820 if self.op.ndparams:
13821 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13823 if self.op.hv_state:
13824 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13826 self.new_hv_state = None
13828 if self.op.disk_state:
13829 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13831 self.new_disk_state = None
13833 if self.op.diskparams:
13834 for templ in constants.DISK_TEMPLATES:
13835 if templ in self.op.diskparams:
13836 utils.ForceDictType(self.op.diskparams[templ],
13837 constants.DISK_DT_TYPES)
13838 self.new_diskparams = self.op.diskparams
13840 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13841 except errors.OpPrereqError, err:
13842 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13843 errors.ECODE_INVAL)
13845 self.new_diskparams = {}
13847 if self.op.ipolicy:
13848 cluster = self.cfg.GetClusterInfo()
13849 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13851 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13852 except errors.ConfigurationError, err:
13853 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13854 errors.ECODE_INVAL)
13856 def BuildHooksEnv(self):
13857 """Build hooks env.
13861 "GROUP_NAME": self.op.group_name,
13864 def BuildHooksNodes(self):
13865 """Build hooks nodes.
13868 mn = self.cfg.GetMasterNode()
13869 return ([mn], [mn])
13871 def Exec(self, feedback_fn):
13872 """Add the node group to the cluster.
13875 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13876 uuid=self.group_uuid,
13877 alloc_policy=self.op.alloc_policy,
13878 ndparams=self.op.ndparams,
13879 diskparams=self.new_diskparams,
13880 ipolicy=self.op.ipolicy,
13881 hv_state_static=self.new_hv_state,
13882 disk_state_static=self.new_disk_state)
13884 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13885 del self.remove_locks[locking.LEVEL_NODEGROUP]
13888 class LUGroupAssignNodes(NoHooksLU):
13889 """Logical unit for assigning nodes to groups.
13894 def ExpandNames(self):
13895 # These raise errors.OpPrereqError on their own:
13896 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13897 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13899 # We want to lock all the affected nodes and groups. We have readily
13900 # available the list of nodes, and the *destination* group. To gather the
13901 # list of "source" groups, we need to fetch node information later on.
13902 self.needed_locks = {
13903 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13904 locking.LEVEL_NODE: self.op.nodes,
13907 def DeclareLocks(self, level):
13908 if level == locking.LEVEL_NODEGROUP:
13909 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13911 # Try to get all affected nodes' groups without having the group or node
13912 # lock yet. Needs verification later in the code flow.
13913 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13915 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13917 def CheckPrereq(self):
13918 """Check prerequisites.
13921 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13922 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13923 frozenset(self.op.nodes))
13925 expected_locks = (set([self.group_uuid]) |
13926 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13927 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13928 if actual_locks != expected_locks:
13929 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13930 " current groups are '%s', used to be '%s'" %
13931 (utils.CommaJoin(expected_locks),
13932 utils.CommaJoin(actual_locks)))
13934 self.node_data = self.cfg.GetAllNodesInfo()
13935 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13936 instance_data = self.cfg.GetAllInstancesInfo()
13938 if self.group is None:
13939 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13940 (self.op.group_name, self.group_uuid))
13942 (new_splits, previous_splits) = \
13943 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13944 for node in self.op.nodes],
13945 self.node_data, instance_data)
13948 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13950 if not self.op.force:
13951 raise errors.OpExecError("The following instances get split by this"
13952 " change and --force was not given: %s" %
13955 self.LogWarning("This operation will split the following instances: %s",
13958 if previous_splits:
13959 self.LogWarning("In addition, these already-split instances continue"
13960 " to be split across groups: %s",
13961 utils.CommaJoin(utils.NiceSort(previous_splits)))
13963 def Exec(self, feedback_fn):
13964 """Assign nodes to a new group.
13967 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13969 self.cfg.AssignGroupNodes(mods)
13972 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13973 """Check for split instances after a node assignment.
13975 This method considers a series of node assignments as an atomic operation,
13976 and returns information about split instances after applying the set of
13979 In particular, it returns information about newly split instances, and
13980 instances that were already split, and remain so after the change.
13982 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13985 @type changes: list of (node_name, new_group_uuid) pairs.
13986 @param changes: list of node assignments to consider.
13987 @param node_data: a dict with data for all nodes
13988 @param instance_data: a dict with all instances to consider
13989 @rtype: a two-tuple
13990 @return: a list of instances that were previously okay and result split as a
13991 consequence of this change, and a list of instances that were previously
13992 split and this change does not fix.
13995 changed_nodes = dict((node, group) for node, group in changes
13996 if node_data[node].group != group)
13998 all_split_instances = set()
13999 previously_split_instances = set()
14001 def InstanceNodes(instance):
14002 return [instance.primary_node] + list(instance.secondary_nodes)
14004 for inst in instance_data.values():
14005 if inst.disk_template not in constants.DTS_INT_MIRROR:
14008 instance_nodes = InstanceNodes(inst)
14010 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14011 previously_split_instances.add(inst.name)
14013 if len(set(changed_nodes.get(node, node_data[node].group)
14014 for node in instance_nodes)) > 1:
14015 all_split_instances.add(inst.name)
14017 return (list(all_split_instances - previously_split_instances),
14018 list(previously_split_instances & all_split_instances))
14021 class _GroupQuery(_QueryBase):
14022 FIELDS = query.GROUP_FIELDS
14024 def ExpandNames(self, lu):
14025 lu.needed_locks = {}
14027 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14028 self._cluster = lu.cfg.GetClusterInfo()
14029 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14032 self.wanted = [name_to_uuid[name]
14033 for name in utils.NiceSort(name_to_uuid.keys())]
14035 # Accept names to be either names or UUIDs.
14038 all_uuid = frozenset(self._all_groups.keys())
14040 for name in self.names:
14041 if name in all_uuid:
14042 self.wanted.append(name)
14043 elif name in name_to_uuid:
14044 self.wanted.append(name_to_uuid[name])
14046 missing.append(name)
14049 raise errors.OpPrereqError("Some groups do not exist: %s" %
14050 utils.CommaJoin(missing),
14051 errors.ECODE_NOENT)
14053 def DeclareLocks(self, lu, level):
14056 def _GetQueryData(self, lu):
14057 """Computes the list of node groups and their attributes.
14060 do_nodes = query.GQ_NODE in self.requested_data
14061 do_instances = query.GQ_INST in self.requested_data
14063 group_to_nodes = None
14064 group_to_instances = None
14066 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14067 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14068 # latter GetAllInstancesInfo() is not enough, for we have to go through
14069 # instance->node. Hence, we will need to process nodes even if we only need
14070 # instance information.
14071 if do_nodes or do_instances:
14072 all_nodes = lu.cfg.GetAllNodesInfo()
14073 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14076 for node in all_nodes.values():
14077 if node.group in group_to_nodes:
14078 group_to_nodes[node.group].append(node.name)
14079 node_to_group[node.name] = node.group
14082 all_instances = lu.cfg.GetAllInstancesInfo()
14083 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14085 for instance in all_instances.values():
14086 node = instance.primary_node
14087 if node in node_to_group:
14088 group_to_instances[node_to_group[node]].append(instance.name)
14091 # Do not pass on node information if it was not requested.
14092 group_to_nodes = None
14094 return query.GroupQueryData(self._cluster,
14095 [self._all_groups[uuid]
14096 for uuid in self.wanted],
14097 group_to_nodes, group_to_instances,
14098 query.GQ_DISKPARAMS in self.requested_data)
14101 class LUGroupQuery(NoHooksLU):
14102 """Logical unit for querying node groups.
14107 def CheckArguments(self):
14108 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14109 self.op.output_fields, False)
14111 def ExpandNames(self):
14112 self.gq.ExpandNames(self)
14114 def DeclareLocks(self, level):
14115 self.gq.DeclareLocks(self, level)
14117 def Exec(self, feedback_fn):
14118 return self.gq.OldStyleQuery(self)
14121 class LUGroupSetParams(LogicalUnit):
14122 """Modifies the parameters of a node group.
14125 HPATH = "group-modify"
14126 HTYPE = constants.HTYPE_GROUP
14129 def CheckArguments(self):
14132 self.op.diskparams,
14133 self.op.alloc_policy,
14135 self.op.disk_state,
14139 if all_changes.count(None) == len(all_changes):
14140 raise errors.OpPrereqError("Please pass at least one modification",
14141 errors.ECODE_INVAL)
14143 def ExpandNames(self):
14144 # This raises errors.OpPrereqError on its own:
14145 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14147 self.needed_locks = {
14148 locking.LEVEL_INSTANCE: [],
14149 locking.LEVEL_NODEGROUP: [self.group_uuid],
14152 self.share_locks[locking.LEVEL_INSTANCE] = 1
14154 def DeclareLocks(self, level):
14155 if level == locking.LEVEL_INSTANCE:
14156 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14158 # Lock instances optimistically, needs verification once group lock has
14160 self.needed_locks[locking.LEVEL_INSTANCE] = \
14161 self.cfg.GetNodeGroupInstances(self.group_uuid)
14164 def _UpdateAndVerifyDiskParams(old, new):
14165 """Updates and verifies disk parameters.
14168 new_params = _GetUpdatedParams(old, new)
14169 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14172 def CheckPrereq(self):
14173 """Check prerequisites.
14176 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14178 # Check if locked instances are still correct
14179 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14181 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14182 cluster = self.cfg.GetClusterInfo()
14184 if self.group is None:
14185 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14186 (self.op.group_name, self.group_uuid))
14188 if self.op.ndparams:
14189 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14190 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14191 self.new_ndparams = new_ndparams
14193 if self.op.diskparams:
14194 diskparams = self.group.diskparams
14195 uavdp = self._UpdateAndVerifyDiskParams
14196 # For each disktemplate subdict update and verify the values
14197 new_diskparams = dict((dt,
14198 uavdp(diskparams.get(dt, {}),
14199 self.op.diskparams[dt]))
14200 for dt in constants.DISK_TEMPLATES
14201 if dt in self.op.diskparams)
14202 # As we've all subdicts of diskparams ready, lets merge the actual
14203 # dict with all updated subdicts
14204 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14206 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14207 except errors.OpPrereqError, err:
14208 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14209 errors.ECODE_INVAL)
14211 if self.op.hv_state:
14212 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14213 self.group.hv_state_static)
14215 if self.op.disk_state:
14216 self.new_disk_state = \
14217 _MergeAndVerifyDiskState(self.op.disk_state,
14218 self.group.disk_state_static)
14220 if self.op.ipolicy:
14221 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14225 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14226 inst_filter = lambda inst: inst.name in owned_instances
14227 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14229 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14231 new_ipolicy, instances)
14234 self.LogWarning("After the ipolicy change the following instances"
14235 " violate them: %s",
14236 utils.CommaJoin(violations))
14238 def BuildHooksEnv(self):
14239 """Build hooks env.
14243 "GROUP_NAME": self.op.group_name,
14244 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14247 def BuildHooksNodes(self):
14248 """Build hooks nodes.
14251 mn = self.cfg.GetMasterNode()
14252 return ([mn], [mn])
14254 def Exec(self, feedback_fn):
14255 """Modifies the node group.
14260 if self.op.ndparams:
14261 self.group.ndparams = self.new_ndparams
14262 result.append(("ndparams", str(self.group.ndparams)))
14264 if self.op.diskparams:
14265 self.group.diskparams = self.new_diskparams
14266 result.append(("diskparams", str(self.group.diskparams)))
14268 if self.op.alloc_policy:
14269 self.group.alloc_policy = self.op.alloc_policy
14271 if self.op.hv_state:
14272 self.group.hv_state_static = self.new_hv_state
14274 if self.op.disk_state:
14275 self.group.disk_state_static = self.new_disk_state
14277 if self.op.ipolicy:
14278 self.group.ipolicy = self.new_ipolicy
14280 self.cfg.Update(self.group, feedback_fn)
14284 class LUGroupRemove(LogicalUnit):
14285 HPATH = "group-remove"
14286 HTYPE = constants.HTYPE_GROUP
14289 def ExpandNames(self):
14290 # This will raises errors.OpPrereqError on its own:
14291 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14292 self.needed_locks = {
14293 locking.LEVEL_NODEGROUP: [self.group_uuid],
14296 def CheckPrereq(self):
14297 """Check prerequisites.
14299 This checks that the given group name exists as a node group, that is
14300 empty (i.e., contains no nodes), and that is not the last group of the
14304 # Verify that the group is empty.
14305 group_nodes = [node.name
14306 for node in self.cfg.GetAllNodesInfo().values()
14307 if node.group == self.group_uuid]
14310 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14312 (self.op.group_name,
14313 utils.CommaJoin(utils.NiceSort(group_nodes))),
14314 errors.ECODE_STATE)
14316 # Verify the cluster would not be left group-less.
14317 if len(self.cfg.GetNodeGroupList()) == 1:
14318 raise errors.OpPrereqError("Group '%s' is the only group,"
14319 " cannot be removed" %
14320 self.op.group_name,
14321 errors.ECODE_STATE)
14323 def BuildHooksEnv(self):
14324 """Build hooks env.
14328 "GROUP_NAME": self.op.group_name,
14331 def BuildHooksNodes(self):
14332 """Build hooks nodes.
14335 mn = self.cfg.GetMasterNode()
14336 return ([mn], [mn])
14338 def Exec(self, feedback_fn):
14339 """Remove the node group.
14343 self.cfg.RemoveNodeGroup(self.group_uuid)
14344 except errors.ConfigurationError:
14345 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14346 (self.op.group_name, self.group_uuid))
14348 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14351 class LUGroupRename(LogicalUnit):
14352 HPATH = "group-rename"
14353 HTYPE = constants.HTYPE_GROUP
14356 def ExpandNames(self):
14357 # This raises errors.OpPrereqError on its own:
14358 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14360 self.needed_locks = {
14361 locking.LEVEL_NODEGROUP: [self.group_uuid],
14364 def CheckPrereq(self):
14365 """Check prerequisites.
14367 Ensures requested new name is not yet used.
14371 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14372 except errors.OpPrereqError:
14375 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14376 " node group (UUID: %s)" %
14377 (self.op.new_name, new_name_uuid),
14378 errors.ECODE_EXISTS)
14380 def BuildHooksEnv(self):
14381 """Build hooks env.
14385 "OLD_NAME": self.op.group_name,
14386 "NEW_NAME": self.op.new_name,
14389 def BuildHooksNodes(self):
14390 """Build hooks nodes.
14393 mn = self.cfg.GetMasterNode()
14395 all_nodes = self.cfg.GetAllNodesInfo()
14396 all_nodes.pop(mn, None)
14399 run_nodes.extend(node.name for node in all_nodes.values()
14400 if node.group == self.group_uuid)
14402 return (run_nodes, run_nodes)
14404 def Exec(self, feedback_fn):
14405 """Rename the node group.
14408 group = self.cfg.GetNodeGroup(self.group_uuid)
14411 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14412 (self.op.group_name, self.group_uuid))
14414 group.name = self.op.new_name
14415 self.cfg.Update(group, feedback_fn)
14417 return self.op.new_name
14420 class LUGroupEvacuate(LogicalUnit):
14421 HPATH = "group-evacuate"
14422 HTYPE = constants.HTYPE_GROUP
14425 def ExpandNames(self):
14426 # This raises errors.OpPrereqError on its own:
14427 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14429 if self.op.target_groups:
14430 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14431 self.op.target_groups)
14433 self.req_target_uuids = []
14435 if self.group_uuid in self.req_target_uuids:
14436 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14437 " as a target group (targets are %s)" %
14439 utils.CommaJoin(self.req_target_uuids)),
14440 errors.ECODE_INVAL)
14442 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14444 self.share_locks = _ShareAll()
14445 self.needed_locks = {
14446 locking.LEVEL_INSTANCE: [],
14447 locking.LEVEL_NODEGROUP: [],
14448 locking.LEVEL_NODE: [],
14451 def DeclareLocks(self, level):
14452 if level == locking.LEVEL_INSTANCE:
14453 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14455 # Lock instances optimistically, needs verification once node and group
14456 # locks have been acquired
14457 self.needed_locks[locking.LEVEL_INSTANCE] = \
14458 self.cfg.GetNodeGroupInstances(self.group_uuid)
14460 elif level == locking.LEVEL_NODEGROUP:
14461 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14463 if self.req_target_uuids:
14464 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14466 # Lock all groups used by instances optimistically; this requires going
14467 # via the node before it's locked, requiring verification later on
14468 lock_groups.update(group_uuid
14469 for instance_name in
14470 self.owned_locks(locking.LEVEL_INSTANCE)
14472 self.cfg.GetInstanceNodeGroups(instance_name))
14474 # No target groups, need to lock all of them
14475 lock_groups = locking.ALL_SET
14477 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14479 elif level == locking.LEVEL_NODE:
14480 # This will only lock the nodes in the group to be evacuated which
14481 # contain actual instances
14482 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14483 self._LockInstancesNodes()
14485 # Lock all nodes in group to be evacuated and target groups
14486 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14487 assert self.group_uuid in owned_groups
14488 member_nodes = [node_name
14489 for group in owned_groups
14490 for node_name in self.cfg.GetNodeGroup(group).members]
14491 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14493 def CheckPrereq(self):
14494 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14495 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14496 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14498 assert owned_groups.issuperset(self.req_target_uuids)
14499 assert self.group_uuid in owned_groups
14501 # Check if locked instances are still correct
14502 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14504 # Get instance information
14505 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14507 # Check if node groups for locked instances are still correct
14508 _CheckInstancesNodeGroups(self.cfg, self.instances,
14509 owned_groups, owned_nodes, self.group_uuid)
14511 if self.req_target_uuids:
14512 # User requested specific target groups
14513 self.target_uuids = self.req_target_uuids
14515 # All groups except the one to be evacuated are potential targets
14516 self.target_uuids = [group_uuid for group_uuid in owned_groups
14517 if group_uuid != self.group_uuid]
14519 if not self.target_uuids:
14520 raise errors.OpPrereqError("There are no possible target groups",
14521 errors.ECODE_INVAL)
14523 def BuildHooksEnv(self):
14524 """Build hooks env.
14528 "GROUP_NAME": self.op.group_name,
14529 "TARGET_GROUPS": " ".join(self.target_uuids),
14532 def BuildHooksNodes(self):
14533 """Build hooks nodes.
14536 mn = self.cfg.GetMasterNode()
14538 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14540 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14542 return (run_nodes, run_nodes)
14544 def Exec(self, feedback_fn):
14545 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14547 assert self.group_uuid not in self.target_uuids
14549 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14550 instances=instances, target_groups=self.target_uuids)
14552 ial.Run(self.op.iallocator)
14554 if not ial.success:
14555 raise errors.OpPrereqError("Can't compute group evacuation using"
14556 " iallocator '%s': %s" %
14557 (self.op.iallocator, ial.info),
14558 errors.ECODE_NORES)
14560 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14562 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14563 len(jobs), self.op.group_name)
14565 return ResultWithJobs(jobs)
14568 class TagsLU(NoHooksLU): # pylint: disable=W0223
14569 """Generic tags LU.
14571 This is an abstract class which is the parent of all the other tags LUs.
14574 def ExpandNames(self):
14575 self.group_uuid = None
14576 self.needed_locks = {}
14578 if self.op.kind == constants.TAG_NODE:
14579 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14580 lock_level = locking.LEVEL_NODE
14581 lock_name = self.op.name
14582 elif self.op.kind == constants.TAG_INSTANCE:
14583 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14584 lock_level = locking.LEVEL_INSTANCE
14585 lock_name = self.op.name
14586 elif self.op.kind == constants.TAG_NODEGROUP:
14587 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14588 lock_level = locking.LEVEL_NODEGROUP
14589 lock_name = self.group_uuid
14590 elif self.op.kind == constants.TAG_NETWORK:
14591 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
14592 lock_level = locking.LEVEL_NETWORK
14593 lock_name = self.network_uuid
14598 if lock_level and getattr(self.op, "use_locking", True):
14599 self.needed_locks[lock_level] = lock_name
14601 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14602 # not possible to acquire the BGL based on opcode parameters)
14604 def CheckPrereq(self):
14605 """Check prerequisites.
14608 if self.op.kind == constants.TAG_CLUSTER:
14609 self.target = self.cfg.GetClusterInfo()
14610 elif self.op.kind == constants.TAG_NODE:
14611 self.target = self.cfg.GetNodeInfo(self.op.name)
14612 elif self.op.kind == constants.TAG_INSTANCE:
14613 self.target = self.cfg.GetInstanceInfo(self.op.name)
14614 elif self.op.kind == constants.TAG_NODEGROUP:
14615 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14616 elif self.op.kind == constants.TAG_NETWORK:
14617 self.target = self.cfg.GetNetwork(self.network_uuid)
14619 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14620 str(self.op.kind), errors.ECODE_INVAL)
14623 class LUTagsGet(TagsLU):
14624 """Returns the tags of a given object.
14629 def ExpandNames(self):
14630 TagsLU.ExpandNames(self)
14632 # Share locks as this is only a read operation
14633 self.share_locks = _ShareAll()
14635 def Exec(self, feedback_fn):
14636 """Returns the tag list.
14639 return list(self.target.GetTags())
14642 class LUTagsSearch(NoHooksLU):
14643 """Searches the tags for a given pattern.
14648 def ExpandNames(self):
14649 self.needed_locks = {}
14651 def CheckPrereq(self):
14652 """Check prerequisites.
14654 This checks the pattern passed for validity by compiling it.
14658 self.re = re.compile(self.op.pattern)
14659 except re.error, err:
14660 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14661 (self.op.pattern, err), errors.ECODE_INVAL)
14663 def Exec(self, feedback_fn):
14664 """Returns the tag list.
14668 tgts = [("/cluster", cfg.GetClusterInfo())]
14669 ilist = cfg.GetAllInstancesInfo().values()
14670 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14671 nlist = cfg.GetAllNodesInfo().values()
14672 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14673 tgts.extend(("/nodegroup/%s" % n.name, n)
14674 for n in cfg.GetAllNodeGroupsInfo().values())
14676 for path, target in tgts:
14677 for tag in target.GetTags():
14678 if self.re.search(tag):
14679 results.append((path, tag))
14683 class LUTagsSet(TagsLU):
14684 """Sets a tag on a given object.
14689 def CheckPrereq(self):
14690 """Check prerequisites.
14692 This checks the type and length of the tag name and value.
14695 TagsLU.CheckPrereq(self)
14696 for tag in self.op.tags:
14697 objects.TaggableObject.ValidateTag(tag)
14699 def Exec(self, feedback_fn):
14704 for tag in self.op.tags:
14705 self.target.AddTag(tag)
14706 except errors.TagError, err:
14707 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14708 self.cfg.Update(self.target, feedback_fn)
14711 class LUTagsDel(TagsLU):
14712 """Delete a list of tags from a given object.
14717 def CheckPrereq(self):
14718 """Check prerequisites.
14720 This checks that we have the given tag.
14723 TagsLU.CheckPrereq(self)
14724 for tag in self.op.tags:
14725 objects.TaggableObject.ValidateTag(tag)
14726 del_tags = frozenset(self.op.tags)
14727 cur_tags = self.target.GetTags()
14729 diff_tags = del_tags - cur_tags
14731 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14732 raise errors.OpPrereqError("Tag(s) %s not found" %
14733 (utils.CommaJoin(diff_names), ),
14734 errors.ECODE_NOENT)
14736 def Exec(self, feedback_fn):
14737 """Remove the tag from the object.
14740 for tag in self.op.tags:
14741 self.target.RemoveTag(tag)
14742 self.cfg.Update(self.target, feedback_fn)
14745 class LUTestDelay(NoHooksLU):
14746 """Sleep for a specified amount of time.
14748 This LU sleeps on the master and/or nodes for a specified amount of
14754 def ExpandNames(self):
14755 """Expand names and set required locks.
14757 This expands the node list, if any.
14760 self.needed_locks = {}
14761 if self.op.on_nodes:
14762 # _GetWantedNodes can be used here, but is not always appropriate to use
14763 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14764 # more information.
14765 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14766 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14768 def _TestDelay(self):
14769 """Do the actual sleep.
14772 if self.op.on_master:
14773 if not utils.TestDelay(self.op.duration):
14774 raise errors.OpExecError("Error during master delay test")
14775 if self.op.on_nodes:
14776 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14777 for node, node_result in result.items():
14778 node_result.Raise("Failure during rpc call to node %s" % node)
14780 def Exec(self, feedback_fn):
14781 """Execute the test delay opcode, with the wanted repetitions.
14784 if self.op.repeat == 0:
14787 top_value = self.op.repeat - 1
14788 for i in range(self.op.repeat):
14789 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14793 class LUTestJqueue(NoHooksLU):
14794 """Utility LU to test some aspects of the job queue.
14799 # Must be lower than default timeout for WaitForJobChange to see whether it
14800 # notices changed jobs
14801 _CLIENT_CONNECT_TIMEOUT = 20.0
14802 _CLIENT_CONFIRM_TIMEOUT = 60.0
14805 def _NotifyUsingSocket(cls, cb, errcls):
14806 """Opens a Unix socket and waits for another program to connect.
14809 @param cb: Callback to send socket name to client
14810 @type errcls: class
14811 @param errcls: Exception class to use for errors
14814 # Using a temporary directory as there's no easy way to create temporary
14815 # sockets without writing a custom loop around tempfile.mktemp and
14817 tmpdir = tempfile.mkdtemp()
14819 tmpsock = utils.PathJoin(tmpdir, "sock")
14821 logging.debug("Creating temporary socket at %s", tmpsock)
14822 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14827 # Send details to client
14830 # Wait for client to connect before continuing
14831 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14833 (conn, _) = sock.accept()
14834 except socket.error, err:
14835 raise errcls("Client didn't connect in time (%s)" % err)
14839 # Remove as soon as client is connected
14840 shutil.rmtree(tmpdir)
14842 # Wait for client to close
14845 # pylint: disable=E1101
14846 # Instance of '_socketobject' has no ... member
14847 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14849 except socket.error, err:
14850 raise errcls("Client failed to confirm notification (%s)" % err)
14854 def _SendNotification(self, test, arg, sockname):
14855 """Sends a notification to the client.
14858 @param test: Test name
14859 @param arg: Test argument (depends on test)
14860 @type sockname: string
14861 @param sockname: Socket path
14864 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14866 def _Notify(self, prereq, test, arg):
14867 """Notifies the client of a test.
14870 @param prereq: Whether this is a prereq-phase test
14872 @param test: Test name
14873 @param arg: Test argument (depends on test)
14877 errcls = errors.OpPrereqError
14879 errcls = errors.OpExecError
14881 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14885 def CheckArguments(self):
14886 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14887 self.expandnames_calls = 0
14889 def ExpandNames(self):
14890 checkargs_calls = getattr(self, "checkargs_calls", 0)
14891 if checkargs_calls < 1:
14892 raise errors.ProgrammerError("CheckArguments was not called")
14894 self.expandnames_calls += 1
14896 if self.op.notify_waitlock:
14897 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14899 self.LogInfo("Expanding names")
14901 # Get lock on master node (just to get a lock, not for a particular reason)
14902 self.needed_locks = {
14903 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14906 def Exec(self, feedback_fn):
14907 if self.expandnames_calls < 1:
14908 raise errors.ProgrammerError("ExpandNames was not called")
14910 if self.op.notify_exec:
14911 self._Notify(False, constants.JQT_EXEC, None)
14913 self.LogInfo("Executing")
14915 if self.op.log_messages:
14916 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14917 for idx, msg in enumerate(self.op.log_messages):
14918 self.LogInfo("Sending log message %s", idx + 1)
14919 feedback_fn(constants.JQT_MSGPREFIX + msg)
14920 # Report how many test messages have been sent
14921 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14924 raise errors.OpExecError("Opcode failure was requested")
14929 class IAllocator(object):
14930 """IAllocator framework.
14932 An IAllocator instance has three sets of attributes:
14933 - cfg that is needed to query the cluster
14934 - input data (all members of the _KEYS class attribute are required)
14935 - four buffer attributes (in|out_data|text), that represent the
14936 input (to the external script) in text and data structure format,
14937 and the output from it, again in two formats
14938 - the result variables from the script (success, info, nodes) for
14942 # pylint: disable=R0902
14943 # lots of instance attributes
14945 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14947 self.rpc = rpc_runner
14948 # init buffer variables
14949 self.in_text = self.out_text = self.in_data = self.out_data = None
14950 # init all input fields so that pylint is happy
14952 self.memory = self.disks = self.disk_template = self.spindle_use = None
14953 self.os = self.tags = self.nics = self.vcpus = None
14954 self.hypervisor = None
14955 self.relocate_from = None
14957 self.instances = None
14958 self.evac_mode = None
14959 self.target_groups = []
14961 self.required_nodes = None
14962 # init result fields
14963 self.success = self.info = self.result = None
14966 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14968 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14969 " IAllocator" % self.mode)
14971 keyset = [n for (n, _) in keydata]
14974 if key not in keyset:
14975 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14976 " IAllocator" % key)
14977 setattr(self, key, kwargs[key])
14980 if key not in kwargs:
14981 raise errors.ProgrammerError("Missing input parameter '%s' to"
14982 " IAllocator" % key)
14983 self._BuildInputData(compat.partial(fn, self), keydata)
14985 def _ComputeClusterData(self):
14986 """Compute the generic allocator input data.
14988 This is the data that is independent of the actual operation.
14992 cluster_info = cfg.GetClusterInfo()
14995 "version": constants.IALLOCATOR_VERSION,
14996 "cluster_name": cfg.GetClusterName(),
14997 "cluster_tags": list(cluster_info.GetTags()),
14998 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14999 "ipolicy": cluster_info.ipolicy,
15001 ninfo = cfg.GetAllNodesInfo()
15002 iinfo = cfg.GetAllInstancesInfo().values()
15003 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
15006 node_list = [n.name for n in ninfo.values() if n.vm_capable]
15008 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
15009 hypervisor_name = self.hypervisor
15010 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
15011 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
15013 hypervisor_name = cluster_info.primary_hypervisor
15015 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
15018 self.rpc.call_all_instances_info(node_list,
15019 cluster_info.enabled_hypervisors)
15021 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
15023 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
15024 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
15025 i_list, config_ndata)
15026 assert len(data["nodes"]) == len(ninfo), \
15027 "Incomplete node data computed"
15029 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
15031 self.in_data = data
15034 def _ComputeNodeGroupData(cfg):
15035 """Compute node groups data.
15038 cluster = cfg.GetClusterInfo()
15039 ng = dict((guuid, {
15040 "name": gdata.name,
15041 "alloc_policy": gdata.alloc_policy,
15042 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
15044 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
15049 def _ComputeBasicNodeData(cfg, node_cfg):
15050 """Compute global node data.
15053 @returns: a dict of name: (node dict, node config)
15056 # fill in static (config-based) values
15057 node_results = dict((ninfo.name, {
15058 "tags": list(ninfo.GetTags()),
15059 "primary_ip": ninfo.primary_ip,
15060 "secondary_ip": ninfo.secondary_ip,
15061 "offline": ninfo.offline,
15062 "drained": ninfo.drained,
15063 "master_candidate": ninfo.master_candidate,
15064 "group": ninfo.group,
15065 "master_capable": ninfo.master_capable,
15066 "vm_capable": ninfo.vm_capable,
15067 "ndparams": cfg.GetNdParams(ninfo),
15069 for ninfo in node_cfg.values())
15071 return node_results
15074 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
15076 """Compute global node data.
15078 @param node_results: the basic node structures as filled from the config
15081 #TODO(dynmem): compute the right data on MAX and MIN memory
15082 # make a copy of the current dict
15083 node_results = dict(node_results)
15084 for nname, nresult in node_data.items():
15085 assert nname in node_results, "Missing basic data for node %s" % nname
15086 ninfo = node_cfg[nname]
15088 if not (ninfo.offline or ninfo.drained):
15089 nresult.Raise("Can't get data for node %s" % nname)
15090 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
15092 remote_info = _MakeLegacyNodeInfo(nresult.payload)
15094 for attr in ["memory_total", "memory_free", "memory_dom0",
15095 "vg_size", "vg_free", "cpu_total"]:
15096 if attr not in remote_info:
15097 raise errors.OpExecError("Node '%s' didn't return attribute"
15098 " '%s'" % (nname, attr))
15099 if not isinstance(remote_info[attr], int):
15100 raise errors.OpExecError("Node '%s' returned invalid value"
15102 (nname, attr, remote_info[attr]))
15103 # compute memory used by primary instances
15104 i_p_mem = i_p_up_mem = 0
15105 for iinfo, beinfo in i_list:
15106 if iinfo.primary_node == nname:
15107 i_p_mem += beinfo[constants.BE_MAXMEM]
15108 if iinfo.name not in node_iinfo[nname].payload:
15111 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
15112 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
15113 remote_info["memory_free"] -= max(0, i_mem_diff)
15115 if iinfo.admin_state == constants.ADMINST_UP:
15116 i_p_up_mem += beinfo[constants.BE_MAXMEM]
15118 # compute memory used by instances
15120 "total_memory": remote_info["memory_total"],
15121 "reserved_memory": remote_info["memory_dom0"],
15122 "free_memory": remote_info["memory_free"],
15123 "total_disk": remote_info["vg_size"],
15124 "free_disk": remote_info["vg_free"],
15125 "total_cpus": remote_info["cpu_total"],
15126 "i_pri_memory": i_p_mem,
15127 "i_pri_up_memory": i_p_up_mem,
15129 pnr_dyn.update(node_results[nname])
15130 node_results[nname] = pnr_dyn
15132 return node_results
15135 def _ComputeInstanceData(cluster_info, i_list):
15136 """Compute global instance data.
15140 for iinfo, beinfo in i_list:
15142 for nic in iinfo.nics:
15143 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
15147 "mode": filled_params[constants.NIC_MODE],
15148 "link": filled_params[constants.NIC_LINK],
15149 "network": nic.network,
15151 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
15152 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
15153 nic_data.append(nic_dict)
15155 "tags": list(iinfo.GetTags()),
15156 "admin_state": iinfo.admin_state,
15157 "vcpus": beinfo[constants.BE_VCPUS],
15158 "memory": beinfo[constants.BE_MAXMEM],
15159 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
15161 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
15163 "disks": [{constants.IDISK_SIZE: dsk.size,
15164 constants.IDISK_MODE: dsk.mode}
15165 for dsk in iinfo.disks],
15166 "disk_template": iinfo.disk_template,
15167 "hypervisor": iinfo.hypervisor,
15169 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
15171 instance_data[iinfo.name] = pir
15173 return instance_data
15175 def _AddNewInstance(self):
15176 """Add new instance data to allocator structure.
15178 This in combination with _AllocatorGetClusterData will create the
15179 correct structure needed as input for the allocator.
15181 The checks for the completeness of the opcode must have already been
15185 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
15187 if self.disk_template in constants.DTS_INT_MIRROR:
15188 self.required_nodes = 2
15190 self.required_nodes = 1
15194 "disk_template": self.disk_template,
15197 "vcpus": self.vcpus,
15198 "memory": self.memory,
15199 "spindle_use": self.spindle_use,
15200 "disks": self.disks,
15201 "disk_space_total": disk_space,
15203 "required_nodes": self.required_nodes,
15204 "hypervisor": self.hypervisor,
15209 def _AddRelocateInstance(self):
15210 """Add relocate instance data to allocator structure.
15212 This in combination with _IAllocatorGetClusterData will create the
15213 correct structure needed as input for the allocator.
15215 The checks for the completeness of the opcode must have already been
15219 instance = self.cfg.GetInstanceInfo(self.name)
15220 if instance is None:
15221 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15222 " IAllocator" % self.name)
15224 if instance.disk_template not in constants.DTS_MIRRORED:
15225 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15226 errors.ECODE_INVAL)
15228 if instance.disk_template in constants.DTS_INT_MIRROR and \
15229 len(instance.secondary_nodes) != 1:
15230 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15231 errors.ECODE_STATE)
15233 self.required_nodes = 1
15234 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15235 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15239 "disk_space_total": disk_space,
15240 "required_nodes": self.required_nodes,
15241 "relocate_from": self.relocate_from,
15245 def _AddNodeEvacuate(self):
15246 """Get data for node-evacuate requests.
15250 "instances": self.instances,
15251 "evac_mode": self.evac_mode,
15254 def _AddChangeGroup(self):
15255 """Get data for node-evacuate requests.
15259 "instances": self.instances,
15260 "target_groups": self.target_groups,
15263 def _BuildInputData(self, fn, keydata):
15264 """Build input data structures.
15267 self._ComputeClusterData()
15270 request["type"] = self.mode
15271 for keyname, keytype in keydata:
15272 if keyname not in request:
15273 raise errors.ProgrammerError("Request parameter %s is missing" %
15275 val = request[keyname]
15276 if not keytype(val):
15277 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15278 " validation, value %s, expected"
15279 " type %s" % (keyname, val, keytype))
15280 self.in_data["request"] = request
15282 self.in_text = serializer.Dump(self.in_data)
15284 _STRING_LIST = ht.TListOf(ht.TString)
15285 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15286 # pylint: disable=E1101
15287 # Class '...' has no 'OP_ID' member
15288 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15289 opcodes.OpInstanceMigrate.OP_ID,
15290 opcodes.OpInstanceReplaceDisks.OP_ID])
15294 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15295 ht.TItems([ht.TNonEmptyString,
15296 ht.TNonEmptyString,
15297 ht.TListOf(ht.TNonEmptyString),
15300 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15301 ht.TItems([ht.TNonEmptyString,
15304 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15305 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15308 constants.IALLOCATOR_MODE_ALLOC:
15311 ("name", ht.TString),
15312 ("memory", ht.TInt),
15313 ("spindle_use", ht.TInt),
15314 ("disks", ht.TListOf(ht.TDict)),
15315 ("disk_template", ht.TString),
15316 ("os", ht.TString),
15317 ("tags", _STRING_LIST),
15318 ("nics", ht.TListOf(ht.TDict)),
15319 ("vcpus", ht.TInt),
15320 ("hypervisor", ht.TString),
15322 constants.IALLOCATOR_MODE_RELOC:
15323 (_AddRelocateInstance,
15324 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15326 constants.IALLOCATOR_MODE_NODE_EVAC:
15327 (_AddNodeEvacuate, [
15328 ("instances", _STRING_LIST),
15329 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15331 constants.IALLOCATOR_MODE_CHG_GROUP:
15332 (_AddChangeGroup, [
15333 ("instances", _STRING_LIST),
15334 ("target_groups", _STRING_LIST),
15338 def Run(self, name, validate=True, call_fn=None):
15339 """Run an instance allocator and return the results.
15342 if call_fn is None:
15343 call_fn = self.rpc.call_iallocator_runner
15345 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15346 result.Raise("Failure while running the iallocator script")
15348 self.out_text = result.payload
15350 self._ValidateResult()
15352 def _ValidateResult(self):
15353 """Process the allocator results.
15355 This will process and if successful save the result in
15356 self.out_data and the other parameters.
15360 rdict = serializer.Load(self.out_text)
15361 except Exception, err:
15362 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15364 if not isinstance(rdict, dict):
15365 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15367 # TODO: remove backwards compatiblity in later versions
15368 if "nodes" in rdict and "result" not in rdict:
15369 rdict["result"] = rdict["nodes"]
15372 for key in "success", "info", "result":
15373 if key not in rdict:
15374 raise errors.OpExecError("Can't parse iallocator results:"
15375 " missing key '%s'" % key)
15376 setattr(self, key, rdict[key])
15378 if not self._result_check(self.result):
15379 raise errors.OpExecError("Iallocator returned invalid result,"
15380 " expected %s, got %s" %
15381 (self._result_check, self.result),
15382 errors.ECODE_INVAL)
15384 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15385 assert self.relocate_from is not None
15386 assert self.required_nodes == 1
15388 node2group = dict((name, ndata["group"])
15389 for (name, ndata) in self.in_data["nodes"].items())
15391 fn = compat.partial(self._NodesToGroups, node2group,
15392 self.in_data["nodegroups"])
15394 instance = self.cfg.GetInstanceInfo(self.name)
15395 request_groups = fn(self.relocate_from + [instance.primary_node])
15396 result_groups = fn(rdict["result"] + [instance.primary_node])
15398 if self.success and not set(result_groups).issubset(request_groups):
15399 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15400 " differ from original groups (%s)" %
15401 (utils.CommaJoin(result_groups),
15402 utils.CommaJoin(request_groups)))
15404 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15405 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15407 self.out_data = rdict
15410 def _NodesToGroups(node2group, groups, nodes):
15411 """Returns a list of unique group names for a list of nodes.
15413 @type node2group: dict
15414 @param node2group: Map from node name to group UUID
15416 @param groups: Group information
15418 @param nodes: Node names
15425 group_uuid = node2group[node]
15427 # Ignore unknown node
15431 group = groups[group_uuid]
15433 # Can't find group, let's use UUID
15434 group_name = group_uuid
15436 group_name = group["name"]
15438 result.add(group_name)
15440 return sorted(result)
15443 class LUTestAllocator(NoHooksLU):
15444 """Run allocator tests.
15446 This LU runs the allocator tests
15449 def CheckPrereq(self):
15450 """Check prerequisites.
15452 This checks the opcode parameters depending on the director and mode test.
15455 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15456 for attr in ["memory", "disks", "disk_template",
15457 "os", "tags", "nics", "vcpus"]:
15458 if not hasattr(self.op, attr):
15459 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15460 attr, errors.ECODE_INVAL)
15461 iname = self.cfg.ExpandInstanceName(self.op.name)
15462 if iname is not None:
15463 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15464 iname, errors.ECODE_EXISTS)
15465 if not isinstance(self.op.nics, list):
15466 raise errors.OpPrereqError("Invalid parameter 'nics'",
15467 errors.ECODE_INVAL)
15468 if not isinstance(self.op.disks, list):
15469 raise errors.OpPrereqError("Invalid parameter 'disks'",
15470 errors.ECODE_INVAL)
15471 for row in self.op.disks:
15472 if (not isinstance(row, dict) or
15473 constants.IDISK_SIZE not in row or
15474 not isinstance(row[constants.IDISK_SIZE], int) or
15475 constants.IDISK_MODE not in row or
15476 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15477 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15478 " parameter", errors.ECODE_INVAL)
15479 if self.op.hypervisor is None:
15480 self.op.hypervisor = self.cfg.GetHypervisorType()
15481 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15482 fname = _ExpandInstanceName(self.cfg, self.op.name)
15483 self.op.name = fname
15484 self.relocate_from = \
15485 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15486 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15487 constants.IALLOCATOR_MODE_NODE_EVAC):
15488 if not self.op.instances:
15489 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15490 self.op.instances = _GetWantedInstances(self, self.op.instances)
15492 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15493 self.op.mode, errors.ECODE_INVAL)
15495 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15496 if self.op.allocator is None:
15497 raise errors.OpPrereqError("Missing allocator name",
15498 errors.ECODE_INVAL)
15499 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15500 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15501 self.op.direction, errors.ECODE_INVAL)
15503 def Exec(self, feedback_fn):
15504 """Run the allocator test.
15507 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15508 ial = IAllocator(self.cfg, self.rpc,
15511 memory=self.op.memory,
15512 disks=self.op.disks,
15513 disk_template=self.op.disk_template,
15517 vcpus=self.op.vcpus,
15518 hypervisor=self.op.hypervisor,
15520 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15521 ial = IAllocator(self.cfg, self.rpc,
15524 relocate_from=list(self.relocate_from),
15526 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15527 ial = IAllocator(self.cfg, self.rpc,
15529 instances=self.op.instances,
15530 target_groups=self.op.target_groups)
15531 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15532 ial = IAllocator(self.cfg, self.rpc,
15534 instances=self.op.instances,
15535 evac_mode=self.op.evac_mode)
15537 raise errors.ProgrammerError("Uncatched mode %s in"
15538 " LUTestAllocator.Exec", self.op.mode)
15540 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15541 result = ial.in_text
15543 ial.Run(self.op.allocator, validate=False)
15544 result = ial.out_text
15548 class LUNetworkAdd(LogicalUnit):
15549 """Logical unit for creating networks.
15552 HPATH = "network-add"
15553 HTYPE = constants.HTYPE_NETWORK
15556 def BuildHooksNodes(self):
15557 """Build hooks nodes.
15560 mn = self.cfg.GetMasterNode()
15561 return ([mn], [mn])
15563 def ExpandNames(self):
15564 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15565 self.needed_locks = {}
15566 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15568 def CheckPrereq(self):
15569 """Check prerequisites.
15571 This checks that the given group name is not an existing node group
15575 if self.op.network is None:
15576 raise errors.OpPrereqError("Network must be given",
15577 errors.ECODE_INVAL)
15579 uuid = self.cfg.LookupNetwork(self.op.network_name)
15582 raise errors.OpPrereqError("Network '%s' already defined" %
15583 self.op.network, errors.ECODE_EXISTS)
15585 if self.op.mac_prefix:
15586 utils.NormalizeAndValidateMac(self.op.mac_prefix+":00:00:00")
15588 # Check tag validity
15589 for tag in self.op.tags:
15590 objects.TaggableObject.ValidateTag(tag)
15593 def BuildHooksEnv(self):
15594 """Build hooks env.
15598 "name": self.op.network_name,
15599 "network": self.op.network,
15600 "gateway": self.op.gateway,
15601 "network6": self.op.network6,
15602 "gateway6": self.op.gateway6,
15603 "mac_prefix": self.op.mac_prefix,
15604 "network_type": self.op.network_type,
15605 "tags": self.op.tags,
15607 return _BuildNetworkHookEnv(**args)
15609 def Exec(self, feedback_fn):
15610 """Add the ip pool to the cluster.
15613 nobj = objects.Network(name=self.op.network_name,
15614 network=self.op.network,
15615 gateway=self.op.gateway,
15616 network6=self.op.network6,
15617 gateway6=self.op.gateway6,
15618 mac_prefix=self.op.mac_prefix,
15619 network_type=self.op.network_type,
15620 uuid=self.network_uuid,
15622 # Initialize the associated address pool
15624 pool = network.AddressPool.InitializeNetwork(nobj)
15625 except errors.AddressPoolError, e:
15626 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15628 # Check if we need to reserve the nodes and the cluster master IP
15629 # These may not be allocated to any instances in routed mode, as
15630 # they wouldn't function anyway.
15631 for node in self.cfg.GetAllNodesInfo().values():
15632 for ip in [node.primary_ip, node.secondary_ip]:
15635 self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
15637 except errors.AddressPoolError:
15640 master_ip = self.cfg.GetClusterInfo().master_ip
15642 pool.Reserve(master_ip)
15643 self.LogInfo("Reserved cluster master IP (%s)", master_ip)
15644 except errors.AddressPoolError:
15647 if self.op.add_reserved_ips:
15648 for ip in self.op.add_reserved_ips:
15650 pool.Reserve(ip, external=True)
15651 except errors.AddressPoolError, e:
15652 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15655 for tag in self.op.tags:
15658 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15659 del self.remove_locks[locking.LEVEL_NETWORK]
15662 class LUNetworkRemove(LogicalUnit):
15663 HPATH = "network-remove"
15664 HTYPE = constants.HTYPE_NETWORK
15667 def ExpandNames(self):
15668 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15670 self.needed_locks = {
15671 locking.LEVEL_NETWORK: [self.network_uuid],
15675 def CheckPrereq(self):
15676 """Check prerequisites.
15678 This checks that the given network name exists as a network, that is
15679 empty (i.e., contains no nodes), and that is not the last group of the
15683 if not self.network_uuid:
15684 raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15685 errors.ECODE_INVAL)
15687 # Verify that the network is not conncted.
15688 node_groups = [group.name
15689 for group in self.cfg.GetAllNodeGroupsInfo().values()
15690 for network in group.networks.keys()
15691 if network == self.network_uuid]
15694 self.LogWarning("Nework '%s' is connected to the following"
15695 " node groups: %s" % (self.op.network_name,
15696 utils.CommaJoin(utils.NiceSort(node_groups))))
15697 raise errors.OpPrereqError("Network still connected",
15698 errors.ECODE_STATE)
15700 def BuildHooksEnv(self):
15701 """Build hooks env.
15705 "NETWORK_NAME": self.op.network_name,
15708 def BuildHooksNodes(self):
15709 """Build hooks nodes.
15712 mn = self.cfg.GetMasterNode()
15713 return ([mn], [mn])
15715 def Exec(self, feedback_fn):
15716 """Remove the network.
15720 self.cfg.RemoveNetwork(self.network_uuid)
15721 except errors.ConfigurationError:
15722 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15723 (self.op.network_name, self.network_uuid))
15726 class LUNetworkSetParams(LogicalUnit):
15727 """Modifies the parameters of a network.
15730 HPATH = "network-modify"
15731 HTYPE = constants.HTYPE_NETWORK
15734 def CheckArguments(self):
15735 if (self.op.gateway and
15736 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15737 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15738 " at once", errors.ECODE_INVAL)
15741 def ExpandNames(self):
15742 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15743 self.network = self.cfg.GetNetwork(self.network_uuid)
15744 self.needed_locks = {
15745 locking.LEVEL_NETWORK: [self.network_uuid],
15749 if self.network is None:
15750 raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15751 (self.op.network_name, self.network_uuid),
15752 errors.ECODE_INVAL)
15754 def CheckPrereq(self):
15755 """Check prerequisites.
15758 self.gateway = self.network.gateway
15759 self.network_type = self.network.network_type
15760 self.mac_prefix = self.network.mac_prefix
15761 self.network6 = self.network.network6
15762 self.gateway6 = self.network.gateway6
15763 self.tags = self.network.tags
15765 self.pool = network.AddressPool(self.network)
15767 if self.op.gateway:
15768 if self.op.gateway == constants.VALUE_NONE:
15769 self.gateway = None
15771 self.gateway = self.op.gateway
15772 if self.pool.IsReserved(self.gateway):
15773 raise errors.OpPrereqError("%s is already reserved" %
15774 self.gateway, errors.ECODE_INVAL)
15776 if self.op.network_type:
15777 if self.op.network_type == constants.VALUE_NONE:
15778 self.network_type = None
15780 self.network_type = self.op.network_type
15782 if self.op.mac_prefix:
15783 if self.op.mac_prefix == constants.VALUE_NONE:
15784 self.mac_prefix = None
15786 utils.NormalizeAndValidateMac(self.op.mac_prefix+":00:00:00")
15787 self.mac_prefix = self.op.mac_prefix
15789 if self.op.gateway6:
15790 if self.op.gateway6 == constants.VALUE_NONE:
15791 self.gateway6 = None
15793 self.gateway6 = self.op.gateway6
15795 if self.op.network6:
15796 if self.op.network6 == constants.VALUE_NONE:
15797 self.network6 = None
15799 self.network6 = self.op.network6
15803 def BuildHooksEnv(self):
15804 """Build hooks env.
15808 "name": self.op.network_name,
15809 "network": self.network.network,
15810 "gateway": self.gateway,
15811 "network6": self.network6,
15812 "gateway6": self.gateway6,
15813 "mac_prefix": self.mac_prefix,
15814 "network_type": self.network_type,
15817 return _BuildNetworkHookEnv(**args)
15819 def BuildHooksNodes(self):
15820 """Build hooks nodes.
15823 mn = self.cfg.GetMasterNode()
15824 return ([mn], [mn])
15826 def Exec(self, feedback_fn):
15827 """Modifies the network.
15830 #TODO: reserve/release via temporary reservation manager
15831 # extend cfg.ReserveIp/ReleaseIp with the external flag
15832 if self.op.gateway:
15833 if self.gateway == self.network.gateway:
15834 self.LogWarning("Gateway is already %s" % self.gateway)
15837 self.pool.Reserve(self.gateway, external=True)
15838 if self.network.gateway:
15839 self.pool.Release(self.network.gateway, external=True)
15840 self.network.gateway = self.gateway
15842 if self.op.add_reserved_ips:
15843 for ip in self.op.add_reserved_ips:
15845 if self.pool.IsReserved(ip):
15846 self.LogWarning("IP %s is already reserved" % ip)
15848 self.pool.Reserve(ip, external=True)
15849 except errors.AddressPoolError, e:
15850 self.LogWarning("Cannot reserve ip %s. %s" % (ip, e))
15852 if self.op.remove_reserved_ips:
15853 for ip in self.op.remove_reserved_ips:
15854 if ip == self.network.gateway:
15855 self.LogWarning("Cannot unreserve Gateway's IP")
15858 if not self.pool.IsReserved(ip):
15859 self.LogWarning("IP %s is already unreserved" % ip)
15861 self.pool.Release(ip, external=True)
15862 except errors.AddressPoolError, e:
15863 self.LogWarning("Cannot release ip %s. %s" % (ip, e))
15865 if self.op.mac_prefix:
15866 self.network.mac_prefix = self.mac_prefix
15868 if self.op.network6:
15869 self.network.network6 = self.network6
15871 if self.op.gateway6:
15872 self.network.gateway6 = self.gateway6
15874 if self.op.network_type:
15875 self.network.network_type = self.network_type
15877 self.pool.Validate()
15879 self.cfg.Update(self.network, feedback_fn)
15882 class _NetworkQuery(_QueryBase):
15883 FIELDS = query.NETWORK_FIELDS
15885 def ExpandNames(self, lu):
15886 lu.needed_locks = {}
15888 self._all_networks = lu.cfg.GetAllNetworksInfo()
15889 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
15892 self.wanted = [name_to_uuid[name]
15893 for name in utils.NiceSort(name_to_uuid.keys())]
15895 # Accept names to be either names or UUIDs.
15898 all_uuid = frozenset(self._all_networks.keys())
15900 for name in self.names:
15901 if name in all_uuid:
15902 self.wanted.append(name)
15903 elif name in name_to_uuid:
15904 self.wanted.append(name_to_uuid[name])
15906 missing.append(name)
15909 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
15910 errors.ECODE_NOENT)
15912 def DeclareLocks(self, lu, level):
15915 def _GetQueryData(self, lu):
15916 """Computes the list of networks and their attributes.
15919 do_instances = query.NETQ_INST in self.requested_data
15920 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
15921 do_stats = query.NETQ_STATS in self.requested_data
15922 cluster = lu.cfg.GetClusterInfo()
15924 network_to_groups = None
15925 network_to_instances = None
15928 # For NETQ_GROUP, we need to map network->[groups]
15930 all_groups = lu.cfg.GetAllNodeGroupsInfo()
15931 network_to_groups = dict((uuid, []) for uuid in self.wanted)
15932 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
15935 all_instances = lu.cfg.GetAllInstancesInfo()
15936 all_nodes = lu.cfg.GetAllNodesInfo()
15937 network_to_instances = dict((uuid, []) for uuid in self.wanted)
15940 for group in all_groups.values():
15942 group_nodes = [node.name for node in all_nodes.values() if
15943 node.group == group.uuid]
15944 group_instances = [instance for instance in all_instances.values()
15945 if instance.primary_node in group_nodes]
15947 for net_uuid in group.networks.keys():
15948 if net_uuid in network_to_groups:
15949 netparams = group.networks[net_uuid]
15950 mode = netparams[constants.NIC_MODE]
15951 link = netparams[constants.NIC_LINK]
15952 info = group.name + '(' + mode + ', ' + link + ')'
15953 network_to_groups[net_uuid].append(info)
15956 for instance in group_instances:
15957 for nic in instance.nics:
15958 if nic.network == self._all_networks[net_uuid].name:
15959 network_to_instances[net_uuid].append(instance.name)
15964 for uuid, net in self._all_networks.items():
15965 if uuid in self.wanted:
15966 pool = network.AddressPool(net)
15968 "free_count": pool.GetFreeCount(),
15969 "reserved_count": pool.GetReservedCount(),
15970 "map": pool.GetMap(),
15971 "external_reservations": ", ".join(pool.GetExternalReservations()),
15974 return query.NetworkQueryData([self._all_networks[uuid]
15975 for uuid in self.wanted],
15977 network_to_instances,
15981 class LUNetworkQuery(NoHooksLU):
15982 """Logical unit for querying networks.
15987 def CheckArguments(self):
15988 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
15989 self.op.output_fields, False)
15991 def ExpandNames(self):
15992 self.nq.ExpandNames(self)
15994 def Exec(self, feedback_fn):
15995 return self.nq.OldStyleQuery(self)
15999 class LUNetworkConnect(LogicalUnit):
16000 """Connect a network to a nodegroup
16003 HPATH = "network-connect"
16004 HTYPE = constants.HTYPE_NETWORK
16007 def ExpandNames(self):
16008 self.network_name = self.op.network_name
16009 self.group_name = self.op.group_name
16010 self.network_mode = self.op.network_mode
16011 self.network_link = self.op.network_link
16013 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16014 self.network = self.cfg.GetNetwork(self.network_uuid)
16015 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16016 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16018 self.needed_locks = {
16019 locking.LEVEL_INSTANCE: [],
16020 locking.LEVEL_NODEGROUP: [self.group_uuid],
16022 self.share_locks[locking.LEVEL_INSTANCE] = 1
16024 def DeclareLocks(self, level):
16025 if level == locking.LEVEL_INSTANCE:
16026 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16028 # Lock instances optimistically, needs verification once group lock has
16030 self.needed_locks[locking.LEVEL_INSTANCE] = \
16031 self.cfg.GetNodeGroupInstances(self.group_uuid)
16033 def BuildHooksEnv(self):
16035 ret["GROUP_NAME"] = self.group_name
16036 ret["GROUP_NETWORK_MODE"] = self.network_mode
16037 ret["GROUP_NETWORK_LINK"] = self.network_link
16038 ret.update(_BuildNetworkHookEnvByObject(self, self.network))
16041 def BuildHooksNodes(self):
16042 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16043 return (nodes, nodes)
16046 def CheckPrereq(self):
16047 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16050 if self.network is None:
16051 raise errors.OpPrereqError("Network %s does not exist" %
16052 self.network_name, errors.ECODE_INVAL)
16054 self.netparams = dict()
16055 self.netparams[constants.NIC_MODE] = self.network_mode
16056 self.netparams[constants.NIC_LINK] = self.network_link
16057 objects.NIC.CheckParameterSyntax(self.netparams)
16059 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16060 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16061 self.connected = False
16062 if self.network_uuid in self.group.networks:
16063 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16064 (self.network_name, self.group.name))
16065 self.connected = True
16068 pool = network.AddressPool(self.network)
16069 if self.op.conflicts_check:
16070 groupinstances = []
16071 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16072 groupinstances.append(self.cfg.GetInstanceInfo(n))
16073 instances = [(instance.name, idx, nic.ip)
16074 for instance in groupinstances
16075 for idx, nic in enumerate(instance.nics)
16076 if (not nic.network and pool._Contains(nic.ip))]
16078 self.LogWarning("Following occurences use IPs from network %s"
16079 " that is about to connect to nodegroup %s: %s" %
16080 (self.network_name, self.group.name,
16082 raise errors.OpPrereqError("Conflicting IPs found."
16083 " Please remove/modify"
16084 " corresponding NICs",
16085 errors.ECODE_INVAL)
16087 def Exec(self, feedback_fn):
16091 self.group.networks[self.network_uuid] = self.netparams
16092 self.cfg.Update(self.group, feedback_fn)
16095 class LUNetworkDisconnect(LogicalUnit):
16096 """Disconnect a network to a nodegroup
16099 HPATH = "network-disconnect"
16100 HTYPE = constants.HTYPE_NETWORK
16103 def ExpandNames(self):
16104 self.network_name = self.op.network_name
16105 self.group_name = self.op.group_name
16107 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16108 self.network = self.cfg.GetNetwork(self.network_uuid)
16109 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16110 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16112 self.needed_locks = {
16113 locking.LEVEL_INSTANCE: [],
16114 locking.LEVEL_NODEGROUP: [self.group_uuid],
16116 self.share_locks[locking.LEVEL_INSTANCE] = 1
16118 def DeclareLocks(self, level):
16119 if level == locking.LEVEL_INSTANCE:
16120 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16122 # Lock instances optimistically, needs verification once group lock has
16124 self.needed_locks[locking.LEVEL_INSTANCE] = \
16125 self.cfg.GetNodeGroupInstances(self.group_uuid)
16127 def BuildHooksEnv(self):
16129 ret["GROUP_NAME"] = self.group_name
16130 ret.update(_BuildNetworkHookEnvByObject(self, self.network))
16133 def BuildHooksNodes(self):
16134 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16135 return (nodes, nodes)
16138 def CheckPrereq(self):
16139 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16142 self.connected = True
16143 if self.network_uuid not in self.group.networks:
16144 self.LogWarning("Network '%s' is"
16145 " not mapped to group '%s'" %
16146 (self.network_name, self.group.name))
16147 self.connected = False
16150 if self.op.conflicts_check:
16151 groupinstances = []
16152 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16153 groupinstances.append(self.cfg.GetInstanceInfo(n))
16154 instances = [(instance.name, idx, nic.ip)
16155 for instance in groupinstances
16156 for idx, nic in enumerate(instance.nics)
16157 if nic.network == self.network_name]
16159 self.LogWarning("Following occurences use IPs from network %s"
16160 " that is about to disconnected from the nodegroup"
16162 (self.network_name, self.group.name,
16164 raise errors.OpPrereqError("Conflicting IPs."
16165 " Please remove/modify"
16166 " corresponding NICS",
16167 errors.ECODE_INVAL)
16169 def Exec(self, feedback_fn):
16170 if not self.connected:
16173 del self.group.networks[self.network_uuid]
16174 self.cfg.Update(self.group, feedback_fn)
16177 #: Query type implementations
16179 constants.QR_CLUSTER: _ClusterQuery,
16180 constants.QR_INSTANCE: _InstanceQuery,
16181 constants.QR_NODE: _NodeQuery,
16182 constants.QR_GROUP: _GroupQuery,
16183 constants.QR_NETWORK: _NetworkQuery,
16184 constants.QR_OS: _OsQuery,
16185 constants.QR_EXPORT: _ExportQuery,
16188 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16191 def _GetQueryImplementation(name):
16192 """Returns the implemtnation for a query type.
16194 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16198 return _QUERY_IMPL[name]
16200 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16201 errors.ECODE_INVAL)
16203 def _CheckForConflictingIp(lu, ip, node):
16204 """In case of conflicting ip raise error.
16207 @param ip: ip address
16209 @param node: node name
16212 (conf_net, conf_netparams) = lu.cfg.CheckIPInNodeGroup(ip, node)
16213 if conf_net is not None:
16214 raise errors.OpPrereqError("Conflicting IP found:"
16215 " %s <> %s." % (ip, conf_net),
16216 errors.ECODE_INVAL)
16218 return (None, None)