4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
63 from ganeti import runtime
64 from ganeti import network
66 import ganeti.masterd.instance # pylint: disable=W0611
69 #: Size of DRBD meta block device
73 INSTANCE_DOWN = [constants.ADMINST_DOWN]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
77 #: Instance status in which an instance can be marked as offline/online
78 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
79 constants.ADMINST_OFFLINE,
84 """Data container for LU results with jobs.
86 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
87 by L{mcpu._ProcessResult}. The latter will then submit the jobs
88 contained in the C{jobs} attribute and include the job IDs in the opcode
92 def __init__(self, jobs, **kwargs):
93 """Initializes this class.
95 Additional return values can be specified as keyword arguments.
97 @type jobs: list of lists of L{opcode.OpCode}
98 @param jobs: A list of lists of opcode objects
105 class LogicalUnit(object):
106 """Logical Unit base class.
108 Subclasses must follow these rules:
109 - implement ExpandNames
110 - implement CheckPrereq (except when tasklets are used)
111 - implement Exec (except when tasklets are used)
112 - implement BuildHooksEnv
113 - implement BuildHooksNodes
114 - redefine HPATH and HTYPE
115 - optionally redefine their run requirements:
116 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
118 Note that all commands require root permissions.
120 @ivar dry_run_result: the value (if any) that will be returned to the caller
121 in dry-run mode (signalled by opcode dry_run parameter)
128 def __init__(self, processor, op, context, rpc_runner):
129 """Constructor for LogicalUnit.
131 This needs to be overridden in derived classes in order to check op
135 self.proc = processor
137 self.cfg = context.cfg
138 self.glm = context.glm
140 self.owned_locks = context.glm.list_owned
141 self.context = context
142 self.rpc = rpc_runner
143 # Dicts used to declare locking needs to mcpu
144 self.needed_locks = None
145 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
147 self.remove_locks = {}
148 # Used to force good behavior when calling helper functions
149 self.recalculate_locks = {}
151 self.Log = processor.Log # pylint: disable=C0103
152 self.LogWarning = processor.LogWarning # pylint: disable=C0103
153 self.LogInfo = processor.LogInfo # pylint: disable=C0103
154 self.LogStep = processor.LogStep # pylint: disable=C0103
155 # support for dry-run
156 self.dry_run_result = None
157 # support for generic debug attribute
158 if (not hasattr(self.op, "debug_level") or
159 not isinstance(self.op.debug_level, int)):
160 self.op.debug_level = 0
165 # Validate opcode parameters and set defaults
166 self.op.Validate(True)
168 self.CheckArguments()
170 def CheckArguments(self):
171 """Check syntactic validity for the opcode arguments.
173 This method is for doing a simple syntactic check and ensure
174 validity of opcode parameters, without any cluster-related
175 checks. While the same can be accomplished in ExpandNames and/or
176 CheckPrereq, doing these separate is better because:
178 - ExpandNames is left as as purely a lock-related function
179 - CheckPrereq is run after we have acquired locks (and possible
182 The function is allowed to change the self.op attribute so that
183 later methods can no longer worry about missing parameters.
188 def ExpandNames(self):
189 """Expand names for this LU.
191 This method is called before starting to execute the opcode, and it should
192 update all the parameters of the opcode to their canonical form (e.g. a
193 short node name must be fully expanded after this method has successfully
194 completed). This way locking, hooks, logging, etc. can work correctly.
196 LUs which implement this method must also populate the self.needed_locks
197 member, as a dict with lock levels as keys, and a list of needed lock names
200 - use an empty dict if you don't need any lock
201 - if you don't need any lock at a particular level omit that
202 level (note that in this case C{DeclareLocks} won't be called
203 at all for that level)
204 - if you need locks at a level, but you can't calculate it in
205 this function, initialise that level with an empty list and do
206 further processing in L{LogicalUnit.DeclareLocks} (see that
207 function's docstring)
208 - don't put anything for the BGL level
209 - if you want all locks at a level use L{locking.ALL_SET} as a value
211 If you need to share locks (rather than acquire them exclusively) at one
212 level you can modify self.share_locks, setting a true value (usually 1) for
213 that level. By default locks are not shared.
215 This function can also define a list of tasklets, which then will be
216 executed in order instead of the usual LU-level CheckPrereq and Exec
217 functions, if those are not defined by the LU.
221 # Acquire all nodes and one instance
222 self.needed_locks = {
223 locking.LEVEL_NODE: locking.ALL_SET,
224 locking.LEVEL_INSTANCE: ['instance1.example.com'],
226 # Acquire just two nodes
227 self.needed_locks = {
228 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
231 self.needed_locks = {} # No, you can't leave it to the default value None
234 # The implementation of this method is mandatory only if the new LU is
235 # concurrent, so that old LUs don't need to be changed all at the same
238 self.needed_locks = {} # Exclusive LUs don't need locks.
240 raise NotImplementedError
242 def DeclareLocks(self, level):
243 """Declare LU locking needs for a level
245 While most LUs can just declare their locking needs at ExpandNames time,
246 sometimes there's the need to calculate some locks after having acquired
247 the ones before. This function is called just before acquiring locks at a
248 particular level, but after acquiring the ones at lower levels, and permits
249 such calculations. It can be used to modify self.needed_locks, and by
250 default it does nothing.
252 This function is only called if you have something already set in
253 self.needed_locks for the level.
255 @param level: Locking level which is going to be locked
256 @type level: member of L{ganeti.locking.LEVELS}
260 def CheckPrereq(self):
261 """Check prerequisites for this LU.
263 This method should check that the prerequisites for the execution
264 of this LU are fulfilled. It can do internode communication, but
265 it should be idempotent - no cluster or system changes are
268 The method should raise errors.OpPrereqError in case something is
269 not fulfilled. Its return value is ignored.
271 This method should also update all the parameters of the opcode to
272 their canonical form if it hasn't been done by ExpandNames before.
275 if self.tasklets is not None:
276 for (idx, tl) in enumerate(self.tasklets):
277 logging.debug("Checking prerequisites for tasklet %s/%s",
278 idx + 1, len(self.tasklets))
283 def Exec(self, feedback_fn):
286 This method should implement the actual work. It should raise
287 errors.OpExecError for failures that are somewhat dealt with in
291 if self.tasklets is not None:
292 for (idx, tl) in enumerate(self.tasklets):
293 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
296 raise NotImplementedError
298 def BuildHooksEnv(self):
299 """Build hooks environment for this LU.
302 @return: Dictionary containing the environment that will be used for
303 running the hooks for this LU. The keys of the dict must not be prefixed
304 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
305 will extend the environment with additional variables. If no environment
306 should be defined, an empty dictionary should be returned (not C{None}).
307 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311 raise NotImplementedError
313 def BuildHooksNodes(self):
314 """Build list of nodes to run LU's hooks.
316 @rtype: tuple; (list, list)
317 @return: Tuple containing a list of node names on which the hook
318 should run before the execution and a list of node names on which the
319 hook should run after the execution. No nodes should be returned as an
320 empty list (and not None).
321 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
325 raise NotImplementedError
327 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
328 """Notify the LU about the results of its hooks.
330 This method is called every time a hooks phase is executed, and notifies
331 the Logical Unit about the hooks' result. The LU can then use it to alter
332 its result based on the hooks. By default the method does nothing and the
333 previous result is passed back unchanged but any LU can define it if it
334 wants to use the local cluster hook-scripts somehow.
336 @param phase: one of L{constants.HOOKS_PHASE_POST} or
337 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
338 @param hook_results: the results of the multi-node hooks rpc call
339 @param feedback_fn: function used send feedback back to the caller
340 @param lu_result: the previous Exec result this LU had, or None
342 @return: the new Exec result, based on the previous result
346 # API must be kept, thus we ignore the unused argument and could
347 # be a function warnings
348 # pylint: disable=W0613,R0201
351 def _ExpandAndLockInstance(self):
352 """Helper function to expand and lock an instance.
354 Many LUs that work on an instance take its name in self.op.instance_name
355 and need to expand it and then declare the expanded name for locking. This
356 function does it, and then updates self.op.instance_name to the expanded
357 name. It also initializes needed_locks as a dict, if this hasn't been done
361 if self.needed_locks is None:
362 self.needed_locks = {}
364 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
365 "_ExpandAndLockInstance called with instance-level locks set"
366 self.op.instance_name = _ExpandInstanceName(self.cfg,
367 self.op.instance_name)
368 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
370 def _LockInstancesNodes(self, primary_only=False,
371 level=locking.LEVEL_NODE):
372 """Helper function to declare instances' nodes for locking.
374 This function should be called after locking one or more instances to lock
375 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
376 with all primary or secondary nodes for instances already locked and
377 present in self.needed_locks[locking.LEVEL_INSTANCE].
379 It should be called from DeclareLocks, and for safety only works if
380 self.recalculate_locks[locking.LEVEL_NODE] is set.
382 In the future it may grow parameters to just lock some instance's nodes, or
383 to just lock primaries or secondary nodes, if needed.
385 If should be called in DeclareLocks in a way similar to::
387 if level == locking.LEVEL_NODE:
388 self._LockInstancesNodes()
390 @type primary_only: boolean
391 @param primary_only: only lock primary nodes of locked instances
392 @param level: Which lock level to use for locking nodes
395 assert level in self.recalculate_locks, \
396 "_LockInstancesNodes helper function called with no nodes to recalculate"
398 # TODO: check if we're really been called with the instance locks held
400 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
401 # future we might want to have different behaviors depending on the value
402 # of self.recalculate_locks[locking.LEVEL_NODE]
404 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
405 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
406 wanted_nodes.append(instance.primary_node)
408 wanted_nodes.extend(instance.secondary_nodes)
410 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
411 self.needed_locks[level] = wanted_nodes
412 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
413 self.needed_locks[level].extend(wanted_nodes)
415 raise errors.ProgrammerError("Unknown recalculation mode")
417 del self.recalculate_locks[level]
420 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
421 """Simple LU which runs no hooks.
423 This LU is intended as a parent for other LogicalUnits which will
424 run no hooks, in order to reduce duplicate code.
430 def BuildHooksEnv(self):
431 """Empty BuildHooksEnv for NoHooksLu.
433 This just raises an error.
436 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
438 def BuildHooksNodes(self):
439 """Empty BuildHooksNodes for NoHooksLU.
442 raise AssertionError("BuildHooksNodes called for NoHooksLU")
446 """Tasklet base class.
448 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
449 they can mix legacy code with tasklets. Locking needs to be done in the LU,
450 tasklets know nothing about locks.
452 Subclasses must follow these rules:
453 - Implement CheckPrereq
457 def __init__(self, lu):
464 def CheckPrereq(self):
465 """Check prerequisites for this tasklets.
467 This method should check whether the prerequisites for the execution of
468 this tasklet are fulfilled. It can do internode communication, but it
469 should be idempotent - no cluster or system changes are allowed.
471 The method should raise errors.OpPrereqError in case something is not
472 fulfilled. Its return value is ignored.
474 This method should also update all parameters to their canonical form if it
475 hasn't been done before.
480 def Exec(self, feedback_fn):
481 """Execute the tasklet.
483 This method should implement the actual work. It should raise
484 errors.OpExecError for failures that are somewhat dealt with in code, or
488 raise NotImplementedError
492 """Base for query utility classes.
495 #: Attribute holding field definitions
501 def __init__(self, qfilter, fields, use_locking):
502 """Initializes this class.
505 self.use_locking = use_locking
507 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
508 namefield=self.SORT_FIELD)
509 self.requested_data = self.query.RequestedData()
510 self.names = self.query.RequestedNames()
512 # Sort only if no names were requested
513 self.sort_by_name = not self.names
515 self.do_locking = None
518 def _GetNames(self, lu, all_names, lock_level):
519 """Helper function to determine names asked for in the query.
523 names = lu.owned_locks(lock_level)
527 if self.wanted == locking.ALL_SET:
528 assert not self.names
529 # caller didn't specify names, so ordering is not important
530 return utils.NiceSort(names)
532 # caller specified names and we must keep the same order
534 assert not self.do_locking or lu.glm.is_owned(lock_level)
536 missing = set(self.wanted).difference(names)
538 raise errors.OpExecError("Some items were removed before retrieving"
539 " their data: %s" % missing)
541 # Return expanded names
544 def ExpandNames(self, lu):
545 """Expand names for this query.
547 See L{LogicalUnit.ExpandNames}.
550 raise NotImplementedError()
552 def DeclareLocks(self, lu, level):
553 """Declare locks for this query.
555 See L{LogicalUnit.DeclareLocks}.
558 raise NotImplementedError()
560 def _GetQueryData(self, lu):
561 """Collects all data for this query.
563 @return: Query data object
566 raise NotImplementedError()
568 def NewStyleQuery(self, lu):
569 """Collect data and execute query.
572 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
573 sort_by_name=self.sort_by_name)
575 def OldStyleQuery(self, lu):
576 """Collect data and execute query.
579 return self.query.OldStyleQuery(self._GetQueryData(lu),
580 sort_by_name=self.sort_by_name)
584 """Returns a dict declaring all lock levels shared.
587 return dict.fromkeys(locking.LEVELS, 1)
590 def _MakeLegacyNodeInfo(data):
591 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
593 Converts the data into a single dictionary. This is fine for most use cases,
594 but some require information from more than one volume group or hypervisor.
597 (bootid, (vg_info, ), (hv_info, )) = data
599 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
604 def _AnnotateDiskParams(instance, devs, cfg):
605 """Little helper wrapper to the rpc annotation method.
607 @param instance: The instance object
608 @type devs: List of L{objects.Disk}
609 @param devs: The root devices (not any of its children!)
610 @param cfg: The config object
611 @returns The annotated disk copies
612 @see L{rpc.AnnotateDiskParams}
615 return rpc.AnnotateDiskParams(instance.disk_template, devs,
616 cfg.GetInstanceDiskParams(instance))
619 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
621 """Checks if node groups for locked instances are still correct.
623 @type cfg: L{config.ConfigWriter}
624 @param cfg: Cluster configuration
625 @type instances: dict; string as key, L{objects.Instance} as value
626 @param instances: Dictionary, instance name as key, instance object as value
627 @type owned_groups: iterable of string
628 @param owned_groups: List of owned groups
629 @type owned_nodes: iterable of string
630 @param owned_nodes: List of owned nodes
631 @type cur_group_uuid: string or None
632 @param cur_group_uuid: Optional group UUID to check against instance's groups
635 for (name, inst) in instances.items():
636 assert owned_nodes.issuperset(inst.all_nodes), \
637 "Instance %s's nodes changed while we kept the lock" % name
639 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
641 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
642 "Instance %s has no node in group %s" % (name, cur_group_uuid)
645 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
646 """Checks if the owned node groups are still correct for an instance.
648 @type cfg: L{config.ConfigWriter}
649 @param cfg: The cluster configuration
650 @type instance_name: string
651 @param instance_name: Instance name
652 @type owned_groups: set or frozenset
653 @param owned_groups: List of currently owned node groups
656 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
658 if not owned_groups.issuperset(inst_groups):
659 raise errors.OpPrereqError("Instance %s's node groups changed since"
660 " locks were acquired, current groups are"
661 " are '%s', owning groups '%s'; retry the"
664 utils.CommaJoin(inst_groups),
665 utils.CommaJoin(owned_groups)),
671 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
672 """Checks if the instances in a node group are still correct.
674 @type cfg: L{config.ConfigWriter}
675 @param cfg: The cluster configuration
676 @type group_uuid: string
677 @param group_uuid: Node group UUID
678 @type owned_instances: set or frozenset
679 @param owned_instances: List of currently owned instances
682 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
683 if owned_instances != wanted_instances:
684 raise errors.OpPrereqError("Instances in node group '%s' changed since"
685 " locks were acquired, wanted '%s', have '%s';"
686 " retry the operation" %
688 utils.CommaJoin(wanted_instances),
689 utils.CommaJoin(owned_instances)),
692 return wanted_instances
695 def _SupportsOob(cfg, node):
696 """Tells if node supports OOB.
698 @type cfg: L{config.ConfigWriter}
699 @param cfg: The cluster configuration
700 @type node: L{objects.Node}
701 @param node: The node
702 @return: The OOB script if supported or an empty string otherwise
705 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
708 def _GetWantedNodes(lu, nodes):
709 """Returns list of checked and expanded node names.
711 @type lu: L{LogicalUnit}
712 @param lu: the logical unit on whose behalf we execute
714 @param nodes: list of node names or None for all nodes
716 @return: the list of nodes, sorted
717 @raise errors.ProgrammerError: if the nodes parameter is wrong type
721 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
723 return utils.NiceSort(lu.cfg.GetNodeList())
726 def _GetWantedInstances(lu, instances):
727 """Returns list of checked and expanded instance names.
729 @type lu: L{LogicalUnit}
730 @param lu: the logical unit on whose behalf we execute
731 @type instances: list
732 @param instances: list of instance names or None for all instances
734 @return: the list of instances, sorted
735 @raise errors.OpPrereqError: if the instances parameter is wrong type
736 @raise errors.OpPrereqError: if any of the passed instances is not found
740 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
742 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
746 def _GetUpdatedParams(old_params, update_dict,
747 use_default=True, use_none=False):
748 """Return the new version of a parameter dictionary.
750 @type old_params: dict
751 @param old_params: old parameters
752 @type update_dict: dict
753 @param update_dict: dict containing new parameter values, or
754 constants.VALUE_DEFAULT to reset the parameter to its default
756 @param use_default: boolean
757 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
758 values as 'to be deleted' values
759 @param use_none: boolean
760 @type use_none: whether to recognise C{None} values as 'to be
763 @return: the new parameter dictionary
766 params_copy = copy.deepcopy(old_params)
767 for key, val in update_dict.iteritems():
768 if ((use_default and val == constants.VALUE_DEFAULT) or
769 (use_none and val is None)):
775 params_copy[key] = val
779 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
780 """Return the new version of a instance policy.
782 @param group_policy: whether this policy applies to a group and thus
783 we should support removal of policy entries
786 use_none = use_default = group_policy
787 ipolicy = copy.deepcopy(old_ipolicy)
788 for key, value in new_ipolicy.items():
789 if key not in constants.IPOLICY_ALL_KEYS:
790 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
792 if key in constants.IPOLICY_ISPECS:
793 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
794 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
796 use_default=use_default)
798 if (not value or value == [constants.VALUE_DEFAULT] or
799 value == constants.VALUE_DEFAULT):
803 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
804 " on the cluster'" % key,
807 if key in constants.IPOLICY_PARAMETERS:
808 # FIXME: we assume all such values are float
810 ipolicy[key] = float(value)
811 except (TypeError, ValueError), err:
812 raise errors.OpPrereqError("Invalid value for attribute"
813 " '%s': '%s', error: %s" %
814 (key, value, err), errors.ECODE_INVAL)
816 # FIXME: we assume all others are lists; this should be redone
818 ipolicy[key] = list(value)
820 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
821 except errors.ConfigurationError, err:
822 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
827 def _UpdateAndVerifySubDict(base, updates, type_check):
828 """Updates and verifies a dict with sub dicts of the same type.
830 @param base: The dict with the old data
831 @param updates: The dict with the new data
832 @param type_check: Dict suitable to ForceDictType to verify correct types
833 @returns: A new dict with updated and verified values
837 new = _GetUpdatedParams(old, value)
838 utils.ForceDictType(new, type_check)
841 ret = copy.deepcopy(base)
842 ret.update(dict((key, fn(base.get(key, {}), value))
843 for key, value in updates.items()))
847 def _MergeAndVerifyHvState(op_input, obj_input):
848 """Combines the hv state from an opcode with the one of the object
850 @param op_input: The input dict from the opcode
851 @param obj_input: The input dict from the objects
852 @return: The verified and updated dict
856 invalid_hvs = set(op_input) - constants.HYPER_TYPES
858 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
859 " %s" % utils.CommaJoin(invalid_hvs),
861 if obj_input is None:
863 type_check = constants.HVSTS_PARAMETER_TYPES
864 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
869 def _MergeAndVerifyDiskState(op_input, obj_input):
870 """Combines the disk state from an opcode with the one of the object
872 @param op_input: The input dict from the opcode
873 @param obj_input: The input dict from the objects
874 @return: The verified and updated dict
877 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
879 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
880 utils.CommaJoin(invalid_dst),
882 type_check = constants.DSS_PARAMETER_TYPES
883 if obj_input is None:
885 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
887 for key, value in op_input.items())
892 def _ReleaseLocks(lu, level, names=None, keep=None):
893 """Releases locks owned by an LU.
895 @type lu: L{LogicalUnit}
896 @param level: Lock level
897 @type names: list or None
898 @param names: Names of locks to release
899 @type keep: list or None
900 @param keep: Names of locks to retain
903 assert not (keep is not None and names is not None), \
904 "Only one of the 'names' and the 'keep' parameters can be given"
906 if names is not None:
907 should_release = names.__contains__
909 should_release = lambda name: name not in keep
911 should_release = None
913 owned = lu.owned_locks(level)
915 # Not owning any lock at this level, do nothing
922 # Determine which locks to release
924 if should_release(name):
929 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
931 # Release just some locks
932 lu.glm.release(level, names=release)
934 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
937 lu.glm.release(level)
939 assert not lu.glm.is_owned(level), "No locks should be owned"
942 def _MapInstanceDisksToNodes(instances):
943 """Creates a map from (node, volume) to instance name.
945 @type instances: list of L{objects.Instance}
946 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
949 return dict(((node, vol), inst.name)
950 for inst in instances
951 for (node, vols) in inst.MapLVsByNode().items()
955 def _RunPostHook(lu, node_name):
956 """Runs the post-hook for an opcode on a single node.
959 hm = lu.proc.BuildHooksManager(lu)
961 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
963 # pylint: disable=W0702
964 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
967 def _CheckOutputFields(static, dynamic, selected):
968 """Checks whether all selected fields are valid.
970 @type static: L{utils.FieldSet}
971 @param static: static fields set
972 @type dynamic: L{utils.FieldSet}
973 @param dynamic: dynamic fields set
980 delta = f.NonMatching(selected)
982 raise errors.OpPrereqError("Unknown output fields selected: %s"
983 % ",".join(delta), errors.ECODE_INVAL)
986 def _CheckGlobalHvParams(params):
987 """Validates that given hypervisor params are not global ones.
989 This will ensure that instances don't get customised versions of
993 used_globals = constants.HVC_GLOBALS.intersection(params)
995 msg = ("The following hypervisor parameters are global and cannot"
996 " be customized at instance level, please modify them at"
997 " cluster level: %s" % utils.CommaJoin(used_globals))
998 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1001 def _CheckNodeOnline(lu, node, msg=None):
1002 """Ensure that a given node is online.
1004 @param lu: the LU on behalf of which we make the check
1005 @param node: the node to check
1006 @param msg: if passed, should be a message to replace the default one
1007 @raise errors.OpPrereqError: if the node is offline
1011 msg = "Can't use offline node"
1012 if lu.cfg.GetNodeInfo(node).offline:
1013 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1016 def _CheckNodeNotDrained(lu, node):
1017 """Ensure that a given node is not drained.
1019 @param lu: the LU on behalf of which we make the check
1020 @param node: the node to check
1021 @raise errors.OpPrereqError: if the node is drained
1024 if lu.cfg.GetNodeInfo(node).drained:
1025 raise errors.OpPrereqError("Can't use drained node %s" % node,
1029 def _CheckNodeVmCapable(lu, node):
1030 """Ensure that a given node is vm capable.
1032 @param lu: the LU on behalf of which we make the check
1033 @param node: the node to check
1034 @raise errors.OpPrereqError: if the node is not vm capable
1037 if not lu.cfg.GetNodeInfo(node).vm_capable:
1038 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1042 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1043 """Ensure that a node supports a given OS.
1045 @param lu: the LU on behalf of which we make the check
1046 @param node: the node to check
1047 @param os_name: the OS to query about
1048 @param force_variant: whether to ignore variant errors
1049 @raise errors.OpPrereqError: if the node is not supporting the OS
1052 result = lu.rpc.call_os_get(node, os_name)
1053 result.Raise("OS '%s' not in supported OS list for node %s" %
1055 prereq=True, ecode=errors.ECODE_INVAL)
1056 if not force_variant:
1057 _CheckOSVariant(result.payload, os_name)
1060 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1061 """Ensure that a node has the given secondary ip.
1063 @type lu: L{LogicalUnit}
1064 @param lu: the LU on behalf of which we make the check
1066 @param node: the node to check
1067 @type secondary_ip: string
1068 @param secondary_ip: the ip to check
1069 @type prereq: boolean
1070 @param prereq: whether to throw a prerequisite or an execute error
1071 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1072 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1075 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1076 result.Raise("Failure checking secondary ip on node %s" % node,
1077 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1078 if not result.payload:
1079 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1080 " please fix and re-run this command" % secondary_ip)
1082 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1084 raise errors.OpExecError(msg)
1087 def _GetClusterDomainSecret():
1088 """Reads the cluster domain secret.
1091 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1095 def _CheckInstanceState(lu, instance, req_states, msg=None):
1096 """Ensure that an instance is in one of the required states.
1098 @param lu: the LU on behalf of which we make the check
1099 @param instance: the instance to check
1100 @param msg: if passed, should be a message to replace the default one
1101 @raise errors.OpPrereqError: if the instance is not in the required state
1105 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1106 if instance.admin_state not in req_states:
1107 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1108 (instance.name, instance.admin_state, msg),
1111 if constants.ADMINST_UP not in req_states:
1112 pnode = instance.primary_node
1113 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1114 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1115 prereq=True, ecode=errors.ECODE_ENVIRON)
1117 if instance.name in ins_l.payload:
1118 raise errors.OpPrereqError("Instance %s is running, %s" %
1119 (instance.name, msg), errors.ECODE_STATE)
1122 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1123 """Computes if value is in the desired range.
1125 @param name: name of the parameter for which we perform the check
1126 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1128 @param ipolicy: dictionary containing min, max and std values
1129 @param value: actual value that we want to use
1130 @return: None or element not meeting the criteria
1134 if value in [None, constants.VALUE_AUTO]:
1136 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1137 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1138 if value > max_v or min_v > value:
1140 fqn = "%s/%s" % (name, qualifier)
1143 return ("%s value %s is not in range [%s, %s]" %
1144 (fqn, value, min_v, max_v))
1148 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1149 nic_count, disk_sizes, spindle_use,
1150 _compute_fn=_ComputeMinMaxSpec):
1151 """Verifies ipolicy against provided specs.
1154 @param ipolicy: The ipolicy
1156 @param mem_size: The memory size
1157 @type cpu_count: int
1158 @param cpu_count: Used cpu cores
1159 @type disk_count: int
1160 @param disk_count: Number of disks used
1161 @type nic_count: int
1162 @param nic_count: Number of nics used
1163 @type disk_sizes: list of ints
1164 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1165 @type spindle_use: int
1166 @param spindle_use: The number of spindles this instance uses
1167 @param _compute_fn: The compute function (unittest only)
1168 @return: A list of violations, or an empty list of no violations are found
1171 assert disk_count == len(disk_sizes)
1174 (constants.ISPEC_MEM_SIZE, "", mem_size),
1175 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1176 (constants.ISPEC_DISK_COUNT, "", disk_count),
1177 (constants.ISPEC_NIC_COUNT, "", nic_count),
1178 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1179 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1180 for idx, d in enumerate(disk_sizes)]
1183 (_compute_fn(name, qualifier, ipolicy, value)
1184 for (name, qualifier, value) in test_settings))
1187 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1188 _compute_fn=_ComputeIPolicySpecViolation):
1189 """Compute if instance meets the specs of ipolicy.
1192 @param ipolicy: The ipolicy to verify against
1193 @type instance: L{objects.Instance}
1194 @param instance: The instance to verify
1195 @param _compute_fn: The function to verify ipolicy (unittest only)
1196 @see: L{_ComputeIPolicySpecViolation}
1199 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1200 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1201 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1202 disk_count = len(instance.disks)
1203 disk_sizes = [disk.size for disk in instance.disks]
1204 nic_count = len(instance.nics)
1206 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1207 disk_sizes, spindle_use)
1210 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1211 _compute_fn=_ComputeIPolicySpecViolation):
1212 """Compute if instance specs meets the specs of ipolicy.
1215 @param ipolicy: The ipolicy to verify against
1216 @param instance_spec: dict
1217 @param instance_spec: The instance spec to verify
1218 @param _compute_fn: The function to verify ipolicy (unittest only)
1219 @see: L{_ComputeIPolicySpecViolation}
1222 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1223 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1224 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1225 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1226 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1227 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1229 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1230 disk_sizes, spindle_use)
1233 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1235 _compute_fn=_ComputeIPolicyInstanceViolation):
1236 """Compute if instance meets the specs of the new target group.
1238 @param ipolicy: The ipolicy to verify
1239 @param instance: The instance object to verify
1240 @param current_group: The current group of the instance
1241 @param target_group: The new group of the instance
1242 @param _compute_fn: The function to verify ipolicy (unittest only)
1243 @see: L{_ComputeIPolicySpecViolation}
1246 if current_group == target_group:
1249 return _compute_fn(ipolicy, instance)
1252 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1253 _compute_fn=_ComputeIPolicyNodeViolation):
1254 """Checks that the target node is correct in terms of instance policy.
1256 @param ipolicy: The ipolicy to verify
1257 @param instance: The instance object to verify
1258 @param node: The new node to relocate
1259 @param ignore: Ignore violations of the ipolicy
1260 @param _compute_fn: The function to verify ipolicy (unittest only)
1261 @see: L{_ComputeIPolicySpecViolation}
1264 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1265 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1268 msg = ("Instance does not meet target node group's (%s) instance"
1269 " policy: %s") % (node.group, utils.CommaJoin(res))
1273 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1276 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1277 """Computes a set of any instances that would violate the new ipolicy.
1279 @param old_ipolicy: The current (still in-place) ipolicy
1280 @param new_ipolicy: The new (to become) ipolicy
1281 @param instances: List of instances to verify
1282 @return: A list of instances which violates the new ipolicy but
1286 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1287 _ComputeViolatingInstances(old_ipolicy, instances))
1290 def _ExpandItemName(fn, name, kind):
1291 """Expand an item name.
1293 @param fn: the function to use for expansion
1294 @param name: requested item name
1295 @param kind: text description ('Node' or 'Instance')
1296 @return: the resolved (full) name
1297 @raise errors.OpPrereqError: if the item is not found
1300 full_name = fn(name)
1301 if full_name is None:
1302 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1307 def _ExpandNodeName(cfg, name):
1308 """Wrapper over L{_ExpandItemName} for nodes."""
1309 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1312 def _ExpandInstanceName(cfg, name):
1313 """Wrapper over L{_ExpandItemName} for instance."""
1314 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1317 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1318 minmem, maxmem, vcpus, nics, disk_template, disks,
1319 bep, hvp, hypervisor_name, tags):
1320 """Builds instance related env variables for hooks
1322 This builds the hook environment from individual variables.
1325 @param name: the name of the instance
1326 @type primary_node: string
1327 @param primary_node: the name of the instance's primary node
1328 @type secondary_nodes: list
1329 @param secondary_nodes: list of secondary nodes as strings
1330 @type os_type: string
1331 @param os_type: the name of the instance's OS
1332 @type status: string
1333 @param status: the desired status of the instance
1334 @type minmem: string
1335 @param minmem: the minimum memory size of the instance
1336 @type maxmem: string
1337 @param maxmem: the maximum memory size of the instance
1339 @param vcpus: the count of VCPUs the instance has
1341 @param nics: list of tuples (ip, mac, mode, link, network) representing
1342 the NICs the instance has
1343 @type disk_template: string
1344 @param disk_template: the disk template of the instance
1346 @param disks: the list of (size, mode) pairs
1348 @param bep: the backend parameters for the instance
1350 @param hvp: the hypervisor parameters for the instance
1351 @type hypervisor_name: string
1352 @param hypervisor_name: the hypervisor for the instance
1354 @param tags: list of instance tags as strings
1356 @return: the hook environment for this instance
1361 "INSTANCE_NAME": name,
1362 "INSTANCE_PRIMARY": primary_node,
1363 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1364 "INSTANCE_OS_TYPE": os_type,
1365 "INSTANCE_STATUS": status,
1366 "INSTANCE_MINMEM": minmem,
1367 "INSTANCE_MAXMEM": maxmem,
1368 # TODO(2.7) remove deprecated "memory" value
1369 "INSTANCE_MEMORY": maxmem,
1370 "INSTANCE_VCPUS": vcpus,
1371 "INSTANCE_DISK_TEMPLATE": disk_template,
1372 "INSTANCE_HYPERVISOR": hypervisor_name,
1375 nic_count = len(nics)
1376 for idx, (ip, mac, mode, link, network) in enumerate(nics):
1379 env["INSTANCE_NIC%d_IP" % idx] = ip
1380 env["INSTANCE_NIC%d_MAC" % idx] = mac
1381 env["INSTANCE_NIC%d_MODE" % idx] = mode
1382 env["INSTANCE_NIC%d_LINK" % idx] = link
1383 env["INSTANCE_NIC%d_NETWORK" % idx] = network
1384 if mode == constants.NIC_MODE_BRIDGED:
1385 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1389 env["INSTANCE_NIC_COUNT"] = nic_count
1392 disk_count = len(disks)
1393 for idx, (size, mode) in enumerate(disks):
1394 env["INSTANCE_DISK%d_SIZE" % idx] = size
1395 env["INSTANCE_DISK%d_MODE" % idx] = mode
1399 env["INSTANCE_DISK_COUNT"] = disk_count
1404 env["INSTANCE_TAGS"] = " ".join(tags)
1406 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1407 for key, value in source.items():
1408 env["INSTANCE_%s_%s" % (kind, key)] = value
1413 def _NICListToTuple(lu, nics):
1414 """Build a list of nic information tuples.
1416 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1417 value in LUInstanceQueryData.
1419 @type lu: L{LogicalUnit}
1420 @param lu: the logical unit on whose behalf we execute
1421 @type nics: list of L{objects.NIC}
1422 @param nics: list of nics to convert to hooks tuples
1426 cluster = lu.cfg.GetClusterInfo()
1430 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1431 mode = filled_params[constants.NIC_MODE]
1432 link = filled_params[constants.NIC_LINK]
1433 network = nic.network
1434 hooks_nics.append((ip, mac, mode, link, network))
1438 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1439 """Builds instance related env variables for hooks from an object.
1441 @type lu: L{LogicalUnit}
1442 @param lu: the logical unit on whose behalf we execute
1443 @type instance: L{objects.Instance}
1444 @param instance: the instance for which we should build the
1446 @type override: dict
1447 @param override: dictionary with key/values that will override
1450 @return: the hook environment dictionary
1453 cluster = lu.cfg.GetClusterInfo()
1454 bep = cluster.FillBE(instance)
1455 hvp = cluster.FillHV(instance)
1457 "name": instance.name,
1458 "primary_node": instance.primary_node,
1459 "secondary_nodes": instance.secondary_nodes,
1460 "os_type": instance.os,
1461 "status": instance.admin_state,
1462 "maxmem": bep[constants.BE_MAXMEM],
1463 "minmem": bep[constants.BE_MINMEM],
1464 "vcpus": bep[constants.BE_VCPUS],
1465 "nics": _NICListToTuple(lu, instance.nics),
1466 "disk_template": instance.disk_template,
1467 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1470 "hypervisor_name": instance.hypervisor,
1471 "tags": instance.tags,
1474 args.update(override)
1475 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1478 def _AdjustCandidatePool(lu, exceptions):
1479 """Adjust the candidate pool after node operations.
1482 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1484 lu.LogInfo("Promoted nodes to master candidate role: %s",
1485 utils.CommaJoin(node.name for node in mod_list))
1486 for name in mod_list:
1487 lu.context.ReaddNode(name)
1488 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1490 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1494 def _DecideSelfPromotion(lu, exceptions=None):
1495 """Decide whether I should promote myself as a master candidate.
1498 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1499 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1500 # the new node will increase mc_max with one, so:
1501 mc_should = min(mc_should + 1, cp_size)
1502 return mc_now < mc_should
1505 def _CalculateGroupIPolicy(cluster, group):
1506 """Calculate instance policy for group.
1509 return cluster.SimpleFillIPolicy(group.ipolicy)
1512 def _ComputeViolatingInstances(ipolicy, instances):
1513 """Computes a set of instances who violates given ipolicy.
1515 @param ipolicy: The ipolicy to verify
1516 @type instances: object.Instance
1517 @param instances: List of instances to verify
1518 @return: A frozenset of instance names violating the ipolicy
1521 return frozenset([inst.name for inst in instances
1522 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1525 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1526 """Check that the brigdes needed by a list of nics exist.
1529 cluster = lu.cfg.GetClusterInfo()
1530 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1531 brlist = [params[constants.NIC_LINK] for params in paramslist
1532 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1534 result = lu.rpc.call_bridges_exist(target_node, brlist)
1535 result.Raise("Error checking bridges on destination node '%s'" %
1536 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1539 def _CheckInstanceBridgesExist(lu, instance, node=None):
1540 """Check that the brigdes needed by an instance exist.
1544 node = instance.primary_node
1545 _CheckNicsBridgesExist(lu, instance.nics, node)
1548 def _CheckOSVariant(os_obj, name):
1549 """Check whether an OS name conforms to the os variants specification.
1551 @type os_obj: L{objects.OS}
1552 @param os_obj: OS object to check
1554 @param name: OS name passed by the user, to check for validity
1557 variant = objects.OS.GetVariant(name)
1558 if not os_obj.supported_variants:
1560 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1561 " passed)" % (os_obj.name, variant),
1565 raise errors.OpPrereqError("OS name must include a variant",
1568 if variant not in os_obj.supported_variants:
1569 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1572 def _GetNodeInstancesInner(cfg, fn):
1573 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1576 def _GetNodeInstances(cfg, node_name):
1577 """Returns a list of all primary and secondary instances on a node.
1581 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1584 def _GetNodePrimaryInstances(cfg, node_name):
1585 """Returns primary instances on a node.
1588 return _GetNodeInstancesInner(cfg,
1589 lambda inst: node_name == inst.primary_node)
1592 def _GetNodeSecondaryInstances(cfg, node_name):
1593 """Returns secondary instances on a node.
1596 return _GetNodeInstancesInner(cfg,
1597 lambda inst: node_name in inst.secondary_nodes)
1600 def _GetStorageTypeArgs(cfg, storage_type):
1601 """Returns the arguments for a storage type.
1604 # Special case for file storage
1605 if storage_type == constants.ST_FILE:
1606 # storage.FileStorage wants a list of storage directories
1607 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1612 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1615 for dev in instance.disks:
1616 cfg.SetDiskID(dev, node_name)
1618 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1620 result.Raise("Failed to get disk status from node %s" % node_name,
1621 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1623 for idx, bdev_status in enumerate(result.payload):
1624 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1630 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1631 """Check the sanity of iallocator and node arguments and use the
1632 cluster-wide iallocator if appropriate.
1634 Check that at most one of (iallocator, node) is specified. If none is
1635 specified, then the LU's opcode's iallocator slot is filled with the
1636 cluster-wide default iallocator.
1638 @type iallocator_slot: string
1639 @param iallocator_slot: the name of the opcode iallocator slot
1640 @type node_slot: string
1641 @param node_slot: the name of the opcode target node slot
1644 node = getattr(lu.op, node_slot, None)
1645 iallocator = getattr(lu.op, iallocator_slot, None)
1647 if node is not None and iallocator is not None:
1648 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1650 elif node is None and iallocator is None:
1651 default_iallocator = lu.cfg.GetDefaultIAllocator()
1652 if default_iallocator:
1653 setattr(lu.op, iallocator_slot, default_iallocator)
1655 raise errors.OpPrereqError("No iallocator or node given and no"
1656 " cluster-wide default iallocator found;"
1657 " please specify either an iallocator or a"
1658 " node, or set a cluster-wide default"
1662 def _GetDefaultIAllocator(cfg, iallocator):
1663 """Decides on which iallocator to use.
1665 @type cfg: L{config.ConfigWriter}
1666 @param cfg: Cluster configuration object
1667 @type iallocator: string or None
1668 @param iallocator: Iallocator specified in opcode
1670 @return: Iallocator name
1674 # Use default iallocator
1675 iallocator = cfg.GetDefaultIAllocator()
1678 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1679 " opcode nor as a cluster-wide default",
1685 class LUClusterPostInit(LogicalUnit):
1686 """Logical unit for running hooks after cluster initialization.
1689 HPATH = "cluster-init"
1690 HTYPE = constants.HTYPE_CLUSTER
1692 def BuildHooksEnv(self):
1697 "OP_TARGET": self.cfg.GetClusterName(),
1700 def BuildHooksNodes(self):
1701 """Build hooks nodes.
1704 return ([], [self.cfg.GetMasterNode()])
1706 def Exec(self, feedback_fn):
1713 class LUClusterDestroy(LogicalUnit):
1714 """Logical unit for destroying the cluster.
1717 HPATH = "cluster-destroy"
1718 HTYPE = constants.HTYPE_CLUSTER
1720 def BuildHooksEnv(self):
1725 "OP_TARGET": self.cfg.GetClusterName(),
1728 def BuildHooksNodes(self):
1729 """Build hooks nodes.
1734 def CheckPrereq(self):
1735 """Check prerequisites.
1737 This checks whether the cluster is empty.
1739 Any errors are signaled by raising errors.OpPrereqError.
1742 master = self.cfg.GetMasterNode()
1744 nodelist = self.cfg.GetNodeList()
1745 if len(nodelist) != 1 or nodelist[0] != master:
1746 raise errors.OpPrereqError("There are still %d node(s) in"
1747 " this cluster." % (len(nodelist) - 1),
1749 instancelist = self.cfg.GetInstanceList()
1751 raise errors.OpPrereqError("There are still %d instance(s) in"
1752 " this cluster." % len(instancelist),
1755 def Exec(self, feedback_fn):
1756 """Destroys the cluster.
1759 master_params = self.cfg.GetMasterNetworkParameters()
1761 # Run post hooks on master node before it's removed
1762 _RunPostHook(self, master_params.name)
1764 ems = self.cfg.GetUseExternalMipScript()
1765 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1768 self.LogWarning("Error disabling the master IP address: %s",
1771 return master_params.name
1774 def _VerifyCertificate(filename):
1775 """Verifies a certificate for L{LUClusterVerifyConfig}.
1777 @type filename: string
1778 @param filename: Path to PEM file
1782 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1783 utils.ReadFile(filename))
1784 except Exception, err: # pylint: disable=W0703
1785 return (LUClusterVerifyConfig.ETYPE_ERROR,
1786 "Failed to load X509 certificate %s: %s" % (filename, err))
1789 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1790 constants.SSL_CERT_EXPIRATION_ERROR)
1793 fnamemsg = "While verifying %s: %s" % (filename, msg)
1798 return (None, fnamemsg)
1799 elif errcode == utils.CERT_WARNING:
1800 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1801 elif errcode == utils.CERT_ERROR:
1802 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1804 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1807 def _GetAllHypervisorParameters(cluster, instances):
1808 """Compute the set of all hypervisor parameters.
1810 @type cluster: L{objects.Cluster}
1811 @param cluster: the cluster object
1812 @param instances: list of L{objects.Instance}
1813 @param instances: additional instances from which to obtain parameters
1814 @rtype: list of (origin, hypervisor, parameters)
1815 @return: a list with all parameters found, indicating the hypervisor they
1816 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1821 for hv_name in cluster.enabled_hypervisors:
1822 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1824 for os_name, os_hvp in cluster.os_hvp.items():
1825 for hv_name, hv_params in os_hvp.items():
1827 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1828 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1830 # TODO: collapse identical parameter values in a single one
1831 for instance in instances:
1832 if instance.hvparams:
1833 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1834 cluster.FillHV(instance)))
1839 class _VerifyErrors(object):
1840 """Mix-in for cluster/group verify LUs.
1842 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1843 self.op and self._feedback_fn to be available.)
1847 ETYPE_FIELD = "code"
1848 ETYPE_ERROR = "ERROR"
1849 ETYPE_WARNING = "WARNING"
1851 def _Error(self, ecode, item, msg, *args, **kwargs):
1852 """Format an error message.
1854 Based on the opcode's error_codes parameter, either format a
1855 parseable error code, or a simpler error string.
1857 This must be called only from Exec and functions called from Exec.
1860 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1861 itype, etxt, _ = ecode
1862 # first complete the msg
1865 # then format the whole message
1866 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1867 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1873 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1874 # and finally report it via the feedback_fn
1875 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1877 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1878 """Log an error message if the passed condition is True.
1882 or self.op.debug_simulate_errors) # pylint: disable=E1101
1884 # If the error code is in the list of ignored errors, demote the error to a
1886 (_, etxt, _) = ecode
1887 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1888 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1891 self._Error(ecode, *args, **kwargs)
1893 # do not mark the operation as failed for WARN cases only
1894 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1895 self.bad = self.bad or cond
1898 class LUClusterVerify(NoHooksLU):
1899 """Submits all jobs necessary to verify the cluster.
1904 def ExpandNames(self):
1905 self.needed_locks = {}
1907 def Exec(self, feedback_fn):
1910 if self.op.group_name:
1911 groups = [self.op.group_name]
1912 depends_fn = lambda: None
1914 groups = self.cfg.GetNodeGroupList()
1916 # Verify global configuration
1918 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1921 # Always depend on global verification
1922 depends_fn = lambda: [(-len(jobs), [])]
1924 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1925 ignore_errors=self.op.ignore_errors,
1926 depends=depends_fn())]
1927 for group in groups)
1929 # Fix up all parameters
1930 for op in itertools.chain(*jobs): # pylint: disable=W0142
1931 op.debug_simulate_errors = self.op.debug_simulate_errors
1932 op.verbose = self.op.verbose
1933 op.error_codes = self.op.error_codes
1935 op.skip_checks = self.op.skip_checks
1936 except AttributeError:
1937 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1939 return ResultWithJobs(jobs)
1942 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1943 """Verifies the cluster config.
1948 def _VerifyHVP(self, hvp_data):
1949 """Verifies locally the syntax of the hypervisor parameters.
1952 for item, hv_name, hv_params in hvp_data:
1953 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1956 hv_class = hypervisor.GetHypervisor(hv_name)
1957 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1958 hv_class.CheckParameterSyntax(hv_params)
1959 except errors.GenericError, err:
1960 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1962 def ExpandNames(self):
1963 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1964 self.share_locks = _ShareAll()
1966 def CheckPrereq(self):
1967 """Check prerequisites.
1970 # Retrieve all information
1971 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1972 self.all_node_info = self.cfg.GetAllNodesInfo()
1973 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1975 def Exec(self, feedback_fn):
1976 """Verify integrity of cluster, performing various test on nodes.
1980 self._feedback_fn = feedback_fn
1982 feedback_fn("* Verifying cluster config")
1984 for msg in self.cfg.VerifyConfig():
1985 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1987 feedback_fn("* Verifying cluster certificate files")
1989 for cert_filename in constants.ALL_CERT_FILES:
1990 (errcode, msg) = _VerifyCertificate(cert_filename)
1991 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1993 feedback_fn("* Verifying hypervisor parameters")
1995 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1996 self.all_inst_info.values()))
1998 feedback_fn("* Verifying all nodes belong to an existing group")
2000 # We do this verification here because, should this bogus circumstance
2001 # occur, it would never be caught by VerifyGroup, which only acts on
2002 # nodes/instances reachable from existing node groups.
2004 dangling_nodes = set(node.name for node in self.all_node_info.values()
2005 if node.group not in self.all_group_info)
2007 dangling_instances = {}
2008 no_node_instances = []
2010 for inst in self.all_inst_info.values():
2011 if inst.primary_node in dangling_nodes:
2012 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2013 elif inst.primary_node not in self.all_node_info:
2014 no_node_instances.append(inst.name)
2019 utils.CommaJoin(dangling_instances.get(node.name,
2021 for node in dangling_nodes]
2023 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2025 "the following nodes (and their instances) belong to a non"
2026 " existing group: %s", utils.CommaJoin(pretty_dangling))
2028 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2030 "the following instances have a non-existing primary-node:"
2031 " %s", utils.CommaJoin(no_node_instances))
2036 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2037 """Verifies the status of a node group.
2040 HPATH = "cluster-verify"
2041 HTYPE = constants.HTYPE_CLUSTER
2044 _HOOKS_INDENT_RE = re.compile("^", re.M)
2046 class NodeImage(object):
2047 """A class representing the logical and physical status of a node.
2050 @ivar name: the node name to which this object refers
2051 @ivar volumes: a structure as returned from
2052 L{ganeti.backend.GetVolumeList} (runtime)
2053 @ivar instances: a list of running instances (runtime)
2054 @ivar pinst: list of configured primary instances (config)
2055 @ivar sinst: list of configured secondary instances (config)
2056 @ivar sbp: dictionary of {primary-node: list of instances} for all
2057 instances for which this node is secondary (config)
2058 @ivar mfree: free memory, as reported by hypervisor (runtime)
2059 @ivar dfree: free disk, as reported by the node (runtime)
2060 @ivar offline: the offline status (config)
2061 @type rpc_fail: boolean
2062 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2063 not whether the individual keys were correct) (runtime)
2064 @type lvm_fail: boolean
2065 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2066 @type hyp_fail: boolean
2067 @ivar hyp_fail: whether the RPC call didn't return the instance list
2068 @type ghost: boolean
2069 @ivar ghost: whether this is a known node or not (config)
2070 @type os_fail: boolean
2071 @ivar os_fail: whether the RPC call didn't return valid OS data
2073 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2074 @type vm_capable: boolean
2075 @ivar vm_capable: whether the node can host instances
2078 def __init__(self, offline=False, name=None, vm_capable=True):
2087 self.offline = offline
2088 self.vm_capable = vm_capable
2089 self.rpc_fail = False
2090 self.lvm_fail = False
2091 self.hyp_fail = False
2093 self.os_fail = False
2096 def ExpandNames(self):
2097 # This raises errors.OpPrereqError on its own:
2098 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2100 # Get instances in node group; this is unsafe and needs verification later
2102 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2104 self.needed_locks = {
2105 locking.LEVEL_INSTANCE: inst_names,
2106 locking.LEVEL_NODEGROUP: [self.group_uuid],
2107 locking.LEVEL_NODE: [],
2110 self.share_locks = _ShareAll()
2112 def DeclareLocks(self, level):
2113 if level == locking.LEVEL_NODE:
2114 # Get members of node group; this is unsafe and needs verification later
2115 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2117 all_inst_info = self.cfg.GetAllInstancesInfo()
2119 # In Exec(), we warn about mirrored instances that have primary and
2120 # secondary living in separate node groups. To fully verify that
2121 # volumes for these instances are healthy, we will need to do an
2122 # extra call to their secondaries. We ensure here those nodes will
2124 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2125 # Important: access only the instances whose lock is owned
2126 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2127 nodes.update(all_inst_info[inst].secondary_nodes)
2129 self.needed_locks[locking.LEVEL_NODE] = nodes
2131 def CheckPrereq(self):
2132 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2133 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2135 group_nodes = set(self.group_info.members)
2137 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2140 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2142 unlocked_instances = \
2143 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2146 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2147 utils.CommaJoin(unlocked_nodes),
2150 if unlocked_instances:
2151 raise errors.OpPrereqError("Missing lock for instances: %s" %
2152 utils.CommaJoin(unlocked_instances),
2155 self.all_node_info = self.cfg.GetAllNodesInfo()
2156 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2158 self.my_node_names = utils.NiceSort(group_nodes)
2159 self.my_inst_names = utils.NiceSort(group_instances)
2161 self.my_node_info = dict((name, self.all_node_info[name])
2162 for name in self.my_node_names)
2164 self.my_inst_info = dict((name, self.all_inst_info[name])
2165 for name in self.my_inst_names)
2167 # We detect here the nodes that will need the extra RPC calls for verifying
2168 # split LV volumes; they should be locked.
2169 extra_lv_nodes = set()
2171 for inst in self.my_inst_info.values():
2172 if inst.disk_template in constants.DTS_INT_MIRROR:
2173 for nname in inst.all_nodes:
2174 if self.all_node_info[nname].group != self.group_uuid:
2175 extra_lv_nodes.add(nname)
2177 unlocked_lv_nodes = \
2178 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2180 if unlocked_lv_nodes:
2181 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2182 utils.CommaJoin(unlocked_lv_nodes),
2184 self.extra_lv_nodes = list(extra_lv_nodes)
2186 def _VerifyNode(self, ninfo, nresult):
2187 """Perform some basic validation on data returned from a node.
2189 - check the result data structure is well formed and has all the
2191 - check ganeti version
2193 @type ninfo: L{objects.Node}
2194 @param ninfo: the node to check
2195 @param nresult: the results from the node
2197 @return: whether overall this call was successful (and we can expect
2198 reasonable values in the respose)
2202 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2204 # main result, nresult should be a non-empty dict
2205 test = not nresult or not isinstance(nresult, dict)
2206 _ErrorIf(test, constants.CV_ENODERPC, node,
2207 "unable to verify node: no data returned")
2211 # compares ganeti version
2212 local_version = constants.PROTOCOL_VERSION
2213 remote_version = nresult.get("version", None)
2214 test = not (remote_version and
2215 isinstance(remote_version, (list, tuple)) and
2216 len(remote_version) == 2)
2217 _ErrorIf(test, constants.CV_ENODERPC, node,
2218 "connection to node returned invalid data")
2222 test = local_version != remote_version[0]
2223 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2224 "incompatible protocol versions: master %s,"
2225 " node %s", local_version, remote_version[0])
2229 # node seems compatible, we can actually try to look into its results
2231 # full package version
2232 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2233 constants.CV_ENODEVERSION, node,
2234 "software version mismatch: master %s, node %s",
2235 constants.RELEASE_VERSION, remote_version[1],
2236 code=self.ETYPE_WARNING)
2238 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2239 if ninfo.vm_capable and isinstance(hyp_result, dict):
2240 for hv_name, hv_result in hyp_result.iteritems():
2241 test = hv_result is not None
2242 _ErrorIf(test, constants.CV_ENODEHV, node,
2243 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2245 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2246 if ninfo.vm_capable and isinstance(hvp_result, list):
2247 for item, hv_name, hv_result in hvp_result:
2248 _ErrorIf(True, constants.CV_ENODEHV, node,
2249 "hypervisor %s parameter verify failure (source %s): %s",
2250 hv_name, item, hv_result)
2252 test = nresult.get(constants.NV_NODESETUP,
2253 ["Missing NODESETUP results"])
2254 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2259 def _VerifyNodeTime(self, ninfo, nresult,
2260 nvinfo_starttime, nvinfo_endtime):
2261 """Check the node time.
2263 @type ninfo: L{objects.Node}
2264 @param ninfo: the node to check
2265 @param nresult: the remote results for the node
2266 @param nvinfo_starttime: the start time of the RPC call
2267 @param nvinfo_endtime: the end time of the RPC call
2271 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2273 ntime = nresult.get(constants.NV_TIME, None)
2275 ntime_merged = utils.MergeTime(ntime)
2276 except (ValueError, TypeError):
2277 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2280 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2281 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2282 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2283 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2287 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2288 "Node time diverges by at least %s from master node time",
2291 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2292 """Check the node LVM results.
2294 @type ninfo: L{objects.Node}
2295 @param ninfo: the node to check
2296 @param nresult: the remote results for the node
2297 @param vg_name: the configured VG name
2304 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2306 # checks vg existence and size > 20G
2307 vglist = nresult.get(constants.NV_VGLIST, None)
2309 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2311 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2312 constants.MIN_VG_SIZE)
2313 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2316 pvlist = nresult.get(constants.NV_PVLIST, None)
2317 test = pvlist is None
2318 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2320 # check that ':' is not present in PV names, since it's a
2321 # special character for lvcreate (denotes the range of PEs to
2323 for _, pvname, owner_vg in pvlist:
2324 test = ":" in pvname
2325 _ErrorIf(test, constants.CV_ENODELVM, node,
2326 "Invalid character ':' in PV '%s' of VG '%s'",
2329 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2330 """Check the node bridges.
2332 @type ninfo: L{objects.Node}
2333 @param ninfo: the node to check
2334 @param nresult: the remote results for the node
2335 @param bridges: the expected list of bridges
2342 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2344 missing = nresult.get(constants.NV_BRIDGES, None)
2345 test = not isinstance(missing, list)
2346 _ErrorIf(test, constants.CV_ENODENET, node,
2347 "did not return valid bridge information")
2349 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2350 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2352 def _VerifyNodeUserScripts(self, ninfo, nresult):
2353 """Check the results of user scripts presence and executability on the node
2355 @type ninfo: L{objects.Node}
2356 @param ninfo: the node to check
2357 @param nresult: the remote results for the node
2362 test = not constants.NV_USERSCRIPTS in nresult
2363 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2364 "did not return user scripts information")
2366 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2368 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2369 "user scripts not present or not executable: %s" %
2370 utils.CommaJoin(sorted(broken_scripts)))
2372 def _VerifyNodeNetwork(self, ninfo, nresult):
2373 """Check the node network connectivity results.
2375 @type ninfo: L{objects.Node}
2376 @param ninfo: the node to check
2377 @param nresult: the remote results for the node
2381 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2383 test = constants.NV_NODELIST not in nresult
2384 _ErrorIf(test, constants.CV_ENODESSH, node,
2385 "node hasn't returned node ssh connectivity data")
2387 if nresult[constants.NV_NODELIST]:
2388 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2389 _ErrorIf(True, constants.CV_ENODESSH, node,
2390 "ssh communication with node '%s': %s", a_node, a_msg)
2392 test = constants.NV_NODENETTEST not in nresult
2393 _ErrorIf(test, constants.CV_ENODENET, node,
2394 "node hasn't returned node tcp connectivity data")
2396 if nresult[constants.NV_NODENETTEST]:
2397 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2399 _ErrorIf(True, constants.CV_ENODENET, node,
2400 "tcp communication with node '%s': %s",
2401 anode, nresult[constants.NV_NODENETTEST][anode])
2403 test = constants.NV_MASTERIP not in nresult
2404 _ErrorIf(test, constants.CV_ENODENET, node,
2405 "node hasn't returned node master IP reachability data")
2407 if not nresult[constants.NV_MASTERIP]:
2408 if node == self.master_node:
2409 msg = "the master node cannot reach the master IP (not configured?)"
2411 msg = "cannot reach the master IP"
2412 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2414 def _VerifyInstance(self, instance, instanceconfig, node_image,
2416 """Verify an instance.
2418 This function checks to see if the required block devices are
2419 available on the instance's node.
2422 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2423 node_current = instanceconfig.primary_node
2425 node_vol_should = {}
2426 instanceconfig.MapLVsByNode(node_vol_should)
2428 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2429 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2430 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2432 for node in node_vol_should:
2433 n_img = node_image[node]
2434 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2435 # ignore missing volumes on offline or broken nodes
2437 for volume in node_vol_should[node]:
2438 test = volume not in n_img.volumes
2439 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2440 "volume %s missing on node %s", volume, node)
2442 if instanceconfig.admin_state == constants.ADMINST_UP:
2443 pri_img = node_image[node_current]
2444 test = instance not in pri_img.instances and not pri_img.offline
2445 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2446 "instance not running on its primary node %s",
2449 diskdata = [(nname, success, status, idx)
2450 for (nname, disks) in diskstatus.items()
2451 for idx, (success, status) in enumerate(disks)]
2453 for nname, success, bdev_status, idx in diskdata:
2454 # the 'ghost node' construction in Exec() ensures that we have a
2456 snode = node_image[nname]
2457 bad_snode = snode.ghost or snode.offline
2458 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2459 not success and not bad_snode,
2460 constants.CV_EINSTANCEFAULTYDISK, instance,
2461 "couldn't retrieve status for disk/%s on %s: %s",
2462 idx, nname, bdev_status)
2463 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2464 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2465 constants.CV_EINSTANCEFAULTYDISK, instance,
2466 "disk/%s on %s is faulty", idx, nname)
2468 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2469 """Verify if there are any unknown volumes in the cluster.
2471 The .os, .swap and backup volumes are ignored. All other volumes are
2472 reported as unknown.
2474 @type reserved: L{ganeti.utils.FieldSet}
2475 @param reserved: a FieldSet of reserved volume names
2478 for node, n_img in node_image.items():
2479 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2480 self.all_node_info[node].group != self.group_uuid):
2481 # skip non-healthy nodes
2483 for volume in n_img.volumes:
2484 test = ((node not in node_vol_should or
2485 volume not in node_vol_should[node]) and
2486 not reserved.Matches(volume))
2487 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2488 "volume %s is unknown", volume)
2490 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2491 """Verify N+1 Memory Resilience.
2493 Check that if one single node dies we can still start all the
2494 instances it was primary for.
2497 cluster_info = self.cfg.GetClusterInfo()
2498 for node, n_img in node_image.items():
2499 # This code checks that every node which is now listed as
2500 # secondary has enough memory to host all instances it is
2501 # supposed to should a single other node in the cluster fail.
2502 # FIXME: not ready for failover to an arbitrary node
2503 # FIXME: does not support file-backed instances
2504 # WARNING: we currently take into account down instances as well
2505 # as up ones, considering that even if they're down someone
2506 # might want to start them even in the event of a node failure.
2507 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2508 # we're skipping nodes marked offline and nodes in other groups from
2509 # the N+1 warning, since most likely we don't have good memory
2510 # infromation from them; we already list instances living on such
2511 # nodes, and that's enough warning
2513 #TODO(dynmem): also consider ballooning out other instances
2514 for prinode, instances in n_img.sbp.items():
2516 for instance in instances:
2517 bep = cluster_info.FillBE(instance_cfg[instance])
2518 if bep[constants.BE_AUTO_BALANCE]:
2519 needed_mem += bep[constants.BE_MINMEM]
2520 test = n_img.mfree < needed_mem
2521 self._ErrorIf(test, constants.CV_ENODEN1, node,
2522 "not enough memory to accomodate instance failovers"
2523 " should node %s fail (%dMiB needed, %dMiB available)",
2524 prinode, needed_mem, n_img.mfree)
2527 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2528 (files_all, files_opt, files_mc, files_vm)):
2529 """Verifies file checksums collected from all nodes.
2531 @param errorif: Callback for reporting errors
2532 @param nodeinfo: List of L{objects.Node} objects
2533 @param master_node: Name of master node
2534 @param all_nvinfo: RPC results
2537 # Define functions determining which nodes to consider for a file
2540 (files_mc, lambda node: (node.master_candidate or
2541 node.name == master_node)),
2542 (files_vm, lambda node: node.vm_capable),
2545 # Build mapping from filename to list of nodes which should have the file
2547 for (files, fn) in files2nodefn:
2549 filenodes = nodeinfo
2551 filenodes = filter(fn, nodeinfo)
2552 nodefiles.update((filename,
2553 frozenset(map(operator.attrgetter("name"), filenodes)))
2554 for filename in files)
2556 assert set(nodefiles) == (files_all | files_mc | files_vm)
2558 fileinfo = dict((filename, {}) for filename in nodefiles)
2559 ignore_nodes = set()
2561 for node in nodeinfo:
2563 ignore_nodes.add(node.name)
2566 nresult = all_nvinfo[node.name]
2568 if nresult.fail_msg or not nresult.payload:
2571 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2573 test = not (node_files and isinstance(node_files, dict))
2574 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2575 "Node did not return file checksum data")
2577 ignore_nodes.add(node.name)
2580 # Build per-checksum mapping from filename to nodes having it
2581 for (filename, checksum) in node_files.items():
2582 assert filename in nodefiles
2583 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2585 for (filename, checksums) in fileinfo.items():
2586 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2588 # Nodes having the file
2589 with_file = frozenset(node_name
2590 for nodes in fileinfo[filename].values()
2591 for node_name in nodes) - ignore_nodes
2593 expected_nodes = nodefiles[filename] - ignore_nodes
2595 # Nodes missing file
2596 missing_file = expected_nodes - with_file
2598 if filename in files_opt:
2600 errorif(missing_file and missing_file != expected_nodes,
2601 constants.CV_ECLUSTERFILECHECK, None,
2602 "File %s is optional, but it must exist on all or no"
2603 " nodes (not found on %s)",
2604 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2606 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2607 "File %s is missing from node(s) %s", filename,
2608 utils.CommaJoin(utils.NiceSort(missing_file)))
2610 # Warn if a node has a file it shouldn't
2611 unexpected = with_file - expected_nodes
2613 constants.CV_ECLUSTERFILECHECK, None,
2614 "File %s should not exist on node(s) %s",
2615 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2617 # See if there are multiple versions of the file
2618 test = len(checksums) > 1
2620 variants = ["variant %s on %s" %
2621 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2622 for (idx, (checksum, nodes)) in
2623 enumerate(sorted(checksums.items()))]
2627 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2628 "File %s found with %s different checksums (%s)",
2629 filename, len(checksums), "; ".join(variants))
2631 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2633 """Verifies and the node DRBD status.
2635 @type ninfo: L{objects.Node}
2636 @param ninfo: the node to check
2637 @param nresult: the remote results for the node
2638 @param instanceinfo: the dict of instances
2639 @param drbd_helper: the configured DRBD usermode helper
2640 @param drbd_map: the DRBD map as returned by
2641 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2645 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2648 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2649 test = (helper_result == None)
2650 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2651 "no drbd usermode helper returned")
2653 status, payload = helper_result
2655 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2656 "drbd usermode helper check unsuccessful: %s", payload)
2657 test = status and (payload != drbd_helper)
2658 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2659 "wrong drbd usermode helper: %s", payload)
2661 # compute the DRBD minors
2663 for minor, instance in drbd_map[node].items():
2664 test = instance not in instanceinfo
2665 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2666 "ghost instance '%s' in temporary DRBD map", instance)
2667 # ghost instance should not be running, but otherwise we
2668 # don't give double warnings (both ghost instance and
2669 # unallocated minor in use)
2671 node_drbd[minor] = (instance, False)
2673 instance = instanceinfo[instance]
2674 node_drbd[minor] = (instance.name,
2675 instance.admin_state == constants.ADMINST_UP)
2677 # and now check them
2678 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2679 test = not isinstance(used_minors, (tuple, list))
2680 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2681 "cannot parse drbd status file: %s", str(used_minors))
2683 # we cannot check drbd status
2686 for minor, (iname, must_exist) in node_drbd.items():
2687 test = minor not in used_minors and must_exist
2688 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2689 "drbd minor %d of instance %s is not active", minor, iname)
2690 for minor in used_minors:
2691 test = minor not in node_drbd
2692 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2693 "unallocated drbd minor %d is in use", minor)
2695 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2696 """Builds the node OS structures.
2698 @type ninfo: L{objects.Node}
2699 @param ninfo: the node to check
2700 @param nresult: the remote results for the node
2701 @param nimg: the node image object
2705 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2707 remote_os = nresult.get(constants.NV_OSLIST, None)
2708 test = (not isinstance(remote_os, list) or
2709 not compat.all(isinstance(v, list) and len(v) == 7
2710 for v in remote_os))
2712 _ErrorIf(test, constants.CV_ENODEOS, node,
2713 "node hasn't returned valid OS data")
2722 for (name, os_path, status, diagnose,
2723 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2725 if name not in os_dict:
2728 # parameters is a list of lists instead of list of tuples due to
2729 # JSON lacking a real tuple type, fix it:
2730 parameters = [tuple(v) for v in parameters]
2731 os_dict[name].append((os_path, status, diagnose,
2732 set(variants), set(parameters), set(api_ver)))
2734 nimg.oslist = os_dict
2736 def _VerifyNodeOS(self, ninfo, nimg, base):
2737 """Verifies the node OS list.
2739 @type ninfo: L{objects.Node}
2740 @param ninfo: the node to check
2741 @param nimg: the node image object
2742 @param base: the 'template' node we match against (e.g. from the master)
2746 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2748 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2750 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2751 for os_name, os_data in nimg.oslist.items():
2752 assert os_data, "Empty OS status for OS %s?!" % os_name
2753 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2754 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2755 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2756 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2757 "OS '%s' has multiple entries (first one shadows the rest): %s",
2758 os_name, utils.CommaJoin([v[0] for v in os_data]))
2759 # comparisons with the 'base' image
2760 test = os_name not in base.oslist
2761 _ErrorIf(test, constants.CV_ENODEOS, node,
2762 "Extra OS %s not present on reference node (%s)",
2766 assert base.oslist[os_name], "Base node has empty OS status?"
2767 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2769 # base OS is invalid, skipping
2771 for kind, a, b in [("API version", f_api, b_api),
2772 ("variants list", f_var, b_var),
2773 ("parameters", beautify_params(f_param),
2774 beautify_params(b_param))]:
2775 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2776 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2777 kind, os_name, base.name,
2778 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2780 # check any missing OSes
2781 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2782 _ErrorIf(missing, constants.CV_ENODEOS, node,
2783 "OSes present on reference node %s but missing on this node: %s",
2784 base.name, utils.CommaJoin(missing))
2786 def _VerifyOob(self, ninfo, nresult):
2787 """Verifies out of band functionality of a node.
2789 @type ninfo: L{objects.Node}
2790 @param ninfo: the node to check
2791 @param nresult: the remote results for the node
2795 # We just have to verify the paths on master and/or master candidates
2796 # as the oob helper is invoked on the master
2797 if ((ninfo.master_candidate or ninfo.master_capable) and
2798 constants.NV_OOB_PATHS in nresult):
2799 for path_result in nresult[constants.NV_OOB_PATHS]:
2800 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2802 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2803 """Verifies and updates the node volume data.
2805 This function will update a L{NodeImage}'s internal structures
2806 with data from the remote call.
2808 @type ninfo: L{objects.Node}
2809 @param ninfo: the node to check
2810 @param nresult: the remote results for the node
2811 @param nimg: the node image object
2812 @param vg_name: the configured VG name
2816 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2818 nimg.lvm_fail = True
2819 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2822 elif isinstance(lvdata, basestring):
2823 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2824 utils.SafeEncode(lvdata))
2825 elif not isinstance(lvdata, dict):
2826 _ErrorIf(True, constants.CV_ENODELVM, node,
2827 "rpc call to node failed (lvlist)")
2829 nimg.volumes = lvdata
2830 nimg.lvm_fail = False
2832 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2833 """Verifies and updates the node instance list.
2835 If the listing was successful, then updates this node's instance
2836 list. Otherwise, it marks the RPC call as failed for the instance
2839 @type ninfo: L{objects.Node}
2840 @param ninfo: the node to check
2841 @param nresult: the remote results for the node
2842 @param nimg: the node image object
2845 idata = nresult.get(constants.NV_INSTANCELIST, None)
2846 test = not isinstance(idata, list)
2847 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2848 "rpc call to node failed (instancelist): %s",
2849 utils.SafeEncode(str(idata)))
2851 nimg.hyp_fail = True
2853 nimg.instances = idata
2855 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2856 """Verifies and computes a node information map
2858 @type ninfo: L{objects.Node}
2859 @param ninfo: the node to check
2860 @param nresult: the remote results for the node
2861 @param nimg: the node image object
2862 @param vg_name: the configured VG name
2866 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2868 # try to read free memory (from the hypervisor)
2869 hv_info = nresult.get(constants.NV_HVINFO, None)
2870 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2871 _ErrorIf(test, constants.CV_ENODEHV, node,
2872 "rpc call to node failed (hvinfo)")
2875 nimg.mfree = int(hv_info["memory_free"])
2876 except (ValueError, TypeError):
2877 _ErrorIf(True, constants.CV_ENODERPC, node,
2878 "node returned invalid nodeinfo, check hypervisor")
2880 # FIXME: devise a free space model for file based instances as well
2881 if vg_name is not None:
2882 test = (constants.NV_VGLIST not in nresult or
2883 vg_name not in nresult[constants.NV_VGLIST])
2884 _ErrorIf(test, constants.CV_ENODELVM, node,
2885 "node didn't return data for the volume group '%s'"
2886 " - it is either missing or broken", vg_name)
2889 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2890 except (ValueError, TypeError):
2891 _ErrorIf(True, constants.CV_ENODERPC, node,
2892 "node returned invalid LVM info, check LVM status")
2894 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2895 """Gets per-disk status information for all instances.
2897 @type nodelist: list of strings
2898 @param nodelist: Node names
2899 @type node_image: dict of (name, L{objects.Node})
2900 @param node_image: Node objects
2901 @type instanceinfo: dict of (name, L{objects.Instance})
2902 @param instanceinfo: Instance objects
2903 @rtype: {instance: {node: [(succes, payload)]}}
2904 @return: a dictionary of per-instance dictionaries with nodes as
2905 keys and disk information as values; the disk information is a
2906 list of tuples (success, payload)
2909 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2912 node_disks_devonly = {}
2913 diskless_instances = set()
2914 diskless = constants.DT_DISKLESS
2916 for nname in nodelist:
2917 node_instances = list(itertools.chain(node_image[nname].pinst,
2918 node_image[nname].sinst))
2919 diskless_instances.update(inst for inst in node_instances
2920 if instanceinfo[inst].disk_template == diskless)
2921 disks = [(inst, disk)
2922 for inst in node_instances
2923 for disk in instanceinfo[inst].disks]
2926 # No need to collect data
2929 node_disks[nname] = disks
2931 # _AnnotateDiskParams makes already copies of the disks
2933 for (inst, dev) in disks:
2934 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2935 self.cfg.SetDiskID(anno_disk, nname)
2936 devonly.append(anno_disk)
2938 node_disks_devonly[nname] = devonly
2940 assert len(node_disks) == len(node_disks_devonly)
2942 # Collect data from all nodes with disks
2943 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2946 assert len(result) == len(node_disks)
2950 for (nname, nres) in result.items():
2951 disks = node_disks[nname]
2954 # No data from this node
2955 data = len(disks) * [(False, "node offline")]
2958 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2959 "while getting disk information: %s", msg)
2961 # No data from this node
2962 data = len(disks) * [(False, msg)]
2965 for idx, i in enumerate(nres.payload):
2966 if isinstance(i, (tuple, list)) and len(i) == 2:
2969 logging.warning("Invalid result from node %s, entry %d: %s",
2971 data.append((False, "Invalid result from the remote node"))
2973 for ((inst, _), status) in zip(disks, data):
2974 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2976 # Add empty entries for diskless instances.
2977 for inst in diskless_instances:
2978 assert inst not in instdisk
2981 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2982 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2983 compat.all(isinstance(s, (tuple, list)) and
2984 len(s) == 2 for s in statuses)
2985 for inst, nnames in instdisk.items()
2986 for nname, statuses in nnames.items())
2987 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2992 def _SshNodeSelector(group_uuid, all_nodes):
2993 """Create endless iterators for all potential SSH check hosts.
2996 nodes = [node for node in all_nodes
2997 if (node.group != group_uuid and
2999 keyfunc = operator.attrgetter("group")
3001 return map(itertools.cycle,
3002 [sorted(map(operator.attrgetter("name"), names))
3003 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3007 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3008 """Choose which nodes should talk to which other nodes.
3010 We will make nodes contact all nodes in their group, and one node from
3013 @warning: This algorithm has a known issue if one node group is much
3014 smaller than others (e.g. just one node). In such a case all other
3015 nodes will talk to the single node.
3018 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3019 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3021 return (online_nodes,
3022 dict((name, sorted([i.next() for i in sel]))
3023 for name in online_nodes))
3025 def BuildHooksEnv(self):
3028 Cluster-Verify hooks just ran in the post phase and their failure makes
3029 the output be logged in the verify output and the verification to fail.
3033 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3036 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3037 for node in self.my_node_info.values())
3041 def BuildHooksNodes(self):
3042 """Build hooks nodes.
3045 return ([], self.my_node_names)
3047 def Exec(self, feedback_fn):
3048 """Verify integrity of the node group, performing various test on nodes.
3051 # This method has too many local variables. pylint: disable=R0914
3052 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3054 if not self.my_node_names:
3056 feedback_fn("* Empty node group, skipping verification")
3060 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3061 verbose = self.op.verbose
3062 self._feedback_fn = feedback_fn
3064 vg_name = self.cfg.GetVGName()
3065 drbd_helper = self.cfg.GetDRBDHelper()
3066 cluster = self.cfg.GetClusterInfo()
3067 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3068 hypervisors = cluster.enabled_hypervisors
3069 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3071 i_non_redundant = [] # Non redundant instances
3072 i_non_a_balanced = [] # Non auto-balanced instances
3073 i_offline = 0 # Count of offline instances
3074 n_offline = 0 # Count of offline nodes
3075 n_drained = 0 # Count of nodes being drained
3076 node_vol_should = {}
3078 # FIXME: verify OS list
3081 filemap = _ComputeAncillaryFiles(cluster, False)
3083 # do local checksums
3084 master_node = self.master_node = self.cfg.GetMasterNode()
3085 master_ip = self.cfg.GetMasterIP()
3087 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3090 if self.cfg.GetUseExternalMipScript():
3091 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3093 node_verify_param = {
3094 constants.NV_FILELIST:
3095 utils.UniqueSequence(filename
3096 for files in filemap
3097 for filename in files),
3098 constants.NV_NODELIST:
3099 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3100 self.all_node_info.values()),
3101 constants.NV_HYPERVISOR: hypervisors,
3102 constants.NV_HVPARAMS:
3103 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3104 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3105 for node in node_data_list
3106 if not node.offline],
3107 constants.NV_INSTANCELIST: hypervisors,
3108 constants.NV_VERSION: None,
3109 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3110 constants.NV_NODESETUP: None,
3111 constants.NV_TIME: None,
3112 constants.NV_MASTERIP: (master_node, master_ip),
3113 constants.NV_OSLIST: None,
3114 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3115 constants.NV_USERSCRIPTS: user_scripts,
3118 if vg_name is not None:
3119 node_verify_param[constants.NV_VGLIST] = None
3120 node_verify_param[constants.NV_LVLIST] = vg_name
3121 node_verify_param[constants.NV_PVLIST] = [vg_name]
3122 node_verify_param[constants.NV_DRBDLIST] = None
3125 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3128 # FIXME: this needs to be changed per node-group, not cluster-wide
3130 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3131 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3132 bridges.add(default_nicpp[constants.NIC_LINK])
3133 for instance in self.my_inst_info.values():
3134 for nic in instance.nics:
3135 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3136 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3137 bridges.add(full_nic[constants.NIC_LINK])
3140 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3142 # Build our expected cluster state
3143 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3145 vm_capable=node.vm_capable))
3146 for node in node_data_list)
3150 for node in self.all_node_info.values():
3151 path = _SupportsOob(self.cfg, node)
3152 if path and path not in oob_paths:
3153 oob_paths.append(path)
3156 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3158 for instance in self.my_inst_names:
3159 inst_config = self.my_inst_info[instance]
3160 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3163 for nname in inst_config.all_nodes:
3164 if nname not in node_image:
3165 gnode = self.NodeImage(name=nname)
3166 gnode.ghost = (nname not in self.all_node_info)
3167 node_image[nname] = gnode
3169 inst_config.MapLVsByNode(node_vol_should)
3171 pnode = inst_config.primary_node
3172 node_image[pnode].pinst.append(instance)
3174 for snode in inst_config.secondary_nodes:
3175 nimg = node_image[snode]
3176 nimg.sinst.append(instance)
3177 if pnode not in nimg.sbp:
3178 nimg.sbp[pnode] = []
3179 nimg.sbp[pnode].append(instance)
3181 # At this point, we have the in-memory data structures complete,
3182 # except for the runtime information, which we'll gather next
3184 # Due to the way our RPC system works, exact response times cannot be
3185 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3186 # time before and after executing the request, we can at least have a time
3188 nvinfo_starttime = time.time()
3189 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3191 self.cfg.GetClusterName())
3192 nvinfo_endtime = time.time()
3194 if self.extra_lv_nodes and vg_name is not None:
3196 self.rpc.call_node_verify(self.extra_lv_nodes,
3197 {constants.NV_LVLIST: vg_name},
3198 self.cfg.GetClusterName())
3200 extra_lv_nvinfo = {}
3202 all_drbd_map = self.cfg.ComputeDRBDMap()
3204 feedback_fn("* Gathering disk information (%s nodes)" %
3205 len(self.my_node_names))
3206 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3209 feedback_fn("* Verifying configuration file consistency")
3211 # If not all nodes are being checked, we need to make sure the master node
3212 # and a non-checked vm_capable node are in the list.
3213 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3215 vf_nvinfo = all_nvinfo.copy()
3216 vf_node_info = list(self.my_node_info.values())
3217 additional_nodes = []
3218 if master_node not in self.my_node_info:
3219 additional_nodes.append(master_node)
3220 vf_node_info.append(self.all_node_info[master_node])
3221 # Add the first vm_capable node we find which is not included,
3222 # excluding the master node (which we already have)
3223 for node in absent_nodes:
3224 nodeinfo = self.all_node_info[node]
3225 if (nodeinfo.vm_capable and not nodeinfo.offline and
3226 node != master_node):
3227 additional_nodes.append(node)
3228 vf_node_info.append(self.all_node_info[node])
3230 key = constants.NV_FILELIST
3231 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3232 {key: node_verify_param[key]},
3233 self.cfg.GetClusterName()))
3235 vf_nvinfo = all_nvinfo
3236 vf_node_info = self.my_node_info.values()
3238 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3240 feedback_fn("* Verifying node status")
3244 for node_i in node_data_list:
3246 nimg = node_image[node]
3250 feedback_fn("* Skipping offline node %s" % (node,))
3254 if node == master_node:
3256 elif node_i.master_candidate:
3257 ntype = "master candidate"
3258 elif node_i.drained:
3264 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3266 msg = all_nvinfo[node].fail_msg
3267 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3270 nimg.rpc_fail = True
3273 nresult = all_nvinfo[node].payload
3275 nimg.call_ok = self._VerifyNode(node_i, nresult)
3276 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3277 self._VerifyNodeNetwork(node_i, nresult)
3278 self._VerifyNodeUserScripts(node_i, nresult)
3279 self._VerifyOob(node_i, nresult)
3282 self._VerifyNodeLVM(node_i, nresult, vg_name)
3283 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3286 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3287 self._UpdateNodeInstances(node_i, nresult, nimg)
3288 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3289 self._UpdateNodeOS(node_i, nresult, nimg)
3291 if not nimg.os_fail:
3292 if refos_img is None:
3294 self._VerifyNodeOS(node_i, nimg, refos_img)
3295 self._VerifyNodeBridges(node_i, nresult, bridges)
3297 # Check whether all running instancies are primary for the node. (This
3298 # can no longer be done from _VerifyInstance below, since some of the
3299 # wrong instances could be from other node groups.)
3300 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3302 for inst in non_primary_inst:
3303 test = inst in self.all_inst_info
3304 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3305 "instance should not run on node %s", node_i.name)
3306 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3307 "node is running unknown instance %s", inst)
3309 for node, result in extra_lv_nvinfo.items():
3310 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3311 node_image[node], vg_name)
3313 feedback_fn("* Verifying instance status")
3314 for instance in self.my_inst_names:
3316 feedback_fn("* Verifying instance %s" % instance)
3317 inst_config = self.my_inst_info[instance]
3318 self._VerifyInstance(instance, inst_config, node_image,
3320 inst_nodes_offline = []
3322 pnode = inst_config.primary_node
3323 pnode_img = node_image[pnode]
3324 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3325 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3326 " primary node failed", instance)
3328 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3330 constants.CV_EINSTANCEBADNODE, instance,
3331 "instance is marked as running and lives on offline node %s",
3332 inst_config.primary_node)
3334 # If the instance is non-redundant we cannot survive losing its primary
3335 # node, so we are not N+1 compliant. On the other hand we have no disk
3336 # templates with more than one secondary so that situation is not well
3338 # FIXME: does not support file-backed instances
3339 if not inst_config.secondary_nodes:
3340 i_non_redundant.append(instance)
3342 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3343 constants.CV_EINSTANCELAYOUT,
3344 instance, "instance has multiple secondary nodes: %s",
3345 utils.CommaJoin(inst_config.secondary_nodes),
3346 code=self.ETYPE_WARNING)
3348 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3349 pnode = inst_config.primary_node
3350 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3351 instance_groups = {}
3353 for node in instance_nodes:
3354 instance_groups.setdefault(self.all_node_info[node].group,
3358 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3359 # Sort so that we always list the primary node first.
3360 for group, nodes in sorted(instance_groups.items(),
3361 key=lambda (_, nodes): pnode in nodes,
3364 self._ErrorIf(len(instance_groups) > 1,
3365 constants.CV_EINSTANCESPLITGROUPS,
3366 instance, "instance has primary and secondary nodes in"
3367 " different groups: %s", utils.CommaJoin(pretty_list),
3368 code=self.ETYPE_WARNING)
3370 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3371 i_non_a_balanced.append(instance)
3373 for snode in inst_config.secondary_nodes:
3374 s_img = node_image[snode]
3375 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3376 snode, "instance %s, connection to secondary node failed",
3380 inst_nodes_offline.append(snode)
3382 # warn that the instance lives on offline nodes
3383 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3384 "instance has offline secondary node(s) %s",
3385 utils.CommaJoin(inst_nodes_offline))
3386 # ... or ghost/non-vm_capable nodes
3387 for node in inst_config.all_nodes:
3388 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3389 instance, "instance lives on ghost node %s", node)
3390 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3391 instance, "instance lives on non-vm_capable node %s", node)
3393 feedback_fn("* Verifying orphan volumes")
3394 reserved = utils.FieldSet(*cluster.reserved_lvs)
3396 # We will get spurious "unknown volume" warnings if any node of this group
3397 # is secondary for an instance whose primary is in another group. To avoid
3398 # them, we find these instances and add their volumes to node_vol_should.
3399 for inst in self.all_inst_info.values():
3400 for secondary in inst.secondary_nodes:
3401 if (secondary in self.my_node_info
3402 and inst.name not in self.my_inst_info):
3403 inst.MapLVsByNode(node_vol_should)
3406 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3408 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3409 feedback_fn("* Verifying N+1 Memory redundancy")
3410 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3412 feedback_fn("* Other Notes")
3414 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3415 % len(i_non_redundant))
3417 if i_non_a_balanced:
3418 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3419 % len(i_non_a_balanced))
3422 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3425 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3428 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3432 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3433 """Analyze the post-hooks' result
3435 This method analyses the hook result, handles it, and sends some
3436 nicely-formatted feedback back to the user.
3438 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3439 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3440 @param hooks_results: the results of the multi-node hooks rpc call
3441 @param feedback_fn: function used send feedback back to the caller
3442 @param lu_result: previous Exec result
3443 @return: the new Exec result, based on the previous result
3447 # We only really run POST phase hooks, only for non-empty groups,
3448 # and are only interested in their results
3449 if not self.my_node_names:
3452 elif phase == constants.HOOKS_PHASE_POST:
3453 # Used to change hooks' output to proper indentation
3454 feedback_fn("* Hooks Results")
3455 assert hooks_results, "invalid result from hooks"
3457 for node_name in hooks_results:
3458 res = hooks_results[node_name]
3460 test = msg and not res.offline
3461 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3462 "Communication failure in hooks execution: %s", msg)
3463 if res.offline or msg:
3464 # No need to investigate payload if node is offline or gave
3467 for script, hkr, output in res.payload:
3468 test = hkr == constants.HKR_FAIL
3469 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3470 "Script %s failed, output:", script)
3472 output = self._HOOKS_INDENT_RE.sub(" ", output)
3473 feedback_fn("%s" % output)
3479 class LUClusterVerifyDisks(NoHooksLU):
3480 """Verifies the cluster disks status.
3485 def ExpandNames(self):
3486 self.share_locks = _ShareAll()
3487 self.needed_locks = {
3488 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3491 def Exec(self, feedback_fn):
3492 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3494 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3495 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3496 for group in group_names])
3499 class LUGroupVerifyDisks(NoHooksLU):
3500 """Verifies the status of all disks in a node group.
3505 def ExpandNames(self):
3506 # Raises errors.OpPrereqError on its own if group can't be found
3507 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3509 self.share_locks = _ShareAll()
3510 self.needed_locks = {
3511 locking.LEVEL_INSTANCE: [],
3512 locking.LEVEL_NODEGROUP: [],
3513 locking.LEVEL_NODE: [],
3516 def DeclareLocks(self, level):
3517 if level == locking.LEVEL_INSTANCE:
3518 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3520 # Lock instances optimistically, needs verification once node and group
3521 # locks have been acquired
3522 self.needed_locks[locking.LEVEL_INSTANCE] = \
3523 self.cfg.GetNodeGroupInstances(self.group_uuid)
3525 elif level == locking.LEVEL_NODEGROUP:
3526 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3528 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3529 set([self.group_uuid] +
3530 # Lock all groups used by instances optimistically; this requires
3531 # going via the node before it's locked, requiring verification
3534 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3535 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3537 elif level == locking.LEVEL_NODE:
3538 # This will only lock the nodes in the group to be verified which contain
3540 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3541 self._LockInstancesNodes()
3543 # Lock all nodes in group to be verified
3544 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3545 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3546 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3548 def CheckPrereq(self):
3549 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3550 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3551 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3553 assert self.group_uuid in owned_groups
3555 # Check if locked instances are still correct
3556 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3558 # Get instance information
3559 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3561 # Check if node groups for locked instances are still correct
3562 _CheckInstancesNodeGroups(self.cfg, self.instances,
3563 owned_groups, owned_nodes, self.group_uuid)
3565 def Exec(self, feedback_fn):
3566 """Verify integrity of cluster disks.
3568 @rtype: tuple of three items
3569 @return: a tuple of (dict of node-to-node_error, list of instances
3570 which need activate-disks, dict of instance: (node, volume) for
3575 res_instances = set()
3578 nv_dict = _MapInstanceDisksToNodes([inst
3579 for inst in self.instances.values()
3580 if inst.admin_state == constants.ADMINST_UP])
3583 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3584 set(self.cfg.GetVmCapableNodeList()))
3586 node_lvs = self.rpc.call_lv_list(nodes, [])
3588 for (node, node_res) in node_lvs.items():
3589 if node_res.offline:
3592 msg = node_res.fail_msg
3594 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3595 res_nodes[node] = msg
3598 for lv_name, (_, _, lv_online) in node_res.payload.items():
3599 inst = nv_dict.pop((node, lv_name), None)
3600 if not (lv_online or inst is None):
3601 res_instances.add(inst)
3603 # any leftover items in nv_dict are missing LVs, let's arrange the data
3605 for key, inst in nv_dict.iteritems():
3606 res_missing.setdefault(inst, []).append(list(key))
3608 return (res_nodes, list(res_instances), res_missing)
3611 class LUClusterRepairDiskSizes(NoHooksLU):
3612 """Verifies the cluster disks sizes.
3617 def ExpandNames(self):
3618 if self.op.instances:
3619 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3620 self.needed_locks = {
3621 locking.LEVEL_NODE_RES: [],
3622 locking.LEVEL_INSTANCE: self.wanted_names,
3624 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3626 self.wanted_names = None
3627 self.needed_locks = {
3628 locking.LEVEL_NODE_RES: locking.ALL_SET,
3629 locking.LEVEL_INSTANCE: locking.ALL_SET,
3631 self.share_locks = {
3632 locking.LEVEL_NODE_RES: 1,
3633 locking.LEVEL_INSTANCE: 0,
3636 def DeclareLocks(self, level):
3637 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3638 self._LockInstancesNodes(primary_only=True, level=level)
3640 def CheckPrereq(self):
3641 """Check prerequisites.
3643 This only checks the optional instance list against the existing names.
3646 if self.wanted_names is None:
3647 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3649 self.wanted_instances = \
3650 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3652 def _EnsureChildSizes(self, disk):
3653 """Ensure children of the disk have the needed disk size.
3655 This is valid mainly for DRBD8 and fixes an issue where the
3656 children have smaller disk size.
3658 @param disk: an L{ganeti.objects.Disk} object
3661 if disk.dev_type == constants.LD_DRBD8:
3662 assert disk.children, "Empty children for DRBD8?"
3663 fchild = disk.children[0]
3664 mismatch = fchild.size < disk.size
3666 self.LogInfo("Child disk has size %d, parent %d, fixing",
3667 fchild.size, disk.size)
3668 fchild.size = disk.size
3670 # and we recurse on this child only, not on the metadev
3671 return self._EnsureChildSizes(fchild) or mismatch
3675 def Exec(self, feedback_fn):
3676 """Verify the size of cluster disks.
3679 # TODO: check child disks too
3680 # TODO: check differences in size between primary/secondary nodes
3682 for instance in self.wanted_instances:
3683 pnode = instance.primary_node
3684 if pnode not in per_node_disks:
3685 per_node_disks[pnode] = []
3686 for idx, disk in enumerate(instance.disks):
3687 per_node_disks[pnode].append((instance, idx, disk))
3689 assert not (frozenset(per_node_disks.keys()) -
3690 self.owned_locks(locking.LEVEL_NODE_RES)), \
3691 "Not owning correct locks"
3692 assert not self.owned_locks(locking.LEVEL_NODE)
3695 for node, dskl in per_node_disks.items():
3696 newl = [v[2].Copy() for v in dskl]
3698 self.cfg.SetDiskID(dsk, node)
3699 result = self.rpc.call_blockdev_getsize(node, newl)
3701 self.LogWarning("Failure in blockdev_getsize call to node"
3702 " %s, ignoring", node)
3704 if len(result.payload) != len(dskl):
3705 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3706 " result.payload=%s", node, len(dskl), result.payload)
3707 self.LogWarning("Invalid result from node %s, ignoring node results",
3710 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3712 self.LogWarning("Disk %d of instance %s did not return size"
3713 " information, ignoring", idx, instance.name)
3715 if not isinstance(size, (int, long)):
3716 self.LogWarning("Disk %d of instance %s did not return valid"
3717 " size information, ignoring", idx, instance.name)
3720 if size != disk.size:
3721 self.LogInfo("Disk %d of instance %s has mismatched size,"
3722 " correcting: recorded %d, actual %d", idx,
3723 instance.name, disk.size, size)
3725 self.cfg.Update(instance, feedback_fn)
3726 changed.append((instance.name, idx, size))
3727 if self._EnsureChildSizes(disk):
3728 self.cfg.Update(instance, feedback_fn)
3729 changed.append((instance.name, idx, disk.size))
3733 class LUClusterRename(LogicalUnit):
3734 """Rename the cluster.
3737 HPATH = "cluster-rename"
3738 HTYPE = constants.HTYPE_CLUSTER
3740 def BuildHooksEnv(self):
3745 "OP_TARGET": self.cfg.GetClusterName(),
3746 "NEW_NAME": self.op.name,
3749 def BuildHooksNodes(self):
3750 """Build hooks nodes.
3753 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3755 def CheckPrereq(self):
3756 """Verify that the passed name is a valid one.
3759 hostname = netutils.GetHostname(name=self.op.name,
3760 family=self.cfg.GetPrimaryIPFamily())
3762 new_name = hostname.name
3763 self.ip = new_ip = hostname.ip
3764 old_name = self.cfg.GetClusterName()
3765 old_ip = self.cfg.GetMasterIP()
3766 if new_name == old_name and new_ip == old_ip:
3767 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3768 " cluster has changed",
3770 if new_ip != old_ip:
3771 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3772 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3773 " reachable on the network" %
3774 new_ip, errors.ECODE_NOTUNIQUE)
3776 self.op.name = new_name
3778 def Exec(self, feedback_fn):
3779 """Rename the cluster.
3782 clustername = self.op.name
3785 # shutdown the master IP
3786 master_params = self.cfg.GetMasterNetworkParameters()
3787 ems = self.cfg.GetUseExternalMipScript()
3788 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3790 result.Raise("Could not disable the master role")
3793 cluster = self.cfg.GetClusterInfo()
3794 cluster.cluster_name = clustername
3795 cluster.master_ip = new_ip
3796 self.cfg.Update(cluster, feedback_fn)
3798 # update the known hosts file
3799 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3800 node_list = self.cfg.GetOnlineNodeList()
3802 node_list.remove(master_params.name)
3805 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3807 master_params.ip = new_ip
3808 result = self.rpc.call_node_activate_master_ip(master_params.name,
3810 msg = result.fail_msg
3812 self.LogWarning("Could not re-enable the master role on"
3813 " the master, please restart manually: %s", msg)
3818 def _ValidateNetmask(cfg, netmask):
3819 """Checks if a netmask is valid.
3821 @type cfg: L{config.ConfigWriter}
3822 @param cfg: The cluster configuration
3824 @param netmask: the netmask to be verified
3825 @raise errors.OpPrereqError: if the validation fails
3828 ip_family = cfg.GetPrimaryIPFamily()
3830 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3831 except errors.ProgrammerError:
3832 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3834 if not ipcls.ValidateNetmask(netmask):
3835 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3839 class LUClusterSetParams(LogicalUnit):
3840 """Change the parameters of the cluster.
3843 HPATH = "cluster-modify"
3844 HTYPE = constants.HTYPE_CLUSTER
3847 def CheckArguments(self):
3851 if self.op.uid_pool:
3852 uidpool.CheckUidPool(self.op.uid_pool)
3854 if self.op.add_uids:
3855 uidpool.CheckUidPool(self.op.add_uids)
3857 if self.op.remove_uids:
3858 uidpool.CheckUidPool(self.op.remove_uids)
3860 if self.op.master_netmask is not None:
3861 _ValidateNetmask(self.cfg, self.op.master_netmask)
3863 if self.op.diskparams:
3864 for dt_params in self.op.diskparams.values():
3865 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3867 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3868 except errors.OpPrereqError, err:
3869 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3872 def ExpandNames(self):
3873 # FIXME: in the future maybe other cluster params won't require checking on
3874 # all nodes to be modified.
3875 self.needed_locks = {
3876 locking.LEVEL_NODE: locking.ALL_SET,
3877 locking.LEVEL_INSTANCE: locking.ALL_SET,
3878 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3880 self.share_locks = {
3881 locking.LEVEL_NODE: 1,
3882 locking.LEVEL_INSTANCE: 1,
3883 locking.LEVEL_NODEGROUP: 1,
3886 def BuildHooksEnv(self):
3891 "OP_TARGET": self.cfg.GetClusterName(),
3892 "NEW_VG_NAME": self.op.vg_name,
3895 def BuildHooksNodes(self):
3896 """Build hooks nodes.
3899 mn = self.cfg.GetMasterNode()
3902 def CheckPrereq(self):
3903 """Check prerequisites.
3905 This checks whether the given params don't conflict and
3906 if the given volume group is valid.
3909 if self.op.vg_name is not None and not self.op.vg_name:
3910 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3911 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3912 " instances exist", errors.ECODE_INVAL)
3914 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3915 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3916 raise errors.OpPrereqError("Cannot disable drbd helper while"
3917 " drbd-based instances exist",
3920 node_list = self.owned_locks(locking.LEVEL_NODE)
3922 # if vg_name not None, checks given volume group on all nodes
3924 vglist = self.rpc.call_vg_list(node_list)
3925 for node in node_list:
3926 msg = vglist[node].fail_msg
3928 # ignoring down node
3929 self.LogWarning("Error while gathering data on node %s"
3930 " (ignoring node): %s", node, msg)
3932 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3934 constants.MIN_VG_SIZE)
3936 raise errors.OpPrereqError("Error on node '%s': %s" %
3937 (node, vgstatus), errors.ECODE_ENVIRON)
3939 if self.op.drbd_helper:
3940 # checks given drbd helper on all nodes
3941 helpers = self.rpc.call_drbd_helper(node_list)
3942 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3944 self.LogInfo("Not checking drbd helper on offline node %s", node)
3946 msg = helpers[node].fail_msg
3948 raise errors.OpPrereqError("Error checking drbd helper on node"
3949 " '%s': %s" % (node, msg),
3950 errors.ECODE_ENVIRON)
3951 node_helper = helpers[node].payload
3952 if node_helper != self.op.drbd_helper:
3953 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3954 (node, node_helper), errors.ECODE_ENVIRON)
3956 self.cluster = cluster = self.cfg.GetClusterInfo()
3957 # validate params changes
3958 if self.op.beparams:
3959 objects.UpgradeBeParams(self.op.beparams)
3960 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3961 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3963 if self.op.ndparams:
3964 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3965 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3967 # TODO: we need a more general way to handle resetting
3968 # cluster-level parameters to default values
3969 if self.new_ndparams["oob_program"] == "":
3970 self.new_ndparams["oob_program"] = \
3971 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3973 if self.op.hv_state:
3974 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3975 self.cluster.hv_state_static)
3976 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3977 for hv, values in new_hv_state.items())
3979 if self.op.disk_state:
3980 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3981 self.cluster.disk_state_static)
3982 self.new_disk_state = \
3983 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3984 for name, values in svalues.items()))
3985 for storage, svalues in new_disk_state.items())
3988 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3991 all_instances = self.cfg.GetAllInstancesInfo().values()
3993 for group in self.cfg.GetAllNodeGroupsInfo().values():
3994 instances = frozenset([inst for inst in all_instances
3995 if compat.any(node in group.members
3996 for node in inst.all_nodes)])
3997 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3998 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
4000 new_ipolicy, instances)
4002 violations.update(new)
4005 self.LogWarning("After the ipolicy change the following instances"
4006 " violate them: %s",
4007 utils.CommaJoin(utils.NiceSort(violations)))
4009 if self.op.nicparams:
4010 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4011 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4012 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4015 # check all instances for consistency
4016 for instance in self.cfg.GetAllInstancesInfo().values():
4017 for nic_idx, nic in enumerate(instance.nics):
4018 params_copy = copy.deepcopy(nic.nicparams)
4019 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4021 # check parameter syntax
4023 objects.NIC.CheckParameterSyntax(params_filled)
4024 except errors.ConfigurationError, err:
4025 nic_errors.append("Instance %s, nic/%d: %s" %
4026 (instance.name, nic_idx, err))
4028 # if we're moving instances to routed, check that they have an ip
4029 target_mode = params_filled[constants.NIC_MODE]
4030 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4031 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4032 " address" % (instance.name, nic_idx))
4034 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4035 "\n".join(nic_errors))
4037 # hypervisor list/parameters
4038 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4039 if self.op.hvparams:
4040 for hv_name, hv_dict in self.op.hvparams.items():
4041 if hv_name not in self.new_hvparams:
4042 self.new_hvparams[hv_name] = hv_dict
4044 self.new_hvparams[hv_name].update(hv_dict)
4046 # disk template parameters
4047 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4048 if self.op.diskparams:
4049 for dt_name, dt_params in self.op.diskparams.items():
4050 if dt_name not in self.op.diskparams:
4051 self.new_diskparams[dt_name] = dt_params
4053 self.new_diskparams[dt_name].update(dt_params)
4055 # os hypervisor parameters
4056 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4058 for os_name, hvs in self.op.os_hvp.items():
4059 if os_name not in self.new_os_hvp:
4060 self.new_os_hvp[os_name] = hvs
4062 for hv_name, hv_dict in hvs.items():
4063 if hv_name not in self.new_os_hvp[os_name]:
4064 self.new_os_hvp[os_name][hv_name] = hv_dict
4066 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4069 self.new_osp = objects.FillDict(cluster.osparams, {})
4070 if self.op.osparams:
4071 for os_name, osp in self.op.osparams.items():
4072 if os_name not in self.new_osp:
4073 self.new_osp[os_name] = {}
4075 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4078 if not self.new_osp[os_name]:
4079 # we removed all parameters
4080 del self.new_osp[os_name]
4082 # check the parameter validity (remote check)
4083 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4084 os_name, self.new_osp[os_name])
4086 # changes to the hypervisor list
4087 if self.op.enabled_hypervisors is not None:
4088 self.hv_list = self.op.enabled_hypervisors
4089 for hv in self.hv_list:
4090 # if the hypervisor doesn't already exist in the cluster
4091 # hvparams, we initialize it to empty, and then (in both
4092 # cases) we make sure to fill the defaults, as we might not
4093 # have a complete defaults list if the hypervisor wasn't
4095 if hv not in new_hvp:
4097 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4098 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4100 self.hv_list = cluster.enabled_hypervisors
4102 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4103 # either the enabled list has changed, or the parameters have, validate
4104 for hv_name, hv_params in self.new_hvparams.items():
4105 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4106 (self.op.enabled_hypervisors and
4107 hv_name in self.op.enabled_hypervisors)):
4108 # either this is a new hypervisor, or its parameters have changed
4109 hv_class = hypervisor.GetHypervisor(hv_name)
4110 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4111 hv_class.CheckParameterSyntax(hv_params)
4112 _CheckHVParams(self, node_list, hv_name, hv_params)
4115 # no need to check any newly-enabled hypervisors, since the
4116 # defaults have already been checked in the above code-block
4117 for os_name, os_hvp in self.new_os_hvp.items():
4118 for hv_name, hv_params in os_hvp.items():
4119 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4120 # we need to fill in the new os_hvp on top of the actual hv_p
4121 cluster_defaults = self.new_hvparams.get(hv_name, {})
4122 new_osp = objects.FillDict(cluster_defaults, hv_params)
4123 hv_class = hypervisor.GetHypervisor(hv_name)
4124 hv_class.CheckParameterSyntax(new_osp)
4125 _CheckHVParams(self, node_list, hv_name, new_osp)
4127 if self.op.default_iallocator:
4128 alloc_script = utils.FindFile(self.op.default_iallocator,
4129 constants.IALLOCATOR_SEARCH_PATH,
4131 if alloc_script is None:
4132 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4133 " specified" % self.op.default_iallocator,
4136 def Exec(self, feedback_fn):
4137 """Change the parameters of the cluster.
4140 if self.op.vg_name is not None:
4141 new_volume = self.op.vg_name
4144 if new_volume != self.cfg.GetVGName():
4145 self.cfg.SetVGName(new_volume)
4147 feedback_fn("Cluster LVM configuration already in desired"
4148 " state, not changing")
4149 if self.op.drbd_helper is not None:
4150 new_helper = self.op.drbd_helper
4153 if new_helper != self.cfg.GetDRBDHelper():
4154 self.cfg.SetDRBDHelper(new_helper)
4156 feedback_fn("Cluster DRBD helper already in desired state,"
4158 if self.op.hvparams:
4159 self.cluster.hvparams = self.new_hvparams
4161 self.cluster.os_hvp = self.new_os_hvp
4162 if self.op.enabled_hypervisors is not None:
4163 self.cluster.hvparams = self.new_hvparams
4164 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4165 if self.op.beparams:
4166 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4167 if self.op.nicparams:
4168 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4170 self.cluster.ipolicy = self.new_ipolicy
4171 if self.op.osparams:
4172 self.cluster.osparams = self.new_osp
4173 if self.op.ndparams:
4174 self.cluster.ndparams = self.new_ndparams
4175 if self.op.diskparams:
4176 self.cluster.diskparams = self.new_diskparams
4177 if self.op.hv_state:
4178 self.cluster.hv_state_static = self.new_hv_state
4179 if self.op.disk_state:
4180 self.cluster.disk_state_static = self.new_disk_state
4182 if self.op.candidate_pool_size is not None:
4183 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4184 # we need to update the pool size here, otherwise the save will fail
4185 _AdjustCandidatePool(self, [])
4187 if self.op.maintain_node_health is not None:
4188 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4189 feedback_fn("Note: CONFD was disabled at build time, node health"
4190 " maintenance is not useful (still enabling it)")
4191 self.cluster.maintain_node_health = self.op.maintain_node_health
4193 if self.op.prealloc_wipe_disks is not None:
4194 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4196 if self.op.add_uids is not None:
4197 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4199 if self.op.remove_uids is not None:
4200 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4202 if self.op.uid_pool is not None:
4203 self.cluster.uid_pool = self.op.uid_pool
4205 if self.op.default_iallocator is not None:
4206 self.cluster.default_iallocator = self.op.default_iallocator
4208 if self.op.reserved_lvs is not None:
4209 self.cluster.reserved_lvs = self.op.reserved_lvs
4211 if self.op.use_external_mip_script is not None:
4212 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4214 def helper_os(aname, mods, desc):
4216 lst = getattr(self.cluster, aname)
4217 for key, val in mods:
4218 if key == constants.DDM_ADD:
4220 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4223 elif key == constants.DDM_REMOVE:
4227 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4229 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4231 if self.op.hidden_os:
4232 helper_os("hidden_os", self.op.hidden_os, "hidden")
4234 if self.op.blacklisted_os:
4235 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4237 if self.op.master_netdev:
4238 master_params = self.cfg.GetMasterNetworkParameters()
4239 ems = self.cfg.GetUseExternalMipScript()
4240 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4241 self.cluster.master_netdev)
4242 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4244 result.Raise("Could not disable the master ip")
4245 feedback_fn("Changing master_netdev from %s to %s" %
4246 (master_params.netdev, self.op.master_netdev))
4247 self.cluster.master_netdev = self.op.master_netdev
4249 if self.op.master_netmask:
4250 master_params = self.cfg.GetMasterNetworkParameters()
4251 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4252 result = self.rpc.call_node_change_master_netmask(master_params.name,
4253 master_params.netmask,
4254 self.op.master_netmask,
4256 master_params.netdev)
4258 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4261 self.cluster.master_netmask = self.op.master_netmask
4263 self.cfg.Update(self.cluster, feedback_fn)
4265 if self.op.master_netdev:
4266 master_params = self.cfg.GetMasterNetworkParameters()
4267 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4268 self.op.master_netdev)
4269 ems = self.cfg.GetUseExternalMipScript()
4270 result = self.rpc.call_node_activate_master_ip(master_params.name,
4273 self.LogWarning("Could not re-enable the master ip on"
4274 " the master, please restart manually: %s",
4278 def _UploadHelper(lu, nodes, fname):
4279 """Helper for uploading a file and showing warnings.
4282 if os.path.exists(fname):
4283 result = lu.rpc.call_upload_file(nodes, fname)
4284 for to_node, to_result in result.items():
4285 msg = to_result.fail_msg
4287 msg = ("Copy of file %s to node %s failed: %s" %
4288 (fname, to_node, msg))
4289 lu.proc.LogWarning(msg)
4292 def _ComputeAncillaryFiles(cluster, redist):
4293 """Compute files external to Ganeti which need to be consistent.
4295 @type redist: boolean
4296 @param redist: Whether to include files which need to be redistributed
4299 # Compute files for all nodes
4301 constants.SSH_KNOWN_HOSTS_FILE,
4302 constants.CONFD_HMAC_KEY,
4303 constants.CLUSTER_DOMAIN_SECRET_FILE,
4304 constants.SPICE_CERT_FILE,
4305 constants.SPICE_CACERT_FILE,
4306 constants.RAPI_USERS_FILE,
4310 files_all.update(constants.ALL_CERT_FILES)
4311 files_all.update(ssconf.SimpleStore().GetFileList())
4313 # we need to ship at least the RAPI certificate
4314 files_all.add(constants.RAPI_CERT_FILE)
4316 if cluster.modify_etc_hosts:
4317 files_all.add(constants.ETC_HOSTS)
4319 if cluster.use_external_mip_script:
4320 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4322 # Files which are optional, these must:
4323 # - be present in one other category as well
4324 # - either exist or not exist on all nodes of that category (mc, vm all)
4326 constants.RAPI_USERS_FILE,
4329 # Files which should only be on master candidates
4333 files_mc.add(constants.CLUSTER_CONF_FILE)
4335 # Files which should only be on VM-capable nodes
4336 files_vm = set(filename
4337 for hv_name in cluster.enabled_hypervisors
4338 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4340 files_opt |= set(filename
4341 for hv_name in cluster.enabled_hypervisors
4342 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4344 # Filenames in each category must be unique
4345 all_files_set = files_all | files_mc | files_vm
4346 assert (len(all_files_set) ==
4347 sum(map(len, [files_all, files_mc, files_vm]))), \
4348 "Found file listed in more than one file list"
4350 # Optional files must be present in one other category
4351 assert all_files_set.issuperset(files_opt), \
4352 "Optional file not in a different required list"
4354 return (files_all, files_opt, files_mc, files_vm)
4357 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4358 """Distribute additional files which are part of the cluster configuration.
4360 ConfigWriter takes care of distributing the config and ssconf files, but
4361 there are more files which should be distributed to all nodes. This function
4362 makes sure those are copied.
4364 @param lu: calling logical unit
4365 @param additional_nodes: list of nodes not in the config to distribute to
4366 @type additional_vm: boolean
4367 @param additional_vm: whether the additional nodes are vm-capable or not
4370 # Gather target nodes
4371 cluster = lu.cfg.GetClusterInfo()
4372 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4374 online_nodes = lu.cfg.GetOnlineNodeList()
4375 online_set = frozenset(online_nodes)
4376 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4378 if additional_nodes is not None:
4379 online_nodes.extend(additional_nodes)
4381 vm_nodes.extend(additional_nodes)
4383 # Never distribute to master node
4384 for nodelist in [online_nodes, vm_nodes]:
4385 if master_info.name in nodelist:
4386 nodelist.remove(master_info.name)
4389 (files_all, _, files_mc, files_vm) = \
4390 _ComputeAncillaryFiles(cluster, True)
4392 # Never re-distribute configuration file from here
4393 assert not (constants.CLUSTER_CONF_FILE in files_all or
4394 constants.CLUSTER_CONF_FILE in files_vm)
4395 assert not files_mc, "Master candidates not handled in this function"
4398 (online_nodes, files_all),
4399 (vm_nodes, files_vm),
4403 for (node_list, files) in filemap:
4405 _UploadHelper(lu, node_list, fname)
4408 class LUClusterRedistConf(NoHooksLU):
4409 """Force the redistribution of cluster configuration.
4411 This is a very simple LU.
4416 def ExpandNames(self):
4417 self.needed_locks = {
4418 locking.LEVEL_NODE: locking.ALL_SET,
4420 self.share_locks[locking.LEVEL_NODE] = 1
4422 def Exec(self, feedback_fn):
4423 """Redistribute the configuration.
4426 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4427 _RedistributeAncillaryFiles(self)
4430 class LUClusterActivateMasterIp(NoHooksLU):
4431 """Activate the master IP on the master node.
4434 def Exec(self, feedback_fn):
4435 """Activate the master IP.
4438 master_params = self.cfg.GetMasterNetworkParameters()
4439 ems = self.cfg.GetUseExternalMipScript()
4440 result = self.rpc.call_node_activate_master_ip(master_params.name,
4442 result.Raise("Could not activate the master IP")
4445 class LUClusterDeactivateMasterIp(NoHooksLU):
4446 """Deactivate the master IP on the master node.
4449 def Exec(self, feedback_fn):
4450 """Deactivate the master IP.
4453 master_params = self.cfg.GetMasterNetworkParameters()
4454 ems = self.cfg.GetUseExternalMipScript()
4455 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4457 result.Raise("Could not deactivate the master IP")
4460 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4461 """Sleep and poll for an instance's disk to sync.
4464 if not instance.disks or disks is not None and not disks:
4467 disks = _ExpandCheckDisks(instance, disks)
4470 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4472 node = instance.primary_node
4475 lu.cfg.SetDiskID(dev, node)
4477 # TODO: Convert to utils.Retry
4480 degr_retries = 10 # in seconds, as we sleep 1 second each time
4484 cumul_degraded = False
4485 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4486 msg = rstats.fail_msg
4488 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4491 raise errors.RemoteError("Can't contact node %s for mirror data,"
4492 " aborting." % node)
4495 rstats = rstats.payload
4497 for i, mstat in enumerate(rstats):
4499 lu.LogWarning("Can't compute data for node %s/%s",
4500 node, disks[i].iv_name)
4503 cumul_degraded = (cumul_degraded or
4504 (mstat.is_degraded and mstat.sync_percent is None))
4505 if mstat.sync_percent is not None:
4507 if mstat.estimated_time is not None:
4508 rem_time = ("%s remaining (estimated)" %
4509 utils.FormatSeconds(mstat.estimated_time))
4510 max_time = mstat.estimated_time
4512 rem_time = "no time estimate"
4513 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4514 (disks[i].iv_name, mstat.sync_percent, rem_time))
4516 # if we're done but degraded, let's do a few small retries, to
4517 # make sure we see a stable and not transient situation; therefore
4518 # we force restart of the loop
4519 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4520 logging.info("Degraded disks found, %d retries left", degr_retries)
4528 time.sleep(min(60, max_time))
4531 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4532 return not cumul_degraded
4535 def _BlockdevFind(lu, node, dev, instance):
4536 """Wrapper around call_blockdev_find to annotate diskparams.
4538 @param lu: A reference to the lu object
4539 @param node: The node to call out
4540 @param dev: The device to find
4541 @param instance: The instance object the device belongs to
4542 @returns The result of the rpc call
4545 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4546 return lu.rpc.call_blockdev_find(node, disk)
4549 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4550 """Wrapper around L{_CheckDiskConsistencyInner}.
4553 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4554 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4558 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4560 """Check that mirrors are not degraded.
4562 @attention: The device has to be annotated already.
4564 The ldisk parameter, if True, will change the test from the
4565 is_degraded attribute (which represents overall non-ok status for
4566 the device(s)) to the ldisk (representing the local storage status).
4569 lu.cfg.SetDiskID(dev, node)
4573 if on_primary or dev.AssembleOnSecondary():
4574 rstats = lu.rpc.call_blockdev_find(node, dev)
4575 msg = rstats.fail_msg
4577 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4579 elif not rstats.payload:
4580 lu.LogWarning("Can't find disk on node %s", node)
4584 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4586 result = result and not rstats.payload.is_degraded
4589 for child in dev.children:
4590 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4596 class LUOobCommand(NoHooksLU):
4597 """Logical unit for OOB handling.
4601 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4603 def ExpandNames(self):
4604 """Gather locks we need.
4607 if self.op.node_names:
4608 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4609 lock_names = self.op.node_names
4611 lock_names = locking.ALL_SET
4613 self.needed_locks = {
4614 locking.LEVEL_NODE: lock_names,
4617 def CheckPrereq(self):
4618 """Check prerequisites.
4621 - the node exists in the configuration
4624 Any errors are signaled by raising errors.OpPrereqError.
4628 self.master_node = self.cfg.GetMasterNode()
4630 assert self.op.power_delay >= 0.0
4632 if self.op.node_names:
4633 if (self.op.command in self._SKIP_MASTER and
4634 self.master_node in self.op.node_names):
4635 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4636 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4638 if master_oob_handler:
4639 additional_text = ("run '%s %s %s' if you want to operate on the"
4640 " master regardless") % (master_oob_handler,
4644 additional_text = "it does not support out-of-band operations"
4646 raise errors.OpPrereqError(("Operating on the master node %s is not"
4647 " allowed for %s; %s") %
4648 (self.master_node, self.op.command,
4649 additional_text), errors.ECODE_INVAL)
4651 self.op.node_names = self.cfg.GetNodeList()
4652 if self.op.command in self._SKIP_MASTER:
4653 self.op.node_names.remove(self.master_node)
4655 if self.op.command in self._SKIP_MASTER:
4656 assert self.master_node not in self.op.node_names
4658 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4660 raise errors.OpPrereqError("Node %s not found" % node_name,
4663 self.nodes.append(node)
4665 if (not self.op.ignore_status and
4666 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4667 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4668 " not marked offline") % node_name,
4671 def Exec(self, feedback_fn):
4672 """Execute OOB and return result if we expect any.
4675 master_node = self.master_node
4678 for idx, node in enumerate(utils.NiceSort(self.nodes,
4679 key=lambda node: node.name)):
4680 node_entry = [(constants.RS_NORMAL, node.name)]
4681 ret.append(node_entry)
4683 oob_program = _SupportsOob(self.cfg, node)
4686 node_entry.append((constants.RS_UNAVAIL, None))
4689 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4690 self.op.command, oob_program, node.name)
4691 result = self.rpc.call_run_oob(master_node, oob_program,
4692 self.op.command, node.name,
4696 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4697 node.name, result.fail_msg)
4698 node_entry.append((constants.RS_NODATA, None))
4701 self._CheckPayload(result)
4702 except errors.OpExecError, err:
4703 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4705 node_entry.append((constants.RS_NODATA, None))
4707 if self.op.command == constants.OOB_HEALTH:
4708 # For health we should log important events
4709 for item, status in result.payload:
4710 if status in [constants.OOB_STATUS_WARNING,
4711 constants.OOB_STATUS_CRITICAL]:
4712 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4713 item, node.name, status)
4715 if self.op.command == constants.OOB_POWER_ON:
4717 elif self.op.command == constants.OOB_POWER_OFF:
4718 node.powered = False
4719 elif self.op.command == constants.OOB_POWER_STATUS:
4720 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4721 if powered != node.powered:
4722 logging.warning(("Recorded power state (%s) of node '%s' does not"
4723 " match actual power state (%s)"), node.powered,
4726 # For configuration changing commands we should update the node
4727 if self.op.command in (constants.OOB_POWER_ON,
4728 constants.OOB_POWER_OFF):
4729 self.cfg.Update(node, feedback_fn)
4731 node_entry.append((constants.RS_NORMAL, result.payload))
4733 if (self.op.command == constants.OOB_POWER_ON and
4734 idx < len(self.nodes) - 1):
4735 time.sleep(self.op.power_delay)
4739 def _CheckPayload(self, result):
4740 """Checks if the payload is valid.
4742 @param result: RPC result
4743 @raises errors.OpExecError: If payload is not valid
4747 if self.op.command == constants.OOB_HEALTH:
4748 if not isinstance(result.payload, list):
4749 errs.append("command 'health' is expected to return a list but got %s" %
4750 type(result.payload))
4752 for item, status in result.payload:
4753 if status not in constants.OOB_STATUSES:
4754 errs.append("health item '%s' has invalid status '%s'" %
4757 if self.op.command == constants.OOB_POWER_STATUS:
4758 if not isinstance(result.payload, dict):
4759 errs.append("power-status is expected to return a dict but got %s" %
4760 type(result.payload))
4762 if self.op.command in [
4763 constants.OOB_POWER_ON,
4764 constants.OOB_POWER_OFF,
4765 constants.OOB_POWER_CYCLE,
4767 if result.payload is not None:
4768 errs.append("%s is expected to not return payload but got '%s'" %
4769 (self.op.command, result.payload))
4772 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4773 utils.CommaJoin(errs))
4776 class _OsQuery(_QueryBase):
4777 FIELDS = query.OS_FIELDS
4779 def ExpandNames(self, lu):
4780 # Lock all nodes in shared mode
4781 # Temporary removal of locks, should be reverted later
4782 # TODO: reintroduce locks when they are lighter-weight
4783 lu.needed_locks = {}
4784 #self.share_locks[locking.LEVEL_NODE] = 1
4785 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4787 # The following variables interact with _QueryBase._GetNames
4789 self.wanted = self.names
4791 self.wanted = locking.ALL_SET
4793 self.do_locking = self.use_locking
4795 def DeclareLocks(self, lu, level):
4799 def _DiagnoseByOS(rlist):
4800 """Remaps a per-node return list into an a per-os per-node dictionary
4802 @param rlist: a map with node names as keys and OS objects as values
4805 @return: a dictionary with osnames as keys and as value another
4806 map, with nodes as keys and tuples of (path, status, diagnose,
4807 variants, parameters, api_versions) as values, eg::
4809 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4810 (/srv/..., False, "invalid api")],
4811 "node2": [(/srv/..., True, "", [], [])]}
4816 # we build here the list of nodes that didn't fail the RPC (at RPC
4817 # level), so that nodes with a non-responding node daemon don't
4818 # make all OSes invalid
4819 good_nodes = [node_name for node_name in rlist
4820 if not rlist[node_name].fail_msg]
4821 for node_name, nr in rlist.items():
4822 if nr.fail_msg or not nr.payload:
4824 for (name, path, status, diagnose, variants,
4825 params, api_versions) in nr.payload:
4826 if name not in all_os:
4827 # build a list of nodes for this os containing empty lists
4828 # for each node in node_list
4830 for nname in good_nodes:
4831 all_os[name][nname] = []
4832 # convert params from [name, help] to (name, help)
4833 params = [tuple(v) for v in params]
4834 all_os[name][node_name].append((path, status, diagnose,
4835 variants, params, api_versions))
4838 def _GetQueryData(self, lu):
4839 """Computes the list of nodes and their attributes.
4842 # Locking is not used
4843 assert not (compat.any(lu.glm.is_owned(level)
4844 for level in locking.LEVELS
4845 if level != locking.LEVEL_CLUSTER) or
4846 self.do_locking or self.use_locking)
4848 valid_nodes = [node.name
4849 for node in lu.cfg.GetAllNodesInfo().values()
4850 if not node.offline and node.vm_capable]
4851 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4852 cluster = lu.cfg.GetClusterInfo()
4856 for (os_name, os_data) in pol.items():
4857 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4858 hidden=(os_name in cluster.hidden_os),
4859 blacklisted=(os_name in cluster.blacklisted_os))
4863 api_versions = set()
4865 for idx, osl in enumerate(os_data.values()):
4866 info.valid = bool(info.valid and osl and osl[0][1])
4870 (node_variants, node_params, node_api) = osl[0][3:6]
4873 variants.update(node_variants)
4874 parameters.update(node_params)
4875 api_versions.update(node_api)
4877 # Filter out inconsistent values
4878 variants.intersection_update(node_variants)
4879 parameters.intersection_update(node_params)
4880 api_versions.intersection_update(node_api)
4882 info.variants = list(variants)
4883 info.parameters = list(parameters)
4884 info.api_versions = list(api_versions)
4886 data[os_name] = info
4888 # Prepare data in requested order
4889 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4893 class LUOsDiagnose(NoHooksLU):
4894 """Logical unit for OS diagnose/query.
4900 def _BuildFilter(fields, names):
4901 """Builds a filter for querying OSes.
4904 name_filter = qlang.MakeSimpleFilter("name", names)
4906 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4907 # respective field is not requested
4908 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4909 for fname in ["hidden", "blacklisted"]
4910 if fname not in fields]
4911 if "valid" not in fields:
4912 status_filter.append([qlang.OP_TRUE, "valid"])
4915 status_filter.insert(0, qlang.OP_AND)
4917 status_filter = None
4919 if name_filter and status_filter:
4920 return [qlang.OP_AND, name_filter, status_filter]
4924 return status_filter
4926 def CheckArguments(self):
4927 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4928 self.op.output_fields, False)
4930 def ExpandNames(self):
4931 self.oq.ExpandNames(self)
4933 def Exec(self, feedback_fn):
4934 return self.oq.OldStyleQuery(self)
4937 class LUNodeRemove(LogicalUnit):
4938 """Logical unit for removing a node.
4941 HPATH = "node-remove"
4942 HTYPE = constants.HTYPE_NODE
4944 def BuildHooksEnv(self):
4949 "OP_TARGET": self.op.node_name,
4950 "NODE_NAME": self.op.node_name,
4953 def BuildHooksNodes(self):
4954 """Build hooks nodes.
4956 This doesn't run on the target node in the pre phase as a failed
4957 node would then be impossible to remove.
4960 all_nodes = self.cfg.GetNodeList()
4962 all_nodes.remove(self.op.node_name)
4965 return (all_nodes, all_nodes)
4967 def CheckPrereq(self):
4968 """Check prerequisites.
4971 - the node exists in the configuration
4972 - it does not have primary or secondary instances
4973 - it's not the master
4975 Any errors are signaled by raising errors.OpPrereqError.
4978 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4979 node = self.cfg.GetNodeInfo(self.op.node_name)
4980 assert node is not None
4982 masternode = self.cfg.GetMasterNode()
4983 if node.name == masternode:
4984 raise errors.OpPrereqError("Node is the master node, failover to another"
4985 " node is required", errors.ECODE_INVAL)
4987 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4988 if node.name in instance.all_nodes:
4989 raise errors.OpPrereqError("Instance %s is still running on the node,"
4990 " please remove first" % instance_name,
4992 self.op.node_name = node.name
4995 def Exec(self, feedback_fn):
4996 """Removes the node from the cluster.
5000 logging.info("Stopping the node daemon and removing configs from node %s",
5003 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5005 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5008 # Promote nodes to master candidate as needed
5009 _AdjustCandidatePool(self, exceptions=[node.name])
5010 self.context.RemoveNode(node.name)
5012 # Run post hooks on the node before it's removed
5013 _RunPostHook(self, node.name)
5015 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5016 msg = result.fail_msg
5018 self.LogWarning("Errors encountered on the remote node while leaving"
5019 " the cluster: %s", msg)
5021 # Remove node from our /etc/hosts
5022 if self.cfg.GetClusterInfo().modify_etc_hosts:
5023 master_node = self.cfg.GetMasterNode()
5024 result = self.rpc.call_etc_hosts_modify(master_node,
5025 constants.ETC_HOSTS_REMOVE,
5027 result.Raise("Can't update hosts file with new host data")
5028 _RedistributeAncillaryFiles(self)
5031 class _NodeQuery(_QueryBase):
5032 FIELDS = query.NODE_FIELDS
5034 def ExpandNames(self, lu):
5035 lu.needed_locks = {}
5036 lu.share_locks = _ShareAll()
5039 self.wanted = _GetWantedNodes(lu, self.names)
5041 self.wanted = locking.ALL_SET
5043 self.do_locking = (self.use_locking and
5044 query.NQ_LIVE in self.requested_data)
5047 # If any non-static field is requested we need to lock the nodes
5048 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5050 def DeclareLocks(self, lu, level):
5053 def _GetQueryData(self, lu):
5054 """Computes the list of nodes and their attributes.
5057 all_info = lu.cfg.GetAllNodesInfo()
5059 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5061 # Gather data as requested
5062 if query.NQ_LIVE in self.requested_data:
5063 # filter out non-vm_capable nodes
5064 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5066 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5067 [lu.cfg.GetHypervisorType()])
5068 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5069 for (name, nresult) in node_data.items()
5070 if not nresult.fail_msg and nresult.payload)
5074 if query.NQ_INST in self.requested_data:
5075 node_to_primary = dict([(name, set()) for name in nodenames])
5076 node_to_secondary = dict([(name, set()) for name in nodenames])
5078 inst_data = lu.cfg.GetAllInstancesInfo()
5080 for inst in inst_data.values():
5081 if inst.primary_node in node_to_primary:
5082 node_to_primary[inst.primary_node].add(inst.name)
5083 for secnode in inst.secondary_nodes:
5084 if secnode in node_to_secondary:
5085 node_to_secondary[secnode].add(inst.name)
5087 node_to_primary = None
5088 node_to_secondary = None
5090 if query.NQ_OOB in self.requested_data:
5091 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5092 for name, node in all_info.iteritems())
5096 if query.NQ_GROUP in self.requested_data:
5097 groups = lu.cfg.GetAllNodeGroupsInfo()
5101 return query.NodeQueryData([all_info[name] for name in nodenames],
5102 live_data, lu.cfg.GetMasterNode(),
5103 node_to_primary, node_to_secondary, groups,
5104 oob_support, lu.cfg.GetClusterInfo())
5107 class LUNodeQuery(NoHooksLU):
5108 """Logical unit for querying nodes.
5111 # pylint: disable=W0142
5114 def CheckArguments(self):
5115 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5116 self.op.output_fields, self.op.use_locking)
5118 def ExpandNames(self):
5119 self.nq.ExpandNames(self)
5121 def DeclareLocks(self, level):
5122 self.nq.DeclareLocks(self, level)
5124 def Exec(self, feedback_fn):
5125 return self.nq.OldStyleQuery(self)
5128 class LUNodeQueryvols(NoHooksLU):
5129 """Logical unit for getting volumes on node(s).
5133 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5134 _FIELDS_STATIC = utils.FieldSet("node")
5136 def CheckArguments(self):
5137 _CheckOutputFields(static=self._FIELDS_STATIC,
5138 dynamic=self._FIELDS_DYNAMIC,
5139 selected=self.op.output_fields)
5141 def ExpandNames(self):
5142 self.share_locks = _ShareAll()
5143 self.needed_locks = {}
5145 if not self.op.nodes:
5146 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5148 self.needed_locks[locking.LEVEL_NODE] = \
5149 _GetWantedNodes(self, self.op.nodes)
5151 def Exec(self, feedback_fn):
5152 """Computes the list of nodes and their attributes.
5155 nodenames = self.owned_locks(locking.LEVEL_NODE)
5156 volumes = self.rpc.call_node_volumes(nodenames)
5158 ilist = self.cfg.GetAllInstancesInfo()
5159 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5162 for node in nodenames:
5163 nresult = volumes[node]
5166 msg = nresult.fail_msg
5168 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5171 node_vols = sorted(nresult.payload,
5172 key=operator.itemgetter("dev"))
5174 for vol in node_vols:
5176 for field in self.op.output_fields:
5179 elif field == "phys":
5183 elif field == "name":
5185 elif field == "size":
5186 val = int(float(vol["size"]))
5187 elif field == "instance":
5188 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5190 raise errors.ParameterError(field)
5191 node_output.append(str(val))
5193 output.append(node_output)
5198 class LUNodeQueryStorage(NoHooksLU):
5199 """Logical unit for getting information on storage units on node(s).
5202 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5205 def CheckArguments(self):
5206 _CheckOutputFields(static=self._FIELDS_STATIC,
5207 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5208 selected=self.op.output_fields)
5210 def ExpandNames(self):
5211 self.share_locks = _ShareAll()
5212 self.needed_locks = {}
5215 self.needed_locks[locking.LEVEL_NODE] = \
5216 _GetWantedNodes(self, self.op.nodes)
5218 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5220 def Exec(self, feedback_fn):
5221 """Computes the list of nodes and their attributes.
5224 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5226 # Always get name to sort by
5227 if constants.SF_NAME in self.op.output_fields:
5228 fields = self.op.output_fields[:]
5230 fields = [constants.SF_NAME] + self.op.output_fields
5232 # Never ask for node or type as it's only known to the LU
5233 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5234 while extra in fields:
5235 fields.remove(extra)
5237 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5238 name_idx = field_idx[constants.SF_NAME]
5240 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5241 data = self.rpc.call_storage_list(self.nodes,
5242 self.op.storage_type, st_args,
5243 self.op.name, fields)
5247 for node in utils.NiceSort(self.nodes):
5248 nresult = data[node]
5252 msg = nresult.fail_msg
5254 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5257 rows = dict([(row[name_idx], row) for row in nresult.payload])
5259 for name in utils.NiceSort(rows.keys()):
5264 for field in self.op.output_fields:
5265 if field == constants.SF_NODE:
5267 elif field == constants.SF_TYPE:
5268 val = self.op.storage_type
5269 elif field in field_idx:
5270 val = row[field_idx[field]]
5272 raise errors.ParameterError(field)
5281 class _InstanceQuery(_QueryBase):
5282 FIELDS = query.INSTANCE_FIELDS
5284 def ExpandNames(self, lu):
5285 lu.needed_locks = {}
5286 lu.share_locks = _ShareAll()
5289 self.wanted = _GetWantedInstances(lu, self.names)
5291 self.wanted = locking.ALL_SET
5293 self.do_locking = (self.use_locking and
5294 query.IQ_LIVE in self.requested_data)
5296 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5297 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5298 lu.needed_locks[locking.LEVEL_NODE] = []
5299 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5301 self.do_grouplocks = (self.do_locking and
5302 query.IQ_NODES in self.requested_data)
5304 def DeclareLocks(self, lu, level):
5306 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5307 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5309 # Lock all groups used by instances optimistically; this requires going
5310 # via the node before it's locked, requiring verification later on
5311 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5313 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5314 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5315 elif level == locking.LEVEL_NODE:
5316 lu._LockInstancesNodes() # pylint: disable=W0212
5319 def _CheckGroupLocks(lu):
5320 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5321 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5323 # Check if node groups for locked instances are still correct
5324 for instance_name in owned_instances:
5325 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5327 def _GetQueryData(self, lu):
5328 """Computes the list of instances and their attributes.
5331 if self.do_grouplocks:
5332 self._CheckGroupLocks(lu)
5334 cluster = lu.cfg.GetClusterInfo()
5335 all_info = lu.cfg.GetAllInstancesInfo()
5337 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5339 instance_list = [all_info[name] for name in instance_names]
5340 nodes = frozenset(itertools.chain(*(inst.all_nodes
5341 for inst in instance_list)))
5342 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5345 wrongnode_inst = set()
5347 # Gather data as requested
5348 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5350 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5352 result = node_data[name]
5354 # offline nodes will be in both lists
5355 assert result.fail_msg
5356 offline_nodes.append(name)
5358 bad_nodes.append(name)
5359 elif result.payload:
5360 for inst in result.payload:
5361 if inst in all_info:
5362 if all_info[inst].primary_node == name:
5363 live_data.update(result.payload)
5365 wrongnode_inst.add(inst)
5367 # orphan instance; we don't list it here as we don't
5368 # handle this case yet in the output of instance listing
5369 logging.warning("Orphan instance '%s' found on node %s",
5371 # else no instance is alive
5375 if query.IQ_DISKUSAGE in self.requested_data:
5376 disk_usage = dict((inst.name,
5377 _ComputeDiskSize(inst.disk_template,
5378 [{constants.IDISK_SIZE: disk.size}
5379 for disk in inst.disks]))
5380 for inst in instance_list)
5384 if query.IQ_CONSOLE in self.requested_data:
5386 for inst in instance_list:
5387 if inst.name in live_data:
5388 # Instance is running
5389 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5391 consinfo[inst.name] = None
5392 assert set(consinfo.keys()) == set(instance_names)
5396 if query.IQ_NODES in self.requested_data:
5397 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5399 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5400 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5401 for uuid in set(map(operator.attrgetter("group"),
5407 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5408 disk_usage, offline_nodes, bad_nodes,
5409 live_data, wrongnode_inst, consinfo,
5413 class LUQuery(NoHooksLU):
5414 """Query for resources/items of a certain kind.
5417 # pylint: disable=W0142
5420 def CheckArguments(self):
5421 qcls = _GetQueryImplementation(self.op.what)
5423 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5425 def ExpandNames(self):
5426 self.impl.ExpandNames(self)
5428 def DeclareLocks(self, level):
5429 self.impl.DeclareLocks(self, level)
5431 def Exec(self, feedback_fn):
5432 return self.impl.NewStyleQuery(self)
5435 class LUQueryFields(NoHooksLU):
5436 """Query for resources/items of a certain kind.
5439 # pylint: disable=W0142
5442 def CheckArguments(self):
5443 self.qcls = _GetQueryImplementation(self.op.what)
5445 def ExpandNames(self):
5446 self.needed_locks = {}
5448 def Exec(self, feedback_fn):
5449 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5452 class LUNodeModifyStorage(NoHooksLU):
5453 """Logical unit for modifying a storage volume on a node.
5458 def CheckArguments(self):
5459 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5461 storage_type = self.op.storage_type
5464 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5466 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5467 " modified" % storage_type,
5470 diff = set(self.op.changes.keys()) - modifiable
5472 raise errors.OpPrereqError("The following fields can not be modified for"
5473 " storage units of type '%s': %r" %
5474 (storage_type, list(diff)),
5477 def ExpandNames(self):
5478 self.needed_locks = {
5479 locking.LEVEL_NODE: self.op.node_name,
5482 def Exec(self, feedback_fn):
5483 """Computes the list of nodes and their attributes.
5486 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5487 result = self.rpc.call_storage_modify(self.op.node_name,
5488 self.op.storage_type, st_args,
5489 self.op.name, self.op.changes)
5490 result.Raise("Failed to modify storage unit '%s' on %s" %
5491 (self.op.name, self.op.node_name))
5494 class LUNodeAdd(LogicalUnit):
5495 """Logical unit for adding node to the cluster.
5499 HTYPE = constants.HTYPE_NODE
5500 _NFLAGS = ["master_capable", "vm_capable"]
5502 def CheckArguments(self):
5503 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5504 # validate/normalize the node name
5505 self.hostname = netutils.GetHostname(name=self.op.node_name,
5506 family=self.primary_ip_family)
5507 self.op.node_name = self.hostname.name
5509 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5510 raise errors.OpPrereqError("Cannot readd the master node",
5513 if self.op.readd and self.op.group:
5514 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5515 " being readded", errors.ECODE_INVAL)
5517 def BuildHooksEnv(self):
5520 This will run on all nodes before, and on all nodes + the new node after.
5524 "OP_TARGET": self.op.node_name,
5525 "NODE_NAME": self.op.node_name,
5526 "NODE_PIP": self.op.primary_ip,
5527 "NODE_SIP": self.op.secondary_ip,
5528 "MASTER_CAPABLE": str(self.op.master_capable),
5529 "VM_CAPABLE": str(self.op.vm_capable),
5532 def BuildHooksNodes(self):
5533 """Build hooks nodes.
5536 # Exclude added node
5537 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5538 post_nodes = pre_nodes + [self.op.node_name, ]
5540 return (pre_nodes, post_nodes)
5542 def CheckPrereq(self):
5543 """Check prerequisites.
5546 - the new node is not already in the config
5548 - its parameters (single/dual homed) matches the cluster
5550 Any errors are signaled by raising errors.OpPrereqError.
5554 hostname = self.hostname
5555 node = hostname.name
5556 primary_ip = self.op.primary_ip = hostname.ip
5557 if self.op.secondary_ip is None:
5558 if self.primary_ip_family == netutils.IP6Address.family:
5559 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5560 " IPv4 address must be given as secondary",
5562 self.op.secondary_ip = primary_ip
5564 secondary_ip = self.op.secondary_ip
5565 if not netutils.IP4Address.IsValid(secondary_ip):
5566 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5567 " address" % secondary_ip, errors.ECODE_INVAL)
5569 node_list = cfg.GetNodeList()
5570 if not self.op.readd and node in node_list:
5571 raise errors.OpPrereqError("Node %s is already in the configuration" %
5572 node, errors.ECODE_EXISTS)
5573 elif self.op.readd and node not in node_list:
5574 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5577 self.changed_primary_ip = False
5579 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5580 if self.op.readd and node == existing_node_name:
5581 if existing_node.secondary_ip != secondary_ip:
5582 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5583 " address configuration as before",
5585 if existing_node.primary_ip != primary_ip:
5586 self.changed_primary_ip = True
5590 if (existing_node.primary_ip == primary_ip or
5591 existing_node.secondary_ip == primary_ip or
5592 existing_node.primary_ip == secondary_ip or
5593 existing_node.secondary_ip == secondary_ip):
5594 raise errors.OpPrereqError("New node ip address(es) conflict with"
5595 " existing node %s" % existing_node.name,
5596 errors.ECODE_NOTUNIQUE)
5598 # After this 'if' block, None is no longer a valid value for the
5599 # _capable op attributes
5601 old_node = self.cfg.GetNodeInfo(node)
5602 assert old_node is not None, "Can't retrieve locked node %s" % node
5603 for attr in self._NFLAGS:
5604 if getattr(self.op, attr) is None:
5605 setattr(self.op, attr, getattr(old_node, attr))
5607 for attr in self._NFLAGS:
5608 if getattr(self.op, attr) is None:
5609 setattr(self.op, attr, True)
5611 if self.op.readd and not self.op.vm_capable:
5612 pri, sec = cfg.GetNodeInstances(node)
5614 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5615 " flag set to false, but it already holds"
5616 " instances" % node,
5619 # check that the type of the node (single versus dual homed) is the
5620 # same as for the master
5621 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5622 master_singlehomed = myself.secondary_ip == myself.primary_ip
5623 newbie_singlehomed = secondary_ip == primary_ip
5624 if master_singlehomed != newbie_singlehomed:
5625 if master_singlehomed:
5626 raise errors.OpPrereqError("The master has no secondary ip but the"
5627 " new node has one",
5630 raise errors.OpPrereqError("The master has a secondary ip but the"
5631 " new node doesn't have one",
5634 # checks reachability
5635 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5636 raise errors.OpPrereqError("Node not reachable by ping",
5637 errors.ECODE_ENVIRON)
5639 if not newbie_singlehomed:
5640 # check reachability from my secondary ip to newbie's secondary ip
5641 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5642 source=myself.secondary_ip):
5643 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5644 " based ping to node daemon port",
5645 errors.ECODE_ENVIRON)
5652 if self.op.master_capable:
5653 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5655 self.master_candidate = False
5658 self.new_node = old_node
5660 node_group = cfg.LookupNodeGroup(self.op.group)
5661 self.new_node = objects.Node(name=node,
5662 primary_ip=primary_ip,
5663 secondary_ip=secondary_ip,
5664 master_candidate=self.master_candidate,
5665 offline=False, drained=False,
5668 if self.op.ndparams:
5669 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5671 if self.op.hv_state:
5672 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5674 if self.op.disk_state:
5675 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5677 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5678 # it a property on the base class.
5679 result = rpc.DnsOnlyRunner().call_version([node])[node]
5680 result.Raise("Can't get version information from node %s" % node)
5681 if constants.PROTOCOL_VERSION == result.payload:
5682 logging.info("Communication to node %s fine, sw version %s match",
5683 node, result.payload)
5685 raise errors.OpPrereqError("Version mismatch master version %s,"
5686 " node version %s" %
5687 (constants.PROTOCOL_VERSION, result.payload),
5688 errors.ECODE_ENVIRON)
5690 def Exec(self, feedback_fn):
5691 """Adds the new node to the cluster.
5694 new_node = self.new_node
5695 node = new_node.name
5697 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5700 # We adding a new node so we assume it's powered
5701 new_node.powered = True
5703 # for re-adds, reset the offline/drained/master-candidate flags;
5704 # we need to reset here, otherwise offline would prevent RPC calls
5705 # later in the procedure; this also means that if the re-add
5706 # fails, we are left with a non-offlined, broken node
5708 new_node.drained = new_node.offline = False # pylint: disable=W0201
5709 self.LogInfo("Readding a node, the offline/drained flags were reset")
5710 # if we demote the node, we do cleanup later in the procedure
5711 new_node.master_candidate = self.master_candidate
5712 if self.changed_primary_ip:
5713 new_node.primary_ip = self.op.primary_ip
5715 # copy the master/vm_capable flags
5716 for attr in self._NFLAGS:
5717 setattr(new_node, attr, getattr(self.op, attr))
5719 # notify the user about any possible mc promotion
5720 if new_node.master_candidate:
5721 self.LogInfo("Node will be a master candidate")
5723 if self.op.ndparams:
5724 new_node.ndparams = self.op.ndparams
5726 new_node.ndparams = {}
5728 if self.op.hv_state:
5729 new_node.hv_state_static = self.new_hv_state
5731 if self.op.disk_state:
5732 new_node.disk_state_static = self.new_disk_state
5734 # Add node to our /etc/hosts, and add key to known_hosts
5735 if self.cfg.GetClusterInfo().modify_etc_hosts:
5736 master_node = self.cfg.GetMasterNode()
5737 result = self.rpc.call_etc_hosts_modify(master_node,
5738 constants.ETC_HOSTS_ADD,
5741 result.Raise("Can't update hosts file with new host data")
5743 if new_node.secondary_ip != new_node.primary_ip:
5744 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5747 node_verify_list = [self.cfg.GetMasterNode()]
5748 node_verify_param = {
5749 constants.NV_NODELIST: ([node], {}),
5750 # TODO: do a node-net-test as well?
5753 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5754 self.cfg.GetClusterName())
5755 for verifier in node_verify_list:
5756 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5757 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5759 for failed in nl_payload:
5760 feedback_fn("ssh/hostname verification failed"
5761 " (checking from %s): %s" %
5762 (verifier, nl_payload[failed]))
5763 raise errors.OpExecError("ssh/hostname verification failed")
5766 _RedistributeAncillaryFiles(self)
5767 self.context.ReaddNode(new_node)
5768 # make sure we redistribute the config
5769 self.cfg.Update(new_node, feedback_fn)
5770 # and make sure the new node will not have old files around
5771 if not new_node.master_candidate:
5772 result = self.rpc.call_node_demote_from_mc(new_node.name)
5773 msg = result.fail_msg
5775 self.LogWarning("Node failed to demote itself from master"
5776 " candidate status: %s" % msg)
5778 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5779 additional_vm=self.op.vm_capable)
5780 self.context.AddNode(new_node, self.proc.GetECId())
5783 class LUNodeSetParams(LogicalUnit):
5784 """Modifies the parameters of a node.
5786 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5787 to the node role (as _ROLE_*)
5788 @cvar _R2F: a dictionary from node role to tuples of flags
5789 @cvar _FLAGS: a list of attribute names corresponding to the flags
5792 HPATH = "node-modify"
5793 HTYPE = constants.HTYPE_NODE
5795 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5797 (True, False, False): _ROLE_CANDIDATE,
5798 (False, True, False): _ROLE_DRAINED,
5799 (False, False, True): _ROLE_OFFLINE,
5800 (False, False, False): _ROLE_REGULAR,
5802 _R2F = dict((v, k) for k, v in _F2R.items())
5803 _FLAGS = ["master_candidate", "drained", "offline"]
5805 def CheckArguments(self):
5806 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5807 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5808 self.op.master_capable, self.op.vm_capable,
5809 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5811 if all_mods.count(None) == len(all_mods):
5812 raise errors.OpPrereqError("Please pass at least one modification",
5814 if all_mods.count(True) > 1:
5815 raise errors.OpPrereqError("Can't set the node into more than one"
5816 " state at the same time",
5819 # Boolean value that tells us whether we might be demoting from MC
5820 self.might_demote = (self.op.master_candidate == False or
5821 self.op.offline == True or
5822 self.op.drained == True or
5823 self.op.master_capable == False)
5825 if self.op.secondary_ip:
5826 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5827 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5828 " address" % self.op.secondary_ip,
5831 self.lock_all = self.op.auto_promote and self.might_demote
5832 self.lock_instances = self.op.secondary_ip is not None
5834 def _InstanceFilter(self, instance):
5835 """Filter for getting affected instances.
5838 return (instance.disk_template in constants.DTS_INT_MIRROR and
5839 self.op.node_name in instance.all_nodes)
5841 def ExpandNames(self):
5843 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5845 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5847 # Since modifying a node can have severe effects on currently running
5848 # operations the resource lock is at least acquired in shared mode
5849 self.needed_locks[locking.LEVEL_NODE_RES] = \
5850 self.needed_locks[locking.LEVEL_NODE]
5852 # Get node resource and instance locks in shared mode; they are not used
5853 # for anything but read-only access
5854 self.share_locks[locking.LEVEL_NODE_RES] = 1
5855 self.share_locks[locking.LEVEL_INSTANCE] = 1
5857 if self.lock_instances:
5858 self.needed_locks[locking.LEVEL_INSTANCE] = \
5859 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5861 def BuildHooksEnv(self):
5864 This runs on the master node.
5868 "OP_TARGET": self.op.node_name,
5869 "MASTER_CANDIDATE": str(self.op.master_candidate),
5870 "OFFLINE": str(self.op.offline),
5871 "DRAINED": str(self.op.drained),
5872 "MASTER_CAPABLE": str(self.op.master_capable),
5873 "VM_CAPABLE": str(self.op.vm_capable),
5876 def BuildHooksNodes(self):
5877 """Build hooks nodes.
5880 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5883 def CheckPrereq(self):
5884 """Check prerequisites.
5886 This only checks the instance list against the existing names.
5889 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5891 if self.lock_instances:
5892 affected_instances = \
5893 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5895 # Verify instance locks
5896 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5897 wanted_instances = frozenset(affected_instances.keys())
5898 if wanted_instances - owned_instances:
5899 raise errors.OpPrereqError("Instances affected by changing node %s's"
5900 " secondary IP address have changed since"
5901 " locks were acquired, wanted '%s', have"
5902 " '%s'; retry the operation" %
5904 utils.CommaJoin(wanted_instances),
5905 utils.CommaJoin(owned_instances)),
5908 affected_instances = None
5910 if (self.op.master_candidate is not None or
5911 self.op.drained is not None or
5912 self.op.offline is not None):
5913 # we can't change the master's node flags
5914 if self.op.node_name == self.cfg.GetMasterNode():
5915 raise errors.OpPrereqError("The master role can be changed"
5916 " only via master-failover",
5919 if self.op.master_candidate and not node.master_capable:
5920 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5921 " it a master candidate" % node.name,
5924 if self.op.vm_capable == False:
5925 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5927 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5928 " the vm_capable flag" % node.name,
5931 if node.master_candidate and self.might_demote and not self.lock_all:
5932 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5933 # check if after removing the current node, we're missing master
5935 (mc_remaining, mc_should, _) = \
5936 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5937 if mc_remaining < mc_should:
5938 raise errors.OpPrereqError("Not enough master candidates, please"
5939 " pass auto promote option to allow"
5940 " promotion (--auto-promote or RAPI"
5941 " auto_promote=True)", errors.ECODE_STATE)
5943 self.old_flags = old_flags = (node.master_candidate,
5944 node.drained, node.offline)
5945 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5946 self.old_role = old_role = self._F2R[old_flags]
5948 # Check for ineffective changes
5949 for attr in self._FLAGS:
5950 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5951 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5952 setattr(self.op, attr, None)
5954 # Past this point, any flag change to False means a transition
5955 # away from the respective state, as only real changes are kept
5957 # TODO: We might query the real power state if it supports OOB
5958 if _SupportsOob(self.cfg, node):
5959 if self.op.offline is False and not (node.powered or
5960 self.op.powered == True):
5961 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5962 " offline status can be reset") %
5964 elif self.op.powered is not None:
5965 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5966 " as it does not support out-of-band"
5967 " handling") % self.op.node_name)
5969 # If we're being deofflined/drained, we'll MC ourself if needed
5970 if (self.op.drained == False or self.op.offline == False or
5971 (self.op.master_capable and not node.master_capable)):
5972 if _DecideSelfPromotion(self):
5973 self.op.master_candidate = True
5974 self.LogInfo("Auto-promoting node to master candidate")
5976 # If we're no longer master capable, we'll demote ourselves from MC
5977 if self.op.master_capable == False and node.master_candidate:
5978 self.LogInfo("Demoting from master candidate")
5979 self.op.master_candidate = False
5982 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5983 if self.op.master_candidate:
5984 new_role = self._ROLE_CANDIDATE
5985 elif self.op.drained:
5986 new_role = self._ROLE_DRAINED
5987 elif self.op.offline:
5988 new_role = self._ROLE_OFFLINE
5989 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5990 # False is still in new flags, which means we're un-setting (the
5992 new_role = self._ROLE_REGULAR
5993 else: # no new flags, nothing, keep old role
5996 self.new_role = new_role
5998 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5999 # Trying to transition out of offline status
6000 result = self.rpc.call_version([node.name])[node.name]
6002 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6003 " to report its version: %s" %
6004 (node.name, result.fail_msg),
6007 self.LogWarning("Transitioning node from offline to online state"
6008 " without using re-add. Please make sure the node"
6011 if self.op.secondary_ip:
6012 # Ok even without locking, because this can't be changed by any LU
6013 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6014 master_singlehomed = master.secondary_ip == master.primary_ip
6015 if master_singlehomed and self.op.secondary_ip:
6016 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6017 " homed cluster", errors.ECODE_INVAL)
6019 assert not (frozenset(affected_instances) -
6020 self.owned_locks(locking.LEVEL_INSTANCE))
6023 if affected_instances:
6024 raise errors.OpPrereqError("Cannot change secondary IP address:"
6025 " offline node has instances (%s)"
6026 " configured to use it" %
6027 utils.CommaJoin(affected_instances.keys()))
6029 # On online nodes, check that no instances are running, and that
6030 # the node has the new ip and we can reach it.
6031 for instance in affected_instances.values():
6032 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6033 msg="cannot change secondary ip")
6035 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6036 if master.name != node.name:
6037 # check reachability from master secondary ip to new secondary ip
6038 if not netutils.TcpPing(self.op.secondary_ip,
6039 constants.DEFAULT_NODED_PORT,
6040 source=master.secondary_ip):
6041 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6042 " based ping to node daemon port",
6043 errors.ECODE_ENVIRON)
6045 if self.op.ndparams:
6046 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6047 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6048 self.new_ndparams = new_ndparams
6050 if self.op.hv_state:
6051 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6052 self.node.hv_state_static)
6054 if self.op.disk_state:
6055 self.new_disk_state = \
6056 _MergeAndVerifyDiskState(self.op.disk_state,
6057 self.node.disk_state_static)
6059 def Exec(self, feedback_fn):
6064 old_role = self.old_role
6065 new_role = self.new_role
6069 if self.op.ndparams:
6070 node.ndparams = self.new_ndparams
6072 if self.op.powered is not None:
6073 node.powered = self.op.powered
6075 if self.op.hv_state:
6076 node.hv_state_static = self.new_hv_state
6078 if self.op.disk_state:
6079 node.disk_state_static = self.new_disk_state
6081 for attr in ["master_capable", "vm_capable"]:
6082 val = getattr(self.op, attr)
6084 setattr(node, attr, val)
6085 result.append((attr, str(val)))
6087 if new_role != old_role:
6088 # Tell the node to demote itself, if no longer MC and not offline
6089 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6090 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6092 self.LogWarning("Node failed to demote itself: %s", msg)
6094 new_flags = self._R2F[new_role]
6095 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6097 result.append((desc, str(nf)))
6098 (node.master_candidate, node.drained, node.offline) = new_flags
6100 # we locked all nodes, we adjust the CP before updating this node
6102 _AdjustCandidatePool(self, [node.name])
6104 if self.op.secondary_ip:
6105 node.secondary_ip = self.op.secondary_ip
6106 result.append(("secondary_ip", self.op.secondary_ip))
6108 # this will trigger configuration file update, if needed
6109 self.cfg.Update(node, feedback_fn)
6111 # this will trigger job queue propagation or cleanup if the mc
6113 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6114 self.context.ReaddNode(node)
6119 class LUNodePowercycle(NoHooksLU):
6120 """Powercycles a node.
6125 def CheckArguments(self):
6126 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6127 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6128 raise errors.OpPrereqError("The node is the master and the force"
6129 " parameter was not set",
6132 def ExpandNames(self):
6133 """Locking for PowercycleNode.
6135 This is a last-resort option and shouldn't block on other
6136 jobs. Therefore, we grab no locks.
6139 self.needed_locks = {}
6141 def Exec(self, feedback_fn):
6145 result = self.rpc.call_node_powercycle(self.op.node_name,
6146 self.cfg.GetHypervisorType())
6147 result.Raise("Failed to schedule the reboot")
6148 return result.payload
6151 class LUClusterQuery(NoHooksLU):
6152 """Query cluster configuration.
6157 def ExpandNames(self):
6158 self.needed_locks = {}
6160 def Exec(self, feedback_fn):
6161 """Return cluster config.
6164 cluster = self.cfg.GetClusterInfo()
6167 # Filter just for enabled hypervisors
6168 for os_name, hv_dict in cluster.os_hvp.items():
6169 os_hvp[os_name] = {}
6170 for hv_name, hv_params in hv_dict.items():
6171 if hv_name in cluster.enabled_hypervisors:
6172 os_hvp[os_name][hv_name] = hv_params
6174 # Convert ip_family to ip_version
6175 primary_ip_version = constants.IP4_VERSION
6176 if cluster.primary_ip_family == netutils.IP6Address.family:
6177 primary_ip_version = constants.IP6_VERSION
6180 "software_version": constants.RELEASE_VERSION,
6181 "protocol_version": constants.PROTOCOL_VERSION,
6182 "config_version": constants.CONFIG_VERSION,
6183 "os_api_version": max(constants.OS_API_VERSIONS),
6184 "export_version": constants.EXPORT_VERSION,
6185 "architecture": runtime.GetArchInfo(),
6186 "name": cluster.cluster_name,
6187 "master": cluster.master_node,
6188 "default_hypervisor": cluster.primary_hypervisor,
6189 "enabled_hypervisors": cluster.enabled_hypervisors,
6190 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6191 for hypervisor_name in cluster.enabled_hypervisors]),
6193 "beparams": cluster.beparams,
6194 "osparams": cluster.osparams,
6195 "ipolicy": cluster.ipolicy,
6196 "nicparams": cluster.nicparams,
6197 "ndparams": cluster.ndparams,
6198 "diskparams": cluster.diskparams,
6199 "candidate_pool_size": cluster.candidate_pool_size,
6200 "master_netdev": cluster.master_netdev,
6201 "master_netmask": cluster.master_netmask,
6202 "use_external_mip_script": cluster.use_external_mip_script,
6203 "volume_group_name": cluster.volume_group_name,
6204 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6205 "file_storage_dir": cluster.file_storage_dir,
6206 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6207 "maintain_node_health": cluster.maintain_node_health,
6208 "ctime": cluster.ctime,
6209 "mtime": cluster.mtime,
6210 "uuid": cluster.uuid,
6211 "tags": list(cluster.GetTags()),
6212 "uid_pool": cluster.uid_pool,
6213 "default_iallocator": cluster.default_iallocator,
6214 "reserved_lvs": cluster.reserved_lvs,
6215 "primary_ip_version": primary_ip_version,
6216 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6217 "hidden_os": cluster.hidden_os,
6218 "blacklisted_os": cluster.blacklisted_os,
6224 class LUClusterConfigQuery(NoHooksLU):
6225 """Return configuration values.
6230 def CheckArguments(self):
6231 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6233 def ExpandNames(self):
6234 self.cq.ExpandNames(self)
6236 def DeclareLocks(self, level):
6237 self.cq.DeclareLocks(self, level)
6239 def Exec(self, feedback_fn):
6240 result = self.cq.OldStyleQuery(self)
6242 assert len(result) == 1
6247 class _ClusterQuery(_QueryBase):
6248 FIELDS = query.CLUSTER_FIELDS
6250 #: Do not sort (there is only one item)
6253 def ExpandNames(self, lu):
6254 lu.needed_locks = {}
6256 # The following variables interact with _QueryBase._GetNames
6257 self.wanted = locking.ALL_SET
6258 self.do_locking = self.use_locking
6261 raise errors.OpPrereqError("Can not use locking for cluster queries",
6264 def DeclareLocks(self, lu, level):
6267 def _GetQueryData(self, lu):
6268 """Computes the list of nodes and their attributes.
6271 # Locking is not used
6272 assert not (compat.any(lu.glm.is_owned(level)
6273 for level in locking.LEVELS
6274 if level != locking.LEVEL_CLUSTER) or
6275 self.do_locking or self.use_locking)
6277 if query.CQ_CONFIG in self.requested_data:
6278 cluster = lu.cfg.GetClusterInfo()
6280 cluster = NotImplemented
6282 if query.CQ_QUEUE_DRAINED in self.requested_data:
6283 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6285 drain_flag = NotImplemented
6287 if query.CQ_WATCHER_PAUSE in self.requested_data:
6288 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6290 watcher_pause = NotImplemented
6292 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6295 class LUInstanceActivateDisks(NoHooksLU):
6296 """Bring up an instance's disks.
6301 def ExpandNames(self):
6302 self._ExpandAndLockInstance()
6303 self.needed_locks[locking.LEVEL_NODE] = []
6304 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6306 def DeclareLocks(self, level):
6307 if level == locking.LEVEL_NODE:
6308 self._LockInstancesNodes()
6310 def CheckPrereq(self):
6311 """Check prerequisites.
6313 This checks that the instance is in the cluster.
6316 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6317 assert self.instance is not None, \
6318 "Cannot retrieve locked instance %s" % self.op.instance_name
6319 _CheckNodeOnline(self, self.instance.primary_node)
6321 def Exec(self, feedback_fn):
6322 """Activate the disks.
6325 disks_ok, disks_info = \
6326 _AssembleInstanceDisks(self, self.instance,
6327 ignore_size=self.op.ignore_size)
6329 raise errors.OpExecError("Cannot activate block devices")
6334 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6336 """Prepare the block devices for an instance.
6338 This sets up the block devices on all nodes.
6340 @type lu: L{LogicalUnit}
6341 @param lu: the logical unit on whose behalf we execute
6342 @type instance: L{objects.Instance}
6343 @param instance: the instance for whose disks we assemble
6344 @type disks: list of L{objects.Disk} or None
6345 @param disks: which disks to assemble (or all, if None)
6346 @type ignore_secondaries: boolean
6347 @param ignore_secondaries: if true, errors on secondary nodes
6348 won't result in an error return from the function
6349 @type ignore_size: boolean
6350 @param ignore_size: if true, the current known size of the disk
6351 will not be used during the disk activation, useful for cases
6352 when the size is wrong
6353 @return: False if the operation failed, otherwise a list of
6354 (host, instance_visible_name, node_visible_name)
6355 with the mapping from node devices to instance devices
6360 iname = instance.name
6361 disks = _ExpandCheckDisks(instance, disks)
6363 # With the two passes mechanism we try to reduce the window of
6364 # opportunity for the race condition of switching DRBD to primary
6365 # before handshaking occured, but we do not eliminate it
6367 # The proper fix would be to wait (with some limits) until the
6368 # connection has been made and drbd transitions from WFConnection
6369 # into any other network-connected state (Connected, SyncTarget,
6372 # 1st pass, assemble on all nodes in secondary mode
6373 for idx, inst_disk in enumerate(disks):
6374 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6376 node_disk = node_disk.Copy()
6377 node_disk.UnsetSize()
6378 lu.cfg.SetDiskID(node_disk, node)
6379 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6381 msg = result.fail_msg
6383 is_offline_secondary = (node in instance.secondary_nodes and
6385 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6386 " (is_primary=False, pass=1): %s",
6387 inst_disk.iv_name, node, msg)
6388 if not (ignore_secondaries or is_offline_secondary):
6391 # FIXME: race condition on drbd migration to primary
6393 # 2nd pass, do only the primary node
6394 for idx, inst_disk in enumerate(disks):
6397 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6398 if node != instance.primary_node:
6401 node_disk = node_disk.Copy()
6402 node_disk.UnsetSize()
6403 lu.cfg.SetDiskID(node_disk, node)
6404 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6406 msg = result.fail_msg
6408 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6409 " (is_primary=True, pass=2): %s",
6410 inst_disk.iv_name, node, msg)
6413 dev_path = result.payload
6415 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6417 # leave the disks configured for the primary node
6418 # this is a workaround that would be fixed better by
6419 # improving the logical/physical id handling
6421 lu.cfg.SetDiskID(disk, instance.primary_node)
6423 return disks_ok, device_info
6426 def _StartInstanceDisks(lu, instance, force):
6427 """Start the disks of an instance.
6430 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6431 ignore_secondaries=force)
6433 _ShutdownInstanceDisks(lu, instance)
6434 if force is not None and not force:
6435 lu.proc.LogWarning("", hint="If the message above refers to a"
6437 " you can retry the operation using '--force'.")
6438 raise errors.OpExecError("Disk consistency error")
6441 class LUInstanceDeactivateDisks(NoHooksLU):
6442 """Shutdown an instance's disks.
6447 def ExpandNames(self):
6448 self._ExpandAndLockInstance()
6449 self.needed_locks[locking.LEVEL_NODE] = []
6450 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6452 def DeclareLocks(self, level):
6453 if level == locking.LEVEL_NODE:
6454 self._LockInstancesNodes()
6456 def CheckPrereq(self):
6457 """Check prerequisites.
6459 This checks that the instance is in the cluster.
6462 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6463 assert self.instance is not None, \
6464 "Cannot retrieve locked instance %s" % self.op.instance_name
6466 def Exec(self, feedback_fn):
6467 """Deactivate the disks
6470 instance = self.instance
6472 _ShutdownInstanceDisks(self, instance)
6474 _SafeShutdownInstanceDisks(self, instance)
6477 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6478 """Shutdown block devices of an instance.
6480 This function checks if an instance is running, before calling
6481 _ShutdownInstanceDisks.
6484 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6485 _ShutdownInstanceDisks(lu, instance, disks=disks)
6488 def _ExpandCheckDisks(instance, disks):
6489 """Return the instance disks selected by the disks list
6491 @type disks: list of L{objects.Disk} or None
6492 @param disks: selected disks
6493 @rtype: list of L{objects.Disk}
6494 @return: selected instance disks to act on
6498 return instance.disks
6500 if not set(disks).issubset(instance.disks):
6501 raise errors.ProgrammerError("Can only act on disks belonging to the"
6506 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6507 """Shutdown block devices of an instance.
6509 This does the shutdown on all nodes of the instance.
6511 If the ignore_primary is false, errors on the primary node are
6516 disks = _ExpandCheckDisks(instance, disks)
6519 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6520 lu.cfg.SetDiskID(top_disk, node)
6521 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6522 msg = result.fail_msg
6524 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6525 disk.iv_name, node, msg)
6526 if ((node == instance.primary_node and not ignore_primary) or
6527 (node != instance.primary_node and not result.offline)):
6532 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6533 """Checks if a node has enough free memory.
6535 This function check if a given node has the needed amount of free
6536 memory. In case the node has less memory or we cannot get the
6537 information from the node, this function raise an OpPrereqError
6540 @type lu: C{LogicalUnit}
6541 @param lu: a logical unit from which we get configuration data
6543 @param node: the node to check
6544 @type reason: C{str}
6545 @param reason: string to use in the error message
6546 @type requested: C{int}
6547 @param requested: the amount of memory in MiB to check for
6548 @type hypervisor_name: C{str}
6549 @param hypervisor_name: the hypervisor to ask for memory stats
6551 @return: node current free memory
6552 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6553 we cannot check the node
6556 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6557 nodeinfo[node].Raise("Can't get data from node %s" % node,
6558 prereq=True, ecode=errors.ECODE_ENVIRON)
6559 (_, _, (hv_info, )) = nodeinfo[node].payload
6561 free_mem = hv_info.get("memory_free", None)
6562 if not isinstance(free_mem, int):
6563 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6564 " was '%s'" % (node, free_mem),
6565 errors.ECODE_ENVIRON)
6566 if requested > free_mem:
6567 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6568 " needed %s MiB, available %s MiB" %
6569 (node, reason, requested, free_mem),
6574 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6575 """Checks if nodes have enough free disk space in the all VGs.
6577 This function check if all given nodes have the needed amount of
6578 free disk. In case any node has less disk or we cannot get the
6579 information from the node, this function raise an OpPrereqError
6582 @type lu: C{LogicalUnit}
6583 @param lu: a logical unit from which we get configuration data
6584 @type nodenames: C{list}
6585 @param nodenames: the list of node names to check
6586 @type req_sizes: C{dict}
6587 @param req_sizes: the hash of vg and corresponding amount of disk in
6589 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6590 or we cannot check the node
6593 for vg, req_size in req_sizes.items():
6594 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6597 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6598 """Checks if nodes have enough free disk space in the specified VG.
6600 This function check if all given nodes have the needed amount of
6601 free disk. In case any node has less disk or we cannot get the
6602 information from the node, this function raise an OpPrereqError
6605 @type lu: C{LogicalUnit}
6606 @param lu: a logical unit from which we get configuration data
6607 @type nodenames: C{list}
6608 @param nodenames: the list of node names to check
6610 @param vg: the volume group to check
6611 @type requested: C{int}
6612 @param requested: the amount of disk in MiB to check for
6613 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6614 or we cannot check the node
6617 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6618 for node in nodenames:
6619 info = nodeinfo[node]
6620 info.Raise("Cannot get current information from node %s" % node,
6621 prereq=True, ecode=errors.ECODE_ENVIRON)
6622 (_, (vg_info, ), _) = info.payload
6623 vg_free = vg_info.get("vg_free", None)
6624 if not isinstance(vg_free, int):
6625 raise errors.OpPrereqError("Can't compute free disk space on node"
6626 " %s for vg %s, result was '%s'" %
6627 (node, vg, vg_free), errors.ECODE_ENVIRON)
6628 if requested > vg_free:
6629 raise errors.OpPrereqError("Not enough disk space on target node %s"
6630 " vg %s: required %d MiB, available %d MiB" %
6631 (node, vg, requested, vg_free),
6635 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6636 """Checks if nodes have enough physical CPUs
6638 This function checks if all given nodes have the needed number of
6639 physical CPUs. In case any node has less CPUs or we cannot get the
6640 information from the node, this function raises an OpPrereqError
6643 @type lu: C{LogicalUnit}
6644 @param lu: a logical unit from which we get configuration data
6645 @type nodenames: C{list}
6646 @param nodenames: the list of node names to check
6647 @type requested: C{int}
6648 @param requested: the minimum acceptable number of physical CPUs
6649 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6650 or we cannot check the node
6653 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6654 for node in nodenames:
6655 info = nodeinfo[node]
6656 info.Raise("Cannot get current information from node %s" % node,
6657 prereq=True, ecode=errors.ECODE_ENVIRON)
6658 (_, _, (hv_info, )) = info.payload
6659 num_cpus = hv_info.get("cpu_total", None)
6660 if not isinstance(num_cpus, int):
6661 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6662 " on node %s, result was '%s'" %
6663 (node, num_cpus), errors.ECODE_ENVIRON)
6664 if requested > num_cpus:
6665 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6666 "required" % (node, num_cpus, requested),
6670 class LUInstanceStartup(LogicalUnit):
6671 """Starts an instance.
6674 HPATH = "instance-start"
6675 HTYPE = constants.HTYPE_INSTANCE
6678 def CheckArguments(self):
6680 if self.op.beparams:
6681 # fill the beparams dict
6682 objects.UpgradeBeParams(self.op.beparams)
6683 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6685 def ExpandNames(self):
6686 self._ExpandAndLockInstance()
6687 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6689 def DeclareLocks(self, level):
6690 if level == locking.LEVEL_NODE_RES:
6691 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6693 def BuildHooksEnv(self):
6696 This runs on master, primary and secondary nodes of the instance.
6700 "FORCE": self.op.force,
6703 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6707 def BuildHooksNodes(self):
6708 """Build hooks nodes.
6711 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6714 def CheckPrereq(self):
6715 """Check prerequisites.
6717 This checks that the instance is in the cluster.
6720 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6721 assert self.instance is not None, \
6722 "Cannot retrieve locked instance %s" % self.op.instance_name
6725 if self.op.hvparams:
6726 # check hypervisor parameter syntax (locally)
6727 cluster = self.cfg.GetClusterInfo()
6728 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6729 filled_hvp = cluster.FillHV(instance)
6730 filled_hvp.update(self.op.hvparams)
6731 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6732 hv_type.CheckParameterSyntax(filled_hvp)
6733 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6735 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6737 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6739 if self.primary_offline and self.op.ignore_offline_nodes:
6740 self.proc.LogWarning("Ignoring offline primary node")
6742 if self.op.hvparams or self.op.beparams:
6743 self.proc.LogWarning("Overridden parameters are ignored")
6745 _CheckNodeOnline(self, instance.primary_node)
6747 bep = self.cfg.GetClusterInfo().FillBE(instance)
6748 bep.update(self.op.beparams)
6750 # check bridges existence
6751 _CheckInstanceBridgesExist(self, instance)
6753 remote_info = self.rpc.call_instance_info(instance.primary_node,
6755 instance.hypervisor)
6756 remote_info.Raise("Error checking node %s" % instance.primary_node,
6757 prereq=True, ecode=errors.ECODE_ENVIRON)
6758 if not remote_info.payload: # not running already
6759 _CheckNodeFreeMemory(self, instance.primary_node,
6760 "starting instance %s" % instance.name,
6761 bep[constants.BE_MINMEM], instance.hypervisor)
6763 def Exec(self, feedback_fn):
6764 """Start the instance.
6767 instance = self.instance
6768 force = self.op.force
6770 if not self.op.no_remember:
6771 self.cfg.MarkInstanceUp(instance.name)
6773 if self.primary_offline:
6774 assert self.op.ignore_offline_nodes
6775 self.proc.LogInfo("Primary node offline, marked instance as started")
6777 node_current = instance.primary_node
6779 _StartInstanceDisks(self, instance, force)
6782 self.rpc.call_instance_start(node_current,
6783 (instance, self.op.hvparams,
6785 self.op.startup_paused)
6786 msg = result.fail_msg
6788 _ShutdownInstanceDisks(self, instance)
6789 raise errors.OpExecError("Could not start instance: %s" % msg)
6792 class LUInstanceReboot(LogicalUnit):
6793 """Reboot an instance.
6796 HPATH = "instance-reboot"
6797 HTYPE = constants.HTYPE_INSTANCE
6800 def ExpandNames(self):
6801 self._ExpandAndLockInstance()
6803 def BuildHooksEnv(self):
6806 This runs on master, primary and secondary nodes of the instance.
6810 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6811 "REBOOT_TYPE": self.op.reboot_type,
6812 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6815 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6819 def BuildHooksNodes(self):
6820 """Build hooks nodes.
6823 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6826 def CheckPrereq(self):
6827 """Check prerequisites.
6829 This checks that the instance is in the cluster.
6832 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6833 assert self.instance is not None, \
6834 "Cannot retrieve locked instance %s" % self.op.instance_name
6835 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6836 _CheckNodeOnline(self, instance.primary_node)
6838 # check bridges existence
6839 _CheckInstanceBridgesExist(self, instance)
6841 def Exec(self, feedback_fn):
6842 """Reboot the instance.
6845 instance = self.instance
6846 ignore_secondaries = self.op.ignore_secondaries
6847 reboot_type = self.op.reboot_type
6849 remote_info = self.rpc.call_instance_info(instance.primary_node,
6851 instance.hypervisor)
6852 remote_info.Raise("Error checking node %s" % instance.primary_node)
6853 instance_running = bool(remote_info.payload)
6855 node_current = instance.primary_node
6857 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6858 constants.INSTANCE_REBOOT_HARD]:
6859 for disk in instance.disks:
6860 self.cfg.SetDiskID(disk, node_current)
6861 result = self.rpc.call_instance_reboot(node_current, instance,
6863 self.op.shutdown_timeout)
6864 result.Raise("Could not reboot instance")
6866 if instance_running:
6867 result = self.rpc.call_instance_shutdown(node_current, instance,
6868 self.op.shutdown_timeout)
6869 result.Raise("Could not shutdown instance for full reboot")
6870 _ShutdownInstanceDisks(self, instance)
6872 self.LogInfo("Instance %s was already stopped, starting now",
6874 _StartInstanceDisks(self, instance, ignore_secondaries)
6875 result = self.rpc.call_instance_start(node_current,
6876 (instance, None, None), False)
6877 msg = result.fail_msg
6879 _ShutdownInstanceDisks(self, instance)
6880 raise errors.OpExecError("Could not start instance for"
6881 " full reboot: %s" % msg)
6883 self.cfg.MarkInstanceUp(instance.name)
6886 class LUInstanceShutdown(LogicalUnit):
6887 """Shutdown an instance.
6890 HPATH = "instance-stop"
6891 HTYPE = constants.HTYPE_INSTANCE
6894 def ExpandNames(self):
6895 self._ExpandAndLockInstance()
6897 def BuildHooksEnv(self):
6900 This runs on master, primary and secondary nodes of the instance.
6903 env = _BuildInstanceHookEnvByObject(self, self.instance)
6904 env["TIMEOUT"] = self.op.timeout
6907 def BuildHooksNodes(self):
6908 """Build hooks nodes.
6911 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6914 def CheckPrereq(self):
6915 """Check prerequisites.
6917 This checks that the instance is in the cluster.
6920 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6921 assert self.instance is not None, \
6922 "Cannot retrieve locked instance %s" % self.op.instance_name
6924 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6926 self.primary_offline = \
6927 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6929 if self.primary_offline and self.op.ignore_offline_nodes:
6930 self.proc.LogWarning("Ignoring offline primary node")
6932 _CheckNodeOnline(self, self.instance.primary_node)
6934 def Exec(self, feedback_fn):
6935 """Shutdown the instance.
6938 instance = self.instance
6939 node_current = instance.primary_node
6940 timeout = self.op.timeout
6942 if not self.op.no_remember:
6943 self.cfg.MarkInstanceDown(instance.name)
6945 if self.primary_offline:
6946 assert self.op.ignore_offline_nodes
6947 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6949 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6950 msg = result.fail_msg
6952 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6954 _ShutdownInstanceDisks(self, instance)
6957 class LUInstanceReinstall(LogicalUnit):
6958 """Reinstall an instance.
6961 HPATH = "instance-reinstall"
6962 HTYPE = constants.HTYPE_INSTANCE
6965 def ExpandNames(self):
6966 self._ExpandAndLockInstance()
6968 def BuildHooksEnv(self):
6971 This runs on master, primary and secondary nodes of the instance.
6974 return _BuildInstanceHookEnvByObject(self, self.instance)
6976 def BuildHooksNodes(self):
6977 """Build hooks nodes.
6980 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6983 def CheckPrereq(self):
6984 """Check prerequisites.
6986 This checks that the instance is in the cluster and is not running.
6989 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6990 assert instance is not None, \
6991 "Cannot retrieve locked instance %s" % self.op.instance_name
6992 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6993 " offline, cannot reinstall")
6995 if instance.disk_template == constants.DT_DISKLESS:
6996 raise errors.OpPrereqError("Instance '%s' has no disks" %
6997 self.op.instance_name,
6999 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7001 if self.op.os_type is not None:
7003 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7004 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7005 instance_os = self.op.os_type
7007 instance_os = instance.os
7009 nodelist = list(instance.all_nodes)
7011 if self.op.osparams:
7012 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7013 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7014 self.os_inst = i_osdict # the new dict (without defaults)
7018 self.instance = instance
7020 def Exec(self, feedback_fn):
7021 """Reinstall the instance.
7024 inst = self.instance
7026 if self.op.os_type is not None:
7027 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7028 inst.os = self.op.os_type
7029 # Write to configuration
7030 self.cfg.Update(inst, feedback_fn)
7032 _StartInstanceDisks(self, inst, None)
7034 feedback_fn("Running the instance OS create scripts...")
7035 # FIXME: pass debug option from opcode to backend
7036 result = self.rpc.call_instance_os_add(inst.primary_node,
7037 (inst, self.os_inst), True,
7038 self.op.debug_level)
7039 result.Raise("Could not install OS for instance %s on node %s" %
7040 (inst.name, inst.primary_node))
7042 _ShutdownInstanceDisks(self, inst)
7045 class LUInstanceRecreateDisks(LogicalUnit):
7046 """Recreate an instance's missing disks.
7049 HPATH = "instance-recreate-disks"
7050 HTYPE = constants.HTYPE_INSTANCE
7053 _MODIFYABLE = frozenset([
7054 constants.IDISK_SIZE,
7055 constants.IDISK_MODE,
7058 # New or changed disk parameters may have different semantics
7059 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7060 constants.IDISK_ADOPT,
7062 # TODO: Implement support changing VG while recreating
7064 constants.IDISK_METAVG,
7067 def CheckArguments(self):
7068 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7069 # Normalize and convert deprecated list of disk indices
7070 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7072 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7074 raise errors.OpPrereqError("Some disks have been specified more than"
7075 " once: %s" % utils.CommaJoin(duplicates),
7078 for (idx, params) in self.op.disks:
7079 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7080 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7082 raise errors.OpPrereqError("Parameters for disk %s try to change"
7083 " unmodifyable parameter(s): %s" %
7084 (idx, utils.CommaJoin(unsupported)),
7087 def ExpandNames(self):
7088 self._ExpandAndLockInstance()
7089 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7091 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7092 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7094 self.needed_locks[locking.LEVEL_NODE] = []
7095 self.needed_locks[locking.LEVEL_NODE_RES] = []
7097 def DeclareLocks(self, level):
7098 if level == locking.LEVEL_NODE:
7099 # if we replace the nodes, we only need to lock the old primary,
7100 # otherwise we need to lock all nodes for disk re-creation
7101 primary_only = bool(self.op.nodes)
7102 self._LockInstancesNodes(primary_only=primary_only)
7103 elif level == locking.LEVEL_NODE_RES:
7105 self.needed_locks[locking.LEVEL_NODE_RES] = \
7106 self.needed_locks[locking.LEVEL_NODE][:]
7108 def BuildHooksEnv(self):
7111 This runs on master, primary and secondary nodes of the instance.
7114 return _BuildInstanceHookEnvByObject(self, self.instance)
7116 def BuildHooksNodes(self):
7117 """Build hooks nodes.
7120 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7123 def CheckPrereq(self):
7124 """Check prerequisites.
7126 This checks that the instance is in the cluster and is not running.
7129 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7130 assert instance is not None, \
7131 "Cannot retrieve locked instance %s" % self.op.instance_name
7133 if len(self.op.nodes) != len(instance.all_nodes):
7134 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7135 " %d replacement nodes were specified" %
7136 (instance.name, len(instance.all_nodes),
7137 len(self.op.nodes)),
7139 assert instance.disk_template != constants.DT_DRBD8 or \
7140 len(self.op.nodes) == 2
7141 assert instance.disk_template != constants.DT_PLAIN or \
7142 len(self.op.nodes) == 1
7143 primary_node = self.op.nodes[0]
7145 primary_node = instance.primary_node
7146 _CheckNodeOnline(self, primary_node)
7148 if instance.disk_template == constants.DT_DISKLESS:
7149 raise errors.OpPrereqError("Instance '%s' has no disks" %
7150 self.op.instance_name, errors.ECODE_INVAL)
7152 # if we replace nodes *and* the old primary is offline, we don't
7154 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7155 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7156 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7157 if not (self.op.nodes and old_pnode.offline):
7158 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7159 msg="cannot recreate disks")
7162 self.disks = dict(self.op.disks)
7164 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7166 maxidx = max(self.disks.keys())
7167 if maxidx >= len(instance.disks):
7168 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7171 if (self.op.nodes and
7172 sorted(self.disks.keys()) != range(len(instance.disks))):
7173 raise errors.OpPrereqError("Can't recreate disks partially and"
7174 " change the nodes at the same time",
7177 self.instance = instance
7179 def Exec(self, feedback_fn):
7180 """Recreate the disks.
7183 instance = self.instance
7185 assert (self.owned_locks(locking.LEVEL_NODE) ==
7186 self.owned_locks(locking.LEVEL_NODE_RES))
7189 mods = [] # keeps track of needed changes
7191 for idx, disk in enumerate(instance.disks):
7193 changes = self.disks[idx]
7195 # Disk should not be recreated
7199 # update secondaries for disks, if needed
7200 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7201 # need to update the nodes and minors
7202 assert len(self.op.nodes) == 2
7203 assert len(disk.logical_id) == 6 # otherwise disk internals
7205 (_, _, old_port, _, _, old_secret) = disk.logical_id
7206 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7207 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7208 new_minors[0], new_minors[1], old_secret)
7209 assert len(disk.logical_id) == len(new_id)
7213 mods.append((idx, new_id, changes))
7215 # now that we have passed all asserts above, we can apply the mods
7216 # in a single run (to avoid partial changes)
7217 for idx, new_id, changes in mods:
7218 disk = instance.disks[idx]
7219 if new_id is not None:
7220 assert disk.dev_type == constants.LD_DRBD8
7221 disk.logical_id = new_id
7223 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7224 mode=changes.get(constants.IDISK_MODE, None))
7226 # change primary node, if needed
7228 instance.primary_node = self.op.nodes[0]
7229 self.LogWarning("Changing the instance's nodes, you will have to"
7230 " remove any disks left on the older nodes manually")
7233 self.cfg.Update(instance, feedback_fn)
7235 _CreateDisks(self, instance, to_skip=to_skip)
7238 class LUInstanceRename(LogicalUnit):
7239 """Rename an instance.
7242 HPATH = "instance-rename"
7243 HTYPE = constants.HTYPE_INSTANCE
7245 def CheckArguments(self):
7249 if self.op.ip_check and not self.op.name_check:
7250 # TODO: make the ip check more flexible and not depend on the name check
7251 raise errors.OpPrereqError("IP address check requires a name check",
7254 def BuildHooksEnv(self):
7257 This runs on master, primary and secondary nodes of the instance.
7260 env = _BuildInstanceHookEnvByObject(self, self.instance)
7261 env["INSTANCE_NEW_NAME"] = self.op.new_name
7264 def BuildHooksNodes(self):
7265 """Build hooks nodes.
7268 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7271 def CheckPrereq(self):
7272 """Check prerequisites.
7274 This checks that the instance is in the cluster and is not running.
7277 self.op.instance_name = _ExpandInstanceName(self.cfg,
7278 self.op.instance_name)
7279 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7280 assert instance is not None
7281 _CheckNodeOnline(self, instance.primary_node)
7282 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7283 msg="cannot rename")
7284 self.instance = instance
7286 new_name = self.op.new_name
7287 if self.op.name_check:
7288 hostname = netutils.GetHostname(name=new_name)
7289 if hostname.name != new_name:
7290 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7292 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7293 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7294 " same as given hostname '%s'") %
7295 (hostname.name, self.op.new_name),
7297 new_name = self.op.new_name = hostname.name
7298 if (self.op.ip_check and
7299 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7300 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7301 (hostname.ip, new_name),
7302 errors.ECODE_NOTUNIQUE)
7304 instance_list = self.cfg.GetInstanceList()
7305 if new_name in instance_list and new_name != instance.name:
7306 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7307 new_name, errors.ECODE_EXISTS)
7309 def Exec(self, feedback_fn):
7310 """Rename the instance.
7313 inst = self.instance
7314 old_name = inst.name
7316 rename_file_storage = False
7317 if (inst.disk_template in constants.DTS_FILEBASED and
7318 self.op.new_name != inst.name):
7319 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7320 rename_file_storage = True
7322 self.cfg.RenameInstance(inst.name, self.op.new_name)
7323 # Change the instance lock. This is definitely safe while we hold the BGL.
7324 # Otherwise the new lock would have to be added in acquired mode.
7326 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7327 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7329 # re-read the instance from the configuration after rename
7330 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7332 if rename_file_storage:
7333 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7334 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7335 old_file_storage_dir,
7336 new_file_storage_dir)
7337 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7338 " (but the instance has been renamed in Ganeti)" %
7339 (inst.primary_node, old_file_storage_dir,
7340 new_file_storage_dir))
7342 _StartInstanceDisks(self, inst, None)
7344 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7345 old_name, self.op.debug_level)
7346 msg = result.fail_msg
7348 msg = ("Could not run OS rename script for instance %s on node %s"
7349 " (but the instance has been renamed in Ganeti): %s" %
7350 (inst.name, inst.primary_node, msg))
7351 self.proc.LogWarning(msg)
7353 _ShutdownInstanceDisks(self, inst)
7358 class LUInstanceRemove(LogicalUnit):
7359 """Remove an instance.
7362 HPATH = "instance-remove"
7363 HTYPE = constants.HTYPE_INSTANCE
7366 def ExpandNames(self):
7367 self._ExpandAndLockInstance()
7368 self.needed_locks[locking.LEVEL_NODE] = []
7369 self.needed_locks[locking.LEVEL_NODE_RES] = []
7370 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7372 def DeclareLocks(self, level):
7373 if level == locking.LEVEL_NODE:
7374 self._LockInstancesNodes()
7375 elif level == locking.LEVEL_NODE_RES:
7377 self.needed_locks[locking.LEVEL_NODE_RES] = \
7378 self.needed_locks[locking.LEVEL_NODE][:]
7380 def BuildHooksEnv(self):
7383 This runs on master, primary and secondary nodes of the instance.
7386 env = _BuildInstanceHookEnvByObject(self, self.instance)
7387 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7390 def BuildHooksNodes(self):
7391 """Build hooks nodes.
7394 nl = [self.cfg.GetMasterNode()]
7395 nl_post = list(self.instance.all_nodes) + nl
7396 return (nl, nl_post)
7398 def CheckPrereq(self):
7399 """Check prerequisites.
7401 This checks that the instance is in the cluster.
7404 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7405 assert self.instance is not None, \
7406 "Cannot retrieve locked instance %s" % self.op.instance_name
7408 def Exec(self, feedback_fn):
7409 """Remove the instance.
7412 instance = self.instance
7413 logging.info("Shutting down instance %s on node %s",
7414 instance.name, instance.primary_node)
7416 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7417 self.op.shutdown_timeout)
7418 msg = result.fail_msg
7420 if self.op.ignore_failures:
7421 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7423 raise errors.OpExecError("Could not shutdown instance %s on"
7425 (instance.name, instance.primary_node, msg))
7427 assert (self.owned_locks(locking.LEVEL_NODE) ==
7428 self.owned_locks(locking.LEVEL_NODE_RES))
7429 assert not (set(instance.all_nodes) -
7430 self.owned_locks(locking.LEVEL_NODE)), \
7431 "Not owning correct locks"
7433 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7436 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7437 """Utility function to remove an instance.
7440 logging.info("Removing block devices for instance %s", instance.name)
7442 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7443 if not ignore_failures:
7444 raise errors.OpExecError("Can't remove instance's disks")
7445 feedback_fn("Warning: can't remove instance's disks")
7447 logging.info("Removing instance %s out of cluster config", instance.name)
7449 lu.cfg.RemoveInstance(instance.name)
7451 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7452 "Instance lock removal conflict"
7454 # Remove lock for the instance
7455 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7458 class LUInstanceQuery(NoHooksLU):
7459 """Logical unit for querying instances.
7462 # pylint: disable=W0142
7465 def CheckArguments(self):
7466 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7467 self.op.output_fields, self.op.use_locking)
7469 def ExpandNames(self):
7470 self.iq.ExpandNames(self)
7472 def DeclareLocks(self, level):
7473 self.iq.DeclareLocks(self, level)
7475 def Exec(self, feedback_fn):
7476 return self.iq.OldStyleQuery(self)
7479 class LUInstanceFailover(LogicalUnit):
7480 """Failover an instance.
7483 HPATH = "instance-failover"
7484 HTYPE = constants.HTYPE_INSTANCE
7487 def CheckArguments(self):
7488 """Check the arguments.
7491 self.iallocator = getattr(self.op, "iallocator", None)
7492 self.target_node = getattr(self.op, "target_node", None)
7494 def ExpandNames(self):
7495 self._ExpandAndLockInstance()
7497 if self.op.target_node is not None:
7498 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7500 self.needed_locks[locking.LEVEL_NODE] = []
7501 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7503 self.needed_locks[locking.LEVEL_NODE_RES] = []
7504 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7506 ignore_consistency = self.op.ignore_consistency
7507 shutdown_timeout = self.op.shutdown_timeout
7508 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7511 ignore_consistency=ignore_consistency,
7512 shutdown_timeout=shutdown_timeout,
7513 ignore_ipolicy=self.op.ignore_ipolicy)
7514 self.tasklets = [self._migrater]
7516 def DeclareLocks(self, level):
7517 if level == locking.LEVEL_NODE:
7518 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7519 if instance.disk_template in constants.DTS_EXT_MIRROR:
7520 if self.op.target_node is None:
7521 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7523 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7524 self.op.target_node]
7525 del self.recalculate_locks[locking.LEVEL_NODE]
7527 self._LockInstancesNodes()
7528 elif level == locking.LEVEL_NODE_RES:
7530 self.needed_locks[locking.LEVEL_NODE_RES] = \
7531 self.needed_locks[locking.LEVEL_NODE][:]
7533 def BuildHooksEnv(self):
7536 This runs on master, primary and secondary nodes of the instance.
7539 instance = self._migrater.instance
7540 source_node = instance.primary_node
7541 target_node = self.op.target_node
7543 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7544 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7545 "OLD_PRIMARY": source_node,
7546 "NEW_PRIMARY": target_node,
7549 if instance.disk_template in constants.DTS_INT_MIRROR:
7550 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7551 env["NEW_SECONDARY"] = source_node
7553 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7555 env.update(_BuildInstanceHookEnvByObject(self, instance))
7559 def BuildHooksNodes(self):
7560 """Build hooks nodes.
7563 instance = self._migrater.instance
7564 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7565 return (nl, nl + [instance.primary_node])
7568 class LUInstanceMigrate(LogicalUnit):
7569 """Migrate an instance.
7571 This is migration without shutting down, compared to the failover,
7572 which is done with shutdown.
7575 HPATH = "instance-migrate"
7576 HTYPE = constants.HTYPE_INSTANCE
7579 def ExpandNames(self):
7580 self._ExpandAndLockInstance()
7582 if self.op.target_node is not None:
7583 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7585 self.needed_locks[locking.LEVEL_NODE] = []
7586 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7588 self.needed_locks[locking.LEVEL_NODE] = []
7589 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7592 TLMigrateInstance(self, self.op.instance_name,
7593 cleanup=self.op.cleanup,
7595 fallback=self.op.allow_failover,
7596 allow_runtime_changes=self.op.allow_runtime_changes,
7597 ignore_ipolicy=self.op.ignore_ipolicy)
7598 self.tasklets = [self._migrater]
7600 def DeclareLocks(self, level):
7601 if level == locking.LEVEL_NODE:
7602 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7603 if instance.disk_template in constants.DTS_EXT_MIRROR:
7604 if self.op.target_node is None:
7605 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7607 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7608 self.op.target_node]
7609 del self.recalculate_locks[locking.LEVEL_NODE]
7611 self._LockInstancesNodes()
7612 elif level == locking.LEVEL_NODE_RES:
7614 self.needed_locks[locking.LEVEL_NODE_RES] = \
7615 self.needed_locks[locking.LEVEL_NODE][:]
7617 def BuildHooksEnv(self):
7620 This runs on master, primary and secondary nodes of the instance.
7623 instance = self._migrater.instance
7624 source_node = instance.primary_node
7625 target_node = self.op.target_node
7626 env = _BuildInstanceHookEnvByObject(self, instance)
7628 "MIGRATE_LIVE": self._migrater.live,
7629 "MIGRATE_CLEANUP": self.op.cleanup,
7630 "OLD_PRIMARY": source_node,
7631 "NEW_PRIMARY": target_node,
7632 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7635 if instance.disk_template in constants.DTS_INT_MIRROR:
7636 env["OLD_SECONDARY"] = target_node
7637 env["NEW_SECONDARY"] = source_node
7639 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7643 def BuildHooksNodes(self):
7644 """Build hooks nodes.
7647 instance = self._migrater.instance
7648 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7649 return (nl, nl + [instance.primary_node])
7652 class LUInstanceMove(LogicalUnit):
7653 """Move an instance by data-copying.
7656 HPATH = "instance-move"
7657 HTYPE = constants.HTYPE_INSTANCE
7660 def ExpandNames(self):
7661 self._ExpandAndLockInstance()
7662 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7663 self.op.target_node = target_node
7664 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7665 self.needed_locks[locking.LEVEL_NODE_RES] = []
7666 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7668 def DeclareLocks(self, level):
7669 if level == locking.LEVEL_NODE:
7670 self._LockInstancesNodes(primary_only=True)
7671 elif level == locking.LEVEL_NODE_RES:
7673 self.needed_locks[locking.LEVEL_NODE_RES] = \
7674 self.needed_locks[locking.LEVEL_NODE][:]
7676 def BuildHooksEnv(self):
7679 This runs on master, primary and secondary nodes of the instance.
7683 "TARGET_NODE": self.op.target_node,
7684 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7686 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7689 def BuildHooksNodes(self):
7690 """Build hooks nodes.
7694 self.cfg.GetMasterNode(),
7695 self.instance.primary_node,
7696 self.op.target_node,
7700 def CheckPrereq(self):
7701 """Check prerequisites.
7703 This checks that the instance is in the cluster.
7706 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7707 assert self.instance is not None, \
7708 "Cannot retrieve locked instance %s" % self.op.instance_name
7710 node = self.cfg.GetNodeInfo(self.op.target_node)
7711 assert node is not None, \
7712 "Cannot retrieve locked node %s" % self.op.target_node
7714 self.target_node = target_node = node.name
7716 if target_node == instance.primary_node:
7717 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7718 (instance.name, target_node),
7721 bep = self.cfg.GetClusterInfo().FillBE(instance)
7723 for idx, dsk in enumerate(instance.disks):
7724 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7725 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7726 " cannot copy" % idx, errors.ECODE_STATE)
7728 _CheckNodeOnline(self, target_node)
7729 _CheckNodeNotDrained(self, target_node)
7730 _CheckNodeVmCapable(self, target_node)
7731 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7732 self.cfg.GetNodeGroup(node.group))
7733 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7734 ignore=self.op.ignore_ipolicy)
7736 if instance.admin_state == constants.ADMINST_UP:
7737 # check memory requirements on the secondary node
7738 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7739 instance.name, bep[constants.BE_MAXMEM],
7740 instance.hypervisor)
7742 self.LogInfo("Not checking memory on the secondary node as"
7743 " instance will not be started")
7745 # check bridge existance
7746 _CheckInstanceBridgesExist(self, instance, node=target_node)
7748 def Exec(self, feedback_fn):
7749 """Move an instance.
7751 The move is done by shutting it down on its present node, copying
7752 the data over (slow) and starting it on the new node.
7755 instance = self.instance
7757 source_node = instance.primary_node
7758 target_node = self.target_node
7760 self.LogInfo("Shutting down instance %s on source node %s",
7761 instance.name, source_node)
7763 assert (self.owned_locks(locking.LEVEL_NODE) ==
7764 self.owned_locks(locking.LEVEL_NODE_RES))
7766 result = self.rpc.call_instance_shutdown(source_node, instance,
7767 self.op.shutdown_timeout)
7768 msg = result.fail_msg
7770 if self.op.ignore_consistency:
7771 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7772 " Proceeding anyway. Please make sure node"
7773 " %s is down. Error details: %s",
7774 instance.name, source_node, source_node, msg)
7776 raise errors.OpExecError("Could not shutdown instance %s on"
7778 (instance.name, source_node, msg))
7780 # create the target disks
7782 _CreateDisks(self, instance, target_node=target_node)
7783 except errors.OpExecError:
7784 self.LogWarning("Device creation failed, reverting...")
7786 _RemoveDisks(self, instance, target_node=target_node)
7788 self.cfg.ReleaseDRBDMinors(instance.name)
7791 cluster_name = self.cfg.GetClusterInfo().cluster_name
7794 # activate, get path, copy the data over
7795 for idx, disk in enumerate(instance.disks):
7796 self.LogInfo("Copying data for disk %d", idx)
7797 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7798 instance.name, True, idx)
7800 self.LogWarning("Can't assemble newly created disk %d: %s",
7801 idx, result.fail_msg)
7802 errs.append(result.fail_msg)
7804 dev_path = result.payload
7805 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
7806 target_node, dev_path,
7809 self.LogWarning("Can't copy data over for disk %d: %s",
7810 idx, result.fail_msg)
7811 errs.append(result.fail_msg)
7815 self.LogWarning("Some disks failed to copy, aborting")
7817 _RemoveDisks(self, instance, target_node=target_node)
7819 self.cfg.ReleaseDRBDMinors(instance.name)
7820 raise errors.OpExecError("Errors during disk copy: %s" %
7823 instance.primary_node = target_node
7824 self.cfg.Update(instance, feedback_fn)
7826 self.LogInfo("Removing the disks on the original node")
7827 _RemoveDisks(self, instance, target_node=source_node)
7829 # Only start the instance if it's marked as up
7830 if instance.admin_state == constants.ADMINST_UP:
7831 self.LogInfo("Starting instance %s on node %s",
7832 instance.name, target_node)
7834 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7835 ignore_secondaries=True)
7837 _ShutdownInstanceDisks(self, instance)
7838 raise errors.OpExecError("Can't activate the instance's disks")
7840 result = self.rpc.call_instance_start(target_node,
7841 (instance, None, None), False)
7842 msg = result.fail_msg
7844 _ShutdownInstanceDisks(self, instance)
7845 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7846 (instance.name, target_node, msg))
7849 class LUNodeMigrate(LogicalUnit):
7850 """Migrate all instances from a node.
7853 HPATH = "node-migrate"
7854 HTYPE = constants.HTYPE_NODE
7857 def CheckArguments(self):
7860 def ExpandNames(self):
7861 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7863 self.share_locks = _ShareAll()
7864 self.needed_locks = {
7865 locking.LEVEL_NODE: [self.op.node_name],
7868 def BuildHooksEnv(self):
7871 This runs on the master, the primary and all the secondaries.
7875 "NODE_NAME": self.op.node_name,
7876 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7879 def BuildHooksNodes(self):
7880 """Build hooks nodes.
7883 nl = [self.cfg.GetMasterNode()]
7886 def CheckPrereq(self):
7889 def Exec(self, feedback_fn):
7890 # Prepare jobs for migration instances
7891 allow_runtime_changes = self.op.allow_runtime_changes
7893 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7896 iallocator=self.op.iallocator,
7897 target_node=self.op.target_node,
7898 allow_runtime_changes=allow_runtime_changes,
7899 ignore_ipolicy=self.op.ignore_ipolicy)]
7900 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7903 # TODO: Run iallocator in this opcode and pass correct placement options to
7904 # OpInstanceMigrate. Since other jobs can modify the cluster between
7905 # running the iallocator and the actual migration, a good consistency model
7906 # will have to be found.
7908 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7909 frozenset([self.op.node_name]))
7911 return ResultWithJobs(jobs)
7914 class TLMigrateInstance(Tasklet):
7915 """Tasklet class for instance migration.
7918 @ivar live: whether the migration will be done live or non-live;
7919 this variable is initalized only after CheckPrereq has run
7920 @type cleanup: boolean
7921 @ivar cleanup: Wheater we cleanup from a failed migration
7922 @type iallocator: string
7923 @ivar iallocator: The iallocator used to determine target_node
7924 @type target_node: string
7925 @ivar target_node: If given, the target_node to reallocate the instance to
7926 @type failover: boolean
7927 @ivar failover: Whether operation results in failover or migration
7928 @type fallback: boolean
7929 @ivar fallback: Whether fallback to failover is allowed if migration not
7931 @type ignore_consistency: boolean
7932 @ivar ignore_consistency: Wheter we should ignore consistency between source
7934 @type shutdown_timeout: int
7935 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7936 @type ignore_ipolicy: bool
7937 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7942 _MIGRATION_POLL_INTERVAL = 1 # seconds
7943 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7945 def __init__(self, lu, instance_name, cleanup=False,
7946 failover=False, fallback=False,
7947 ignore_consistency=False,
7948 allow_runtime_changes=True,
7949 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7950 ignore_ipolicy=False):
7951 """Initializes this class.
7954 Tasklet.__init__(self, lu)
7957 self.instance_name = instance_name
7958 self.cleanup = cleanup
7959 self.live = False # will be overridden later
7960 self.failover = failover
7961 self.fallback = fallback
7962 self.ignore_consistency = ignore_consistency
7963 self.shutdown_timeout = shutdown_timeout
7964 self.ignore_ipolicy = ignore_ipolicy
7965 self.allow_runtime_changes = allow_runtime_changes
7967 def CheckPrereq(self):
7968 """Check prerequisites.
7970 This checks that the instance is in the cluster.
7973 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7974 instance = self.cfg.GetInstanceInfo(instance_name)
7975 assert instance is not None
7976 self.instance = instance
7977 cluster = self.cfg.GetClusterInfo()
7979 if (not self.cleanup and
7980 not instance.admin_state == constants.ADMINST_UP and
7981 not self.failover and self.fallback):
7982 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7983 " switching to failover")
7984 self.failover = True
7986 if instance.disk_template not in constants.DTS_MIRRORED:
7991 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7992 " %s" % (instance.disk_template, text),
7995 if instance.disk_template in constants.DTS_EXT_MIRROR:
7996 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7998 if self.lu.op.iallocator:
7999 self._RunAllocator()
8001 # We set set self.target_node as it is required by
8003 self.target_node = self.lu.op.target_node
8005 # Check that the target node is correct in terms of instance policy
8006 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8007 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8008 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8009 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8010 ignore=self.ignore_ipolicy)
8012 # self.target_node is already populated, either directly or by the
8014 target_node = self.target_node
8015 if self.target_node == instance.primary_node:
8016 raise errors.OpPrereqError("Cannot migrate instance %s"
8017 " to its primary (%s)" %
8018 (instance.name, instance.primary_node))
8020 if len(self.lu.tasklets) == 1:
8021 # It is safe to release locks only when we're the only tasklet
8023 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8024 keep=[instance.primary_node, self.target_node])
8027 secondary_nodes = instance.secondary_nodes
8028 if not secondary_nodes:
8029 raise errors.ConfigurationError("No secondary node but using"
8030 " %s disk template" %
8031 instance.disk_template)
8032 target_node = secondary_nodes[0]
8033 if self.lu.op.iallocator or (self.lu.op.target_node and
8034 self.lu.op.target_node != target_node):
8036 text = "failed over"
8039 raise errors.OpPrereqError("Instances with disk template %s cannot"
8040 " be %s to arbitrary nodes"
8041 " (neither an iallocator nor a target"
8042 " node can be passed)" %
8043 (instance.disk_template, text),
8045 nodeinfo = self.cfg.GetNodeInfo(target_node)
8046 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8047 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8048 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8049 ignore=self.ignore_ipolicy)
8051 i_be = cluster.FillBE(instance)
8053 # check memory requirements on the secondary node
8054 if (not self.cleanup and
8055 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8056 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8057 "migrating instance %s" %
8059 i_be[constants.BE_MINMEM],
8060 instance.hypervisor)
8062 self.lu.LogInfo("Not checking memory on the secondary node as"
8063 " instance will not be started")
8065 # check if failover must be forced instead of migration
8066 if (not self.cleanup and not self.failover and
8067 i_be[constants.BE_ALWAYS_FAILOVER]):
8069 self.lu.LogInfo("Instance configured to always failover; fallback"
8071 self.failover = True
8073 raise errors.OpPrereqError("This instance has been configured to"
8074 " always failover, please allow failover",
8077 # check bridge existance
8078 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8080 if not self.cleanup:
8081 _CheckNodeNotDrained(self.lu, target_node)
8082 if not self.failover:
8083 result = self.rpc.call_instance_migratable(instance.primary_node,
8085 if result.fail_msg and self.fallback:
8086 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8088 self.failover = True
8090 result.Raise("Can't migrate, please use failover",
8091 prereq=True, ecode=errors.ECODE_STATE)
8093 assert not (self.failover and self.cleanup)
8095 if not self.failover:
8096 if self.lu.op.live is not None and self.lu.op.mode is not None:
8097 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8098 " parameters are accepted",
8100 if self.lu.op.live is not None:
8102 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8104 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8105 # reset the 'live' parameter to None so that repeated
8106 # invocations of CheckPrereq do not raise an exception
8107 self.lu.op.live = None
8108 elif self.lu.op.mode is None:
8109 # read the default value from the hypervisor
8110 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8111 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8113 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8115 # Failover is never live
8118 if not (self.failover or self.cleanup):
8119 remote_info = self.rpc.call_instance_info(instance.primary_node,
8121 instance.hypervisor)
8122 remote_info.Raise("Error checking instance on node %s" %
8123 instance.primary_node)
8124 instance_running = bool(remote_info.payload)
8125 if instance_running:
8126 self.current_mem = int(remote_info.payload["memory"])
8128 def _RunAllocator(self):
8129 """Run the allocator based on input opcode.
8132 # FIXME: add a self.ignore_ipolicy option
8133 ial = IAllocator(self.cfg, self.rpc,
8134 mode=constants.IALLOCATOR_MODE_RELOC,
8135 name=self.instance_name,
8136 relocate_from=[self.instance.primary_node],
8139 ial.Run(self.lu.op.iallocator)
8142 raise errors.OpPrereqError("Can't compute nodes using"
8143 " iallocator '%s': %s" %
8144 (self.lu.op.iallocator, ial.info),
8146 if len(ial.result) != ial.required_nodes:
8147 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8148 " of nodes (%s), required %s" %
8149 (self.lu.op.iallocator, len(ial.result),
8150 ial.required_nodes), errors.ECODE_FAULT)
8151 self.target_node = ial.result[0]
8152 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8153 self.instance_name, self.lu.op.iallocator,
8154 utils.CommaJoin(ial.result))
8156 def _WaitUntilSync(self):
8157 """Poll with custom rpc for disk sync.
8159 This uses our own step-based rpc call.
8162 self.feedback_fn("* wait until resync is done")
8166 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8168 (self.instance.disks,
8171 for node, nres in result.items():
8172 nres.Raise("Cannot resync disks on node %s" % node)
8173 node_done, node_percent = nres.payload
8174 all_done = all_done and node_done
8175 if node_percent is not None:
8176 min_percent = min(min_percent, node_percent)
8178 if min_percent < 100:
8179 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8182 def _EnsureSecondary(self, node):
8183 """Demote a node to secondary.
8186 self.feedback_fn("* switching node %s to secondary mode" % node)
8188 for dev in self.instance.disks:
8189 self.cfg.SetDiskID(dev, node)
8191 result = self.rpc.call_blockdev_close(node, self.instance.name,
8192 self.instance.disks)
8193 result.Raise("Cannot change disk to secondary on node %s" % node)
8195 def _GoStandalone(self):
8196 """Disconnect from the network.
8199 self.feedback_fn("* changing into standalone mode")
8200 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8201 self.instance.disks)
8202 for node, nres in result.items():
8203 nres.Raise("Cannot disconnect disks node %s" % node)
8205 def _GoReconnect(self, multimaster):
8206 """Reconnect to the network.
8212 msg = "single-master"
8213 self.feedback_fn("* changing disks into %s mode" % msg)
8214 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8215 (self.instance.disks, self.instance),
8216 self.instance.name, multimaster)
8217 for node, nres in result.items():
8218 nres.Raise("Cannot change disks config on node %s" % node)
8220 def _ExecCleanup(self):
8221 """Try to cleanup after a failed migration.
8223 The cleanup is done by:
8224 - check that the instance is running only on one node
8225 (and update the config if needed)
8226 - change disks on its secondary node to secondary
8227 - wait until disks are fully synchronized
8228 - disconnect from the network
8229 - change disks into single-master mode
8230 - wait again until disks are fully synchronized
8233 instance = self.instance
8234 target_node = self.target_node
8235 source_node = self.source_node
8237 # check running on only one node
8238 self.feedback_fn("* checking where the instance actually runs"
8239 " (if this hangs, the hypervisor might be in"
8241 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8242 for node, result in ins_l.items():
8243 result.Raise("Can't contact node %s" % node)
8245 runningon_source = instance.name in ins_l[source_node].payload
8246 runningon_target = instance.name in ins_l[target_node].payload
8248 if runningon_source and runningon_target:
8249 raise errors.OpExecError("Instance seems to be running on two nodes,"
8250 " or the hypervisor is confused; you will have"
8251 " to ensure manually that it runs only on one"
8252 " and restart this operation")
8254 if not (runningon_source or runningon_target):
8255 raise errors.OpExecError("Instance does not seem to be running at all;"
8256 " in this case it's safer to repair by"
8257 " running 'gnt-instance stop' to ensure disk"
8258 " shutdown, and then restarting it")
8260 if runningon_target:
8261 # the migration has actually succeeded, we need to update the config
8262 self.feedback_fn("* instance running on secondary node (%s),"
8263 " updating config" % target_node)
8264 instance.primary_node = target_node
8265 self.cfg.Update(instance, self.feedback_fn)
8266 demoted_node = source_node
8268 self.feedback_fn("* instance confirmed to be running on its"
8269 " primary node (%s)" % source_node)
8270 demoted_node = target_node
8272 if instance.disk_template in constants.DTS_INT_MIRROR:
8273 self._EnsureSecondary(demoted_node)
8275 self._WaitUntilSync()
8276 except errors.OpExecError:
8277 # we ignore here errors, since if the device is standalone, it
8278 # won't be able to sync
8280 self._GoStandalone()
8281 self._GoReconnect(False)
8282 self._WaitUntilSync()
8284 self.feedback_fn("* done")
8286 def _RevertDiskStatus(self):
8287 """Try to revert the disk status after a failed migration.
8290 target_node = self.target_node
8291 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8295 self._EnsureSecondary(target_node)
8296 self._GoStandalone()
8297 self._GoReconnect(False)
8298 self._WaitUntilSync()
8299 except errors.OpExecError, err:
8300 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8301 " please try to recover the instance manually;"
8302 " error '%s'" % str(err))
8304 def _AbortMigration(self):
8305 """Call the hypervisor code to abort a started migration.
8308 instance = self.instance
8309 target_node = self.target_node
8310 source_node = self.source_node
8311 migration_info = self.migration_info
8313 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8317 abort_msg = abort_result.fail_msg
8319 logging.error("Aborting migration failed on target node %s: %s",
8320 target_node, abort_msg)
8321 # Don't raise an exception here, as we stil have to try to revert the
8322 # disk status, even if this step failed.
8324 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8325 instance, False, self.live)
8326 abort_msg = abort_result.fail_msg
8328 logging.error("Aborting migration failed on source node %s: %s",
8329 source_node, abort_msg)
8331 def _ExecMigration(self):
8332 """Migrate an instance.
8334 The migrate is done by:
8335 - change the disks into dual-master mode
8336 - wait until disks are fully synchronized again
8337 - migrate the instance
8338 - change disks on the new secondary node (the old primary) to secondary
8339 - wait until disks are fully synchronized
8340 - change disks into single-master mode
8343 instance = self.instance
8344 target_node = self.target_node
8345 source_node = self.source_node
8347 # Check for hypervisor version mismatch and warn the user.
8348 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8349 None, [self.instance.hypervisor])
8350 for ninfo in nodeinfo.values():
8351 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8353 (_, _, (src_info, )) = nodeinfo[source_node].payload
8354 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8356 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8357 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8358 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8359 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8360 if src_version != dst_version:
8361 self.feedback_fn("* warning: hypervisor version mismatch between"
8362 " source (%s) and target (%s) node" %
8363 (src_version, dst_version))
8365 self.feedback_fn("* checking disk consistency between source and target")
8366 for (idx, dev) in enumerate(instance.disks):
8367 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8368 raise errors.OpExecError("Disk %s is degraded or not fully"
8369 " synchronized on target node,"
8370 " aborting migration" % idx)
8372 if self.current_mem > self.tgt_free_mem:
8373 if not self.allow_runtime_changes:
8374 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8375 " free memory to fit instance %s on target"
8376 " node %s (have %dMB, need %dMB)" %
8377 (instance.name, target_node,
8378 self.tgt_free_mem, self.current_mem))
8379 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8380 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8383 rpcres.Raise("Cannot modify instance runtime memory")
8385 # First get the migration information from the remote node
8386 result = self.rpc.call_migration_info(source_node, instance)
8387 msg = result.fail_msg
8389 log_err = ("Failed fetching source migration information from %s: %s" %
8391 logging.error(log_err)
8392 raise errors.OpExecError(log_err)
8394 self.migration_info = migration_info = result.payload
8396 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8397 # Then switch the disks to master/master mode
8398 self._EnsureSecondary(target_node)
8399 self._GoStandalone()
8400 self._GoReconnect(True)
8401 self._WaitUntilSync()
8403 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8404 result = self.rpc.call_accept_instance(target_node,
8407 self.nodes_ip[target_node])
8409 msg = result.fail_msg
8411 logging.error("Instance pre-migration failed, trying to revert"
8412 " disk status: %s", msg)
8413 self.feedback_fn("Pre-migration failed, aborting")
8414 self._AbortMigration()
8415 self._RevertDiskStatus()
8416 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8417 (instance.name, msg))
8419 self.feedback_fn("* migrating instance to %s" % target_node)
8420 result = self.rpc.call_instance_migrate(source_node, instance,
8421 self.nodes_ip[target_node],
8423 msg = result.fail_msg
8425 logging.error("Instance migration failed, trying to revert"
8426 " disk status: %s", msg)
8427 self.feedback_fn("Migration failed, aborting")
8428 self._AbortMigration()
8429 self._RevertDiskStatus()
8430 raise errors.OpExecError("Could not migrate instance %s: %s" %
8431 (instance.name, msg))
8433 self.feedback_fn("* starting memory transfer")
8434 last_feedback = time.time()
8436 result = self.rpc.call_instance_get_migration_status(source_node,
8438 msg = result.fail_msg
8439 ms = result.payload # MigrationStatus instance
8440 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8441 logging.error("Instance migration failed, trying to revert"
8442 " disk status: %s", msg)
8443 self.feedback_fn("Migration failed, aborting")
8444 self._AbortMigration()
8445 self._RevertDiskStatus()
8446 raise errors.OpExecError("Could not migrate instance %s: %s" %
8447 (instance.name, msg))
8449 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8450 self.feedback_fn("* memory transfer complete")
8453 if (utils.TimeoutExpired(last_feedback,
8454 self._MIGRATION_FEEDBACK_INTERVAL) and
8455 ms.transferred_ram is not None):
8456 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8457 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8458 last_feedback = time.time()
8460 time.sleep(self._MIGRATION_POLL_INTERVAL)
8462 result = self.rpc.call_instance_finalize_migration_src(source_node,
8466 msg = result.fail_msg
8468 logging.error("Instance migration succeeded, but finalization failed"
8469 " on the source node: %s", msg)
8470 raise errors.OpExecError("Could not finalize instance migration: %s" %
8473 instance.primary_node = target_node
8475 # distribute new instance config to the other nodes
8476 self.cfg.Update(instance, self.feedback_fn)
8478 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8482 msg = result.fail_msg
8484 logging.error("Instance migration succeeded, but finalization failed"
8485 " on the target node: %s", msg)
8486 raise errors.OpExecError("Could not finalize instance migration: %s" %
8489 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8490 self._EnsureSecondary(source_node)
8491 self._WaitUntilSync()
8492 self._GoStandalone()
8493 self._GoReconnect(False)
8494 self._WaitUntilSync()
8496 # If the instance's disk template is `rbd' and there was a successful
8497 # migration, unmap the device from the source node.
8498 if self.instance.disk_template == constants.DT_RBD:
8499 disks = _ExpandCheckDisks(instance, instance.disks)
8500 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8502 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8503 msg = result.fail_msg
8505 logging.error("Migration was successful, but couldn't unmap the"
8506 " block device %s on source node %s: %s",
8507 disk.iv_name, source_node, msg)
8508 logging.error("You need to unmap the device %s manually on %s",
8509 disk.iv_name, source_node)
8511 self.feedback_fn("* done")
8513 def _ExecFailover(self):
8514 """Failover an instance.
8516 The failover is done by shutting it down on its present node and
8517 starting it on the secondary.
8520 instance = self.instance
8521 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8523 source_node = instance.primary_node
8524 target_node = self.target_node
8526 if instance.admin_state == constants.ADMINST_UP:
8527 self.feedback_fn("* checking disk consistency between source and target")
8528 for (idx, dev) in enumerate(instance.disks):
8529 # for drbd, these are drbd over lvm
8530 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8532 if primary_node.offline:
8533 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8535 (primary_node.name, idx, target_node))
8536 elif not self.ignore_consistency:
8537 raise errors.OpExecError("Disk %s is degraded on target node,"
8538 " aborting failover" % idx)
8540 self.feedback_fn("* not checking disk consistency as instance is not"
8543 self.feedback_fn("* shutting down instance on source node")
8544 logging.info("Shutting down instance %s on node %s",
8545 instance.name, source_node)
8547 result = self.rpc.call_instance_shutdown(source_node, instance,
8548 self.shutdown_timeout)
8549 msg = result.fail_msg
8551 if self.ignore_consistency or primary_node.offline:
8552 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8553 " proceeding anyway; please make sure node"
8554 " %s is down; error details: %s",
8555 instance.name, source_node, source_node, msg)
8557 raise errors.OpExecError("Could not shutdown instance %s on"
8559 (instance.name, source_node, msg))
8561 self.feedback_fn("* deactivating the instance's disks on source node")
8562 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8563 raise errors.OpExecError("Can't shut down the instance's disks")
8565 instance.primary_node = target_node
8566 # distribute new instance config to the other nodes
8567 self.cfg.Update(instance, self.feedback_fn)
8569 # Only start the instance if it's marked as up
8570 if instance.admin_state == constants.ADMINST_UP:
8571 self.feedback_fn("* activating the instance's disks on target node %s" %
8573 logging.info("Starting instance %s on node %s",
8574 instance.name, target_node)
8576 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8577 ignore_secondaries=True)
8579 _ShutdownInstanceDisks(self.lu, instance)
8580 raise errors.OpExecError("Can't activate the instance's disks")
8582 self.feedback_fn("* starting the instance on the target node %s" %
8584 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8586 msg = result.fail_msg
8588 _ShutdownInstanceDisks(self.lu, instance)
8589 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8590 (instance.name, target_node, msg))
8592 def Exec(self, feedback_fn):
8593 """Perform the migration.
8596 self.feedback_fn = feedback_fn
8597 self.source_node = self.instance.primary_node
8599 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8600 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8601 self.target_node = self.instance.secondary_nodes[0]
8602 # Otherwise self.target_node has been populated either
8603 # directly, or through an iallocator.
8605 self.all_nodes = [self.source_node, self.target_node]
8606 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8607 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8610 feedback_fn("Failover instance %s" % self.instance.name)
8611 self._ExecFailover()
8613 feedback_fn("Migrating instance %s" % self.instance.name)
8616 return self._ExecCleanup()
8618 return self._ExecMigration()
8621 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8623 """Wrapper around L{_CreateBlockDevInner}.
8625 This method annotates the root device first.
8628 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8629 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8633 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8635 """Create a tree of block devices on a given node.
8637 If this device type has to be created on secondaries, create it and
8640 If not, just recurse to children keeping the same 'force' value.
8642 @attention: The device has to be annotated already.
8644 @param lu: the lu on whose behalf we execute
8645 @param node: the node on which to create the device
8646 @type instance: L{objects.Instance}
8647 @param instance: the instance which owns the device
8648 @type device: L{objects.Disk}
8649 @param device: the device to create
8650 @type force_create: boolean
8651 @param force_create: whether to force creation of this device; this
8652 will be change to True whenever we find a device which has
8653 CreateOnSecondary() attribute
8654 @param info: the extra 'metadata' we should attach to the device
8655 (this will be represented as a LVM tag)
8656 @type force_open: boolean
8657 @param force_open: this parameter will be passes to the
8658 L{backend.BlockdevCreate} function where it specifies
8659 whether we run on primary or not, and it affects both
8660 the child assembly and the device own Open() execution
8663 if device.CreateOnSecondary():
8667 for child in device.children:
8668 _CreateBlockDevInner(lu, node, instance, child, force_create,
8671 if not force_create:
8674 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8677 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8678 """Create a single block device on a given node.
8680 This will not recurse over children of the device, so they must be
8683 @param lu: the lu on whose behalf we execute
8684 @param node: the node on which to create the device
8685 @type instance: L{objects.Instance}
8686 @param instance: the instance which owns the device
8687 @type device: L{objects.Disk}
8688 @param device: the device to create
8689 @param info: the extra 'metadata' we should attach to the device
8690 (this will be represented as a LVM tag)
8691 @type force_open: boolean
8692 @param force_open: this parameter will be passes to the
8693 L{backend.BlockdevCreate} function where it specifies
8694 whether we run on primary or not, and it affects both
8695 the child assembly and the device own Open() execution
8698 lu.cfg.SetDiskID(device, node)
8699 result = lu.rpc.call_blockdev_create(node, device, device.size,
8700 instance.name, force_open, info)
8701 result.Raise("Can't create block device %s on"
8702 " node %s for instance %s" % (device, node, instance.name))
8703 if device.physical_id is None:
8704 device.physical_id = result.payload
8707 def _GenerateUniqueNames(lu, exts):
8708 """Generate a suitable LV name.
8710 This will generate a logical volume name for the given instance.
8715 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8716 results.append("%s%s" % (new_id, val))
8720 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8721 iv_name, p_minor, s_minor):
8722 """Generate a drbd8 device complete with its children.
8725 assert len(vgnames) == len(names) == 2
8726 port = lu.cfg.AllocatePort()
8727 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8729 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8730 logical_id=(vgnames[0], names[0]),
8732 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8733 logical_id=(vgnames[1], names[1]),
8735 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8736 logical_id=(primary, secondary, port,
8739 children=[dev_data, dev_meta],
8740 iv_name=iv_name, params={})
8744 _DISK_TEMPLATE_NAME_PREFIX = {
8745 constants.DT_PLAIN: "",
8746 constants.DT_RBD: ".rbd",
8750 _DISK_TEMPLATE_DEVICE_TYPE = {
8751 constants.DT_PLAIN: constants.LD_LV,
8752 constants.DT_FILE: constants.LD_FILE,
8753 constants.DT_SHARED_FILE: constants.LD_FILE,
8754 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8755 constants.DT_RBD: constants.LD_RBD,
8759 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8760 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8761 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8762 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8763 """Generate the entire disk layout for a given template type.
8766 #TODO: compute space requirements
8768 vgname = lu.cfg.GetVGName()
8769 disk_count = len(disk_info)
8772 if template_name == constants.DT_DISKLESS:
8774 elif template_name == constants.DT_DRBD8:
8775 if len(secondary_nodes) != 1:
8776 raise errors.ProgrammerError("Wrong template configuration")
8777 remote_node = secondary_nodes[0]
8778 minors = lu.cfg.AllocateDRBDMinor(
8779 [primary_node, remote_node] * len(disk_info), instance_name)
8781 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8783 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8786 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8787 for i in range(disk_count)]):
8788 names.append(lv_prefix + "_data")
8789 names.append(lv_prefix + "_meta")
8790 for idx, disk in enumerate(disk_info):
8791 disk_index = idx + base_index
8792 data_vg = disk.get(constants.IDISK_VG, vgname)
8793 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8794 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8795 disk[constants.IDISK_SIZE],
8797 names[idx * 2:idx * 2 + 2],
8798 "disk/%d" % disk_index,
8799 minors[idx * 2], minors[idx * 2 + 1])
8800 disk_dev.mode = disk[constants.IDISK_MODE]
8801 disks.append(disk_dev)
8804 raise errors.ProgrammerError("Wrong template configuration")
8806 if template_name == constants.DT_FILE:
8808 elif template_name == constants.DT_SHARED_FILE:
8809 _req_shr_file_storage()
8811 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8812 if name_prefix is None:
8815 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8816 (name_prefix, base_index + i)
8817 for i in range(disk_count)])
8819 if template_name == constants.DT_PLAIN:
8820 def logical_id_fn(idx, _, disk):
8821 vg = disk.get(constants.IDISK_VG, vgname)
8822 return (vg, names[idx])
8823 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8825 lambda _, disk_index, disk: (file_driver,
8826 "%s/disk%d" % (file_storage_dir,
8828 elif template_name == constants.DT_BLOCK:
8830 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8831 disk[constants.IDISK_ADOPT])
8832 elif template_name == constants.DT_RBD:
8833 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8835 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8837 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8839 for idx, disk in enumerate(disk_info):
8840 disk_index = idx + base_index
8841 size = disk[constants.IDISK_SIZE]
8842 feedback_fn("* disk %s, size %s" %
8843 (disk_index, utils.FormatUnit(size, "h")))
8844 disks.append(objects.Disk(dev_type=dev_type, size=size,
8845 logical_id=logical_id_fn(idx, disk_index, disk),
8846 iv_name="disk/%d" % disk_index,
8847 mode=disk[constants.IDISK_MODE],
8853 def _GetInstanceInfoText(instance):
8854 """Compute that text that should be added to the disk's metadata.
8857 return "originstname+%s" % instance.name
8860 def _CalcEta(time_taken, written, total_size):
8861 """Calculates the ETA based on size written and total size.
8863 @param time_taken: The time taken so far
8864 @param written: amount written so far
8865 @param total_size: The total size of data to be written
8866 @return: The remaining time in seconds
8869 avg_time = time_taken / float(written)
8870 return (total_size - written) * avg_time
8873 def _WipeDisks(lu, instance):
8874 """Wipes instance disks.
8876 @type lu: L{LogicalUnit}
8877 @param lu: the logical unit on whose behalf we execute
8878 @type instance: L{objects.Instance}
8879 @param instance: the instance whose disks we should create
8880 @return: the success of the wipe
8883 node = instance.primary_node
8885 for device in instance.disks:
8886 lu.cfg.SetDiskID(device, node)
8888 logging.info("Pause sync of instance %s disks", instance.name)
8889 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8890 (instance.disks, instance),
8892 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
8894 for idx, success in enumerate(result.payload):
8896 logging.warn("pause-sync of instance %s for disks %d failed",
8900 for idx, device in enumerate(instance.disks):
8901 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8902 # MAX_WIPE_CHUNK at max
8903 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8904 constants.MIN_WIPE_CHUNK_PERCENT)
8905 # we _must_ make this an int, otherwise rounding errors will
8907 wipe_chunk_size = int(wipe_chunk_size)
8909 lu.LogInfo("* Wiping disk %d", idx)
8910 logging.info("Wiping disk %d for instance %s, node %s using"
8911 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8916 start_time = time.time()
8918 while offset < size:
8919 wipe_size = min(wipe_chunk_size, size - offset)
8920 logging.debug("Wiping disk %d, offset %s, chunk %s",
8921 idx, offset, wipe_size)
8922 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
8924 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8925 (idx, offset, wipe_size))
8928 if now - last_output >= 60:
8929 eta = _CalcEta(now - start_time, offset, size)
8930 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8931 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8934 logging.info("Resume sync of instance %s disks", instance.name)
8936 result = lu.rpc.call_blockdev_pause_resume_sync(node,
8937 (instance.disks, instance),
8941 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
8942 " please have a look at the status and troubleshoot"
8943 " the issue: %s", node, result.fail_msg)
8945 for idx, success in enumerate(result.payload):
8947 lu.LogWarning("Resume sync of disk %d failed, please have a"
8948 " look at the status and troubleshoot the issue", idx)
8949 logging.warn("resume-sync of instance %s for disks %d failed",
8953 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8954 """Create all disks for an instance.
8956 This abstracts away some work from AddInstance.
8958 @type lu: L{LogicalUnit}
8959 @param lu: the logical unit on whose behalf we execute
8960 @type instance: L{objects.Instance}
8961 @param instance: the instance whose disks we should create
8963 @param to_skip: list of indices to skip
8964 @type target_node: string
8965 @param target_node: if passed, overrides the target node for creation
8967 @return: the success of the creation
8970 info = _GetInstanceInfoText(instance)
8971 if target_node is None:
8972 pnode = instance.primary_node
8973 all_nodes = instance.all_nodes
8978 if instance.disk_template in constants.DTS_FILEBASED:
8979 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8980 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8982 result.Raise("Failed to create directory '%s' on"
8983 " node %s" % (file_storage_dir, pnode))
8985 # Note: this needs to be kept in sync with adding of disks in
8986 # LUInstanceSetParams
8987 for idx, device in enumerate(instance.disks):
8988 if to_skip and idx in to_skip:
8990 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8992 for node in all_nodes:
8993 f_create = node == pnode
8994 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8997 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8998 """Remove all disks for an instance.
9000 This abstracts away some work from `AddInstance()` and
9001 `RemoveInstance()`. Note that in case some of the devices couldn't
9002 be removed, the removal will continue with the other ones (compare
9003 with `_CreateDisks()`).
9005 @type lu: L{LogicalUnit}
9006 @param lu: the logical unit on whose behalf we execute
9007 @type instance: L{objects.Instance}
9008 @param instance: the instance whose disks we should remove
9009 @type target_node: string
9010 @param target_node: used to override the node on which to remove the disks
9012 @return: the success of the removal
9015 logging.info("Removing block devices for instance %s", instance.name)
9018 ports_to_release = set()
9019 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9020 for (idx, device) in enumerate(anno_disks):
9022 edata = [(target_node, device)]
9024 edata = device.ComputeNodeTree(instance.primary_node)
9025 for node, disk in edata:
9026 lu.cfg.SetDiskID(disk, node)
9027 result = lu.rpc.call_blockdev_remove(node, disk)
9029 lu.LogWarning("Could not remove disk %s on node %s,"
9030 " continuing anyway: %s", idx, node, result.fail_msg)
9031 if not (result.offline and node != instance.primary_node):
9034 # if this is a DRBD disk, return its port to the pool
9035 if device.dev_type in constants.LDS_DRBD:
9036 ports_to_release.add(device.logical_id[2])
9038 if all_result or ignore_failures:
9039 for port in ports_to_release:
9040 lu.cfg.AddTcpUdpPort(port)
9042 if instance.disk_template == constants.DT_FILE:
9043 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9047 tgt = instance.primary_node
9048 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9050 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9051 file_storage_dir, instance.primary_node, result.fail_msg)
9057 def _ComputeDiskSizePerVG(disk_template, disks):
9058 """Compute disk size requirements in the volume group
9061 def _compute(disks, payload):
9062 """Universal algorithm.
9067 vgs[disk[constants.IDISK_VG]] = \
9068 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9072 # Required free disk space as a function of disk and swap space
9074 constants.DT_DISKLESS: {},
9075 constants.DT_PLAIN: _compute(disks, 0),
9076 # 128 MB are added for drbd metadata for each disk
9077 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9078 constants.DT_FILE: {},
9079 constants.DT_SHARED_FILE: {},
9082 if disk_template not in req_size_dict:
9083 raise errors.ProgrammerError("Disk template '%s' size requirement"
9084 " is unknown" % disk_template)
9086 return req_size_dict[disk_template]
9089 def _ComputeDiskSize(disk_template, disks):
9090 """Compute disk size requirements according to disk template
9093 # Required free disk space as a function of disk and swap space
9095 constants.DT_DISKLESS: None,
9096 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9097 # 128 MB are added for drbd metadata for each disk
9099 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9100 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9101 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9102 constants.DT_BLOCK: 0,
9103 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9106 if disk_template not in req_size_dict:
9107 raise errors.ProgrammerError("Disk template '%s' size requirement"
9108 " is unknown" % disk_template)
9110 return req_size_dict[disk_template]
9113 def _FilterVmNodes(lu, nodenames):
9114 """Filters out non-vm_capable nodes from a list.
9116 @type lu: L{LogicalUnit}
9117 @param lu: the logical unit for which we check
9118 @type nodenames: list
9119 @param nodenames: the list of nodes on which we should check
9121 @return: the list of vm-capable nodes
9124 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9125 return [name for name in nodenames if name not in vm_nodes]
9128 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9129 """Hypervisor parameter validation.
9131 This function abstract the hypervisor parameter validation to be
9132 used in both instance create and instance modify.
9134 @type lu: L{LogicalUnit}
9135 @param lu: the logical unit for which we check
9136 @type nodenames: list
9137 @param nodenames: the list of nodes on which we should check
9138 @type hvname: string
9139 @param hvname: the name of the hypervisor we should use
9140 @type hvparams: dict
9141 @param hvparams: the parameters which we need to check
9142 @raise errors.OpPrereqError: if the parameters are not valid
9145 nodenames = _FilterVmNodes(lu, nodenames)
9147 cluster = lu.cfg.GetClusterInfo()
9148 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9150 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9151 for node in nodenames:
9155 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9158 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9159 """OS parameters validation.
9161 @type lu: L{LogicalUnit}
9162 @param lu: the logical unit for which we check
9163 @type required: boolean
9164 @param required: whether the validation should fail if the OS is not
9166 @type nodenames: list
9167 @param nodenames: the list of nodes on which we should check
9168 @type osname: string
9169 @param osname: the name of the hypervisor we should use
9170 @type osparams: dict
9171 @param osparams: the parameters which we need to check
9172 @raise errors.OpPrereqError: if the parameters are not valid
9175 nodenames = _FilterVmNodes(lu, nodenames)
9176 result = lu.rpc.call_os_validate(nodenames, required, osname,
9177 [constants.OS_VALIDATE_PARAMETERS],
9179 for node, nres in result.items():
9180 # we don't check for offline cases since this should be run only
9181 # against the master node and/or an instance's nodes
9182 nres.Raise("OS Parameters validation failed on node %s" % node)
9183 if not nres.payload:
9184 lu.LogInfo("OS %s not found on node %s, validation skipped",
9188 class LUInstanceCreate(LogicalUnit):
9189 """Create an instance.
9192 HPATH = "instance-add"
9193 HTYPE = constants.HTYPE_INSTANCE
9196 def CheckArguments(self):
9200 # do not require name_check to ease forward/backward compatibility
9202 if self.op.no_install and self.op.start:
9203 self.LogInfo("No-installation mode selected, disabling startup")
9204 self.op.start = False
9205 # validate/normalize the instance name
9206 self.op.instance_name = \
9207 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9209 if self.op.ip_check and not self.op.name_check:
9210 # TODO: make the ip check more flexible and not depend on the name check
9211 raise errors.OpPrereqError("Cannot do IP address check without a name"
9212 " check", errors.ECODE_INVAL)
9214 # check nics' parameter names
9215 for nic in self.op.nics:
9216 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9218 # check disks. parameter names and consistent adopt/no-adopt strategy
9219 has_adopt = has_no_adopt = False
9220 for disk in self.op.disks:
9221 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9222 if constants.IDISK_ADOPT in disk:
9226 if has_adopt and has_no_adopt:
9227 raise errors.OpPrereqError("Either all disks are adopted or none is",
9230 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9231 raise errors.OpPrereqError("Disk adoption is not supported for the"
9232 " '%s' disk template" %
9233 self.op.disk_template,
9235 if self.op.iallocator is not None:
9236 raise errors.OpPrereqError("Disk adoption not allowed with an"
9237 " iallocator script", errors.ECODE_INVAL)
9238 if self.op.mode == constants.INSTANCE_IMPORT:
9239 raise errors.OpPrereqError("Disk adoption not allowed for"
9240 " instance import", errors.ECODE_INVAL)
9242 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9243 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9244 " but no 'adopt' parameter given" %
9245 self.op.disk_template,
9248 self.adopt_disks = has_adopt
9250 # instance name verification
9251 if self.op.name_check:
9252 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9253 self.op.instance_name = self.hostname1.name
9254 # used in CheckPrereq for ip ping check
9255 self.check_ip = self.hostname1.ip
9257 self.check_ip = None
9259 # file storage checks
9260 if (self.op.file_driver and
9261 not self.op.file_driver in constants.FILE_DRIVER):
9262 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9263 self.op.file_driver, errors.ECODE_INVAL)
9265 if self.op.disk_template == constants.DT_FILE:
9266 opcodes.RequireFileStorage()
9267 elif self.op.disk_template == constants.DT_SHARED_FILE:
9268 opcodes.RequireSharedFileStorage()
9270 ### Node/iallocator related checks
9271 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9273 if self.op.pnode is not None:
9274 if self.op.disk_template in constants.DTS_INT_MIRROR:
9275 if self.op.snode is None:
9276 raise errors.OpPrereqError("The networked disk templates need"
9277 " a mirror node", errors.ECODE_INVAL)
9279 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9281 self.op.snode = None
9283 self._cds = _GetClusterDomainSecret()
9285 if self.op.mode == constants.INSTANCE_IMPORT:
9286 # On import force_variant must be True, because if we forced it at
9287 # initial install, our only chance when importing it back is that it
9289 self.op.force_variant = True
9291 if self.op.no_install:
9292 self.LogInfo("No-installation mode has no effect during import")
9294 elif self.op.mode == constants.INSTANCE_CREATE:
9295 if self.op.os_type is None:
9296 raise errors.OpPrereqError("No guest OS specified",
9298 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9299 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9300 " installation" % self.op.os_type,
9302 if self.op.disk_template is None:
9303 raise errors.OpPrereqError("No disk template specified",
9306 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9307 # Check handshake to ensure both clusters have the same domain secret
9308 src_handshake = self.op.source_handshake
9309 if not src_handshake:
9310 raise errors.OpPrereqError("Missing source handshake",
9313 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9316 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9319 # Load and check source CA
9320 self.source_x509_ca_pem = self.op.source_x509_ca
9321 if not self.source_x509_ca_pem:
9322 raise errors.OpPrereqError("Missing source X509 CA",
9326 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9328 except OpenSSL.crypto.Error, err:
9329 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9330 (err, ), errors.ECODE_INVAL)
9332 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9333 if errcode is not None:
9334 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9337 self.source_x509_ca = cert
9339 src_instance_name = self.op.source_instance_name
9340 if not src_instance_name:
9341 raise errors.OpPrereqError("Missing source instance name",
9344 self.source_instance_name = \
9345 netutils.GetHostname(name=src_instance_name).name
9348 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9349 self.op.mode, errors.ECODE_INVAL)
9351 def ExpandNames(self):
9352 """ExpandNames for CreateInstance.
9354 Figure out the right locks for instance creation.
9357 self.needed_locks = {}
9359 instance_name = self.op.instance_name
9360 # this is just a preventive check, but someone might still add this
9361 # instance in the meantime, and creation will fail at lock-add time
9362 if instance_name in self.cfg.GetInstanceList():
9363 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9364 instance_name, errors.ECODE_EXISTS)
9366 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9368 if self.op.iallocator:
9369 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9370 # specifying a group on instance creation and then selecting nodes from
9372 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9373 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9375 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9376 nodelist = [self.op.pnode]
9377 if self.op.snode is not None:
9378 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9379 nodelist.append(self.op.snode)
9380 self.needed_locks[locking.LEVEL_NODE] = nodelist
9381 # Lock resources of instance's primary and secondary nodes (copy to
9382 # prevent accidential modification)
9383 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9385 # in case of import lock the source node too
9386 if self.op.mode == constants.INSTANCE_IMPORT:
9387 src_node = self.op.src_node
9388 src_path = self.op.src_path
9390 if src_path is None:
9391 self.op.src_path = src_path = self.op.instance_name
9393 if src_node is None:
9394 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9395 self.op.src_node = None
9396 if os.path.isabs(src_path):
9397 raise errors.OpPrereqError("Importing an instance from a path"
9398 " requires a source node option",
9401 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9402 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9403 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9404 if not os.path.isabs(src_path):
9405 self.op.src_path = src_path = \
9406 utils.PathJoin(constants.EXPORT_DIR, src_path)
9408 def _RunAllocator(self):
9409 """Run the allocator based on input opcode.
9412 #TODO Export network to iallocator so that it chooses a pnode
9413 # in a nodegroup that has the desired network connected to
9414 nics = [n.ToDict() for n in self.nics]
9415 ial = IAllocator(self.cfg, self.rpc,
9416 mode=constants.IALLOCATOR_MODE_ALLOC,
9417 name=self.op.instance_name,
9418 disk_template=self.op.disk_template,
9421 vcpus=self.be_full[constants.BE_VCPUS],
9422 memory=self.be_full[constants.BE_MAXMEM],
9423 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9426 hypervisor=self.op.hypervisor,
9429 ial.Run(self.op.iallocator)
9432 raise errors.OpPrereqError("Can't compute nodes using"
9433 " iallocator '%s': %s" %
9434 (self.op.iallocator, ial.info),
9436 if len(ial.result) != ial.required_nodes:
9437 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9438 " of nodes (%s), required %s" %
9439 (self.op.iallocator, len(ial.result),
9440 ial.required_nodes), errors.ECODE_FAULT)
9441 self.op.pnode = ial.result[0]
9442 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9443 self.op.instance_name, self.op.iallocator,
9444 utils.CommaJoin(ial.result))
9445 if ial.required_nodes == 2:
9446 self.op.snode = ial.result[1]
9448 def BuildHooksEnv(self):
9451 This runs on master, primary and secondary nodes of the instance.
9455 "ADD_MODE": self.op.mode,
9457 if self.op.mode == constants.INSTANCE_IMPORT:
9458 env["SRC_NODE"] = self.op.src_node
9459 env["SRC_PATH"] = self.op.src_path
9460 env["SRC_IMAGES"] = self.src_images
9462 env.update(_BuildInstanceHookEnv(
9463 name=self.op.instance_name,
9464 primary_node=self.op.pnode,
9465 secondary_nodes=self.secondaries,
9466 status=self.op.start,
9467 os_type=self.op.os_type,
9468 minmem=self.be_full[constants.BE_MINMEM],
9469 maxmem=self.be_full[constants.BE_MAXMEM],
9470 vcpus=self.be_full[constants.BE_VCPUS],
9471 nics=_NICListToTuple(self, self.nics),
9472 disk_template=self.op.disk_template,
9473 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9474 for d in self.disks],
9477 hypervisor_name=self.op.hypervisor,
9483 def BuildHooksNodes(self):
9484 """Build hooks nodes.
9487 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9490 def _ReadExportInfo(self):
9491 """Reads the export information from disk.
9493 It will override the opcode source node and path with the actual
9494 information, if these two were not specified before.
9496 @return: the export information
9499 assert self.op.mode == constants.INSTANCE_IMPORT
9501 src_node = self.op.src_node
9502 src_path = self.op.src_path
9504 if src_node is None:
9505 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9506 exp_list = self.rpc.call_export_list(locked_nodes)
9508 for node in exp_list:
9509 if exp_list[node].fail_msg:
9511 if src_path in exp_list[node].payload:
9513 self.op.src_node = src_node = node
9514 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9518 raise errors.OpPrereqError("No export found for relative path %s" %
9519 src_path, errors.ECODE_INVAL)
9521 _CheckNodeOnline(self, src_node)
9522 result = self.rpc.call_export_info(src_node, src_path)
9523 result.Raise("No export or invalid export found in dir %s" % src_path)
9525 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9526 if not export_info.has_section(constants.INISECT_EXP):
9527 raise errors.ProgrammerError("Corrupted export config",
9528 errors.ECODE_ENVIRON)
9530 ei_version = export_info.get(constants.INISECT_EXP, "version")
9531 if (int(ei_version) != constants.EXPORT_VERSION):
9532 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9533 (ei_version, constants.EXPORT_VERSION),
9534 errors.ECODE_ENVIRON)
9537 def _ReadExportParams(self, einfo):
9538 """Use export parameters as defaults.
9540 In case the opcode doesn't specify (as in override) some instance
9541 parameters, then try to use them from the export information, if
9545 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9547 if self.op.disk_template is None:
9548 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9549 self.op.disk_template = einfo.get(constants.INISECT_INS,
9551 if self.op.disk_template not in constants.DISK_TEMPLATES:
9552 raise errors.OpPrereqError("Disk template specified in configuration"
9553 " file is not one of the allowed values:"
9554 " %s" % " ".join(constants.DISK_TEMPLATES))
9556 raise errors.OpPrereqError("No disk template specified and the export"
9557 " is missing the disk_template information",
9560 if not self.op.disks:
9562 # TODO: import the disk iv_name too
9563 for idx in range(constants.MAX_DISKS):
9564 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9565 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9566 disks.append({constants.IDISK_SIZE: disk_sz})
9567 self.op.disks = disks
9568 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9569 raise errors.OpPrereqError("No disk info specified and the export"
9570 " is missing the disk information",
9573 if not self.op.nics:
9575 for idx in range(constants.MAX_NICS):
9576 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9578 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9579 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9586 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9587 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9589 if (self.op.hypervisor is None and
9590 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9591 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9593 if einfo.has_section(constants.INISECT_HYP):
9594 # use the export parameters but do not override the ones
9595 # specified by the user
9596 for name, value in einfo.items(constants.INISECT_HYP):
9597 if name not in self.op.hvparams:
9598 self.op.hvparams[name] = value
9600 if einfo.has_section(constants.INISECT_BEP):
9601 # use the parameters, without overriding
9602 for name, value in einfo.items(constants.INISECT_BEP):
9603 if name not in self.op.beparams:
9604 self.op.beparams[name] = value
9605 # Compatibility for the old "memory" be param
9606 if name == constants.BE_MEMORY:
9607 if constants.BE_MAXMEM not in self.op.beparams:
9608 self.op.beparams[constants.BE_MAXMEM] = value
9609 if constants.BE_MINMEM not in self.op.beparams:
9610 self.op.beparams[constants.BE_MINMEM] = value
9612 # try to read the parameters old style, from the main section
9613 for name in constants.BES_PARAMETERS:
9614 if (name not in self.op.beparams and
9615 einfo.has_option(constants.INISECT_INS, name)):
9616 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9618 if einfo.has_section(constants.INISECT_OSP):
9619 # use the parameters, without overriding
9620 for name, value in einfo.items(constants.INISECT_OSP):
9621 if name not in self.op.osparams:
9622 self.op.osparams[name] = value
9624 def _RevertToDefaults(self, cluster):
9625 """Revert the instance parameters to the default values.
9629 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9630 for name in self.op.hvparams.keys():
9631 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9632 del self.op.hvparams[name]
9634 be_defs = cluster.SimpleFillBE({})
9635 for name in self.op.beparams.keys():
9636 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9637 del self.op.beparams[name]
9639 nic_defs = cluster.SimpleFillNIC({})
9640 for nic in self.op.nics:
9641 for name in constants.NICS_PARAMETERS:
9642 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9645 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9646 for name in self.op.osparams.keys():
9647 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9648 del self.op.osparams[name]
9650 def _CalculateFileStorageDir(self):
9651 """Calculate final instance file storage dir.
9654 # file storage dir calculation/check
9655 self.instance_file_storage_dir = None
9656 if self.op.disk_template in constants.DTS_FILEBASED:
9657 # build the full file storage dir path
9660 if self.op.disk_template == constants.DT_SHARED_FILE:
9661 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9663 get_fsd_fn = self.cfg.GetFileStorageDir
9665 cfg_storagedir = get_fsd_fn()
9666 if not cfg_storagedir:
9667 raise errors.OpPrereqError("Cluster file storage dir not defined")
9668 joinargs.append(cfg_storagedir)
9670 if self.op.file_storage_dir is not None:
9671 joinargs.append(self.op.file_storage_dir)
9673 joinargs.append(self.op.instance_name)
9675 # pylint: disable=W0142
9676 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9678 def CheckPrereq(self): # pylint: disable=R0914
9679 """Check prerequisites.
9682 self._CalculateFileStorageDir()
9684 if self.op.mode == constants.INSTANCE_IMPORT:
9685 export_info = self._ReadExportInfo()
9686 self._ReadExportParams(export_info)
9687 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9689 self._old_instance_name = None
9691 if (not self.cfg.GetVGName() and
9692 self.op.disk_template not in constants.DTS_NOT_LVM):
9693 raise errors.OpPrereqError("Cluster does not support lvm-based"
9694 " instances", errors.ECODE_STATE)
9696 if (self.op.hypervisor is None or
9697 self.op.hypervisor == constants.VALUE_AUTO):
9698 self.op.hypervisor = self.cfg.GetHypervisorType()
9700 cluster = self.cfg.GetClusterInfo()
9701 enabled_hvs = cluster.enabled_hypervisors
9702 if self.op.hypervisor not in enabled_hvs:
9703 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9704 " cluster (%s)" % (self.op.hypervisor,
9705 ",".join(enabled_hvs)),
9708 # Check tag validity
9709 for tag in self.op.tags:
9710 objects.TaggableObject.ValidateTag(tag)
9712 # check hypervisor parameter syntax (locally)
9713 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9714 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9716 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9717 hv_type.CheckParameterSyntax(filled_hvp)
9718 self.hv_full = filled_hvp
9719 # check that we don't specify global parameters on an instance
9720 _CheckGlobalHvParams(self.op.hvparams)
9722 # fill and remember the beparams dict
9723 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9724 for param, value in self.op.beparams.iteritems():
9725 if value == constants.VALUE_AUTO:
9726 self.op.beparams[param] = default_beparams[param]
9727 objects.UpgradeBeParams(self.op.beparams)
9728 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9729 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9731 # build os parameters
9732 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9734 # now that hvp/bep are in final format, let's reset to defaults,
9736 if self.op.identify_defaults:
9737 self._RevertToDefaults(cluster)
9741 for idx, nic in enumerate(self.op.nics):
9742 nic_mode_req = nic.get(constants.INIC_MODE, None)
9743 nic_mode = nic_mode_req
9744 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9745 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9747 net = nic.get(constants.INIC_NETWORK, None)
9748 link = nic.get(constants.NIC_LINK, None)
9749 ip = nic.get(constants.INIC_IP, None)
9751 if net is None or net.lower() == constants.VALUE_NONE:
9754 if nic_mode_req is not None or link is not None:
9755 raise errors.OpPrereqError("If network is given, no mode or link"
9756 " is allowed to be passed",
9759 # ip validity checks
9760 if ip is None or ip.lower() == constants.VALUE_NONE:
9762 elif ip.lower() == constants.VALUE_AUTO:
9763 if not self.op.name_check:
9764 raise errors.OpPrereqError("IP address set to auto but name checks"
9765 " have been skipped",
9767 nic_ip = self.hostname1.ip
9769 # We defer pool operations until later, so that the iallocator has
9770 # filled in the instance's node(s) dimara
9771 if ip.lower() == constants.NIC_IP_POOL:
9773 raise errors.OpPrereqError("if ip=pool, parameter network"
9774 " must be passed too",
9777 elif not netutils.IPAddress.IsValid(ip):
9778 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9783 # TODO: check the ip address for uniqueness
9784 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9785 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9788 # MAC address verification
9789 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9790 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9791 mac = utils.NormalizeAndValidateMac(mac)
9794 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9795 except errors.ReservationError:
9796 raise errors.OpPrereqError("MAC address %s already in use"
9797 " in cluster" % mac,
9798 errors.ECODE_NOTUNIQUE)
9800 # Build nic parameters
9803 nicparams[constants.NIC_MODE] = nic_mode
9805 nicparams[constants.NIC_LINK] = link
9807 check_params = cluster.SimpleFillNIC(nicparams)
9808 objects.NIC.CheckParameterSyntax(check_params)
9809 self.nics.append(objects.NIC(mac=mac, ip=nic_ip,
9810 network=net, nicparams=check_params))
9812 # disk checks/pre-build
9813 default_vg = self.cfg.GetVGName()
9815 for disk in self.op.disks:
9816 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9817 if mode not in constants.DISK_ACCESS_SET:
9818 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9819 mode, errors.ECODE_INVAL)
9820 size = disk.get(constants.IDISK_SIZE, None)
9822 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9825 except (TypeError, ValueError):
9826 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9829 data_vg = disk.get(constants.IDISK_VG, default_vg)
9831 constants.IDISK_SIZE: size,
9832 constants.IDISK_MODE: mode,
9833 constants.IDISK_VG: data_vg,
9835 if constants.IDISK_METAVG in disk:
9836 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9837 if constants.IDISK_ADOPT in disk:
9838 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9839 self.disks.append(new_disk)
9841 if self.op.mode == constants.INSTANCE_IMPORT:
9843 for idx in range(len(self.disks)):
9844 option = "disk%d_dump" % idx
9845 if export_info.has_option(constants.INISECT_INS, option):
9846 # FIXME: are the old os-es, disk sizes, etc. useful?
9847 export_name = export_info.get(constants.INISECT_INS, option)
9848 image = utils.PathJoin(self.op.src_path, export_name)
9849 disk_images.append(image)
9851 disk_images.append(False)
9853 self.src_images = disk_images
9855 if self.op.instance_name == self._old_instance_name:
9856 for idx, nic in enumerate(self.nics):
9857 if nic.mac == constants.VALUE_AUTO:
9858 nic_mac_ini = "nic%d_mac" % idx
9859 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9861 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9863 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9864 if self.op.ip_check:
9865 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9866 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9867 (self.check_ip, self.op.instance_name),
9868 errors.ECODE_NOTUNIQUE)
9870 #### mac address generation
9871 # By generating here the mac address both the allocator and the hooks get
9872 # the real final mac address rather than the 'auto' or 'generate' value.
9873 # There is a race condition between the generation and the instance object
9874 # creation, which means that we know the mac is valid now, but we're not
9875 # sure it will be when we actually add the instance. If things go bad
9876 # adding the instance will abort because of a duplicate mac, and the
9877 # creation job will fail.
9878 for nic in self.nics:
9879 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9880 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9884 if self.op.iallocator is not None:
9885 self._RunAllocator()
9887 # Release all unneeded node locks
9888 _ReleaseLocks(self, locking.LEVEL_NODE,
9889 keep=filter(None, [self.op.pnode, self.op.snode,
9891 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9892 keep=filter(None, [self.op.pnode, self.op.snode,
9895 #### node related checks
9897 # check primary node
9898 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9899 assert self.pnode is not None, \
9900 "Cannot retrieve locked node %s" % self.op.pnode
9902 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9903 pnode.name, errors.ECODE_STATE)
9905 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9906 pnode.name, errors.ECODE_STATE)
9907 if not pnode.vm_capable:
9908 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9909 " '%s'" % pnode.name, errors.ECODE_STATE)
9911 self.secondaries = []
9913 # Fill in any IPs from IP pools. This must happen here, because we need to
9914 # know the nic's primary node, as specified by the iallocator
9915 for idx, nic in enumerate(self.nics):
9918 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
9919 if netparams is None:
9920 raise errors.OpPrereqError("No netparams found for network"
9921 " %s. Propably not connected to"
9922 " node's %s nodegroup" %
9923 (net, self.pnode.name),
9925 self.LogInfo("NIC/%d inherits netparams %s" %
9926 (idx, netparams.values()))
9927 nic.nicparams = dict(netparams)
9928 if nic.ip is not None:
9929 filled_params = cluster.SimpleFillNIC(nic.nicparams)
9930 if nic.ip.lower() == constants.NIC_IP_POOL:
9932 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
9933 except errors.ReservationError:
9934 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
9935 " from the address pool" % idx,
9937 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
9940 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
9941 except errors.ReservationError:
9942 raise errors.OpPrereqError("IP address %s already in use"
9943 " or does not belong to network %s" %
9945 errors.ECODE_NOTUNIQUE)
9947 # net is None, ip None or given
9948 if self.op.conflicts_check:
9949 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
9952 # mirror node verification
9953 if self.op.disk_template in constants.DTS_INT_MIRROR:
9954 if self.op.snode == pnode.name:
9955 raise errors.OpPrereqError("The secondary node cannot be the"
9956 " primary node", errors.ECODE_INVAL)
9957 _CheckNodeOnline(self, self.op.snode)
9958 _CheckNodeNotDrained(self, self.op.snode)
9959 _CheckNodeVmCapable(self, self.op.snode)
9960 self.secondaries.append(self.op.snode)
9962 snode = self.cfg.GetNodeInfo(self.op.snode)
9963 if pnode.group != snode.group:
9964 self.LogWarning("The primary and secondary nodes are in two"
9965 " different node groups; the disk parameters"
9966 " from the first disk's node group will be"
9969 nodenames = [pnode.name] + self.secondaries
9971 # Verify instance specs
9972 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
9974 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9975 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9976 constants.ISPEC_DISK_COUNT: len(self.disks),
9977 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9978 constants.ISPEC_NIC_COUNT: len(self.nics),
9979 constants.ISPEC_SPINDLE_USE: spindle_use,
9982 group_info = self.cfg.GetNodeGroup(pnode.group)
9983 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9984 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9985 if not self.op.ignore_ipolicy and res:
9986 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9987 " policy: %s") % (pnode.group,
9988 utils.CommaJoin(res)),
9991 if not self.adopt_disks:
9992 if self.op.disk_template == constants.DT_RBD:
9993 # _CheckRADOSFreeSpace() is just a placeholder.
9994 # Any function that checks prerequisites can be placed here.
9995 # Check if there is enough space on the RADOS cluster.
9996 _CheckRADOSFreeSpace()
9998 # Check lv size requirements, if not adopting
9999 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10000 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10002 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10003 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10004 disk[constants.IDISK_ADOPT])
10005 for disk in self.disks])
10006 if len(all_lvs) != len(self.disks):
10007 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10008 errors.ECODE_INVAL)
10009 for lv_name in all_lvs:
10011 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10012 # to ReserveLV uses the same syntax
10013 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10014 except errors.ReservationError:
10015 raise errors.OpPrereqError("LV named %s used by another instance" %
10016 lv_name, errors.ECODE_NOTUNIQUE)
10018 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10019 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10021 node_lvs = self.rpc.call_lv_list([pnode.name],
10022 vg_names.payload.keys())[pnode.name]
10023 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10024 node_lvs = node_lvs.payload
10026 delta = all_lvs.difference(node_lvs.keys())
10028 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10029 utils.CommaJoin(delta),
10030 errors.ECODE_INVAL)
10031 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10033 raise errors.OpPrereqError("Online logical volumes found, cannot"
10034 " adopt: %s" % utils.CommaJoin(online_lvs),
10035 errors.ECODE_STATE)
10036 # update the size of disk based on what is found
10037 for dsk in self.disks:
10038 dsk[constants.IDISK_SIZE] = \
10039 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10040 dsk[constants.IDISK_ADOPT])][0]))
10042 elif self.op.disk_template == constants.DT_BLOCK:
10043 # Normalize and de-duplicate device paths
10044 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10045 for disk in self.disks])
10046 if len(all_disks) != len(self.disks):
10047 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10048 errors.ECODE_INVAL)
10049 baddisks = [d for d in all_disks
10050 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10052 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10053 " cannot be adopted" %
10054 (", ".join(baddisks),
10055 constants.ADOPTABLE_BLOCKDEV_ROOT),
10056 errors.ECODE_INVAL)
10058 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10059 list(all_disks))[pnode.name]
10060 node_disks.Raise("Cannot get block device information from node %s" %
10062 node_disks = node_disks.payload
10063 delta = all_disks.difference(node_disks.keys())
10065 raise errors.OpPrereqError("Missing block device(s): %s" %
10066 utils.CommaJoin(delta),
10067 errors.ECODE_INVAL)
10068 for dsk in self.disks:
10069 dsk[constants.IDISK_SIZE] = \
10070 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10072 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10074 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10075 # check OS parameters (remotely)
10076 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10078 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10080 # memory check on primary node
10081 #TODO(dynmem): use MINMEM for checking
10083 _CheckNodeFreeMemory(self, self.pnode.name,
10084 "creating instance %s" % self.op.instance_name,
10085 self.be_full[constants.BE_MAXMEM],
10086 self.op.hypervisor)
10088 self.dry_run_result = list(nodenames)
10090 def Exec(self, feedback_fn):
10091 """Create and add the instance to the cluster.
10094 instance = self.op.instance_name
10095 pnode_name = self.pnode.name
10097 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10098 self.owned_locks(locking.LEVEL_NODE)), \
10099 "Node locks differ from node resource locks"
10101 ht_kind = self.op.hypervisor
10102 if ht_kind in constants.HTS_REQ_PORT:
10103 network_port = self.cfg.AllocatePort()
10105 network_port = None
10107 # This is ugly but we got a chicken-egg problem here
10108 # We can only take the group disk parameters, as the instance
10109 # has no disks yet (we are generating them right here).
10110 node = self.cfg.GetNodeInfo(pnode_name)
10111 nodegroup = self.cfg.GetNodeGroup(node.group)
10112 disks = _GenerateDiskTemplate(self,
10113 self.op.disk_template,
10114 instance, pnode_name,
10117 self.instance_file_storage_dir,
10118 self.op.file_driver,
10121 self.cfg.GetGroupDiskParams(nodegroup))
10123 iobj = objects.Instance(name=instance, os=self.op.os_type,
10124 primary_node=pnode_name,
10125 nics=self.nics, disks=disks,
10126 disk_template=self.op.disk_template,
10127 admin_state=constants.ADMINST_DOWN,
10128 network_port=network_port,
10129 beparams=self.op.beparams,
10130 hvparams=self.op.hvparams,
10131 hypervisor=self.op.hypervisor,
10132 osparams=self.op.osparams,
10136 for tag in self.op.tags:
10139 if self.adopt_disks:
10140 if self.op.disk_template == constants.DT_PLAIN:
10141 # rename LVs to the newly-generated names; we need to construct
10142 # 'fake' LV disks with the old data, plus the new unique_id
10143 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10145 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10146 rename_to.append(t_dsk.logical_id)
10147 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10148 self.cfg.SetDiskID(t_dsk, pnode_name)
10149 result = self.rpc.call_blockdev_rename(pnode_name,
10150 zip(tmp_disks, rename_to))
10151 result.Raise("Failed to rename adoped LVs")
10153 feedback_fn("* creating instance disks...")
10155 _CreateDisks(self, iobj)
10156 except errors.OpExecError:
10157 self.LogWarning("Device creation failed, reverting...")
10159 _RemoveDisks(self, iobj)
10161 self.cfg.ReleaseDRBDMinors(instance)
10164 feedback_fn("adding instance %s to cluster config" % instance)
10166 self.cfg.AddInstance(iobj, self.proc.GetECId())
10168 # Declare that we don't want to remove the instance lock anymore, as we've
10169 # added the instance to the config
10170 del self.remove_locks[locking.LEVEL_INSTANCE]
10172 if self.op.mode == constants.INSTANCE_IMPORT:
10173 # Release unused nodes
10174 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10176 # Release all nodes
10177 _ReleaseLocks(self, locking.LEVEL_NODE)
10180 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10181 feedback_fn("* wiping instance disks...")
10183 _WipeDisks(self, iobj)
10184 except errors.OpExecError, err:
10185 logging.exception("Wiping disks failed")
10186 self.LogWarning("Wiping instance disks failed (%s)", err)
10190 # Something is already wrong with the disks, don't do anything else
10192 elif self.op.wait_for_sync:
10193 disk_abort = not _WaitForSync(self, iobj)
10194 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10195 # make sure the disks are not degraded (still sync-ing is ok)
10196 feedback_fn("* checking mirrors status")
10197 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10202 _RemoveDisks(self, iobj)
10203 self.cfg.RemoveInstance(iobj.name)
10204 # Make sure the instance lock gets removed
10205 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10206 raise errors.OpExecError("There are some degraded disks for"
10209 # Release all node resource locks
10210 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10212 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10213 # we need to set the disks ID to the primary node, since the
10214 # preceding code might or might have not done it, depending on
10215 # disk template and other options
10216 for disk in iobj.disks:
10217 self.cfg.SetDiskID(disk, pnode_name)
10218 if self.op.mode == constants.INSTANCE_CREATE:
10219 if not self.op.no_install:
10220 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10221 not self.op.wait_for_sync)
10223 feedback_fn("* pausing disk sync to install instance OS")
10224 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10227 for idx, success in enumerate(result.payload):
10229 logging.warn("pause-sync of instance %s for disk %d failed",
10232 feedback_fn("* running the instance OS create scripts...")
10233 # FIXME: pass debug option from opcode to backend
10235 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10236 self.op.debug_level)
10238 feedback_fn("* resuming disk sync")
10239 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10242 for idx, success in enumerate(result.payload):
10244 logging.warn("resume-sync of instance %s for disk %d failed",
10247 os_add_result.Raise("Could not add os for instance %s"
10248 " on node %s" % (instance, pnode_name))
10251 if self.op.mode == constants.INSTANCE_IMPORT:
10252 feedback_fn("* running the instance OS import scripts...")
10256 for idx, image in enumerate(self.src_images):
10260 # FIXME: pass debug option from opcode to backend
10261 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10262 constants.IEIO_FILE, (image, ),
10263 constants.IEIO_SCRIPT,
10264 (iobj.disks[idx], idx),
10266 transfers.append(dt)
10269 masterd.instance.TransferInstanceData(self, feedback_fn,
10270 self.op.src_node, pnode_name,
10271 self.pnode.secondary_ip,
10273 if not compat.all(import_result):
10274 self.LogWarning("Some disks for instance %s on node %s were not"
10275 " imported successfully" % (instance, pnode_name))
10277 rename_from = self._old_instance_name
10279 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10280 feedback_fn("* preparing remote import...")
10281 # The source cluster will stop the instance before attempting to make
10282 # a connection. In some cases stopping an instance can take a long
10283 # time, hence the shutdown timeout is added to the connection
10285 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10286 self.op.source_shutdown_timeout)
10287 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10289 assert iobj.primary_node == self.pnode.name
10291 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10292 self.source_x509_ca,
10293 self._cds, timeouts)
10294 if not compat.all(disk_results):
10295 # TODO: Should the instance still be started, even if some disks
10296 # failed to import (valid for local imports, too)?
10297 self.LogWarning("Some disks for instance %s on node %s were not"
10298 " imported successfully" % (instance, pnode_name))
10300 rename_from = self.source_instance_name
10303 # also checked in the prereq part
10304 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10307 # Run rename script on newly imported instance
10308 assert iobj.name == instance
10309 feedback_fn("Running rename script for %s" % instance)
10310 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10312 self.op.debug_level)
10313 if result.fail_msg:
10314 self.LogWarning("Failed to run rename script for %s on node"
10315 " %s: %s" % (instance, pnode_name, result.fail_msg))
10317 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10320 iobj.admin_state = constants.ADMINST_UP
10321 self.cfg.Update(iobj, feedback_fn)
10322 logging.info("Starting instance %s on node %s", instance, pnode_name)
10323 feedback_fn("* starting instance...")
10324 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10326 result.Raise("Could not start instance")
10328 return list(iobj.all_nodes)
10331 def _CheckRADOSFreeSpace():
10332 """Compute disk size requirements inside the RADOS cluster.
10335 # For the RADOS cluster we assume there is always enough space.
10339 class LUInstanceConsole(NoHooksLU):
10340 """Connect to an instance's console.
10342 This is somewhat special in that it returns the command line that
10343 you need to run on the master node in order to connect to the
10349 def ExpandNames(self):
10350 self.share_locks = _ShareAll()
10351 self._ExpandAndLockInstance()
10353 def CheckPrereq(self):
10354 """Check prerequisites.
10356 This checks that the instance is in the cluster.
10359 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10360 assert self.instance is not None, \
10361 "Cannot retrieve locked instance %s" % self.op.instance_name
10362 _CheckNodeOnline(self, self.instance.primary_node)
10364 def Exec(self, feedback_fn):
10365 """Connect to the console of an instance
10368 instance = self.instance
10369 node = instance.primary_node
10371 node_insts = self.rpc.call_instance_list([node],
10372 [instance.hypervisor])[node]
10373 node_insts.Raise("Can't get node information from %s" % node)
10375 if instance.name not in node_insts.payload:
10376 if instance.admin_state == constants.ADMINST_UP:
10377 state = constants.INSTST_ERRORDOWN
10378 elif instance.admin_state == constants.ADMINST_DOWN:
10379 state = constants.INSTST_ADMINDOWN
10381 state = constants.INSTST_ADMINOFFLINE
10382 raise errors.OpExecError("Instance %s is not running (state %s)" %
10383 (instance.name, state))
10385 logging.debug("Connecting to console of %s on %s", instance.name, node)
10387 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10390 def _GetInstanceConsole(cluster, instance):
10391 """Returns console information for an instance.
10393 @type cluster: L{objects.Cluster}
10394 @type instance: L{objects.Instance}
10398 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10399 # beparams and hvparams are passed separately, to avoid editing the
10400 # instance and then saving the defaults in the instance itself.
10401 hvparams = cluster.FillHV(instance)
10402 beparams = cluster.FillBE(instance)
10403 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10405 assert console.instance == instance.name
10406 assert console.Validate()
10408 return console.ToDict()
10411 class LUInstanceReplaceDisks(LogicalUnit):
10412 """Replace the disks of an instance.
10415 HPATH = "mirrors-replace"
10416 HTYPE = constants.HTYPE_INSTANCE
10419 def CheckArguments(self):
10420 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10421 self.op.iallocator)
10423 def ExpandNames(self):
10424 self._ExpandAndLockInstance()
10426 assert locking.LEVEL_NODE not in self.needed_locks
10427 assert locking.LEVEL_NODE_RES not in self.needed_locks
10428 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10430 assert self.op.iallocator is None or self.op.remote_node is None, \
10431 "Conflicting options"
10433 if self.op.remote_node is not None:
10434 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10436 # Warning: do not remove the locking of the new secondary here
10437 # unless DRBD8.AddChildren is changed to work in parallel;
10438 # currently it doesn't since parallel invocations of
10439 # FindUnusedMinor will conflict
10440 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10441 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10443 self.needed_locks[locking.LEVEL_NODE] = []
10444 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10446 if self.op.iallocator is not None:
10447 # iallocator will select a new node in the same group
10448 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10450 self.needed_locks[locking.LEVEL_NODE_RES] = []
10452 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10453 self.op.iallocator, self.op.remote_node,
10454 self.op.disks, False, self.op.early_release,
10455 self.op.ignore_ipolicy)
10457 self.tasklets = [self.replacer]
10459 def DeclareLocks(self, level):
10460 if level == locking.LEVEL_NODEGROUP:
10461 assert self.op.remote_node is None
10462 assert self.op.iallocator is not None
10463 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10465 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10466 # Lock all groups used by instance optimistically; this requires going
10467 # via the node before it's locked, requiring verification later on
10468 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10469 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10471 elif level == locking.LEVEL_NODE:
10472 if self.op.iallocator is not None:
10473 assert self.op.remote_node is None
10474 assert not self.needed_locks[locking.LEVEL_NODE]
10476 # Lock member nodes of all locked groups
10477 self.needed_locks[locking.LEVEL_NODE] = [node_name
10478 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10479 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10481 self._LockInstancesNodes()
10482 elif level == locking.LEVEL_NODE_RES:
10484 self.needed_locks[locking.LEVEL_NODE_RES] = \
10485 self.needed_locks[locking.LEVEL_NODE]
10487 def BuildHooksEnv(self):
10488 """Build hooks env.
10490 This runs on the master, the primary and all the secondaries.
10493 instance = self.replacer.instance
10495 "MODE": self.op.mode,
10496 "NEW_SECONDARY": self.op.remote_node,
10497 "OLD_SECONDARY": instance.secondary_nodes[0],
10499 env.update(_BuildInstanceHookEnvByObject(self, instance))
10502 def BuildHooksNodes(self):
10503 """Build hooks nodes.
10506 instance = self.replacer.instance
10508 self.cfg.GetMasterNode(),
10509 instance.primary_node,
10511 if self.op.remote_node is not None:
10512 nl.append(self.op.remote_node)
10515 def CheckPrereq(self):
10516 """Check prerequisites.
10519 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10520 self.op.iallocator is None)
10522 # Verify if node group locks are still correct
10523 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10525 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10527 return LogicalUnit.CheckPrereq(self)
10530 class TLReplaceDisks(Tasklet):
10531 """Replaces disks for an instance.
10533 Note: Locking is not within the scope of this class.
10536 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10537 disks, delay_iallocator, early_release, ignore_ipolicy):
10538 """Initializes this class.
10541 Tasklet.__init__(self, lu)
10544 self.instance_name = instance_name
10546 self.iallocator_name = iallocator_name
10547 self.remote_node = remote_node
10549 self.delay_iallocator = delay_iallocator
10550 self.early_release = early_release
10551 self.ignore_ipolicy = ignore_ipolicy
10554 self.instance = None
10555 self.new_node = None
10556 self.target_node = None
10557 self.other_node = None
10558 self.remote_node_info = None
10559 self.node_secondary_ip = None
10562 def CheckArguments(mode, remote_node, iallocator):
10563 """Helper function for users of this class.
10566 # check for valid parameter combination
10567 if mode == constants.REPLACE_DISK_CHG:
10568 if remote_node is None and iallocator is None:
10569 raise errors.OpPrereqError("When changing the secondary either an"
10570 " iallocator script must be used or the"
10571 " new node given", errors.ECODE_INVAL)
10573 if remote_node is not None and iallocator is not None:
10574 raise errors.OpPrereqError("Give either the iallocator or the new"
10575 " secondary, not both", errors.ECODE_INVAL)
10577 elif remote_node is not None or iallocator is not None:
10578 # Not replacing the secondary
10579 raise errors.OpPrereqError("The iallocator and new node options can"
10580 " only be used when changing the"
10581 " secondary node", errors.ECODE_INVAL)
10584 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10585 """Compute a new secondary node using an IAllocator.
10588 ial = IAllocator(lu.cfg, lu.rpc,
10589 mode=constants.IALLOCATOR_MODE_RELOC,
10590 name=instance_name,
10591 relocate_from=list(relocate_from))
10593 ial.Run(iallocator_name)
10595 if not ial.success:
10596 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10597 " %s" % (iallocator_name, ial.info),
10598 errors.ECODE_NORES)
10600 if len(ial.result) != ial.required_nodes:
10601 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10602 " of nodes (%s), required %s" %
10604 len(ial.result), ial.required_nodes),
10605 errors.ECODE_FAULT)
10607 remote_node_name = ial.result[0]
10609 lu.LogInfo("Selected new secondary for instance '%s': %s",
10610 instance_name, remote_node_name)
10612 return remote_node_name
10614 def _FindFaultyDisks(self, node_name):
10615 """Wrapper for L{_FindFaultyInstanceDisks}.
10618 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10621 def _CheckDisksActivated(self, instance):
10622 """Checks if the instance disks are activated.
10624 @param instance: The instance to check disks
10625 @return: True if they are activated, False otherwise
10628 nodes = instance.all_nodes
10630 for idx, dev in enumerate(instance.disks):
10632 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10633 self.cfg.SetDiskID(dev, node)
10635 result = _BlockdevFind(self, node, dev, instance)
10639 elif result.fail_msg or not result.payload:
10644 def CheckPrereq(self):
10645 """Check prerequisites.
10647 This checks that the instance is in the cluster.
10650 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10651 assert instance is not None, \
10652 "Cannot retrieve locked instance %s" % self.instance_name
10654 if instance.disk_template != constants.DT_DRBD8:
10655 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10656 " instances", errors.ECODE_INVAL)
10658 if len(instance.secondary_nodes) != 1:
10659 raise errors.OpPrereqError("The instance has a strange layout,"
10660 " expected one secondary but found %d" %
10661 len(instance.secondary_nodes),
10662 errors.ECODE_FAULT)
10664 if not self.delay_iallocator:
10665 self._CheckPrereq2()
10667 def _CheckPrereq2(self):
10668 """Check prerequisites, second part.
10670 This function should always be part of CheckPrereq. It was separated and is
10671 now called from Exec because during node evacuation iallocator was only
10672 called with an unmodified cluster model, not taking planned changes into
10676 instance = self.instance
10677 secondary_node = instance.secondary_nodes[0]
10679 if self.iallocator_name is None:
10680 remote_node = self.remote_node
10682 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10683 instance.name, instance.secondary_nodes)
10685 if remote_node is None:
10686 self.remote_node_info = None
10688 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10689 "Remote node '%s' is not locked" % remote_node
10691 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10692 assert self.remote_node_info is not None, \
10693 "Cannot retrieve locked node %s" % remote_node
10695 if remote_node == self.instance.primary_node:
10696 raise errors.OpPrereqError("The specified node is the primary node of"
10697 " the instance", errors.ECODE_INVAL)
10699 if remote_node == secondary_node:
10700 raise errors.OpPrereqError("The specified node is already the"
10701 " secondary node of the instance",
10702 errors.ECODE_INVAL)
10704 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10705 constants.REPLACE_DISK_CHG):
10706 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10707 errors.ECODE_INVAL)
10709 if self.mode == constants.REPLACE_DISK_AUTO:
10710 if not self._CheckDisksActivated(instance):
10711 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10712 " first" % self.instance_name,
10713 errors.ECODE_STATE)
10714 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10715 faulty_secondary = self._FindFaultyDisks(secondary_node)
10717 if faulty_primary and faulty_secondary:
10718 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10719 " one node and can not be repaired"
10720 " automatically" % self.instance_name,
10721 errors.ECODE_STATE)
10724 self.disks = faulty_primary
10725 self.target_node = instance.primary_node
10726 self.other_node = secondary_node
10727 check_nodes = [self.target_node, self.other_node]
10728 elif faulty_secondary:
10729 self.disks = faulty_secondary
10730 self.target_node = secondary_node
10731 self.other_node = instance.primary_node
10732 check_nodes = [self.target_node, self.other_node]
10738 # Non-automatic modes
10739 if self.mode == constants.REPLACE_DISK_PRI:
10740 self.target_node = instance.primary_node
10741 self.other_node = secondary_node
10742 check_nodes = [self.target_node, self.other_node]
10744 elif self.mode == constants.REPLACE_DISK_SEC:
10745 self.target_node = secondary_node
10746 self.other_node = instance.primary_node
10747 check_nodes = [self.target_node, self.other_node]
10749 elif self.mode == constants.REPLACE_DISK_CHG:
10750 self.new_node = remote_node
10751 self.other_node = instance.primary_node
10752 self.target_node = secondary_node
10753 check_nodes = [self.new_node, self.other_node]
10755 _CheckNodeNotDrained(self.lu, remote_node)
10756 _CheckNodeVmCapable(self.lu, remote_node)
10758 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10759 assert old_node_info is not None
10760 if old_node_info.offline and not self.early_release:
10761 # doesn't make sense to delay the release
10762 self.early_release = True
10763 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10764 " early-release mode", secondary_node)
10767 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10770 # If not specified all disks should be replaced
10772 self.disks = range(len(self.instance.disks))
10774 # TODO: This is ugly, but right now we can't distinguish between internal
10775 # submitted opcode and external one. We should fix that.
10776 if self.remote_node_info:
10777 # We change the node, lets verify it still meets instance policy
10778 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10779 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10781 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10782 ignore=self.ignore_ipolicy)
10784 for node in check_nodes:
10785 _CheckNodeOnline(self.lu, node)
10787 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10790 if node_name is not None)
10792 # Release unneeded node and node resource locks
10793 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10794 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10796 # Release any owned node group
10797 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10798 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10800 # Check whether disks are valid
10801 for disk_idx in self.disks:
10802 instance.FindDisk(disk_idx)
10804 # Get secondary node IP addresses
10805 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10806 in self.cfg.GetMultiNodeInfo(touched_nodes))
10808 def Exec(self, feedback_fn):
10809 """Execute disk replacement.
10811 This dispatches the disk replacement to the appropriate handler.
10814 if self.delay_iallocator:
10815 self._CheckPrereq2()
10818 # Verify owned locks before starting operation
10819 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10820 assert set(owned_nodes) == set(self.node_secondary_ip), \
10821 ("Incorrect node locks, owning %s, expected %s" %
10822 (owned_nodes, self.node_secondary_ip.keys()))
10823 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10824 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10826 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10827 assert list(owned_instances) == [self.instance_name], \
10828 "Instance '%s' not locked" % self.instance_name
10830 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10831 "Should not own any node group lock at this point"
10834 feedback_fn("No disks need replacement")
10837 feedback_fn("Replacing disk(s) %s for %s" %
10838 (utils.CommaJoin(self.disks), self.instance.name))
10840 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10842 # Activate the instance disks if we're replacing them on a down instance
10844 _StartInstanceDisks(self.lu, self.instance, True)
10847 # Should we replace the secondary node?
10848 if self.new_node is not None:
10849 fn = self._ExecDrbd8Secondary
10851 fn = self._ExecDrbd8DiskOnly
10853 result = fn(feedback_fn)
10855 # Deactivate the instance disks if we're replacing them on a
10858 _SafeShutdownInstanceDisks(self.lu, self.instance)
10860 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10863 # Verify owned locks
10864 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10865 nodes = frozenset(self.node_secondary_ip)
10866 assert ((self.early_release and not owned_nodes) or
10867 (not self.early_release and not (set(owned_nodes) - nodes))), \
10868 ("Not owning the correct locks, early_release=%s, owned=%r,"
10869 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10873 def _CheckVolumeGroup(self, nodes):
10874 self.lu.LogInfo("Checking volume groups")
10876 vgname = self.cfg.GetVGName()
10878 # Make sure volume group exists on all involved nodes
10879 results = self.rpc.call_vg_list(nodes)
10881 raise errors.OpExecError("Can't list volume groups on the nodes")
10884 res = results[node]
10885 res.Raise("Error checking node %s" % node)
10886 if vgname not in res.payload:
10887 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10890 def _CheckDisksExistence(self, nodes):
10891 # Check disk existence
10892 for idx, dev in enumerate(self.instance.disks):
10893 if idx not in self.disks:
10897 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10898 self.cfg.SetDiskID(dev, node)
10900 result = _BlockdevFind(self, node, dev, self.instance)
10902 msg = result.fail_msg
10903 if msg or not result.payload:
10905 msg = "disk not found"
10906 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10909 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10910 for idx, dev in enumerate(self.instance.disks):
10911 if idx not in self.disks:
10914 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10917 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
10918 on_primary, ldisk=ldisk):
10919 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10920 " replace disks for instance %s" %
10921 (node_name, self.instance.name))
10923 def _CreateNewStorage(self, node_name):
10924 """Create new storage on the primary or secondary node.
10926 This is only used for same-node replaces, not for changing the
10927 secondary node, hence we don't want to modify the existing disk.
10932 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
10933 for idx, dev in enumerate(disks):
10934 if idx not in self.disks:
10937 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10939 self.cfg.SetDiskID(dev, node_name)
10941 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10942 names = _GenerateUniqueNames(self.lu, lv_names)
10944 (data_disk, meta_disk) = dev.children
10945 vg_data = data_disk.logical_id[0]
10946 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10947 logical_id=(vg_data, names[0]),
10948 params=data_disk.params)
10949 vg_meta = meta_disk.logical_id[0]
10950 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10951 logical_id=(vg_meta, names[1]),
10952 params=meta_disk.params)
10954 new_lvs = [lv_data, lv_meta]
10955 old_lvs = [child.Copy() for child in dev.children]
10956 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10958 # we pass force_create=True to force the LVM creation
10959 for new_lv in new_lvs:
10960 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
10961 _GetInstanceInfoText(self.instance), False)
10965 def _CheckDevices(self, node_name, iv_names):
10966 for name, (dev, _, _) in iv_names.iteritems():
10967 self.cfg.SetDiskID(dev, node_name)
10969 result = _BlockdevFind(self, node_name, dev, self.instance)
10971 msg = result.fail_msg
10972 if msg or not result.payload:
10974 msg = "disk not found"
10975 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10978 if result.payload.is_degraded:
10979 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10981 def _RemoveOldStorage(self, node_name, iv_names):
10982 for name, (_, old_lvs, _) in iv_names.iteritems():
10983 self.lu.LogInfo("Remove logical volumes for %s" % name)
10986 self.cfg.SetDiskID(lv, node_name)
10988 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10990 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10991 hint="remove unused LVs manually")
10993 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10994 """Replace a disk on the primary or secondary for DRBD 8.
10996 The algorithm for replace is quite complicated:
10998 1. for each disk to be replaced:
11000 1. create new LVs on the target node with unique names
11001 1. detach old LVs from the drbd device
11002 1. rename old LVs to name_replaced.<time_t>
11003 1. rename new LVs to old LVs
11004 1. attach the new LVs (with the old names now) to the drbd device
11006 1. wait for sync across all devices
11008 1. for each modified disk:
11010 1. remove old LVs (which have the name name_replaces.<time_t>)
11012 Failures are not very well handled.
11017 # Step: check device activation
11018 self.lu.LogStep(1, steps_total, "Check device existence")
11019 self._CheckDisksExistence([self.other_node, self.target_node])
11020 self._CheckVolumeGroup([self.target_node, self.other_node])
11022 # Step: check other node consistency
11023 self.lu.LogStep(2, steps_total, "Check peer consistency")
11024 self._CheckDisksConsistency(self.other_node,
11025 self.other_node == self.instance.primary_node,
11028 # Step: create new storage
11029 self.lu.LogStep(3, steps_total, "Allocate new storage")
11030 iv_names = self._CreateNewStorage(self.target_node)
11032 # Step: for each lv, detach+rename*2+attach
11033 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11034 for dev, old_lvs, new_lvs in iv_names.itervalues():
11035 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11037 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11039 result.Raise("Can't detach drbd from local storage on node"
11040 " %s for device %s" % (self.target_node, dev.iv_name))
11042 #cfg.Update(instance)
11044 # ok, we created the new LVs, so now we know we have the needed
11045 # storage; as such, we proceed on the target node to rename
11046 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11047 # using the assumption that logical_id == physical_id (which in
11048 # turn is the unique_id on that node)
11050 # FIXME(iustin): use a better name for the replaced LVs
11051 temp_suffix = int(time.time())
11052 ren_fn = lambda d, suff: (d.physical_id[0],
11053 d.physical_id[1] + "_replaced-%s" % suff)
11055 # Build the rename list based on what LVs exist on the node
11056 rename_old_to_new = []
11057 for to_ren in old_lvs:
11058 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11059 if not result.fail_msg and result.payload:
11061 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11063 self.lu.LogInfo("Renaming the old LVs on the target node")
11064 result = self.rpc.call_blockdev_rename(self.target_node,
11066 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11068 # Now we rename the new LVs to the old LVs
11069 self.lu.LogInfo("Renaming the new LVs on the target node")
11070 rename_new_to_old = [(new, old.physical_id)
11071 for old, new in zip(old_lvs, new_lvs)]
11072 result = self.rpc.call_blockdev_rename(self.target_node,
11074 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11076 # Intermediate steps of in memory modifications
11077 for old, new in zip(old_lvs, new_lvs):
11078 new.logical_id = old.logical_id
11079 self.cfg.SetDiskID(new, self.target_node)
11081 # We need to modify old_lvs so that removal later removes the
11082 # right LVs, not the newly added ones; note that old_lvs is a
11084 for disk in old_lvs:
11085 disk.logical_id = ren_fn(disk, temp_suffix)
11086 self.cfg.SetDiskID(disk, self.target_node)
11088 # Now that the new lvs have the old name, we can add them to the device
11089 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11090 result = self.rpc.call_blockdev_addchildren(self.target_node,
11091 (dev, self.instance), new_lvs)
11092 msg = result.fail_msg
11094 for new_lv in new_lvs:
11095 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11098 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11099 hint=("cleanup manually the unused logical"
11101 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11103 cstep = itertools.count(5)
11105 if self.early_release:
11106 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11107 self._RemoveOldStorage(self.target_node, iv_names)
11108 # TODO: Check if releasing locks early still makes sense
11109 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11111 # Release all resource locks except those used by the instance
11112 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11113 keep=self.node_secondary_ip.keys())
11115 # Release all node locks while waiting for sync
11116 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11118 # TODO: Can the instance lock be downgraded here? Take the optional disk
11119 # shutdown in the caller into consideration.
11122 # This can fail as the old devices are degraded and _WaitForSync
11123 # does a combined result over all disks, so we don't check its return value
11124 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11125 _WaitForSync(self.lu, self.instance)
11127 # Check all devices manually
11128 self._CheckDevices(self.instance.primary_node, iv_names)
11130 # Step: remove old storage
11131 if not self.early_release:
11132 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11133 self._RemoveOldStorage(self.target_node, iv_names)
11135 def _ExecDrbd8Secondary(self, feedback_fn):
11136 """Replace the secondary node for DRBD 8.
11138 The algorithm for replace is quite complicated:
11139 - for all disks of the instance:
11140 - create new LVs on the new node with same names
11141 - shutdown the drbd device on the old secondary
11142 - disconnect the drbd network on the primary
11143 - create the drbd device on the new secondary
11144 - network attach the drbd on the primary, using an artifice:
11145 the drbd code for Attach() will connect to the network if it
11146 finds a device which is connected to the good local disks but
11147 not network enabled
11148 - wait for sync across all devices
11149 - remove all disks from the old secondary
11151 Failures are not very well handled.
11156 pnode = self.instance.primary_node
11158 # Step: check device activation
11159 self.lu.LogStep(1, steps_total, "Check device existence")
11160 self._CheckDisksExistence([self.instance.primary_node])
11161 self._CheckVolumeGroup([self.instance.primary_node])
11163 # Step: check other node consistency
11164 self.lu.LogStep(2, steps_total, "Check peer consistency")
11165 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11167 # Step: create new storage
11168 self.lu.LogStep(3, steps_total, "Allocate new storage")
11169 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11170 for idx, dev in enumerate(disks):
11171 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11172 (self.new_node, idx))
11173 # we pass force_create=True to force LVM creation
11174 for new_lv in dev.children:
11175 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11176 True, _GetInstanceInfoText(self.instance), False)
11178 # Step 4: dbrd minors and drbd setups changes
11179 # after this, we must manually remove the drbd minors on both the
11180 # error and the success paths
11181 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11182 minors = self.cfg.AllocateDRBDMinor([self.new_node
11183 for dev in self.instance.disks],
11184 self.instance.name)
11185 logging.debug("Allocated minors %r", minors)
11188 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11189 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11190 (self.new_node, idx))
11191 # create new devices on new_node; note that we create two IDs:
11192 # one without port, so the drbd will be activated without
11193 # networking information on the new node at this stage, and one
11194 # with network, for the latter activation in step 4
11195 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11196 if self.instance.primary_node == o_node1:
11199 assert self.instance.primary_node == o_node2, "Three-node instance?"
11202 new_alone_id = (self.instance.primary_node, self.new_node, None,
11203 p_minor, new_minor, o_secret)
11204 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11205 p_minor, new_minor, o_secret)
11207 iv_names[idx] = (dev, dev.children, new_net_id)
11208 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11210 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11211 logical_id=new_alone_id,
11212 children=dev.children,
11215 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11218 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11220 _GetInstanceInfoText(self.instance), False)
11221 except errors.GenericError:
11222 self.cfg.ReleaseDRBDMinors(self.instance.name)
11225 # We have new devices, shutdown the drbd on the old secondary
11226 for idx, dev in enumerate(self.instance.disks):
11227 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11228 self.cfg.SetDiskID(dev, self.target_node)
11229 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11230 (dev, self.instance)).fail_msg
11232 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11233 "node: %s" % (idx, msg),
11234 hint=("Please cleanup this device manually as"
11235 " soon as possible"))
11237 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11238 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11239 self.instance.disks)[pnode]
11241 msg = result.fail_msg
11243 # detaches didn't succeed (unlikely)
11244 self.cfg.ReleaseDRBDMinors(self.instance.name)
11245 raise errors.OpExecError("Can't detach the disks from the network on"
11246 " old node: %s" % (msg,))
11248 # if we managed to detach at least one, we update all the disks of
11249 # the instance to point to the new secondary
11250 self.lu.LogInfo("Updating instance configuration")
11251 for dev, _, new_logical_id in iv_names.itervalues():
11252 dev.logical_id = new_logical_id
11253 self.cfg.SetDiskID(dev, self.instance.primary_node)
11255 self.cfg.Update(self.instance, feedback_fn)
11257 # Release all node locks (the configuration has been updated)
11258 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11260 # and now perform the drbd attach
11261 self.lu.LogInfo("Attaching primary drbds to new secondary"
11262 " (standalone => connected)")
11263 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11265 self.node_secondary_ip,
11266 (self.instance.disks, self.instance),
11267 self.instance.name,
11269 for to_node, to_result in result.items():
11270 msg = to_result.fail_msg
11272 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11274 hint=("please do a gnt-instance info to see the"
11275 " status of disks"))
11277 cstep = itertools.count(5)
11279 if self.early_release:
11280 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11281 self._RemoveOldStorage(self.target_node, iv_names)
11282 # TODO: Check if releasing locks early still makes sense
11283 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11285 # Release all resource locks except those used by the instance
11286 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11287 keep=self.node_secondary_ip.keys())
11289 # TODO: Can the instance lock be downgraded here? Take the optional disk
11290 # shutdown in the caller into consideration.
11293 # This can fail as the old devices are degraded and _WaitForSync
11294 # does a combined result over all disks, so we don't check its return value
11295 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11296 _WaitForSync(self.lu, self.instance)
11298 # Check all devices manually
11299 self._CheckDevices(self.instance.primary_node, iv_names)
11301 # Step: remove old storage
11302 if not self.early_release:
11303 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11304 self._RemoveOldStorage(self.target_node, iv_names)
11307 class LURepairNodeStorage(NoHooksLU):
11308 """Repairs the volume group on a node.
11313 def CheckArguments(self):
11314 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11316 storage_type = self.op.storage_type
11318 if (constants.SO_FIX_CONSISTENCY not in
11319 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11320 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11321 " repaired" % storage_type,
11322 errors.ECODE_INVAL)
11324 def ExpandNames(self):
11325 self.needed_locks = {
11326 locking.LEVEL_NODE: [self.op.node_name],
11329 def _CheckFaultyDisks(self, instance, node_name):
11330 """Ensure faulty disks abort the opcode or at least warn."""
11332 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11334 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11335 " node '%s'" % (instance.name, node_name),
11336 errors.ECODE_STATE)
11337 except errors.OpPrereqError, err:
11338 if self.op.ignore_consistency:
11339 self.proc.LogWarning(str(err.args[0]))
11343 def CheckPrereq(self):
11344 """Check prerequisites.
11347 # Check whether any instance on this node has faulty disks
11348 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11349 if inst.admin_state != constants.ADMINST_UP:
11351 check_nodes = set(inst.all_nodes)
11352 check_nodes.discard(self.op.node_name)
11353 for inst_node_name in check_nodes:
11354 self._CheckFaultyDisks(inst, inst_node_name)
11356 def Exec(self, feedback_fn):
11357 feedback_fn("Repairing storage unit '%s' on %s ..." %
11358 (self.op.name, self.op.node_name))
11360 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11361 result = self.rpc.call_storage_execute(self.op.node_name,
11362 self.op.storage_type, st_args,
11364 constants.SO_FIX_CONSISTENCY)
11365 result.Raise("Failed to repair storage unit '%s' on %s" %
11366 (self.op.name, self.op.node_name))
11369 class LUNodeEvacuate(NoHooksLU):
11370 """Evacuates instances off a list of nodes.
11375 _MODE2IALLOCATOR = {
11376 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11377 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11378 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11380 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11381 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11382 constants.IALLOCATOR_NEVAC_MODES)
11384 def CheckArguments(self):
11385 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11387 def ExpandNames(self):
11388 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11390 if self.op.remote_node is not None:
11391 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11392 assert self.op.remote_node
11394 if self.op.remote_node == self.op.node_name:
11395 raise errors.OpPrereqError("Can not use evacuated node as a new"
11396 " secondary node", errors.ECODE_INVAL)
11398 if self.op.mode != constants.NODE_EVAC_SEC:
11399 raise errors.OpPrereqError("Without the use of an iallocator only"
11400 " secondary instances can be evacuated",
11401 errors.ECODE_INVAL)
11404 self.share_locks = _ShareAll()
11405 self.needed_locks = {
11406 locking.LEVEL_INSTANCE: [],
11407 locking.LEVEL_NODEGROUP: [],
11408 locking.LEVEL_NODE: [],
11411 # Determine nodes (via group) optimistically, needs verification once locks
11412 # have been acquired
11413 self.lock_nodes = self._DetermineNodes()
11415 def _DetermineNodes(self):
11416 """Gets the list of nodes to operate on.
11419 if self.op.remote_node is None:
11420 # Iallocator will choose any node(s) in the same group
11421 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11423 group_nodes = frozenset([self.op.remote_node])
11425 # Determine nodes to be locked
11426 return set([self.op.node_name]) | group_nodes
11428 def _DetermineInstances(self):
11429 """Builds list of instances to operate on.
11432 assert self.op.mode in constants.NODE_EVAC_MODES
11434 if self.op.mode == constants.NODE_EVAC_PRI:
11435 # Primary instances only
11436 inst_fn = _GetNodePrimaryInstances
11437 assert self.op.remote_node is None, \
11438 "Evacuating primary instances requires iallocator"
11439 elif self.op.mode == constants.NODE_EVAC_SEC:
11440 # Secondary instances only
11441 inst_fn = _GetNodeSecondaryInstances
11444 assert self.op.mode == constants.NODE_EVAC_ALL
11445 inst_fn = _GetNodeInstances
11446 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11448 raise errors.OpPrereqError("Due to an issue with the iallocator"
11449 " interface it is not possible to evacuate"
11450 " all instances at once; specify explicitly"
11451 " whether to evacuate primary or secondary"
11453 errors.ECODE_INVAL)
11455 return inst_fn(self.cfg, self.op.node_name)
11457 def DeclareLocks(self, level):
11458 if level == locking.LEVEL_INSTANCE:
11459 # Lock instances optimistically, needs verification once node and group
11460 # locks have been acquired
11461 self.needed_locks[locking.LEVEL_INSTANCE] = \
11462 set(i.name for i in self._DetermineInstances())
11464 elif level == locking.LEVEL_NODEGROUP:
11465 # Lock node groups for all potential target nodes optimistically, needs
11466 # verification once nodes have been acquired
11467 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11468 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11470 elif level == locking.LEVEL_NODE:
11471 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11473 def CheckPrereq(self):
11475 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11476 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11477 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11479 need_nodes = self._DetermineNodes()
11481 if not owned_nodes.issuperset(need_nodes):
11482 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11483 " locks were acquired, current nodes are"
11484 " are '%s', used to be '%s'; retry the"
11486 (self.op.node_name,
11487 utils.CommaJoin(need_nodes),
11488 utils.CommaJoin(owned_nodes)),
11489 errors.ECODE_STATE)
11491 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11492 if owned_groups != wanted_groups:
11493 raise errors.OpExecError("Node groups changed since locks were acquired,"
11494 " current groups are '%s', used to be '%s';"
11495 " retry the operation" %
11496 (utils.CommaJoin(wanted_groups),
11497 utils.CommaJoin(owned_groups)))
11499 # Determine affected instances
11500 self.instances = self._DetermineInstances()
11501 self.instance_names = [i.name for i in self.instances]
11503 if set(self.instance_names) != owned_instances:
11504 raise errors.OpExecError("Instances on node '%s' changed since locks"
11505 " were acquired, current instances are '%s',"
11506 " used to be '%s'; retry the operation" %
11507 (self.op.node_name,
11508 utils.CommaJoin(self.instance_names),
11509 utils.CommaJoin(owned_instances)))
11511 if self.instance_names:
11512 self.LogInfo("Evacuating instances from node '%s': %s",
11514 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11516 self.LogInfo("No instances to evacuate from node '%s'",
11519 if self.op.remote_node is not None:
11520 for i in self.instances:
11521 if i.primary_node == self.op.remote_node:
11522 raise errors.OpPrereqError("Node %s is the primary node of"
11523 " instance %s, cannot use it as"
11525 (self.op.remote_node, i.name),
11526 errors.ECODE_INVAL)
11528 def Exec(self, feedback_fn):
11529 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11531 if not self.instance_names:
11532 # No instances to evacuate
11535 elif self.op.iallocator is not None:
11536 # TODO: Implement relocation to other group
11537 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11538 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11539 instances=list(self.instance_names))
11541 ial.Run(self.op.iallocator)
11543 if not ial.success:
11544 raise errors.OpPrereqError("Can't compute node evacuation using"
11545 " iallocator '%s': %s" %
11546 (self.op.iallocator, ial.info),
11547 errors.ECODE_NORES)
11549 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11551 elif self.op.remote_node is not None:
11552 assert self.op.mode == constants.NODE_EVAC_SEC
11554 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11555 remote_node=self.op.remote_node,
11557 mode=constants.REPLACE_DISK_CHG,
11558 early_release=self.op.early_release)]
11559 for instance_name in self.instance_names
11563 raise errors.ProgrammerError("No iallocator or remote node")
11565 return ResultWithJobs(jobs)
11568 def _SetOpEarlyRelease(early_release, op):
11569 """Sets C{early_release} flag on opcodes if available.
11573 op.early_release = early_release
11574 except AttributeError:
11575 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11580 def _NodeEvacDest(use_nodes, group, nodes):
11581 """Returns group or nodes depending on caller's choice.
11585 return utils.CommaJoin(nodes)
11590 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11591 """Unpacks the result of change-group and node-evacuate iallocator requests.
11593 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11594 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11596 @type lu: L{LogicalUnit}
11597 @param lu: Logical unit instance
11598 @type alloc_result: tuple/list
11599 @param alloc_result: Result from iallocator
11600 @type early_release: bool
11601 @param early_release: Whether to release locks early if possible
11602 @type use_nodes: bool
11603 @param use_nodes: Whether to display node names instead of groups
11606 (moved, failed, jobs) = alloc_result
11609 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11610 for (name, reason) in failed)
11611 lu.LogWarning("Unable to evacuate instances %s", failreason)
11612 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11615 lu.LogInfo("Instances to be moved: %s",
11616 utils.CommaJoin("%s (to %s)" %
11617 (name, _NodeEvacDest(use_nodes, group, nodes))
11618 for (name, group, nodes) in moved))
11620 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11621 map(opcodes.OpCode.LoadOpCode, ops))
11625 class LUInstanceGrowDisk(LogicalUnit):
11626 """Grow a disk of an instance.
11629 HPATH = "disk-grow"
11630 HTYPE = constants.HTYPE_INSTANCE
11633 def ExpandNames(self):
11634 self._ExpandAndLockInstance()
11635 self.needed_locks[locking.LEVEL_NODE] = []
11636 self.needed_locks[locking.LEVEL_NODE_RES] = []
11637 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11638 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11640 def DeclareLocks(self, level):
11641 if level == locking.LEVEL_NODE:
11642 self._LockInstancesNodes()
11643 elif level == locking.LEVEL_NODE_RES:
11645 self.needed_locks[locking.LEVEL_NODE_RES] = \
11646 self.needed_locks[locking.LEVEL_NODE][:]
11648 def BuildHooksEnv(self):
11649 """Build hooks env.
11651 This runs on the master, the primary and all the secondaries.
11655 "DISK": self.op.disk,
11656 "AMOUNT": self.op.amount,
11657 "ABSOLUTE": self.op.absolute,
11659 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11662 def BuildHooksNodes(self):
11663 """Build hooks nodes.
11666 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11669 def CheckPrereq(self):
11670 """Check prerequisites.
11672 This checks that the instance is in the cluster.
11675 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11676 assert instance is not None, \
11677 "Cannot retrieve locked instance %s" % self.op.instance_name
11678 nodenames = list(instance.all_nodes)
11679 for node in nodenames:
11680 _CheckNodeOnline(self, node)
11682 self.instance = instance
11684 if instance.disk_template not in constants.DTS_GROWABLE:
11685 raise errors.OpPrereqError("Instance's disk layout does not support"
11686 " growing", errors.ECODE_INVAL)
11688 self.disk = instance.FindDisk(self.op.disk)
11690 if self.op.absolute:
11691 self.target = self.op.amount
11692 self.delta = self.target - self.disk.size
11694 raise errors.OpPrereqError("Requested size (%s) is smaller than "
11695 "current disk size (%s)" %
11696 (utils.FormatUnit(self.target, "h"),
11697 utils.FormatUnit(self.disk.size, "h")),
11698 errors.ECODE_STATE)
11700 self.delta = self.op.amount
11701 self.target = self.disk.size + self.delta
11703 raise errors.OpPrereqError("Requested increment (%s) is negative" %
11704 utils.FormatUnit(self.delta, "h"),
11705 errors.ECODE_INVAL)
11707 if instance.disk_template not in (constants.DT_FILE,
11708 constants.DT_SHARED_FILE,
11710 # TODO: check the free disk space for file, when that feature will be
11712 _CheckNodesFreeDiskPerVG(self, nodenames,
11713 self.disk.ComputeGrowth(self.delta))
11715 def Exec(self, feedback_fn):
11716 """Execute disk grow.
11719 instance = self.instance
11722 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11723 assert (self.owned_locks(locking.LEVEL_NODE) ==
11724 self.owned_locks(locking.LEVEL_NODE_RES))
11726 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11728 raise errors.OpExecError("Cannot activate block device to grow")
11730 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
11731 (self.op.disk, instance.name,
11732 utils.FormatUnit(self.delta, "h"),
11733 utils.FormatUnit(self.target, "h")))
11735 # First run all grow ops in dry-run mode
11736 for node in instance.all_nodes:
11737 self.cfg.SetDiskID(disk, node)
11738 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11740 result.Raise("Grow request failed to node %s" % node)
11742 # We know that (as far as we can test) operations across different
11743 # nodes will succeed, time to run it for real
11744 for node in instance.all_nodes:
11745 self.cfg.SetDiskID(disk, node)
11746 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
11748 result.Raise("Grow request failed to node %s" % node)
11750 # TODO: Rewrite code to work properly
11751 # DRBD goes into sync mode for a short amount of time after executing the
11752 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11753 # calling "resize" in sync mode fails. Sleeping for a short amount of
11754 # time is a work-around.
11757 disk.RecordGrow(self.delta)
11758 self.cfg.Update(instance, feedback_fn)
11760 # Changes have been recorded, release node lock
11761 _ReleaseLocks(self, locking.LEVEL_NODE)
11763 # Downgrade lock while waiting for sync
11764 self.glm.downgrade(locking.LEVEL_INSTANCE)
11766 if self.op.wait_for_sync:
11767 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11769 self.proc.LogWarning("Disk sync-ing has not returned a good"
11770 " status; please check the instance")
11771 if instance.admin_state != constants.ADMINST_UP:
11772 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11773 elif instance.admin_state != constants.ADMINST_UP:
11774 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11775 " not supposed to be running because no wait for"
11776 " sync mode was requested")
11778 assert self.owned_locks(locking.LEVEL_NODE_RES)
11779 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11782 class LUInstanceQueryData(NoHooksLU):
11783 """Query runtime instance data.
11788 def ExpandNames(self):
11789 self.needed_locks = {}
11791 # Use locking if requested or when non-static information is wanted
11792 if not (self.op.static or self.op.use_locking):
11793 self.LogWarning("Non-static data requested, locks need to be acquired")
11794 self.op.use_locking = True
11796 if self.op.instances or not self.op.use_locking:
11797 # Expand instance names right here
11798 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11800 # Will use acquired locks
11801 self.wanted_names = None
11803 if self.op.use_locking:
11804 self.share_locks = _ShareAll()
11806 if self.wanted_names is None:
11807 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11809 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11811 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11812 self.needed_locks[locking.LEVEL_NODE] = []
11813 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11815 def DeclareLocks(self, level):
11816 if self.op.use_locking:
11817 if level == locking.LEVEL_NODEGROUP:
11818 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11820 # Lock all groups used by instances optimistically; this requires going
11821 # via the node before it's locked, requiring verification later on
11822 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11823 frozenset(group_uuid
11824 for instance_name in owned_instances
11826 self.cfg.GetInstanceNodeGroups(instance_name))
11828 elif level == locking.LEVEL_NODE:
11829 self._LockInstancesNodes()
11831 def CheckPrereq(self):
11832 """Check prerequisites.
11834 This only checks the optional instance list against the existing names.
11837 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11838 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11839 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11841 if self.wanted_names is None:
11842 assert self.op.use_locking, "Locking was not used"
11843 self.wanted_names = owned_instances
11845 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
11847 if self.op.use_locking:
11848 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
11851 assert not (owned_instances or owned_groups or owned_nodes)
11853 self.wanted_instances = instances.values()
11855 def _ComputeBlockdevStatus(self, node, instance, dev):
11856 """Returns the status of a block device
11859 if self.op.static or not node:
11862 self.cfg.SetDiskID(dev, node)
11864 result = self.rpc.call_blockdev_find(node, dev)
11868 result.Raise("Can't compute disk status for %s" % instance.name)
11870 status = result.payload
11874 return (status.dev_path, status.major, status.minor,
11875 status.sync_percent, status.estimated_time,
11876 status.is_degraded, status.ldisk_status)
11878 def _ComputeDiskStatus(self, instance, snode, dev):
11879 """Compute block device status.
11882 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
11884 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
11886 def _ComputeDiskStatusInner(self, instance, snode, dev):
11887 """Compute block device status.
11889 @attention: The device has to be annotated already.
11892 if dev.dev_type in constants.LDS_DRBD:
11893 # we change the snode then (otherwise we use the one passed in)
11894 if dev.logical_id[0] == instance.primary_node:
11895 snode = dev.logical_id[1]
11897 snode = dev.logical_id[0]
11899 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11901 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
11904 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
11911 "iv_name": dev.iv_name,
11912 "dev_type": dev.dev_type,
11913 "logical_id": dev.logical_id,
11914 "physical_id": dev.physical_id,
11915 "pstatus": dev_pstatus,
11916 "sstatus": dev_sstatus,
11917 "children": dev_children,
11922 def Exec(self, feedback_fn):
11923 """Gather and return data"""
11926 cluster = self.cfg.GetClusterInfo()
11928 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
11929 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
11931 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
11932 for node in nodes.values()))
11934 group2name_fn = lambda uuid: groups[uuid].name
11936 for instance in self.wanted_instances:
11937 pnode = nodes[instance.primary_node]
11939 if self.op.static or pnode.offline:
11940 remote_state = None
11942 self.LogWarning("Primary node %s is marked offline, returning static"
11943 " information only for instance %s" %
11944 (pnode.name, instance.name))
11946 remote_info = self.rpc.call_instance_info(instance.primary_node,
11948 instance.hypervisor)
11949 remote_info.Raise("Error checking node %s" % instance.primary_node)
11950 remote_info = remote_info.payload
11951 if remote_info and "state" in remote_info:
11952 remote_state = "up"
11954 if instance.admin_state == constants.ADMINST_UP:
11955 remote_state = "down"
11957 remote_state = instance.admin_state
11959 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11962 snodes_group_uuids = [nodes[snode_name].group
11963 for snode_name in instance.secondary_nodes]
11965 result[instance.name] = {
11966 "name": instance.name,
11967 "config_state": instance.admin_state,
11968 "run_state": remote_state,
11969 "pnode": instance.primary_node,
11970 "pnode_group_uuid": pnode.group,
11971 "pnode_group_name": group2name_fn(pnode.group),
11972 "snodes": instance.secondary_nodes,
11973 "snodes_group_uuids": snodes_group_uuids,
11974 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
11976 # this happens to be the same format used for hooks
11977 "nics": _NICListToTuple(self, instance.nics),
11978 "disk_template": instance.disk_template,
11980 "hypervisor": instance.hypervisor,
11981 "network_port": instance.network_port,
11982 "hv_instance": instance.hvparams,
11983 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11984 "be_instance": instance.beparams,
11985 "be_actual": cluster.FillBE(instance),
11986 "os_instance": instance.osparams,
11987 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11988 "serial_no": instance.serial_no,
11989 "mtime": instance.mtime,
11990 "ctime": instance.ctime,
11991 "uuid": instance.uuid,
11997 def PrepareContainerMods(mods, private_fn):
11998 """Prepares a list of container modifications by adding a private data field.
12000 @type mods: list of tuples; (operation, index, parameters)
12001 @param mods: List of modifications
12002 @type private_fn: callable or None
12003 @param private_fn: Callable for constructing a private data field for a
12008 if private_fn is None:
12013 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12016 #: Type description for changes as returned by L{ApplyContainerMods}'s
12018 _TApplyContModsCbChanges = \
12019 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12020 ht.TNonEmptyString,
12025 def ApplyContainerMods(kind, container, chgdesc, mods,
12026 create_fn, modify_fn, remove_fn):
12027 """Applies descriptions in C{mods} to C{container}.
12030 @param kind: One-word item description
12031 @type container: list
12032 @param container: Container to modify
12033 @type chgdesc: None or list
12034 @param chgdesc: List of applied changes
12036 @param mods: Modifications as returned by L{PrepareContainerMods}
12037 @type create_fn: callable
12038 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12039 receives absolute item index, parameters and private data object as added
12040 by L{PrepareContainerMods}, returns tuple containing new item and changes
12042 @type modify_fn: callable
12043 @param modify_fn: Callback for modifying an existing item
12044 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12045 and private data object as added by L{PrepareContainerMods}, returns
12047 @type remove_fn: callable
12048 @param remove_fn: Callback on removing item; receives absolute item index,
12049 item and private data object as added by L{PrepareContainerMods}
12052 for (op, idx, params, private) in mods:
12055 absidx = len(container) - 1
12057 raise IndexError("Not accepting negative indices other than -1")
12058 elif idx > len(container):
12059 raise IndexError("Got %s index %s, but there are only %s" %
12060 (kind, idx, len(container)))
12066 if op == constants.DDM_ADD:
12067 # Calculate where item will be added
12069 addidx = len(container)
12073 if create_fn is None:
12076 (item, changes) = create_fn(addidx, params, private)
12079 container.append(item)
12082 assert idx <= len(container)
12083 # list.insert does so before the specified index
12084 container.insert(idx, item)
12086 # Retrieve existing item
12088 item = container[absidx]
12090 raise IndexError("Invalid %s index %s" % (kind, idx))
12092 if op == constants.DDM_REMOVE:
12095 if remove_fn is not None:
12096 remove_fn(absidx, item, private)
12098 changes = [("%s/%s" % (kind, absidx), "remove")]
12100 assert container[absidx] == item
12101 del container[absidx]
12102 elif op == constants.DDM_MODIFY:
12103 if modify_fn is not None:
12104 changes = modify_fn(absidx, item, params, private)
12106 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12108 assert _TApplyContModsCbChanges(changes)
12110 if not (chgdesc is None or changes is None):
12111 chgdesc.extend(changes)
12114 def _UpdateIvNames(base_index, disks):
12115 """Updates the C{iv_name} attribute of disks.
12117 @type disks: list of L{objects.Disk}
12120 for (idx, disk) in enumerate(disks):
12121 disk.iv_name = "disk/%s" % (base_index + idx, )
12124 class _InstNicModPrivate:
12125 """Data structure for network interface modifications.
12127 Used by L{LUInstanceSetParams}.
12130 def __init__(self):
12135 class LUInstanceSetParams(LogicalUnit):
12136 """Modifies an instances's parameters.
12139 HPATH = "instance-modify"
12140 HTYPE = constants.HTYPE_INSTANCE
12144 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12145 assert ht.TList(mods)
12146 assert not mods or len(mods[0]) in (2, 3)
12148 if mods and len(mods[0]) == 2:
12152 for op, params in mods:
12153 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12154 result.append((op, -1, params))
12158 raise errors.OpPrereqError("Only one %s add or remove operation is"
12159 " supported at a time" % kind,
12160 errors.ECODE_INVAL)
12162 result.append((constants.DDM_MODIFY, op, params))
12164 assert verify_fn(result)
12171 def _CheckMods(kind, mods, key_types, item_fn):
12172 """Ensures requested disk/NIC modifications are valid.
12175 for (op, _, params) in mods:
12176 assert ht.TDict(params)
12178 utils.ForceDictType(params, key_types)
12180 if op == constants.DDM_REMOVE:
12182 raise errors.OpPrereqError("No settings should be passed when"
12183 " removing a %s" % kind,
12184 errors.ECODE_INVAL)
12185 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12186 item_fn(op, params)
12188 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12191 def _VerifyDiskModification(op, params):
12192 """Verifies a disk modification.
12195 if op == constants.DDM_ADD:
12196 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12197 if mode not in constants.DISK_ACCESS_SET:
12198 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12199 errors.ECODE_INVAL)
12201 size = params.get(constants.IDISK_SIZE, None)
12203 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12204 constants.IDISK_SIZE, errors.ECODE_INVAL)
12208 except (TypeError, ValueError), err:
12209 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12210 errors.ECODE_INVAL)
12212 params[constants.IDISK_SIZE] = size
12214 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12215 raise errors.OpPrereqError("Disk size change not possible, use"
12216 " grow-disk", errors.ECODE_INVAL)
12219 def _VerifyNicModification(op, params):
12220 """Verifies a network interface modification.
12223 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12224 ip = params.get(constants.INIC_IP, None)
12225 req_net = params.get(constants.INIC_NETWORK, None)
12226 link = params.get(constants.NIC_LINK, None)
12227 mode = params.get(constants.NIC_MODE, None)
12228 if req_net is not None:
12229 if req_net.lower() == constants.VALUE_NONE:
12230 params[constants.INIC_NETWORK] = None
12232 elif link is not None or mode is not None:
12233 raise errors.OpPrereqError("If network is given"
12234 " mode or link should not",
12235 errors.ECODE_INVAL)
12237 if op == constants.DDM_ADD:
12238 macaddr = params.get(constants.INIC_MAC, None)
12239 if macaddr is None:
12240 params[constants.INIC_MAC] = constants.VALUE_AUTO
12243 if ip.lower() == constants.VALUE_NONE:
12244 params[constants.INIC_IP] = None
12246 if ip.lower() == constants.NIC_IP_POOL:
12247 if op == constants.DDM_ADD and req_net is None:
12248 raise errors.OpPrereqError("If ip=pool, parameter network"
12250 errors.ECODE_INVAL)
12252 if not netutils.IPAddress.IsValid(ip):
12253 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12254 errors.ECODE_INVAL)
12256 if constants.INIC_MAC in params:
12257 macaddr = params[constants.INIC_MAC]
12258 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12259 macaddr = utils.NormalizeAndValidateMac(macaddr)
12261 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12262 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12263 " modifying an existing NIC",
12264 errors.ECODE_INVAL)
12266 def CheckArguments(self):
12267 if not (self.op.nics or self.op.disks or self.op.disk_template or
12268 self.op.hvparams or self.op.beparams or self.op.os_name or
12269 self.op.offline is not None or self.op.runtime_mem):
12270 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12272 if self.op.hvparams:
12273 _CheckGlobalHvParams(self.op.hvparams)
12276 self._UpgradeDiskNicMods("disk", self.op.disks,
12277 opcodes.OpInstanceSetParams.TestDiskModifications)
12279 self._UpgradeDiskNicMods("NIC", self.op.nics,
12280 opcodes.OpInstanceSetParams.TestNicModifications)
12282 # Check disk modifications
12283 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12284 self._VerifyDiskModification)
12286 if self.op.disks and self.op.disk_template is not None:
12287 raise errors.OpPrereqError("Disk template conversion and other disk"
12288 " changes not supported at the same time",
12289 errors.ECODE_INVAL)
12291 if (self.op.disk_template and
12292 self.op.disk_template in constants.DTS_INT_MIRROR and
12293 self.op.remote_node is None):
12294 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12295 " one requires specifying a secondary node",
12296 errors.ECODE_INVAL)
12298 # Check NIC modifications
12299 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12300 self._VerifyNicModification)
12302 def ExpandNames(self):
12303 self._ExpandAndLockInstance()
12304 # Can't even acquire node locks in shared mode as upcoming changes in
12305 # Ganeti 2.6 will start to modify the node object on disk conversion
12306 self.needed_locks[locking.LEVEL_NODE] = []
12307 self.needed_locks[locking.LEVEL_NODE_RES] = []
12308 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12310 def DeclareLocks(self, level):
12311 # TODO: Acquire group lock in shared mode (disk parameters)
12312 if level == locking.LEVEL_NODE:
12313 self._LockInstancesNodes()
12314 if self.op.disk_template and self.op.remote_node:
12315 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12316 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12317 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12319 self.needed_locks[locking.LEVEL_NODE_RES] = \
12320 self.needed_locks[locking.LEVEL_NODE][:]
12322 def BuildHooksEnv(self):
12323 """Build hooks env.
12325 This runs on the master, primary and secondaries.
12329 if constants.BE_MINMEM in self.be_new:
12330 args["minmem"] = self.be_new[constants.BE_MINMEM]
12331 if constants.BE_MAXMEM in self.be_new:
12332 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12333 if constants.BE_VCPUS in self.be_new:
12334 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12335 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12336 # information at all.
12338 if self._new_nics is not None:
12341 for nic in self._new_nics:
12342 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12343 mode = nicparams[constants.NIC_MODE]
12344 link = nicparams[constants.NIC_LINK]
12345 nics.append((nic.ip, nic.mac, mode, link, nic.network))
12347 args["nics"] = nics
12349 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12350 if self.op.disk_template:
12351 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12352 if self.op.runtime_mem:
12353 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12357 def BuildHooksNodes(self):
12358 """Build hooks nodes.
12361 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12364 def _PrepareNicModification(self, params, private, old_ip, old_net,
12365 old_params, cluster, pnode):
12367 update_params_dict = dict([(key, params[key])
12368 for key in constants.NICS_PARAMETERS
12371 req_link = update_params_dict.get(constants.NIC_LINK, None)
12372 req_mode = update_params_dict.get(constants.NIC_MODE, None)
12374 new_net = params.get(constants.INIC_NETWORK, old_net)
12375 if new_net is not None:
12376 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12377 if netparams is None:
12378 raise errors.OpPrereqError("No netparams found for the network"
12379 " %s, propably not connected." % new_net,
12380 errors.ECODE_INVAL)
12381 new_params = dict(netparams)
12383 new_params = _GetUpdatedParams(old_params, update_params_dict)
12385 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12387 new_filled_params = cluster.SimpleFillNIC(new_params)
12388 objects.NIC.CheckParameterSyntax(new_filled_params)
12390 new_mode = new_filled_params[constants.NIC_MODE]
12391 if new_mode == constants.NIC_MODE_BRIDGED:
12392 bridge = new_filled_params[constants.NIC_LINK]
12393 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12395 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12397 self.warn.append(msg)
12399 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12401 elif new_mode == constants.NIC_MODE_ROUTED:
12402 ip = params.get(constants.INIC_IP, old_ip)
12404 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12405 " on a routed NIC", errors.ECODE_INVAL)
12407 if constants.INIC_MAC in params:
12408 mac = params[constants.INIC_MAC]
12410 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12411 errors.ECODE_INVAL)
12412 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12413 # otherwise generate the MAC address
12414 params[constants.INIC_MAC] = \
12415 self.cfg.GenerateMAC(self.proc.GetECId())
12417 # or validate/reserve the current one
12419 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12420 except errors.ReservationError:
12421 raise errors.OpPrereqError("MAC address '%s' already in use"
12422 " in cluster" % mac,
12423 errors.ECODE_NOTUNIQUE)
12424 elif new_net != old_net:
12425 def get_net_prefix(net):
12427 uuid = self.cfg.LookupNetwork(net)
12429 nobj = self.cfg.GetNetwork(uuid)
12430 return nobj.mac_prefix
12432 new_prefix = get_net_prefix(new_net)
12433 old_prefix = get_net_prefix(old_net)
12434 if old_prefix != new_prefix:
12435 params[constants.INIC_MAC] = \
12436 self.cfg.GenerateMAC(self.proc.GetECId())
12438 #if there is a change in nic-network configuration
12439 new_ip = params.get(constants.INIC_IP, old_ip)
12440 if (new_ip, new_net) != (old_ip, old_net):
12443 if new_ip.lower() == constants.NIC_IP_POOL:
12445 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
12446 except errors.ReservationError:
12447 raise errors.OpPrereqError("Unable to get a free IP"
12448 " from the address pool",
12449 errors.ECODE_STATE)
12450 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
12451 params[constants.INIC_IP] = new_ip
12452 elif new_ip != old_ip or new_net != old_net:
12454 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
12455 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
12456 except errors.ReservationError:
12457 raise errors.OpPrereqError("IP %s not available in network %s" %
12459 errors.ECODE_NOTUNIQUE)
12460 elif new_ip.lower() == constants.NIC_IP_POOL:
12461 raise errors.OpPrereqError("ip=pool, but no network found",
12465 if self.op.conflicts_check:
12466 _CheckForConflictingIp(self, new_ip, pnode)
12471 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
12472 except errors.AddressPoolError:
12473 logging.warning("Release IP %s not contained in network %s",
12476 # there are no changes in (net, ip) tuple
12477 elif (old_net is not None and
12478 (req_link is not None or req_mode is not None)):
12479 raise errors.OpPrereqError("Not allowed to change link or mode of"
12480 " a NIC that is connected to a network.",
12481 errors.ECODE_INVAL)
12483 private.params = new_params
12484 private.filled = new_filled_params
12486 def CheckPrereq(self):
12487 """Check prerequisites.
12489 This only checks the instance list against the existing names.
12492 # checking the new params on the primary/secondary nodes
12494 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12495 cluster = self.cluster = self.cfg.GetClusterInfo()
12496 assert self.instance is not None, \
12497 "Cannot retrieve locked instance %s" % self.op.instance_name
12498 pnode = instance.primary_node
12499 nodelist = list(instance.all_nodes)
12500 pnode_info = self.cfg.GetNodeInfo(pnode)
12501 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12503 # Prepare disk/NIC modifications
12504 self.diskmod = PrepareContainerMods(self.op.disks, None)
12505 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12508 if self.op.os_name and not self.op.force:
12509 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12510 self.op.force_variant)
12511 instance_os = self.op.os_name
12513 instance_os = instance.os
12515 assert not (self.op.disk_template and self.op.disks), \
12516 "Can't modify disk template and apply disk changes at the same time"
12518 if self.op.disk_template:
12519 if instance.disk_template == self.op.disk_template:
12520 raise errors.OpPrereqError("Instance already has disk template %s" %
12521 instance.disk_template, errors.ECODE_INVAL)
12523 if (instance.disk_template,
12524 self.op.disk_template) not in self._DISK_CONVERSIONS:
12525 raise errors.OpPrereqError("Unsupported disk template conversion from"
12526 " %s to %s" % (instance.disk_template,
12527 self.op.disk_template),
12528 errors.ECODE_INVAL)
12529 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12530 msg="cannot change disk template")
12531 if self.op.disk_template in constants.DTS_INT_MIRROR:
12532 if self.op.remote_node == pnode:
12533 raise errors.OpPrereqError("Given new secondary node %s is the same"
12534 " as the primary node of the instance" %
12535 self.op.remote_node, errors.ECODE_STATE)
12536 _CheckNodeOnline(self, self.op.remote_node)
12537 _CheckNodeNotDrained(self, self.op.remote_node)
12538 # FIXME: here we assume that the old instance type is DT_PLAIN
12539 assert instance.disk_template == constants.DT_PLAIN
12540 disks = [{constants.IDISK_SIZE: d.size,
12541 constants.IDISK_VG: d.logical_id[0]}
12542 for d in instance.disks]
12543 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12544 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12546 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12547 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12548 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12549 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12550 ignore=self.op.ignore_ipolicy)
12551 if pnode_info.group != snode_info.group:
12552 self.LogWarning("The primary and secondary nodes are in two"
12553 " different node groups; the disk parameters"
12554 " from the first disk's node group will be"
12557 # hvparams processing
12558 if self.op.hvparams:
12559 hv_type = instance.hypervisor
12560 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12561 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12562 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12565 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12566 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12567 self.hv_proposed = self.hv_new = hv_new # the new actual values
12568 self.hv_inst = i_hvdict # the new dict (without defaults)
12570 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12572 self.hv_new = self.hv_inst = {}
12574 # beparams processing
12575 if self.op.beparams:
12576 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12578 objects.UpgradeBeParams(i_bedict)
12579 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12580 be_new = cluster.SimpleFillBE(i_bedict)
12581 self.be_proposed = self.be_new = be_new # the new actual values
12582 self.be_inst = i_bedict # the new dict (without defaults)
12584 self.be_new = self.be_inst = {}
12585 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12586 be_old = cluster.FillBE(instance)
12588 # CPU param validation -- checking every time a parameter is
12589 # changed to cover all cases where either CPU mask or vcpus have
12591 if (constants.BE_VCPUS in self.be_proposed and
12592 constants.HV_CPU_MASK in self.hv_proposed):
12594 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12595 # Verify mask is consistent with number of vCPUs. Can skip this
12596 # test if only 1 entry in the CPU mask, which means same mask
12597 # is applied to all vCPUs.
12598 if (len(cpu_list) > 1 and
12599 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12600 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12602 (self.be_proposed[constants.BE_VCPUS],
12603 self.hv_proposed[constants.HV_CPU_MASK]),
12604 errors.ECODE_INVAL)
12606 # Only perform this test if a new CPU mask is given
12607 if constants.HV_CPU_MASK in self.hv_new:
12608 # Calculate the largest CPU number requested
12609 max_requested_cpu = max(map(max, cpu_list))
12610 # Check that all of the instance's nodes have enough physical CPUs to
12611 # satisfy the requested CPU mask
12612 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12613 max_requested_cpu + 1, instance.hypervisor)
12615 # osparams processing
12616 if self.op.osparams:
12617 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12618 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12619 self.os_inst = i_osdict # the new dict (without defaults)
12625 #TODO(dynmem): do the appropriate check involving MINMEM
12626 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12627 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12628 mem_check_list = [pnode]
12629 if be_new[constants.BE_AUTO_BALANCE]:
12630 # either we changed auto_balance to yes or it was from before
12631 mem_check_list.extend(instance.secondary_nodes)
12632 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12633 instance.hypervisor)
12634 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12635 [instance.hypervisor])
12636 pninfo = nodeinfo[pnode]
12637 msg = pninfo.fail_msg
12639 # Assume the primary node is unreachable and go ahead
12640 self.warn.append("Can't get info from primary node %s: %s" %
12643 (_, _, (pnhvinfo, )) = pninfo.payload
12644 if not isinstance(pnhvinfo.get("memory_free", None), int):
12645 self.warn.append("Node data from primary node %s doesn't contain"
12646 " free memory information" % pnode)
12647 elif instance_info.fail_msg:
12648 self.warn.append("Can't get instance runtime information: %s" %
12649 instance_info.fail_msg)
12651 if instance_info.payload:
12652 current_mem = int(instance_info.payload["memory"])
12654 # Assume instance not running
12655 # (there is a slight race condition here, but it's not very
12656 # probable, and we have no other way to check)
12657 # TODO: Describe race condition
12659 #TODO(dynmem): do the appropriate check involving MINMEM
12660 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12661 pnhvinfo["memory_free"])
12663 raise errors.OpPrereqError("This change will prevent the instance"
12664 " from starting, due to %d MB of memory"
12665 " missing on its primary node" %
12667 errors.ECODE_NORES)
12669 if be_new[constants.BE_AUTO_BALANCE]:
12670 for node, nres in nodeinfo.items():
12671 if node not in instance.secondary_nodes:
12673 nres.Raise("Can't get info from secondary node %s" % node,
12674 prereq=True, ecode=errors.ECODE_STATE)
12675 (_, _, (nhvinfo, )) = nres.payload
12676 if not isinstance(nhvinfo.get("memory_free", None), int):
12677 raise errors.OpPrereqError("Secondary node %s didn't return free"
12678 " memory information" % node,
12679 errors.ECODE_STATE)
12680 #TODO(dynmem): do the appropriate check involving MINMEM
12681 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12682 raise errors.OpPrereqError("This change will prevent the instance"
12683 " from failover to its secondary node"
12684 " %s, due to not enough memory" % node,
12685 errors.ECODE_STATE)
12687 if self.op.runtime_mem:
12688 remote_info = self.rpc.call_instance_info(instance.primary_node,
12690 instance.hypervisor)
12691 remote_info.Raise("Error checking node %s" % instance.primary_node)
12692 if not remote_info.payload: # not running already
12693 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12694 errors.ECODE_STATE)
12696 current_memory = remote_info.payload["memory"]
12697 if (not self.op.force and
12698 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12699 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12700 raise errors.OpPrereqError("Instance %s must have memory between %d"
12701 " and %d MB of memory unless --force is"
12702 " given" % (instance.name,
12703 self.be_proposed[constants.BE_MINMEM],
12704 self.be_proposed[constants.BE_MAXMEM]),
12705 errors.ECODE_INVAL)
12707 if self.op.runtime_mem > current_memory:
12708 _CheckNodeFreeMemory(self, instance.primary_node,
12709 "ballooning memory for instance %s" %
12711 self.op.memory - current_memory,
12712 instance.hypervisor)
12714 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12715 raise errors.OpPrereqError("Disk operations not supported for"
12716 " diskless instances",
12717 errors.ECODE_INVAL)
12719 def _PrepareNicCreate(_, params, private):
12720 self._PrepareNicModification(params, private, None, None,
12721 {}, cluster, pnode)
12722 return (None, None)
12724 def _PrepareNicMod(_, nic, params, private):
12725 self._PrepareNicModification(params, private, nic.ip, nic.network,
12726 nic.nicparams, cluster, pnode)
12729 def _PrepareNicRemove(_, params, private):
12731 net = params.network
12732 if net is not None and ip is not None:
12733 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
12735 # Verify NIC changes (operating on copy)
12736 nics = instance.nics[:]
12737 ApplyContainerMods("NIC", nics, None, self.nicmod,
12738 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
12739 if len(nics) > constants.MAX_NICS:
12740 raise errors.OpPrereqError("Instance has too many network interfaces"
12741 " (%d), cannot add more" % constants.MAX_NICS,
12742 errors.ECODE_STATE)
12744 # Verify disk changes (operating on a copy)
12745 disks = instance.disks[:]
12746 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12747 if len(disks) > constants.MAX_DISKS:
12748 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12749 " more" % constants.MAX_DISKS,
12750 errors.ECODE_STATE)
12752 if self.op.offline is not None:
12753 if self.op.offline:
12754 msg = "can't change to offline"
12756 msg = "can't change to online"
12757 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12759 # Pre-compute NIC changes (necessary to use result in hooks)
12760 self._nic_chgdesc = []
12762 # Operate on copies as this is still in prereq
12763 nics = [nic.Copy() for nic in instance.nics]
12764 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12765 self._CreateNewNic, self._ApplyNicMods, None)
12766 self._new_nics = nics
12768 self._new_nics = None
12770 def _ConvertPlainToDrbd(self, feedback_fn):
12771 """Converts an instance from plain to drbd.
12774 feedback_fn("Converting template to drbd")
12775 instance = self.instance
12776 pnode = instance.primary_node
12777 snode = self.op.remote_node
12779 assert instance.disk_template == constants.DT_PLAIN
12781 # create a fake disk info for _GenerateDiskTemplate
12782 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12783 constants.IDISK_VG: d.logical_id[0]}
12784 for d in instance.disks]
12785 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12786 instance.name, pnode, [snode],
12787 disk_info, None, None, 0, feedback_fn,
12789 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
12791 info = _GetInstanceInfoText(instance)
12792 feedback_fn("Creating additional volumes...")
12793 # first, create the missing data and meta devices
12794 for disk in anno_disks:
12795 # unfortunately this is... not too nice
12796 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12798 for child in disk.children:
12799 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12800 # at this stage, all new LVs have been created, we can rename the
12802 feedback_fn("Renaming original volumes...")
12803 rename_list = [(o, n.children[0].logical_id)
12804 for (o, n) in zip(instance.disks, new_disks)]
12805 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12806 result.Raise("Failed to rename original LVs")
12808 feedback_fn("Initializing DRBD devices...")
12809 # all child devices are in place, we can now create the DRBD devices
12810 for disk in anno_disks:
12811 for node in [pnode, snode]:
12812 f_create = node == pnode
12813 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12815 # at this point, the instance has been modified
12816 instance.disk_template = constants.DT_DRBD8
12817 instance.disks = new_disks
12818 self.cfg.Update(instance, feedback_fn)
12820 # Release node locks while waiting for sync
12821 _ReleaseLocks(self, locking.LEVEL_NODE)
12823 # disks are created, waiting for sync
12824 disk_abort = not _WaitForSync(self, instance,
12825 oneshot=not self.op.wait_for_sync)
12827 raise errors.OpExecError("There are some degraded disks for"
12828 " this instance, please cleanup manually")
12830 # Node resource locks will be released by caller
12832 def _ConvertDrbdToPlain(self, feedback_fn):
12833 """Converts an instance from drbd to plain.
12836 instance = self.instance
12838 assert len(instance.secondary_nodes) == 1
12839 assert instance.disk_template == constants.DT_DRBD8
12841 pnode = instance.primary_node
12842 snode = instance.secondary_nodes[0]
12843 feedback_fn("Converting template to plain")
12845 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
12846 new_disks = [d.children[0] for d in instance.disks]
12848 # copy over size and mode
12849 for parent, child in zip(old_disks, new_disks):
12850 child.size = parent.size
12851 child.mode = parent.mode
12853 # this is a DRBD disk, return its port to the pool
12854 # NOTE: this must be done right before the call to cfg.Update!
12855 for disk in old_disks:
12856 tcp_port = disk.logical_id[2]
12857 self.cfg.AddTcpUdpPort(tcp_port)
12859 # update instance structure
12860 instance.disks = new_disks
12861 instance.disk_template = constants.DT_PLAIN
12862 self.cfg.Update(instance, feedback_fn)
12864 # Release locks in case removing disks takes a while
12865 _ReleaseLocks(self, locking.LEVEL_NODE)
12867 feedback_fn("Removing volumes on the secondary node...")
12868 for disk in old_disks:
12869 self.cfg.SetDiskID(disk, snode)
12870 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12872 self.LogWarning("Could not remove block device %s on node %s,"
12873 " continuing anyway: %s", disk.iv_name, snode, msg)
12875 feedback_fn("Removing unneeded volumes on the primary node...")
12876 for idx, disk in enumerate(old_disks):
12877 meta = disk.children[1]
12878 self.cfg.SetDiskID(meta, pnode)
12879 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12881 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12882 " continuing anyway: %s", idx, pnode, msg)
12884 def _CreateNewDisk(self, idx, params, _):
12885 """Creates a new disk.
12888 instance = self.instance
12891 if instance.disk_template in constants.DTS_FILEBASED:
12892 (file_driver, file_path) = instance.disks[0].logical_id
12893 file_path = os.path.dirname(file_path)
12895 file_driver = file_path = None
12898 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12899 instance.primary_node, instance.secondary_nodes,
12900 [params], file_path, file_driver, idx,
12901 self.Log, self.diskparams)[0]
12903 info = _GetInstanceInfoText(instance)
12905 logging.info("Creating volume %s for instance %s",
12906 disk.iv_name, instance.name)
12907 # Note: this needs to be kept in sync with _CreateDisks
12909 for node in instance.all_nodes:
12910 f_create = (node == instance.primary_node)
12912 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12913 except errors.OpExecError, err:
12914 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12915 disk.iv_name, disk, node, err)
12918 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12922 def _ModifyDisk(idx, disk, params, _):
12923 """Modifies a disk.
12926 disk.mode = params[constants.IDISK_MODE]
12929 ("disk.mode/%d" % idx, disk.mode),
12932 def _RemoveDisk(self, idx, root, _):
12936 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
12937 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
12938 self.cfg.SetDiskID(disk, node)
12939 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12941 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12942 " continuing anyway", idx, node, msg)
12944 # if this is a DRBD disk, return its port to the pool
12945 if root.dev_type in constants.LDS_DRBD:
12946 self.cfg.AddTcpUdpPort(root.logical_id[2])
12949 def _CreateNewNic(idx, params, private):
12950 """Creates data structure for a new network interface.
12953 mac = params[constants.INIC_MAC]
12954 ip = params.get(constants.INIC_IP, None)
12955 network = params.get(constants.INIC_NETWORK, None)
12956 #TODO: not private.filled?? can a nic have no nicparams??
12957 nicparams = private.filled
12959 return (objects.NIC(mac=mac, ip=ip, network=network, nicparams=nicparams), [
12961 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
12962 (mac, ip, private.filled[constants.NIC_MODE],
12963 private.filled[constants.NIC_LINK],
12968 def _ApplyNicMods(idx, nic, params, private):
12969 """Modifies a network interface.
12974 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
12976 changes.append(("nic.%s/%d" % (key, idx), params[key]))
12977 setattr(nic, key, params[key])
12980 nic.nicparams = private.filled
12982 for (key, val) in nic.nicparams.items():
12983 changes.append(("nic.%s/%d" % (key, idx), val))
12987 def Exec(self, feedback_fn):
12988 """Modifies an instance.
12990 All parameters take effect only at the next restart of the instance.
12993 # Process here the warnings from CheckPrereq, as we don't have a
12994 # feedback_fn there.
12995 # TODO: Replace with self.LogWarning
12996 for warn in self.warn:
12997 feedback_fn("WARNING: %s" % warn)
12999 assert ((self.op.disk_template is None) ^
13000 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13001 "Not owning any node resource locks"
13004 instance = self.instance
13007 if self.op.runtime_mem:
13008 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13010 self.op.runtime_mem)
13011 rpcres.Raise("Cannot modify instance runtime memory")
13012 result.append(("runtime_memory", self.op.runtime_mem))
13014 # Apply disk changes
13015 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13016 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13017 _UpdateIvNames(0, instance.disks)
13019 if self.op.disk_template:
13021 check_nodes = set(instance.all_nodes)
13022 if self.op.remote_node:
13023 check_nodes.add(self.op.remote_node)
13024 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13025 owned = self.owned_locks(level)
13026 assert not (check_nodes - owned), \
13027 ("Not owning the correct locks, owning %r, expected at least %r" %
13028 (owned, check_nodes))
13030 r_shut = _ShutdownInstanceDisks(self, instance)
13032 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13033 " proceed with disk template conversion")
13034 mode = (instance.disk_template, self.op.disk_template)
13036 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13038 self.cfg.ReleaseDRBDMinors(instance.name)
13040 result.append(("disk_template", self.op.disk_template))
13042 assert instance.disk_template == self.op.disk_template, \
13043 ("Expected disk template '%s', found '%s'" %
13044 (self.op.disk_template, instance.disk_template))
13046 # Release node and resource locks if there are any (they might already have
13047 # been released during disk conversion)
13048 _ReleaseLocks(self, locking.LEVEL_NODE)
13049 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13051 # Apply NIC changes
13052 if self._new_nics is not None:
13053 instance.nics = self._new_nics
13054 result.extend(self._nic_chgdesc)
13057 if self.op.hvparams:
13058 instance.hvparams = self.hv_inst
13059 for key, val in self.op.hvparams.iteritems():
13060 result.append(("hv/%s" % key, val))
13063 if self.op.beparams:
13064 instance.beparams = self.be_inst
13065 for key, val in self.op.beparams.iteritems():
13066 result.append(("be/%s" % key, val))
13069 if self.op.os_name:
13070 instance.os = self.op.os_name
13073 if self.op.osparams:
13074 instance.osparams = self.os_inst
13075 for key, val in self.op.osparams.iteritems():
13076 result.append(("os/%s" % key, val))
13078 if self.op.offline is None:
13081 elif self.op.offline:
13082 # Mark instance as offline
13083 self.cfg.MarkInstanceOffline(instance.name)
13084 result.append(("admin_state", constants.ADMINST_OFFLINE))
13086 # Mark instance as online, but stopped
13087 self.cfg.MarkInstanceDown(instance.name)
13088 result.append(("admin_state", constants.ADMINST_DOWN))
13090 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13092 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13093 self.owned_locks(locking.LEVEL_NODE)), \
13094 "All node locks should have been released by now"
13098 _DISK_CONVERSIONS = {
13099 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13100 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13104 class LUInstanceChangeGroup(LogicalUnit):
13105 HPATH = "instance-change-group"
13106 HTYPE = constants.HTYPE_INSTANCE
13109 def ExpandNames(self):
13110 self.share_locks = _ShareAll()
13111 self.needed_locks = {
13112 locking.LEVEL_NODEGROUP: [],
13113 locking.LEVEL_NODE: [],
13116 self._ExpandAndLockInstance()
13118 if self.op.target_groups:
13119 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13120 self.op.target_groups)
13122 self.req_target_uuids = None
13124 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13126 def DeclareLocks(self, level):
13127 if level == locking.LEVEL_NODEGROUP:
13128 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13130 if self.req_target_uuids:
13131 lock_groups = set(self.req_target_uuids)
13133 # Lock all groups used by instance optimistically; this requires going
13134 # via the node before it's locked, requiring verification later on
13135 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13136 lock_groups.update(instance_groups)
13138 # No target groups, need to lock all of them
13139 lock_groups = locking.ALL_SET
13141 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13143 elif level == locking.LEVEL_NODE:
13144 if self.req_target_uuids:
13145 # Lock all nodes used by instances
13146 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13147 self._LockInstancesNodes()
13149 # Lock all nodes in all potential target groups
13150 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13151 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13152 member_nodes = [node_name
13153 for group in lock_groups
13154 for node_name in self.cfg.GetNodeGroup(group).members]
13155 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13157 # Lock all nodes as all groups are potential targets
13158 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13160 def CheckPrereq(self):
13161 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13162 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13163 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13165 assert (self.req_target_uuids is None or
13166 owned_groups.issuperset(self.req_target_uuids))
13167 assert owned_instances == set([self.op.instance_name])
13169 # Get instance information
13170 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13172 # Check if node groups for locked instance are still correct
13173 assert owned_nodes.issuperset(self.instance.all_nodes), \
13174 ("Instance %s's nodes changed while we kept the lock" %
13175 self.op.instance_name)
13177 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13180 if self.req_target_uuids:
13181 # User requested specific target groups
13182 self.target_uuids = frozenset(self.req_target_uuids)
13184 # All groups except those used by the instance are potential targets
13185 self.target_uuids = owned_groups - inst_groups
13187 conflicting_groups = self.target_uuids & inst_groups
13188 if conflicting_groups:
13189 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13190 " used by the instance '%s'" %
13191 (utils.CommaJoin(conflicting_groups),
13192 self.op.instance_name),
13193 errors.ECODE_INVAL)
13195 if not self.target_uuids:
13196 raise errors.OpPrereqError("There are no possible target groups",
13197 errors.ECODE_INVAL)
13199 def BuildHooksEnv(self):
13200 """Build hooks env.
13203 assert self.target_uuids
13206 "TARGET_GROUPS": " ".join(self.target_uuids),
13209 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13213 def BuildHooksNodes(self):
13214 """Build hooks nodes.
13217 mn = self.cfg.GetMasterNode()
13218 return ([mn], [mn])
13220 def Exec(self, feedback_fn):
13221 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13223 assert instances == [self.op.instance_name], "Instance not locked"
13225 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13226 instances=instances, target_groups=list(self.target_uuids))
13228 ial.Run(self.op.iallocator)
13230 if not ial.success:
13231 raise errors.OpPrereqError("Can't compute solution for changing group of"
13232 " instance '%s' using iallocator '%s': %s" %
13233 (self.op.instance_name, self.op.iallocator,
13235 errors.ECODE_NORES)
13237 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13239 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13240 " instance '%s'", len(jobs), self.op.instance_name)
13242 return ResultWithJobs(jobs)
13245 class LUBackupQuery(NoHooksLU):
13246 """Query the exports list
13251 def CheckArguments(self):
13252 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13253 ["node", "export"], self.op.use_locking)
13255 def ExpandNames(self):
13256 self.expq.ExpandNames(self)
13258 def DeclareLocks(self, level):
13259 self.expq.DeclareLocks(self, level)
13261 def Exec(self, feedback_fn):
13264 for (node, expname) in self.expq.OldStyleQuery(self):
13265 if expname is None:
13266 result[node] = False
13268 result.setdefault(node, []).append(expname)
13273 class _ExportQuery(_QueryBase):
13274 FIELDS = query.EXPORT_FIELDS
13276 #: The node name is not a unique key for this query
13277 SORT_FIELD = "node"
13279 def ExpandNames(self, lu):
13280 lu.needed_locks = {}
13282 # The following variables interact with _QueryBase._GetNames
13284 self.wanted = _GetWantedNodes(lu, self.names)
13286 self.wanted = locking.ALL_SET
13288 self.do_locking = self.use_locking
13290 if self.do_locking:
13291 lu.share_locks = _ShareAll()
13292 lu.needed_locks = {
13293 locking.LEVEL_NODE: self.wanted,
13296 def DeclareLocks(self, lu, level):
13299 def _GetQueryData(self, lu):
13300 """Computes the list of nodes and their attributes.
13303 # Locking is not used
13305 assert not (compat.any(lu.glm.is_owned(level)
13306 for level in locking.LEVELS
13307 if level != locking.LEVEL_CLUSTER) or
13308 self.do_locking or self.use_locking)
13310 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13314 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13316 result.append((node, None))
13318 result.extend((node, expname) for expname in nres.payload)
13323 class LUBackupPrepare(NoHooksLU):
13324 """Prepares an instance for an export and returns useful information.
13329 def ExpandNames(self):
13330 self._ExpandAndLockInstance()
13332 def CheckPrereq(self):
13333 """Check prerequisites.
13336 instance_name = self.op.instance_name
13338 self.instance = self.cfg.GetInstanceInfo(instance_name)
13339 assert self.instance is not None, \
13340 "Cannot retrieve locked instance %s" % self.op.instance_name
13341 _CheckNodeOnline(self, self.instance.primary_node)
13343 self._cds = _GetClusterDomainSecret()
13345 def Exec(self, feedback_fn):
13346 """Prepares an instance for an export.
13349 instance = self.instance
13351 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13352 salt = utils.GenerateSecret(8)
13354 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13355 result = self.rpc.call_x509_cert_create(instance.primary_node,
13356 constants.RIE_CERT_VALIDITY)
13357 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13359 (name, cert_pem) = result.payload
13361 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13365 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13366 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13368 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13374 class LUBackupExport(LogicalUnit):
13375 """Export an instance to an image in the cluster.
13378 HPATH = "instance-export"
13379 HTYPE = constants.HTYPE_INSTANCE
13382 def CheckArguments(self):
13383 """Check the arguments.
13386 self.x509_key_name = self.op.x509_key_name
13387 self.dest_x509_ca_pem = self.op.destination_x509_ca
13389 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13390 if not self.x509_key_name:
13391 raise errors.OpPrereqError("Missing X509 key name for encryption",
13392 errors.ECODE_INVAL)
13394 if not self.dest_x509_ca_pem:
13395 raise errors.OpPrereqError("Missing destination X509 CA",
13396 errors.ECODE_INVAL)
13398 def ExpandNames(self):
13399 self._ExpandAndLockInstance()
13401 # Lock all nodes for local exports
13402 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13403 # FIXME: lock only instance primary and destination node
13405 # Sad but true, for now we have do lock all nodes, as we don't know where
13406 # the previous export might be, and in this LU we search for it and
13407 # remove it from its current node. In the future we could fix this by:
13408 # - making a tasklet to search (share-lock all), then create the
13409 # new one, then one to remove, after
13410 # - removing the removal operation altogether
13411 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13413 def DeclareLocks(self, level):
13414 """Last minute lock declaration."""
13415 # All nodes are locked anyway, so nothing to do here.
13417 def BuildHooksEnv(self):
13418 """Build hooks env.
13420 This will run on the master, primary node and target node.
13424 "EXPORT_MODE": self.op.mode,
13425 "EXPORT_NODE": self.op.target_node,
13426 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13427 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13428 # TODO: Generic function for boolean env variables
13429 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13432 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13436 def BuildHooksNodes(self):
13437 """Build hooks nodes.
13440 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13442 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13443 nl.append(self.op.target_node)
13447 def CheckPrereq(self):
13448 """Check prerequisites.
13450 This checks that the instance and node names are valid.
13453 instance_name = self.op.instance_name
13455 self.instance = self.cfg.GetInstanceInfo(instance_name)
13456 assert self.instance is not None, \
13457 "Cannot retrieve locked instance %s" % self.op.instance_name
13458 _CheckNodeOnline(self, self.instance.primary_node)
13460 if (self.op.remove_instance and
13461 self.instance.admin_state == constants.ADMINST_UP and
13462 not self.op.shutdown):
13463 raise errors.OpPrereqError("Can not remove instance without shutting it"
13466 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13467 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13468 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13469 assert self.dst_node is not None
13471 _CheckNodeOnline(self, self.dst_node.name)
13472 _CheckNodeNotDrained(self, self.dst_node.name)
13475 self.dest_disk_info = None
13476 self.dest_x509_ca = None
13478 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13479 self.dst_node = None
13481 if len(self.op.target_node) != len(self.instance.disks):
13482 raise errors.OpPrereqError(("Received destination information for %s"
13483 " disks, but instance %s has %s disks") %
13484 (len(self.op.target_node), instance_name,
13485 len(self.instance.disks)),
13486 errors.ECODE_INVAL)
13488 cds = _GetClusterDomainSecret()
13490 # Check X509 key name
13492 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13493 except (TypeError, ValueError), err:
13494 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13496 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13497 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13498 errors.ECODE_INVAL)
13500 # Load and verify CA
13502 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13503 except OpenSSL.crypto.Error, err:
13504 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13505 (err, ), errors.ECODE_INVAL)
13507 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13508 if errcode is not None:
13509 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13510 (msg, ), errors.ECODE_INVAL)
13512 self.dest_x509_ca = cert
13514 # Verify target information
13516 for idx, disk_data in enumerate(self.op.target_node):
13518 (host, port, magic) = \
13519 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13520 except errors.GenericError, err:
13521 raise errors.OpPrereqError("Target info for disk %s: %s" %
13522 (idx, err), errors.ECODE_INVAL)
13524 disk_info.append((host, port, magic))
13526 assert len(disk_info) == len(self.op.target_node)
13527 self.dest_disk_info = disk_info
13530 raise errors.ProgrammerError("Unhandled export mode %r" %
13533 # instance disk type verification
13534 # TODO: Implement export support for file-based disks
13535 for disk in self.instance.disks:
13536 if disk.dev_type == constants.LD_FILE:
13537 raise errors.OpPrereqError("Export not supported for instances with"
13538 " file-based disks", errors.ECODE_INVAL)
13540 def _CleanupExports(self, feedback_fn):
13541 """Removes exports of current instance from all other nodes.
13543 If an instance in a cluster with nodes A..D was exported to node C, its
13544 exports will be removed from the nodes A, B and D.
13547 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13549 nodelist = self.cfg.GetNodeList()
13550 nodelist.remove(self.dst_node.name)
13552 # on one-node clusters nodelist will be empty after the removal
13553 # if we proceed the backup would be removed because OpBackupQuery
13554 # substitutes an empty list with the full cluster node list.
13555 iname = self.instance.name
13557 feedback_fn("Removing old exports for instance %s" % iname)
13558 exportlist = self.rpc.call_export_list(nodelist)
13559 for node in exportlist:
13560 if exportlist[node].fail_msg:
13562 if iname in exportlist[node].payload:
13563 msg = self.rpc.call_export_remove(node, iname).fail_msg
13565 self.LogWarning("Could not remove older export for instance %s"
13566 " on node %s: %s", iname, node, msg)
13568 def Exec(self, feedback_fn):
13569 """Export an instance to an image in the cluster.
13572 assert self.op.mode in constants.EXPORT_MODES
13574 instance = self.instance
13575 src_node = instance.primary_node
13577 if self.op.shutdown:
13578 # shutdown the instance, but not the disks
13579 feedback_fn("Shutting down instance %s" % instance.name)
13580 result = self.rpc.call_instance_shutdown(src_node, instance,
13581 self.op.shutdown_timeout)
13582 # TODO: Maybe ignore failures if ignore_remove_failures is set
13583 result.Raise("Could not shutdown instance %s on"
13584 " node %s" % (instance.name, src_node))
13586 # set the disks ID correctly since call_instance_start needs the
13587 # correct drbd minor to create the symlinks
13588 for disk in instance.disks:
13589 self.cfg.SetDiskID(disk, src_node)
13591 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13594 # Activate the instance disks if we'exporting a stopped instance
13595 feedback_fn("Activating disks for %s" % instance.name)
13596 _StartInstanceDisks(self, instance, None)
13599 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13602 helper.CreateSnapshots()
13604 if (self.op.shutdown and
13605 instance.admin_state == constants.ADMINST_UP and
13606 not self.op.remove_instance):
13607 assert not activate_disks
13608 feedback_fn("Starting instance %s" % instance.name)
13609 result = self.rpc.call_instance_start(src_node,
13610 (instance, None, None), False)
13611 msg = result.fail_msg
13613 feedback_fn("Failed to start instance: %s" % msg)
13614 _ShutdownInstanceDisks(self, instance)
13615 raise errors.OpExecError("Could not start instance: %s" % msg)
13617 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13618 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13619 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13620 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13621 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13623 (key_name, _, _) = self.x509_key_name
13626 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13629 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13630 key_name, dest_ca_pem,
13635 # Check for backwards compatibility
13636 assert len(dresults) == len(instance.disks)
13637 assert compat.all(isinstance(i, bool) for i in dresults), \
13638 "Not all results are boolean: %r" % dresults
13642 feedback_fn("Deactivating disks for %s" % instance.name)
13643 _ShutdownInstanceDisks(self, instance)
13645 if not (compat.all(dresults) and fin_resu):
13648 failures.append("export finalization")
13649 if not compat.all(dresults):
13650 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13652 failures.append("disk export: disk(s) %s" % fdsk)
13654 raise errors.OpExecError("Export failed, errors in %s" %
13655 utils.CommaJoin(failures))
13657 # At this point, the export was successful, we can cleanup/finish
13659 # Remove instance if requested
13660 if self.op.remove_instance:
13661 feedback_fn("Removing instance %s" % instance.name)
13662 _RemoveInstance(self, feedback_fn, instance,
13663 self.op.ignore_remove_failures)
13665 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13666 self._CleanupExports(feedback_fn)
13668 return fin_resu, dresults
13671 class LUBackupRemove(NoHooksLU):
13672 """Remove exports related to the named instance.
13677 def ExpandNames(self):
13678 self.needed_locks = {}
13679 # We need all nodes to be locked in order for RemoveExport to work, but we
13680 # don't need to lock the instance itself, as nothing will happen to it (and
13681 # we can remove exports also for a removed instance)
13682 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13684 def Exec(self, feedback_fn):
13685 """Remove any export.
13688 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13689 # If the instance was not found we'll try with the name that was passed in.
13690 # This will only work if it was an FQDN, though.
13692 if not instance_name:
13694 instance_name = self.op.instance_name
13696 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13697 exportlist = self.rpc.call_export_list(locked_nodes)
13699 for node in exportlist:
13700 msg = exportlist[node].fail_msg
13702 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13704 if instance_name in exportlist[node].payload:
13706 result = self.rpc.call_export_remove(node, instance_name)
13707 msg = result.fail_msg
13709 logging.error("Could not remove export for instance %s"
13710 " on node %s: %s", instance_name, node, msg)
13712 if fqdn_warn and not found:
13713 feedback_fn("Export not found. If trying to remove an export belonging"
13714 " to a deleted instance please use its Fully Qualified"
13718 class LUGroupAdd(LogicalUnit):
13719 """Logical unit for creating node groups.
13722 HPATH = "group-add"
13723 HTYPE = constants.HTYPE_GROUP
13726 def ExpandNames(self):
13727 # We need the new group's UUID here so that we can create and acquire the
13728 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13729 # that it should not check whether the UUID exists in the configuration.
13730 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13731 self.needed_locks = {}
13732 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13734 def CheckPrereq(self):
13735 """Check prerequisites.
13737 This checks that the given group name is not an existing node group
13742 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13743 except errors.OpPrereqError:
13746 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13747 " node group (UUID: %s)" %
13748 (self.op.group_name, existing_uuid),
13749 errors.ECODE_EXISTS)
13751 if self.op.ndparams:
13752 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13754 if self.op.hv_state:
13755 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13757 self.new_hv_state = None
13759 if self.op.disk_state:
13760 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13762 self.new_disk_state = None
13764 if self.op.diskparams:
13765 for templ in constants.DISK_TEMPLATES:
13766 if templ in self.op.diskparams:
13767 utils.ForceDictType(self.op.diskparams[templ],
13768 constants.DISK_DT_TYPES)
13769 self.new_diskparams = self.op.diskparams
13771 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
13772 except errors.OpPrereqError, err:
13773 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
13774 errors.ECODE_INVAL)
13776 self.new_diskparams = {}
13778 if self.op.ipolicy:
13779 cluster = self.cfg.GetClusterInfo()
13780 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13782 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
13783 except errors.ConfigurationError, err:
13784 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13785 errors.ECODE_INVAL)
13787 def BuildHooksEnv(self):
13788 """Build hooks env.
13792 "GROUP_NAME": self.op.group_name,
13795 def BuildHooksNodes(self):
13796 """Build hooks nodes.
13799 mn = self.cfg.GetMasterNode()
13800 return ([mn], [mn])
13802 def Exec(self, feedback_fn):
13803 """Add the node group to the cluster.
13806 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13807 uuid=self.group_uuid,
13808 alloc_policy=self.op.alloc_policy,
13809 ndparams=self.op.ndparams,
13810 diskparams=self.new_diskparams,
13811 ipolicy=self.op.ipolicy,
13812 hv_state_static=self.new_hv_state,
13813 disk_state_static=self.new_disk_state)
13815 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13816 del self.remove_locks[locking.LEVEL_NODEGROUP]
13819 class LUGroupAssignNodes(NoHooksLU):
13820 """Logical unit for assigning nodes to groups.
13825 def ExpandNames(self):
13826 # These raise errors.OpPrereqError on their own:
13827 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13828 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13830 # We want to lock all the affected nodes and groups. We have readily
13831 # available the list of nodes, and the *destination* group. To gather the
13832 # list of "source" groups, we need to fetch node information later on.
13833 self.needed_locks = {
13834 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13835 locking.LEVEL_NODE: self.op.nodes,
13838 def DeclareLocks(self, level):
13839 if level == locking.LEVEL_NODEGROUP:
13840 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13842 # Try to get all affected nodes' groups without having the group or node
13843 # lock yet. Needs verification later in the code flow.
13844 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13846 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13848 def CheckPrereq(self):
13849 """Check prerequisites.
13852 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13853 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13854 frozenset(self.op.nodes))
13856 expected_locks = (set([self.group_uuid]) |
13857 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13858 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13859 if actual_locks != expected_locks:
13860 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13861 " current groups are '%s', used to be '%s'" %
13862 (utils.CommaJoin(expected_locks),
13863 utils.CommaJoin(actual_locks)))
13865 self.node_data = self.cfg.GetAllNodesInfo()
13866 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13867 instance_data = self.cfg.GetAllInstancesInfo()
13869 if self.group is None:
13870 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13871 (self.op.group_name, self.group_uuid))
13873 (new_splits, previous_splits) = \
13874 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13875 for node in self.op.nodes],
13876 self.node_data, instance_data)
13879 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13881 if not self.op.force:
13882 raise errors.OpExecError("The following instances get split by this"
13883 " change and --force was not given: %s" %
13886 self.LogWarning("This operation will split the following instances: %s",
13889 if previous_splits:
13890 self.LogWarning("In addition, these already-split instances continue"
13891 " to be split across groups: %s",
13892 utils.CommaJoin(utils.NiceSort(previous_splits)))
13894 def Exec(self, feedback_fn):
13895 """Assign nodes to a new group.
13898 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13900 self.cfg.AssignGroupNodes(mods)
13903 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13904 """Check for split instances after a node assignment.
13906 This method considers a series of node assignments as an atomic operation,
13907 and returns information about split instances after applying the set of
13910 In particular, it returns information about newly split instances, and
13911 instances that were already split, and remain so after the change.
13913 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13916 @type changes: list of (node_name, new_group_uuid) pairs.
13917 @param changes: list of node assignments to consider.
13918 @param node_data: a dict with data for all nodes
13919 @param instance_data: a dict with all instances to consider
13920 @rtype: a two-tuple
13921 @return: a list of instances that were previously okay and result split as a
13922 consequence of this change, and a list of instances that were previously
13923 split and this change does not fix.
13926 changed_nodes = dict((node, group) for node, group in changes
13927 if node_data[node].group != group)
13929 all_split_instances = set()
13930 previously_split_instances = set()
13932 def InstanceNodes(instance):
13933 return [instance.primary_node] + list(instance.secondary_nodes)
13935 for inst in instance_data.values():
13936 if inst.disk_template not in constants.DTS_INT_MIRROR:
13939 instance_nodes = InstanceNodes(inst)
13941 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13942 previously_split_instances.add(inst.name)
13944 if len(set(changed_nodes.get(node, node_data[node].group)
13945 for node in instance_nodes)) > 1:
13946 all_split_instances.add(inst.name)
13948 return (list(all_split_instances - previously_split_instances),
13949 list(previously_split_instances & all_split_instances))
13952 class _GroupQuery(_QueryBase):
13953 FIELDS = query.GROUP_FIELDS
13955 def ExpandNames(self, lu):
13956 lu.needed_locks = {}
13958 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13959 self._cluster = lu.cfg.GetClusterInfo()
13960 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13963 self.wanted = [name_to_uuid[name]
13964 for name in utils.NiceSort(name_to_uuid.keys())]
13966 # Accept names to be either names or UUIDs.
13969 all_uuid = frozenset(self._all_groups.keys())
13971 for name in self.names:
13972 if name in all_uuid:
13973 self.wanted.append(name)
13974 elif name in name_to_uuid:
13975 self.wanted.append(name_to_uuid[name])
13977 missing.append(name)
13980 raise errors.OpPrereqError("Some groups do not exist: %s" %
13981 utils.CommaJoin(missing),
13982 errors.ECODE_NOENT)
13984 def DeclareLocks(self, lu, level):
13987 def _GetQueryData(self, lu):
13988 """Computes the list of node groups and their attributes.
13991 do_nodes = query.GQ_NODE in self.requested_data
13992 do_instances = query.GQ_INST in self.requested_data
13994 group_to_nodes = None
13995 group_to_instances = None
13997 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13998 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13999 # latter GetAllInstancesInfo() is not enough, for we have to go through
14000 # instance->node. Hence, we will need to process nodes even if we only need
14001 # instance information.
14002 if do_nodes or do_instances:
14003 all_nodes = lu.cfg.GetAllNodesInfo()
14004 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14007 for node in all_nodes.values():
14008 if node.group in group_to_nodes:
14009 group_to_nodes[node.group].append(node.name)
14010 node_to_group[node.name] = node.group
14013 all_instances = lu.cfg.GetAllInstancesInfo()
14014 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14016 for instance in all_instances.values():
14017 node = instance.primary_node
14018 if node in node_to_group:
14019 group_to_instances[node_to_group[node]].append(instance.name)
14022 # Do not pass on node information if it was not requested.
14023 group_to_nodes = None
14025 return query.GroupQueryData(self._cluster,
14026 [self._all_groups[uuid]
14027 for uuid in self.wanted],
14028 group_to_nodes, group_to_instances,
14029 query.GQ_DISKPARAMS in self.requested_data)
14032 class LUGroupQuery(NoHooksLU):
14033 """Logical unit for querying node groups.
14038 def CheckArguments(self):
14039 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14040 self.op.output_fields, False)
14042 def ExpandNames(self):
14043 self.gq.ExpandNames(self)
14045 def DeclareLocks(self, level):
14046 self.gq.DeclareLocks(self, level)
14048 def Exec(self, feedback_fn):
14049 return self.gq.OldStyleQuery(self)
14052 class LUGroupSetParams(LogicalUnit):
14053 """Modifies the parameters of a node group.
14056 HPATH = "group-modify"
14057 HTYPE = constants.HTYPE_GROUP
14060 def CheckArguments(self):
14063 self.op.diskparams,
14064 self.op.alloc_policy,
14066 self.op.disk_state,
14070 if all_changes.count(None) == len(all_changes):
14071 raise errors.OpPrereqError("Please pass at least one modification",
14072 errors.ECODE_INVAL)
14074 def ExpandNames(self):
14075 # This raises errors.OpPrereqError on its own:
14076 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14078 self.needed_locks = {
14079 locking.LEVEL_INSTANCE: [],
14080 locking.LEVEL_NODEGROUP: [self.group_uuid],
14083 self.share_locks[locking.LEVEL_INSTANCE] = 1
14085 def DeclareLocks(self, level):
14086 if level == locking.LEVEL_INSTANCE:
14087 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14089 # Lock instances optimistically, needs verification once group lock has
14091 self.needed_locks[locking.LEVEL_INSTANCE] = \
14092 self.cfg.GetNodeGroupInstances(self.group_uuid)
14095 def _UpdateAndVerifyDiskParams(old, new):
14096 """Updates and verifies disk parameters.
14099 new_params = _GetUpdatedParams(old, new)
14100 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14103 def CheckPrereq(self):
14104 """Check prerequisites.
14107 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14109 # Check if locked instances are still correct
14110 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14112 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14113 cluster = self.cfg.GetClusterInfo()
14115 if self.group is None:
14116 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14117 (self.op.group_name, self.group_uuid))
14119 if self.op.ndparams:
14120 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14121 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14122 self.new_ndparams = new_ndparams
14124 if self.op.diskparams:
14125 diskparams = self.group.diskparams
14126 uavdp = self._UpdateAndVerifyDiskParams
14127 # For each disktemplate subdict update and verify the values
14128 new_diskparams = dict((dt,
14129 uavdp(diskparams.get(dt, {}),
14130 self.op.diskparams[dt]))
14131 for dt in constants.DISK_TEMPLATES
14132 if dt in self.op.diskparams)
14133 # As we've all subdicts of diskparams ready, lets merge the actual
14134 # dict with all updated subdicts
14135 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14137 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14138 except errors.OpPrereqError, err:
14139 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14140 errors.ECODE_INVAL)
14142 if self.op.hv_state:
14143 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14144 self.group.hv_state_static)
14146 if self.op.disk_state:
14147 self.new_disk_state = \
14148 _MergeAndVerifyDiskState(self.op.disk_state,
14149 self.group.disk_state_static)
14151 if self.op.ipolicy:
14152 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14156 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14157 inst_filter = lambda inst: inst.name in owned_instances
14158 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14160 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14162 new_ipolicy, instances)
14165 self.LogWarning("After the ipolicy change the following instances"
14166 " violate them: %s",
14167 utils.CommaJoin(violations))
14169 def BuildHooksEnv(self):
14170 """Build hooks env.
14174 "GROUP_NAME": self.op.group_name,
14175 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14178 def BuildHooksNodes(self):
14179 """Build hooks nodes.
14182 mn = self.cfg.GetMasterNode()
14183 return ([mn], [mn])
14185 def Exec(self, feedback_fn):
14186 """Modifies the node group.
14191 if self.op.ndparams:
14192 self.group.ndparams = self.new_ndparams
14193 result.append(("ndparams", str(self.group.ndparams)))
14195 if self.op.diskparams:
14196 self.group.diskparams = self.new_diskparams
14197 result.append(("diskparams", str(self.group.diskparams)))
14199 if self.op.alloc_policy:
14200 self.group.alloc_policy = self.op.alloc_policy
14202 if self.op.hv_state:
14203 self.group.hv_state_static = self.new_hv_state
14205 if self.op.disk_state:
14206 self.group.disk_state_static = self.new_disk_state
14208 if self.op.ipolicy:
14209 self.group.ipolicy = self.new_ipolicy
14211 self.cfg.Update(self.group, feedback_fn)
14215 class LUGroupRemove(LogicalUnit):
14216 HPATH = "group-remove"
14217 HTYPE = constants.HTYPE_GROUP
14220 def ExpandNames(self):
14221 # This will raises errors.OpPrereqError on its own:
14222 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14223 self.needed_locks = {
14224 locking.LEVEL_NODEGROUP: [self.group_uuid],
14227 def CheckPrereq(self):
14228 """Check prerequisites.
14230 This checks that the given group name exists as a node group, that is
14231 empty (i.e., contains no nodes), and that is not the last group of the
14235 # Verify that the group is empty.
14236 group_nodes = [node.name
14237 for node in self.cfg.GetAllNodesInfo().values()
14238 if node.group == self.group_uuid]
14241 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14243 (self.op.group_name,
14244 utils.CommaJoin(utils.NiceSort(group_nodes))),
14245 errors.ECODE_STATE)
14247 # Verify the cluster would not be left group-less.
14248 if len(self.cfg.GetNodeGroupList()) == 1:
14249 raise errors.OpPrereqError("Group '%s' is the only group,"
14250 " cannot be removed" %
14251 self.op.group_name,
14252 errors.ECODE_STATE)
14254 def BuildHooksEnv(self):
14255 """Build hooks env.
14259 "GROUP_NAME": self.op.group_name,
14262 def BuildHooksNodes(self):
14263 """Build hooks nodes.
14266 mn = self.cfg.GetMasterNode()
14267 return ([mn], [mn])
14269 def Exec(self, feedback_fn):
14270 """Remove the node group.
14274 self.cfg.RemoveNodeGroup(self.group_uuid)
14275 except errors.ConfigurationError:
14276 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14277 (self.op.group_name, self.group_uuid))
14279 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14282 class LUGroupRename(LogicalUnit):
14283 HPATH = "group-rename"
14284 HTYPE = constants.HTYPE_GROUP
14287 def ExpandNames(self):
14288 # This raises errors.OpPrereqError on its own:
14289 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14291 self.needed_locks = {
14292 locking.LEVEL_NODEGROUP: [self.group_uuid],
14295 def CheckPrereq(self):
14296 """Check prerequisites.
14298 Ensures requested new name is not yet used.
14302 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14303 except errors.OpPrereqError:
14306 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14307 " node group (UUID: %s)" %
14308 (self.op.new_name, new_name_uuid),
14309 errors.ECODE_EXISTS)
14311 def BuildHooksEnv(self):
14312 """Build hooks env.
14316 "OLD_NAME": self.op.group_name,
14317 "NEW_NAME": self.op.new_name,
14320 def BuildHooksNodes(self):
14321 """Build hooks nodes.
14324 mn = self.cfg.GetMasterNode()
14326 all_nodes = self.cfg.GetAllNodesInfo()
14327 all_nodes.pop(mn, None)
14330 run_nodes.extend(node.name for node in all_nodes.values()
14331 if node.group == self.group_uuid)
14333 return (run_nodes, run_nodes)
14335 def Exec(self, feedback_fn):
14336 """Rename the node group.
14339 group = self.cfg.GetNodeGroup(self.group_uuid)
14342 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14343 (self.op.group_name, self.group_uuid))
14345 group.name = self.op.new_name
14346 self.cfg.Update(group, feedback_fn)
14348 return self.op.new_name
14351 class LUGroupEvacuate(LogicalUnit):
14352 HPATH = "group-evacuate"
14353 HTYPE = constants.HTYPE_GROUP
14356 def ExpandNames(self):
14357 # This raises errors.OpPrereqError on its own:
14358 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14360 if self.op.target_groups:
14361 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14362 self.op.target_groups)
14364 self.req_target_uuids = []
14366 if self.group_uuid in self.req_target_uuids:
14367 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14368 " as a target group (targets are %s)" %
14370 utils.CommaJoin(self.req_target_uuids)),
14371 errors.ECODE_INVAL)
14373 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14375 self.share_locks = _ShareAll()
14376 self.needed_locks = {
14377 locking.LEVEL_INSTANCE: [],
14378 locking.LEVEL_NODEGROUP: [],
14379 locking.LEVEL_NODE: [],
14382 def DeclareLocks(self, level):
14383 if level == locking.LEVEL_INSTANCE:
14384 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14386 # Lock instances optimistically, needs verification once node and group
14387 # locks have been acquired
14388 self.needed_locks[locking.LEVEL_INSTANCE] = \
14389 self.cfg.GetNodeGroupInstances(self.group_uuid)
14391 elif level == locking.LEVEL_NODEGROUP:
14392 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14394 if self.req_target_uuids:
14395 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14397 # Lock all groups used by instances optimistically; this requires going
14398 # via the node before it's locked, requiring verification later on
14399 lock_groups.update(group_uuid
14400 for instance_name in
14401 self.owned_locks(locking.LEVEL_INSTANCE)
14403 self.cfg.GetInstanceNodeGroups(instance_name))
14405 # No target groups, need to lock all of them
14406 lock_groups = locking.ALL_SET
14408 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14410 elif level == locking.LEVEL_NODE:
14411 # This will only lock the nodes in the group to be evacuated which
14412 # contain actual instances
14413 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14414 self._LockInstancesNodes()
14416 # Lock all nodes in group to be evacuated and target groups
14417 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14418 assert self.group_uuid in owned_groups
14419 member_nodes = [node_name
14420 for group in owned_groups
14421 for node_name in self.cfg.GetNodeGroup(group).members]
14422 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14424 def CheckPrereq(self):
14425 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14426 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14427 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14429 assert owned_groups.issuperset(self.req_target_uuids)
14430 assert self.group_uuid in owned_groups
14432 # Check if locked instances are still correct
14433 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14435 # Get instance information
14436 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14438 # Check if node groups for locked instances are still correct
14439 _CheckInstancesNodeGroups(self.cfg, self.instances,
14440 owned_groups, owned_nodes, self.group_uuid)
14442 if self.req_target_uuids:
14443 # User requested specific target groups
14444 self.target_uuids = self.req_target_uuids
14446 # All groups except the one to be evacuated are potential targets
14447 self.target_uuids = [group_uuid for group_uuid in owned_groups
14448 if group_uuid != self.group_uuid]
14450 if not self.target_uuids:
14451 raise errors.OpPrereqError("There are no possible target groups",
14452 errors.ECODE_INVAL)
14454 def BuildHooksEnv(self):
14455 """Build hooks env.
14459 "GROUP_NAME": self.op.group_name,
14460 "TARGET_GROUPS": " ".join(self.target_uuids),
14463 def BuildHooksNodes(self):
14464 """Build hooks nodes.
14467 mn = self.cfg.GetMasterNode()
14469 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14471 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14473 return (run_nodes, run_nodes)
14475 def Exec(self, feedback_fn):
14476 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14478 assert self.group_uuid not in self.target_uuids
14480 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14481 instances=instances, target_groups=self.target_uuids)
14483 ial.Run(self.op.iallocator)
14485 if not ial.success:
14486 raise errors.OpPrereqError("Can't compute group evacuation using"
14487 " iallocator '%s': %s" %
14488 (self.op.iallocator, ial.info),
14489 errors.ECODE_NORES)
14491 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14493 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14494 len(jobs), self.op.group_name)
14496 return ResultWithJobs(jobs)
14499 class TagsLU(NoHooksLU): # pylint: disable=W0223
14500 """Generic tags LU.
14502 This is an abstract class which is the parent of all the other tags LUs.
14505 def ExpandNames(self):
14506 self.group_uuid = None
14507 self.needed_locks = {}
14509 if self.op.kind == constants.TAG_NODE:
14510 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14511 lock_level = locking.LEVEL_NODE
14512 lock_name = self.op.name
14513 elif self.op.kind == constants.TAG_INSTANCE:
14514 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14515 lock_level = locking.LEVEL_INSTANCE
14516 lock_name = self.op.name
14517 elif self.op.kind == constants.TAG_NODEGROUP:
14518 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14519 lock_level = locking.LEVEL_NODEGROUP
14520 lock_name = self.group_uuid
14525 if lock_level and getattr(self.op, "use_locking", True):
14526 self.needed_locks[lock_level] = lock_name
14528 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14529 # not possible to acquire the BGL based on opcode parameters)
14531 def CheckPrereq(self):
14532 """Check prerequisites.
14535 if self.op.kind == constants.TAG_CLUSTER:
14536 self.target = self.cfg.GetClusterInfo()
14537 elif self.op.kind == constants.TAG_NODE:
14538 self.target = self.cfg.GetNodeInfo(self.op.name)
14539 elif self.op.kind == constants.TAG_INSTANCE:
14540 self.target = self.cfg.GetInstanceInfo(self.op.name)
14541 elif self.op.kind == constants.TAG_NODEGROUP:
14542 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14544 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14545 str(self.op.kind), errors.ECODE_INVAL)
14548 class LUTagsGet(TagsLU):
14549 """Returns the tags of a given object.
14554 def ExpandNames(self):
14555 TagsLU.ExpandNames(self)
14557 # Share locks as this is only a read operation
14558 self.share_locks = _ShareAll()
14560 def Exec(self, feedback_fn):
14561 """Returns the tag list.
14564 return list(self.target.GetTags())
14567 class LUTagsSearch(NoHooksLU):
14568 """Searches the tags for a given pattern.
14573 def ExpandNames(self):
14574 self.needed_locks = {}
14576 def CheckPrereq(self):
14577 """Check prerequisites.
14579 This checks the pattern passed for validity by compiling it.
14583 self.re = re.compile(self.op.pattern)
14584 except re.error, err:
14585 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14586 (self.op.pattern, err), errors.ECODE_INVAL)
14588 def Exec(self, feedback_fn):
14589 """Returns the tag list.
14593 tgts = [("/cluster", cfg.GetClusterInfo())]
14594 ilist = cfg.GetAllInstancesInfo().values()
14595 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14596 nlist = cfg.GetAllNodesInfo().values()
14597 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14598 tgts.extend(("/nodegroup/%s" % n.name, n)
14599 for n in cfg.GetAllNodeGroupsInfo().values())
14601 for path, target in tgts:
14602 for tag in target.GetTags():
14603 if self.re.search(tag):
14604 results.append((path, tag))
14608 class LUTagsSet(TagsLU):
14609 """Sets a tag on a given object.
14614 def CheckPrereq(self):
14615 """Check prerequisites.
14617 This checks the type and length of the tag name and value.
14620 TagsLU.CheckPrereq(self)
14621 for tag in self.op.tags:
14622 objects.TaggableObject.ValidateTag(tag)
14624 def Exec(self, feedback_fn):
14629 for tag in self.op.tags:
14630 self.target.AddTag(tag)
14631 except errors.TagError, err:
14632 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14633 self.cfg.Update(self.target, feedback_fn)
14636 class LUTagsDel(TagsLU):
14637 """Delete a list of tags from a given object.
14642 def CheckPrereq(self):
14643 """Check prerequisites.
14645 This checks that we have the given tag.
14648 TagsLU.CheckPrereq(self)
14649 for tag in self.op.tags:
14650 objects.TaggableObject.ValidateTag(tag)
14651 del_tags = frozenset(self.op.tags)
14652 cur_tags = self.target.GetTags()
14654 diff_tags = del_tags - cur_tags
14656 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14657 raise errors.OpPrereqError("Tag(s) %s not found" %
14658 (utils.CommaJoin(diff_names), ),
14659 errors.ECODE_NOENT)
14661 def Exec(self, feedback_fn):
14662 """Remove the tag from the object.
14665 for tag in self.op.tags:
14666 self.target.RemoveTag(tag)
14667 self.cfg.Update(self.target, feedback_fn)
14670 class LUTestDelay(NoHooksLU):
14671 """Sleep for a specified amount of time.
14673 This LU sleeps on the master and/or nodes for a specified amount of
14679 def ExpandNames(self):
14680 """Expand names and set required locks.
14682 This expands the node list, if any.
14685 self.needed_locks = {}
14686 if self.op.on_nodes:
14687 # _GetWantedNodes can be used here, but is not always appropriate to use
14688 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14689 # more information.
14690 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14691 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14693 def _TestDelay(self):
14694 """Do the actual sleep.
14697 if self.op.on_master:
14698 if not utils.TestDelay(self.op.duration):
14699 raise errors.OpExecError("Error during master delay test")
14700 if self.op.on_nodes:
14701 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14702 for node, node_result in result.items():
14703 node_result.Raise("Failure during rpc call to node %s" % node)
14705 def Exec(self, feedback_fn):
14706 """Execute the test delay opcode, with the wanted repetitions.
14709 if self.op.repeat == 0:
14712 top_value = self.op.repeat - 1
14713 for i in range(self.op.repeat):
14714 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14718 class LUTestJqueue(NoHooksLU):
14719 """Utility LU to test some aspects of the job queue.
14724 # Must be lower than default timeout for WaitForJobChange to see whether it
14725 # notices changed jobs
14726 _CLIENT_CONNECT_TIMEOUT = 20.0
14727 _CLIENT_CONFIRM_TIMEOUT = 60.0
14730 def _NotifyUsingSocket(cls, cb, errcls):
14731 """Opens a Unix socket and waits for another program to connect.
14734 @param cb: Callback to send socket name to client
14735 @type errcls: class
14736 @param errcls: Exception class to use for errors
14739 # Using a temporary directory as there's no easy way to create temporary
14740 # sockets without writing a custom loop around tempfile.mktemp and
14742 tmpdir = tempfile.mkdtemp()
14744 tmpsock = utils.PathJoin(tmpdir, "sock")
14746 logging.debug("Creating temporary socket at %s", tmpsock)
14747 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14752 # Send details to client
14755 # Wait for client to connect before continuing
14756 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14758 (conn, _) = sock.accept()
14759 except socket.error, err:
14760 raise errcls("Client didn't connect in time (%s)" % err)
14764 # Remove as soon as client is connected
14765 shutil.rmtree(tmpdir)
14767 # Wait for client to close
14770 # pylint: disable=E1101
14771 # Instance of '_socketobject' has no ... member
14772 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14774 except socket.error, err:
14775 raise errcls("Client failed to confirm notification (%s)" % err)
14779 def _SendNotification(self, test, arg, sockname):
14780 """Sends a notification to the client.
14783 @param test: Test name
14784 @param arg: Test argument (depends on test)
14785 @type sockname: string
14786 @param sockname: Socket path
14789 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14791 def _Notify(self, prereq, test, arg):
14792 """Notifies the client of a test.
14795 @param prereq: Whether this is a prereq-phase test
14797 @param test: Test name
14798 @param arg: Test argument (depends on test)
14802 errcls = errors.OpPrereqError
14804 errcls = errors.OpExecError
14806 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14810 def CheckArguments(self):
14811 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14812 self.expandnames_calls = 0
14814 def ExpandNames(self):
14815 checkargs_calls = getattr(self, "checkargs_calls", 0)
14816 if checkargs_calls < 1:
14817 raise errors.ProgrammerError("CheckArguments was not called")
14819 self.expandnames_calls += 1
14821 if self.op.notify_waitlock:
14822 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14824 self.LogInfo("Expanding names")
14826 # Get lock on master node (just to get a lock, not for a particular reason)
14827 self.needed_locks = {
14828 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14831 def Exec(self, feedback_fn):
14832 if self.expandnames_calls < 1:
14833 raise errors.ProgrammerError("ExpandNames was not called")
14835 if self.op.notify_exec:
14836 self._Notify(False, constants.JQT_EXEC, None)
14838 self.LogInfo("Executing")
14840 if self.op.log_messages:
14841 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14842 for idx, msg in enumerate(self.op.log_messages):
14843 self.LogInfo("Sending log message %s", idx + 1)
14844 feedback_fn(constants.JQT_MSGPREFIX + msg)
14845 # Report how many test messages have been sent
14846 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14849 raise errors.OpExecError("Opcode failure was requested")
14854 class IAllocator(object):
14855 """IAllocator framework.
14857 An IAllocator instance has three sets of attributes:
14858 - cfg that is needed to query the cluster
14859 - input data (all members of the _KEYS class attribute are required)
14860 - four buffer attributes (in|out_data|text), that represent the
14861 input (to the external script) in text and data structure format,
14862 and the output from it, again in two formats
14863 - the result variables from the script (success, info, nodes) for
14867 # pylint: disable=R0902
14868 # lots of instance attributes
14870 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14872 self.rpc = rpc_runner
14873 # init buffer variables
14874 self.in_text = self.out_text = self.in_data = self.out_data = None
14875 # init all input fields so that pylint is happy
14877 self.memory = self.disks = self.disk_template = self.spindle_use = None
14878 self.os = self.tags = self.nics = self.vcpus = None
14879 self.hypervisor = None
14880 self.relocate_from = None
14882 self.instances = None
14883 self.evac_mode = None
14884 self.target_groups = []
14886 self.required_nodes = None
14887 # init result fields
14888 self.success = self.info = self.result = None
14891 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14893 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14894 " IAllocator" % self.mode)
14896 keyset = [n for (n, _) in keydata]
14899 if key not in keyset:
14900 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14901 " IAllocator" % key)
14902 setattr(self, key, kwargs[key])
14905 if key not in kwargs:
14906 raise errors.ProgrammerError("Missing input parameter '%s' to"
14907 " IAllocator" % key)
14908 self._BuildInputData(compat.partial(fn, self), keydata)
14910 def _ComputeClusterData(self):
14911 """Compute the generic allocator input data.
14913 This is the data that is independent of the actual operation.
14917 cluster_info = cfg.GetClusterInfo()
14920 "version": constants.IALLOCATOR_VERSION,
14921 "cluster_name": cfg.GetClusterName(),
14922 "cluster_tags": list(cluster_info.GetTags()),
14923 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14924 "ipolicy": cluster_info.ipolicy,
14926 ninfo = cfg.GetAllNodesInfo()
14927 iinfo = cfg.GetAllInstancesInfo().values()
14928 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14931 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14933 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14934 hypervisor_name = self.hypervisor
14935 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14936 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14938 hypervisor_name = cluster_info.primary_hypervisor
14940 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14943 self.rpc.call_all_instances_info(node_list,
14944 cluster_info.enabled_hypervisors)
14946 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14948 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14949 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14950 i_list, config_ndata)
14951 assert len(data["nodes"]) == len(ninfo), \
14952 "Incomplete node data computed"
14954 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14956 self.in_data = data
14959 def _ComputeNodeGroupData(cfg):
14960 """Compute node groups data.
14963 cluster = cfg.GetClusterInfo()
14964 ng = dict((guuid, {
14965 "name": gdata.name,
14966 "alloc_policy": gdata.alloc_policy,
14967 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14969 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14974 def _ComputeBasicNodeData(cfg, node_cfg):
14975 """Compute global node data.
14978 @returns: a dict of name: (node dict, node config)
14981 # fill in static (config-based) values
14982 node_results = dict((ninfo.name, {
14983 "tags": list(ninfo.GetTags()),
14984 "primary_ip": ninfo.primary_ip,
14985 "secondary_ip": ninfo.secondary_ip,
14986 "offline": ninfo.offline,
14987 "drained": ninfo.drained,
14988 "master_candidate": ninfo.master_candidate,
14989 "group": ninfo.group,
14990 "master_capable": ninfo.master_capable,
14991 "vm_capable": ninfo.vm_capable,
14992 "ndparams": cfg.GetNdParams(ninfo),
14994 for ninfo in node_cfg.values())
14996 return node_results
14999 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
15001 """Compute global node data.
15003 @param node_results: the basic node structures as filled from the config
15006 #TODO(dynmem): compute the right data on MAX and MIN memory
15007 # make a copy of the current dict
15008 node_results = dict(node_results)
15009 for nname, nresult in node_data.items():
15010 assert nname in node_results, "Missing basic data for node %s" % nname
15011 ninfo = node_cfg[nname]
15013 if not (ninfo.offline or ninfo.drained):
15014 nresult.Raise("Can't get data for node %s" % nname)
15015 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
15017 remote_info = _MakeLegacyNodeInfo(nresult.payload)
15019 for attr in ["memory_total", "memory_free", "memory_dom0",
15020 "vg_size", "vg_free", "cpu_total"]:
15021 if attr not in remote_info:
15022 raise errors.OpExecError("Node '%s' didn't return attribute"
15023 " '%s'" % (nname, attr))
15024 if not isinstance(remote_info[attr], int):
15025 raise errors.OpExecError("Node '%s' returned invalid value"
15027 (nname, attr, remote_info[attr]))
15028 # compute memory used by primary instances
15029 i_p_mem = i_p_up_mem = 0
15030 for iinfo, beinfo in i_list:
15031 if iinfo.primary_node == nname:
15032 i_p_mem += beinfo[constants.BE_MAXMEM]
15033 if iinfo.name not in node_iinfo[nname].payload:
15036 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
15037 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
15038 remote_info["memory_free"] -= max(0, i_mem_diff)
15040 if iinfo.admin_state == constants.ADMINST_UP:
15041 i_p_up_mem += beinfo[constants.BE_MAXMEM]
15043 # compute memory used by instances
15045 "total_memory": remote_info["memory_total"],
15046 "reserved_memory": remote_info["memory_dom0"],
15047 "free_memory": remote_info["memory_free"],
15048 "total_disk": remote_info["vg_size"],
15049 "free_disk": remote_info["vg_free"],
15050 "total_cpus": remote_info["cpu_total"],
15051 "i_pri_memory": i_p_mem,
15052 "i_pri_up_memory": i_p_up_mem,
15054 pnr_dyn.update(node_results[nname])
15055 node_results[nname] = pnr_dyn
15057 return node_results
15060 def _ComputeInstanceData(cluster_info, i_list):
15061 """Compute global instance data.
15065 for iinfo, beinfo in i_list:
15067 for nic in iinfo.nics:
15068 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
15072 "mode": filled_params[constants.NIC_MODE],
15073 "link": filled_params[constants.NIC_LINK],
15074 "network": nic.network,
15076 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
15077 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
15078 nic_data.append(nic_dict)
15080 "tags": list(iinfo.GetTags()),
15081 "admin_state": iinfo.admin_state,
15082 "vcpus": beinfo[constants.BE_VCPUS],
15083 "memory": beinfo[constants.BE_MAXMEM],
15084 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
15086 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
15088 "disks": [{constants.IDISK_SIZE: dsk.size,
15089 constants.IDISK_MODE: dsk.mode}
15090 for dsk in iinfo.disks],
15091 "disk_template": iinfo.disk_template,
15092 "hypervisor": iinfo.hypervisor,
15094 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
15096 instance_data[iinfo.name] = pir
15098 return instance_data
15100 def _AddNewInstance(self):
15101 """Add new instance data to allocator structure.
15103 This in combination with _AllocatorGetClusterData will create the
15104 correct structure needed as input for the allocator.
15106 The checks for the completeness of the opcode must have already been
15110 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
15112 if self.disk_template in constants.DTS_INT_MIRROR:
15113 self.required_nodes = 2
15115 self.required_nodes = 1
15119 "disk_template": self.disk_template,
15122 "vcpus": self.vcpus,
15123 "memory": self.memory,
15124 "spindle_use": self.spindle_use,
15125 "disks": self.disks,
15126 "disk_space_total": disk_space,
15128 "required_nodes": self.required_nodes,
15129 "hypervisor": self.hypervisor,
15134 def _AddRelocateInstance(self):
15135 """Add relocate instance data to allocator structure.
15137 This in combination with _IAllocatorGetClusterData will create the
15138 correct structure needed as input for the allocator.
15140 The checks for the completeness of the opcode must have already been
15144 instance = self.cfg.GetInstanceInfo(self.name)
15145 if instance is None:
15146 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15147 " IAllocator" % self.name)
15149 if instance.disk_template not in constants.DTS_MIRRORED:
15150 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15151 errors.ECODE_INVAL)
15153 if instance.disk_template in constants.DTS_INT_MIRROR and \
15154 len(instance.secondary_nodes) != 1:
15155 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15156 errors.ECODE_STATE)
15158 self.required_nodes = 1
15159 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15160 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15164 "disk_space_total": disk_space,
15165 "required_nodes": self.required_nodes,
15166 "relocate_from": self.relocate_from,
15170 def _AddNodeEvacuate(self):
15171 """Get data for node-evacuate requests.
15175 "instances": self.instances,
15176 "evac_mode": self.evac_mode,
15179 def _AddChangeGroup(self):
15180 """Get data for node-evacuate requests.
15184 "instances": self.instances,
15185 "target_groups": self.target_groups,
15188 def _BuildInputData(self, fn, keydata):
15189 """Build input data structures.
15192 self._ComputeClusterData()
15195 request["type"] = self.mode
15196 for keyname, keytype in keydata:
15197 if keyname not in request:
15198 raise errors.ProgrammerError("Request parameter %s is missing" %
15200 val = request[keyname]
15201 if not keytype(val):
15202 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15203 " validation, value %s, expected"
15204 " type %s" % (keyname, val, keytype))
15205 self.in_data["request"] = request
15207 self.in_text = serializer.Dump(self.in_data)
15209 _STRING_LIST = ht.TListOf(ht.TString)
15210 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15211 # pylint: disable=E1101
15212 # Class '...' has no 'OP_ID' member
15213 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15214 opcodes.OpInstanceMigrate.OP_ID,
15215 opcodes.OpInstanceReplaceDisks.OP_ID])
15219 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15220 ht.TItems([ht.TNonEmptyString,
15221 ht.TNonEmptyString,
15222 ht.TListOf(ht.TNonEmptyString),
15225 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15226 ht.TItems([ht.TNonEmptyString,
15229 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15230 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15233 constants.IALLOCATOR_MODE_ALLOC:
15236 ("name", ht.TString),
15237 ("memory", ht.TInt),
15238 ("spindle_use", ht.TInt),
15239 ("disks", ht.TListOf(ht.TDict)),
15240 ("disk_template", ht.TString),
15241 ("os", ht.TString),
15242 ("tags", _STRING_LIST),
15243 ("nics", ht.TListOf(ht.TDict)),
15244 ("vcpus", ht.TInt),
15245 ("hypervisor", ht.TString),
15247 constants.IALLOCATOR_MODE_RELOC:
15248 (_AddRelocateInstance,
15249 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15251 constants.IALLOCATOR_MODE_NODE_EVAC:
15252 (_AddNodeEvacuate, [
15253 ("instances", _STRING_LIST),
15254 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15256 constants.IALLOCATOR_MODE_CHG_GROUP:
15257 (_AddChangeGroup, [
15258 ("instances", _STRING_LIST),
15259 ("target_groups", _STRING_LIST),
15263 def Run(self, name, validate=True, call_fn=None):
15264 """Run an instance allocator and return the results.
15267 if call_fn is None:
15268 call_fn = self.rpc.call_iallocator_runner
15270 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15271 result.Raise("Failure while running the iallocator script")
15273 self.out_text = result.payload
15275 self._ValidateResult()
15277 def _ValidateResult(self):
15278 """Process the allocator results.
15280 This will process and if successful save the result in
15281 self.out_data and the other parameters.
15285 rdict = serializer.Load(self.out_text)
15286 except Exception, err:
15287 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15289 if not isinstance(rdict, dict):
15290 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15292 # TODO: remove backwards compatiblity in later versions
15293 if "nodes" in rdict and "result" not in rdict:
15294 rdict["result"] = rdict["nodes"]
15297 for key in "success", "info", "result":
15298 if key not in rdict:
15299 raise errors.OpExecError("Can't parse iallocator results:"
15300 " missing key '%s'" % key)
15301 setattr(self, key, rdict[key])
15303 if not self._result_check(self.result):
15304 raise errors.OpExecError("Iallocator returned invalid result,"
15305 " expected %s, got %s" %
15306 (self._result_check, self.result),
15307 errors.ECODE_INVAL)
15309 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15310 assert self.relocate_from is not None
15311 assert self.required_nodes == 1
15313 node2group = dict((name, ndata["group"])
15314 for (name, ndata) in self.in_data["nodes"].items())
15316 fn = compat.partial(self._NodesToGroups, node2group,
15317 self.in_data["nodegroups"])
15319 instance = self.cfg.GetInstanceInfo(self.name)
15320 request_groups = fn(self.relocate_from + [instance.primary_node])
15321 result_groups = fn(rdict["result"] + [instance.primary_node])
15323 if self.success and not set(result_groups).issubset(request_groups):
15324 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15325 " differ from original groups (%s)" %
15326 (utils.CommaJoin(result_groups),
15327 utils.CommaJoin(request_groups)))
15329 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15330 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15332 self.out_data = rdict
15335 def _NodesToGroups(node2group, groups, nodes):
15336 """Returns a list of unique group names for a list of nodes.
15338 @type node2group: dict
15339 @param node2group: Map from node name to group UUID
15341 @param groups: Group information
15343 @param nodes: Node names
15350 group_uuid = node2group[node]
15352 # Ignore unknown node
15356 group = groups[group_uuid]
15358 # Can't find group, let's use UUID
15359 group_name = group_uuid
15361 group_name = group["name"]
15363 result.add(group_name)
15365 return sorted(result)
15368 class LUTestAllocator(NoHooksLU):
15369 """Run allocator tests.
15371 This LU runs the allocator tests
15374 def CheckPrereq(self):
15375 """Check prerequisites.
15377 This checks the opcode parameters depending on the director and mode test.
15380 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15381 for attr in ["memory", "disks", "disk_template",
15382 "os", "tags", "nics", "vcpus"]:
15383 if not hasattr(self.op, attr):
15384 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15385 attr, errors.ECODE_INVAL)
15386 iname = self.cfg.ExpandInstanceName(self.op.name)
15387 if iname is not None:
15388 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15389 iname, errors.ECODE_EXISTS)
15390 if not isinstance(self.op.nics, list):
15391 raise errors.OpPrereqError("Invalid parameter 'nics'",
15392 errors.ECODE_INVAL)
15393 if not isinstance(self.op.disks, list):
15394 raise errors.OpPrereqError("Invalid parameter 'disks'",
15395 errors.ECODE_INVAL)
15396 for row in self.op.disks:
15397 if (not isinstance(row, dict) or
15398 constants.IDISK_SIZE not in row or
15399 not isinstance(row[constants.IDISK_SIZE], int) or
15400 constants.IDISK_MODE not in row or
15401 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15402 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15403 " parameter", errors.ECODE_INVAL)
15404 if self.op.hypervisor is None:
15405 self.op.hypervisor = self.cfg.GetHypervisorType()
15406 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15407 fname = _ExpandInstanceName(self.cfg, self.op.name)
15408 self.op.name = fname
15409 self.relocate_from = \
15410 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15411 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15412 constants.IALLOCATOR_MODE_NODE_EVAC):
15413 if not self.op.instances:
15414 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15415 self.op.instances = _GetWantedInstances(self, self.op.instances)
15417 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15418 self.op.mode, errors.ECODE_INVAL)
15420 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15421 if self.op.allocator is None:
15422 raise errors.OpPrereqError("Missing allocator name",
15423 errors.ECODE_INVAL)
15424 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15425 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15426 self.op.direction, errors.ECODE_INVAL)
15428 def Exec(self, feedback_fn):
15429 """Run the allocator test.
15432 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15433 ial = IAllocator(self.cfg, self.rpc,
15436 memory=self.op.memory,
15437 disks=self.op.disks,
15438 disk_template=self.op.disk_template,
15442 vcpus=self.op.vcpus,
15443 hypervisor=self.op.hypervisor,
15445 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15446 ial = IAllocator(self.cfg, self.rpc,
15449 relocate_from=list(self.relocate_from),
15451 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15452 ial = IAllocator(self.cfg, self.rpc,
15454 instances=self.op.instances,
15455 target_groups=self.op.target_groups)
15456 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15457 ial = IAllocator(self.cfg, self.rpc,
15459 instances=self.op.instances,
15460 evac_mode=self.op.evac_mode)
15462 raise errors.ProgrammerError("Uncatched mode %s in"
15463 " LUTestAllocator.Exec", self.op.mode)
15465 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15466 result = ial.in_text
15468 ial.Run(self.op.allocator, validate=False)
15469 result = ial.out_text
15473 class LUNetworkAdd(LogicalUnit):
15474 """Logical unit for creating networks.
15477 HPATH = "network-add"
15478 HTYPE = constants.HTYPE_NETWORK
15481 def BuildHooksNodes(self):
15482 """Build hooks nodes.
15485 mn = self.cfg.GetMasterNode()
15486 return ([mn], [mn])
15488 def ExpandNames(self):
15489 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15490 self.needed_locks = {}
15491 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15493 def CheckPrereq(self):
15494 """Check prerequisites.
15496 This checks that the given group name is not an existing node group
15500 if self.op.network is None:
15501 raise errors.OpPrereqError("Network must be given",
15502 errors.ECODE_INVAL)
15504 uuid = self.cfg.LookupNetwork(self.op.network_name)
15507 raise errors.OpPrereqError("Network '%s' already defined" %
15508 self.op.network, errors.ECODE_EXISTS)
15511 def BuildHooksEnv(self):
15512 """Build hooks env.
15516 "NETWORK_NAME": self.op.network_name,
15517 "NETWORK_SUBNET": self.op.network,
15518 "NETWORK_GATEWAY": self.op.gateway,
15519 "NETWORK_SUBNET6": self.op.network6,
15520 "NETWORK_GATEWAY6": self.op.gateway6,
15521 "NETWORK_MAC_PREFIX": self.op.mac_prefix,
15522 "NETWORK_TYPE": self.op.network_type,
15526 def Exec(self, feedback_fn):
15527 """Add the ip pool to the cluster.
15530 nobj = objects.Network(name=self.op.network_name,
15531 network=self.op.network,
15532 gateway=self.op.gateway,
15533 network6=self.op.network6,
15534 gateway6=self.op.gateway6,
15535 mac_prefix=self.op.mac_prefix,
15536 network_type=self.op.network_type,
15537 uuid=self.network_uuid,
15539 # Initialize the associated address pool
15541 pool = network.AddressPool.InitializeNetwork(nobj)
15542 except errors.AddressPoolError, e:
15543 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15545 # Check if we need to reserve the nodes and the cluster master IP
15546 # These may not be allocated to any instances in routed mode, as
15547 # they wouldn't function anyway.
15548 for node in self.cfg.GetAllNodesInfo().values():
15549 for ip in [node.primary_ip, node.secondary_ip]:
15552 self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
15554 except errors.AddressPoolError:
15557 master_ip = self.cfg.GetClusterInfo().master_ip
15559 pool.Reserve(master_ip)
15560 self.LogInfo("Reserved cluster master IP (%s)", master_ip)
15561 except errors.AddressPoolError:
15564 if self.op.add_reserved_ips:
15565 for ip in self.op.add_reserved_ips:
15567 pool.Reserve(ip, external=True)
15568 except errors.AddressPoolError, e:
15569 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15571 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15572 del self.remove_locks[locking.LEVEL_NETWORK]
15575 class LUNetworkRemove(LogicalUnit):
15576 HPATH = "network-remove"
15577 HTYPE = constants.HTYPE_NETWORK
15580 def ExpandNames(self):
15581 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15583 self.needed_locks = {
15584 locking.LEVEL_NETWORK: [self.network_uuid],
15588 def CheckPrereq(self):
15589 """Check prerequisites.
15591 This checks that the given network name exists as a network, that is
15592 empty (i.e., contains no nodes), and that is not the last group of the
15596 if not self.network_uuid:
15597 raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
15598 errors.ECODE_INVAL)
15600 # Verify that the network is not conncted.
15601 node_groups = [group.name
15602 for group in self.cfg.GetAllNodeGroupsInfo().values()
15603 for network in group.networks.keys()
15604 if network == self.network_uuid]
15607 self.LogWarning("Nework '%s' is connected to the following"
15608 " node groups: %s" % (self.op.network_name,
15609 utils.CommaJoin(utils.NiceSort(node_groups))))
15610 raise errors.OpPrereqError("Network still connected",
15611 errors.ECODE_STATE)
15613 def BuildHooksEnv(self):
15614 """Build hooks env.
15618 "NETWORK_NAME": self.op.network_name,
15621 def BuildHooksNodes(self):
15622 """Build hooks nodes.
15625 mn = self.cfg.GetMasterNode()
15626 return ([mn], [mn])
15628 def Exec(self, feedback_fn):
15629 """Remove the network.
15633 self.cfg.RemoveNetwork(self.network_uuid)
15634 except errors.ConfigurationError:
15635 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15636 (self.op.network_name, self.network_uuid))
15639 class LUNetworkSetParams(LogicalUnit):
15640 """Modifies the parameters of a network.
15643 HPATH = "network-modify"
15644 HTYPE = constants.HTYPE_NETWORK
15647 def CheckArguments(self):
15648 if (self.op.gateway and
15649 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15650 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15651 " at once", errors.ECODE_INVAL)
15654 def ExpandNames(self):
15655 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15656 self.network = self.cfg.GetNetwork(self.network_uuid)
15657 self.needed_locks = {
15658 locking.LEVEL_NETWORK: [self.network_uuid],
15662 if self.network is None:
15663 raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
15664 (self.op.network_name, self.network_uuid),
15665 errors.ECODE_INVAL)
15667 def CheckPrereq(self):
15668 """Check prerequisites.
15671 self.gateway = self.network.gateway
15672 self.network_type = self.network.network_type
15673 self.mac_prefix = self.network.mac_prefix
15674 self.network6 = self.network.network6
15675 self.gateway6 = self.network.gateway6
15677 self.pool = network.AddressPool(self.network)
15679 if self.op.gateway:
15680 if self.op.gateway == constants.VALUE_NONE:
15681 self.gateway = None
15683 self.gateway = self.op.gateway
15684 if self.pool.IsReserved(self.gateway):
15685 raise errors.OpPrereqError("%s is already reserved" %
15686 self.gateway, errors.ECODE_INVAL)
15688 if self.op.network_type:
15689 if self.op.network_type == constants.VALUE_NONE:
15690 self.network_type = None
15692 self.network_type = self.op.network_type
15694 if self.op.mac_prefix:
15695 if self.op.mac_prefix == constants.VALUE_NONE:
15696 self.mac_prefix = None
15698 self.mac_prefix = self.op.mac_prefix
15700 if self.op.gateway6:
15701 if self.op.gateway6 == constants.VALUE_NONE:
15702 self.gateway6 = None
15704 self.gateway6 = self.op.gateway6
15706 if self.op.network6:
15707 if self.op.network6 == constants.VALUE_NONE:
15708 self.network6 = None
15710 self.network6 = self.op.network6
15714 def BuildHooksEnv(self):
15715 """Build hooks env.
15719 "NETWORK_NAME": self.op.network_name,
15720 "NETWORK_SUBNET": self.network.network,
15721 "NETWORK_GATEWAY": self.gateway,
15722 "NETWORK_SUBNET6": self.network6,
15723 "NETWORK_GATEWAY6": self.gateway6,
15724 "NETWORK_MAC_PREFIX": self.mac_prefix,
15725 "NETWORK_TYPE": self.network_type,
15729 def BuildHooksNodes(self):
15730 """Build hooks nodes.
15733 mn = self.cfg.GetMasterNode()
15734 return ([mn], [mn])
15736 def Exec(self, feedback_fn):
15737 """Modifies the network.
15740 #TODO: reserve/release via temporary reservation manager
15741 # extend cfg.ReserveIp/ReleaseIp with the external flag
15742 if self.op.gateway:
15743 if self.gateway == self.network.gateway:
15744 self.LogWarning("Gateway is already %s" % self.gateway)
15747 self.pool.Reserve(self.gateway, external=True)
15748 if self.network.gateway:
15749 self.pool.Release(self.network.gateway, external=True)
15750 self.network.gateway = self.gateway
15752 if self.op.add_reserved_ips:
15753 for ip in self.op.add_reserved_ips:
15755 if self.pool.IsReserved(ip):
15756 self.LogWarning("IP %s is already reserved" % ip)
15758 self.pool.Reserve(ip, external=True)
15759 except errors.AddressPoolError, e:
15760 self.LogWarning("Cannot reserve ip %s. %s" % (ip, e))
15762 if self.op.remove_reserved_ips:
15763 for ip in self.op.remove_reserved_ips:
15764 if ip == self.network.gateway:
15765 self.LogWarning("Cannot unreserve Gateway's IP")
15768 if not self.pool.IsReserved(ip):
15769 self.LogWarning("IP %s is already unreserved" % ip)
15771 self.pool.Release(ip, external=True)
15772 except errors.AddressPoolError, e:
15773 self.LogWarning("Cannot release ip %s. %s" % (ip, e))
15775 if self.op.mac_prefix:
15776 self.network.mac_prefix = self.mac_prefix
15778 if self.op.network6:
15779 self.network.network6 = self.network6
15781 if self.op.gateway6:
15782 self.network.gateway6 = self.gateway6
15784 if self.op.network_type:
15785 self.network.network_type = self.network_type
15787 self.pool.Validate()
15789 self.cfg.Update(self.network, feedback_fn)
15792 class _NetworkQuery(_QueryBase):
15793 FIELDS = query.NETWORK_FIELDS
15795 def ExpandNames(self, lu):
15796 lu.needed_locks = {}
15798 self._all_networks = lu.cfg.GetAllNetworksInfo()
15799 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
15802 self.wanted = [name_to_uuid[name]
15803 for name in utils.NiceSort(name_to_uuid.keys())]
15805 # Accept names to be either names or UUIDs.
15808 all_uuid = frozenset(self._all_networks.keys())
15810 for name in self.names:
15811 if name in all_uuid:
15812 self.wanted.append(name)
15813 elif name in name_to_uuid:
15814 self.wanted.append(name_to_uuid[name])
15816 missing.append(name)
15819 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
15820 errors.ECODE_NOENT)
15822 def DeclareLocks(self, lu, level):
15825 def _GetQueryData(self, lu):
15826 """Computes the list of networks and their attributes.
15829 do_instances = query.NETQ_INST in self.requested_data
15830 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
15831 do_stats = query.NETQ_STATS in self.requested_data
15832 cluster = lu.cfg.GetClusterInfo()
15834 network_to_groups = None
15835 network_to_instances = None
15838 # For NETQ_GROUP, we need to map network->[groups]
15840 all_groups = lu.cfg.GetAllNodeGroupsInfo()
15841 network_to_groups = dict((uuid, []) for uuid in self.wanted)
15842 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
15845 all_instances = lu.cfg.GetAllInstancesInfo()
15846 all_nodes = lu.cfg.GetAllNodesInfo()
15847 network_to_instances = dict((uuid, []) for uuid in self.wanted)
15850 for group in all_groups.values():
15852 group_nodes = [node.name for node in all_nodes.values() if
15853 node.group == group.uuid]
15854 group_instances = [instance for instance in all_instances.values()
15855 if instance.primary_node in group_nodes]
15857 for net_uuid in group.networks.keys():
15858 if net_uuid in network_to_groups:
15859 netparams = group.networks[net_uuid]
15860 mode = netparams[constants.NIC_MODE]
15861 link = netparams[constants.NIC_LINK]
15862 info = group.name + '(' + mode + ', ' + link + ')'
15863 network_to_groups[net_uuid].append(info)
15866 for instance in group_instances:
15867 for nic in instance.nics:
15868 if nic.network == self._all_networks[net_uuid].name:
15869 network_to_instances[net_uuid].append(instance.name)
15874 for uuid, net in self._all_networks.items():
15875 if uuid in self.wanted:
15876 pool = network.AddressPool(net)
15878 "free_count": pool.GetFreeCount(),
15879 "reserved_count": pool.GetReservedCount(),
15880 "map": pool.GetMap(),
15881 "external_reservations": ", ".join(pool.GetExternalReservations()),
15884 return query.NetworkQueryData([self._all_networks[uuid]
15885 for uuid in self.wanted],
15887 network_to_instances,
15891 class LUNetworkQuery(NoHooksLU):
15892 """Logical unit for querying networks.
15897 def CheckArguments(self):
15898 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
15899 self.op.output_fields, False)
15901 def ExpandNames(self):
15902 self.nq.ExpandNames(self)
15904 def Exec(self, feedback_fn):
15905 return self.nq.OldStyleQuery(self)
15909 class LUNetworkConnect(LogicalUnit):
15910 """Connect a network to a nodegroup
15913 HPATH = "network-connect"
15914 HTYPE = constants.HTYPE_NETWORK
15917 def ExpandNames(self):
15918 self.network_name = self.op.network_name
15919 self.group_name = self.op.group_name
15920 self.network_mode = self.op.network_mode
15921 self.network_link = self.op.network_link
15923 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
15924 self.network = self.cfg.GetNetwork(self.network_uuid)
15925 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
15926 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15928 self.needed_locks = {
15929 locking.LEVEL_INSTANCE: [],
15930 locking.LEVEL_NODEGROUP: [self.group_uuid],
15932 self.share_locks[locking.LEVEL_INSTANCE] = 1
15934 def DeclareLocks(self, level):
15935 if level == locking.LEVEL_INSTANCE:
15936 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15938 # Lock instances optimistically, needs verification once group lock has
15940 self.needed_locks[locking.LEVEL_INSTANCE] = \
15941 self.cfg.GetNodeGroupInstances(self.group_uuid)
15943 def BuildHooksEnv(self):
15945 ret["GROUP_NAME"] = self.group_name
15946 ret["GROUP_NETWORK_NAME"] = self.network_name
15947 ret["GROUP_NETWORK_MODE"] = self.network_mode
15948 ret["GROUP_NETWORK_LINK"] = self.network_link
15951 def BuildHooksNodes(self):
15952 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
15953 return (nodes, nodes)
15956 def CheckPrereq(self):
15957 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
15960 if self.network is None:
15961 raise errors.OpPrereqError("Network %s does not exist" %
15962 self.network_name, errors.ECODE_INVAL)
15964 self.netparams = dict()
15965 self.netparams[constants.NIC_MODE] = self.network_mode
15966 self.netparams[constants.NIC_LINK] = self.network_link
15967 objects.NIC.CheckParameterSyntax(self.netparams)
15969 #if self.network_mode == constants.NIC_MODE_BRIDGED:
15970 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
15971 self.connected = False
15972 if self.network_uuid in self.group.networks:
15973 self.LogWarning("Network '%s' is already mapped to group '%s'" %
15974 (self.network_name, self.group.name))
15975 self.connected = True
15978 pool = network.AddressPool(self.network)
15979 if self.op.conflicts_check:
15980 groupinstances = []
15981 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
15982 groupinstances.append(self.cfg.GetInstanceInfo(n))
15983 instances = [(instance.name, idx, nic.ip)
15984 for instance in groupinstances
15985 for idx, nic in enumerate(instance.nics)
15986 if (not nic.network and pool._Contains(nic.ip))]
15988 self.LogWarning("Following occurences use IPs from network %s"
15989 " that is about to connect to nodegroup %s: %s" %
15990 (self.network_name, self.group.name,
15992 raise errors.OpPrereqError("Conflicting IPs found."
15993 " Please remove/modify"
15994 " corresponding NICs",
15995 errors.ECODE_INVAL)
15997 def Exec(self, feedback_fn):
16001 self.group.networks[self.network_uuid] = self.netparams
16002 self.cfg.Update(self.group, feedback_fn)
16005 class LUNetworkDisconnect(LogicalUnit):
16006 """Disconnect a network to a nodegroup
16009 HPATH = "network-disconnect"
16010 HTYPE = constants.HTYPE_NETWORK
16013 def ExpandNames(self):
16014 self.network_name = self.op.network_name
16015 self.group_name = self.op.group_name
16017 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16018 self.network = self.cfg.GetNetwork(self.network_uuid)
16019 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16020 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16022 self.needed_locks = {
16023 locking.LEVEL_INSTANCE: [],
16024 locking.LEVEL_NODEGROUP: [self.group_uuid],
16026 self.share_locks[locking.LEVEL_INSTANCE] = 1
16028 def DeclareLocks(self, level):
16029 if level == locking.LEVEL_INSTANCE:
16030 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16032 # Lock instances optimistically, needs verification once group lock has
16034 self.needed_locks[locking.LEVEL_INSTANCE] = \
16035 self.cfg.GetNodeGroupInstances(self.group_uuid)
16037 def BuildHooksEnv(self):
16039 ret["GROUP_NAME"] = self.group_name
16040 ret["GROUP_NETWORK_NAME"] = self.network_name
16043 def BuildHooksNodes(self):
16044 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16045 return (nodes, nodes)
16048 def CheckPrereq(self):
16049 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16052 self.connected = True
16053 if self.network_uuid not in self.group.networks:
16054 self.LogWarning("Network '%s' is"
16055 " not mapped to group '%s'" %
16056 (self.network_name, self.group.name))
16057 self.connected = False
16060 if self.op.conflicts_check:
16061 groupinstances = []
16062 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16063 groupinstances.append(self.cfg.GetInstanceInfo(n))
16064 instances = [(instance.name, idx, nic.ip)
16065 for instance in groupinstances
16066 for idx, nic in enumerate(instance.nics)
16067 if nic.network == self.network_name]
16069 self.LogWarning("Following occurences use IPs from network %s"
16070 " that is about to disconnected from the nodegroup"
16072 (self.network_name, self.group.name,
16074 raise errors.OpPrereqError("Conflicting IPs."
16075 " Please remove/modify"
16076 " corresponding NICS",
16077 errors.ECODE_INVAL)
16079 def Exec(self, feedback_fn):
16080 if not self.connected:
16083 del self.group.networks[self.network_uuid]
16084 self.cfg.Update(self.group, feedback_fn)
16087 #: Query type implementations
16089 constants.QR_CLUSTER: _ClusterQuery,
16090 constants.QR_INSTANCE: _InstanceQuery,
16091 constants.QR_NODE: _NodeQuery,
16092 constants.QR_GROUP: _GroupQuery,
16093 constants.QR_NETWORK: _NetworkQuery,
16094 constants.QR_OS: _OsQuery,
16095 constants.QR_EXPORT: _ExportQuery,
16098 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16101 def _GetQueryImplementation(name):
16102 """Returns the implemtnation for a query type.
16104 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16108 return _QUERY_IMPL[name]
16110 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16111 errors.ECODE_INVAL)
16113 def _CheckForConflictingIp(lu, ip, node):
16114 """In case of conflicting ip raise error.
16117 @param ip: ip address
16119 @param node: node name
16122 (conf_net, conf_netparams) = lu.cfg.CheckIPInNodeGroup(ip, node)
16123 if conf_net is not None:
16124 raise errors.OpPrereqError("Conflicting IP found:"
16125 " %s <> %s." % (ip, conf_net),
16126 errors.ECODE_INVAL)
16128 return (None, None)