4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _CopyLockList(names):
701 """Makes a copy of a list of lock names.
703 Handles L{locking.ALL_SET} correctly.
706 if names == locking.ALL_SET:
707 return locking.ALL_SET
712 def _GetWantedNodes(lu, nodes):
713 """Returns list of checked and expanded node names.
715 @type lu: L{LogicalUnit}
716 @param lu: the logical unit on whose behalf we execute
718 @param nodes: list of node names or None for all nodes
720 @return: the list of nodes, sorted
721 @raise errors.ProgrammerError: if the nodes parameter is wrong type
725 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
727 return utils.NiceSort(lu.cfg.GetNodeList())
730 def _GetWantedInstances(lu, instances):
731 """Returns list of checked and expanded instance names.
733 @type lu: L{LogicalUnit}
734 @param lu: the logical unit on whose behalf we execute
735 @type instances: list
736 @param instances: list of instance names or None for all instances
738 @return: the list of instances, sorted
739 @raise errors.OpPrereqError: if the instances parameter is wrong type
740 @raise errors.OpPrereqError: if any of the passed instances is not found
744 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
746 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
750 def _GetUpdatedParams(old_params, update_dict,
751 use_default=True, use_none=False):
752 """Return the new version of a parameter dictionary.
754 @type old_params: dict
755 @param old_params: old parameters
756 @type update_dict: dict
757 @param update_dict: dict containing new parameter values, or
758 constants.VALUE_DEFAULT to reset the parameter to its default
760 @param use_default: boolean
761 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
762 values as 'to be deleted' values
763 @param use_none: boolean
764 @type use_none: whether to recognise C{None} values as 'to be
767 @return: the new parameter dictionary
770 params_copy = copy.deepcopy(old_params)
771 for key, val in update_dict.iteritems():
772 if ((use_default and val == constants.VALUE_DEFAULT) or
773 (use_none and val is None)):
779 params_copy[key] = val
783 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
784 """Return the new version of a instance policy.
786 @param group_policy: whether this policy applies to a group and thus
787 we should support removal of policy entries
790 use_none = use_default = group_policy
791 ipolicy = copy.deepcopy(old_ipolicy)
792 for key, value in new_ipolicy.items():
793 if key not in constants.IPOLICY_ALL_KEYS:
794 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
796 if key in constants.IPOLICY_ISPECS:
797 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
798 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
800 use_default=use_default)
802 if (not value or value == [constants.VALUE_DEFAULT] or
803 value == constants.VALUE_DEFAULT):
807 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
808 " on the cluster'" % key,
811 if key in constants.IPOLICY_PARAMETERS:
812 # FIXME: we assume all such values are float
814 ipolicy[key] = float(value)
815 except (TypeError, ValueError), err:
816 raise errors.OpPrereqError("Invalid value for attribute"
817 " '%s': '%s', error: %s" %
818 (key, value, err), errors.ECODE_INVAL)
820 # FIXME: we assume all others are lists; this should be redone
822 ipolicy[key] = list(value)
824 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
825 except errors.ConfigurationError, err:
826 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
831 def _UpdateAndVerifySubDict(base, updates, type_check):
832 """Updates and verifies a dict with sub dicts of the same type.
834 @param base: The dict with the old data
835 @param updates: The dict with the new data
836 @param type_check: Dict suitable to ForceDictType to verify correct types
837 @returns: A new dict with updated and verified values
841 new = _GetUpdatedParams(old, value)
842 utils.ForceDictType(new, type_check)
845 ret = copy.deepcopy(base)
846 ret.update(dict((key, fn(base.get(key, {}), value))
847 for key, value in updates.items()))
851 def _MergeAndVerifyHvState(op_input, obj_input):
852 """Combines the hv state from an opcode with the one of the object
854 @param op_input: The input dict from the opcode
855 @param obj_input: The input dict from the objects
856 @return: The verified and updated dict
860 invalid_hvs = set(op_input) - constants.HYPER_TYPES
862 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
863 " %s" % utils.CommaJoin(invalid_hvs),
865 if obj_input is None:
867 type_check = constants.HVSTS_PARAMETER_TYPES
868 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
873 def _MergeAndVerifyDiskState(op_input, obj_input):
874 """Combines the disk state from an opcode with the one of the object
876 @param op_input: The input dict from the opcode
877 @param obj_input: The input dict from the objects
878 @return: The verified and updated dict
881 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
883 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
884 utils.CommaJoin(invalid_dst),
886 type_check = constants.DSS_PARAMETER_TYPES
887 if obj_input is None:
889 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
891 for key, value in op_input.items())
896 def _ReleaseLocks(lu, level, names=None, keep=None):
897 """Releases locks owned by an LU.
899 @type lu: L{LogicalUnit}
900 @param level: Lock level
901 @type names: list or None
902 @param names: Names of locks to release
903 @type keep: list or None
904 @param keep: Names of locks to retain
907 assert not (keep is not None and names is not None), \
908 "Only one of the 'names' and the 'keep' parameters can be given"
910 if names is not None:
911 should_release = names.__contains__
913 should_release = lambda name: name not in keep
915 should_release = None
917 owned = lu.owned_locks(level)
919 # Not owning any lock at this level, do nothing
926 # Determine which locks to release
928 if should_release(name):
933 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
935 # Release just some locks
936 lu.glm.release(level, names=release)
938 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
941 lu.glm.release(level)
943 assert not lu.glm.is_owned(level), "No locks should be owned"
946 def _MapInstanceDisksToNodes(instances):
947 """Creates a map from (node, volume) to instance name.
949 @type instances: list of L{objects.Instance}
950 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
953 return dict(((node, vol), inst.name)
954 for inst in instances
955 for (node, vols) in inst.MapLVsByNode().items()
959 def _RunPostHook(lu, node_name):
960 """Runs the post-hook for an opcode on a single node.
963 hm = lu.proc.BuildHooksManager(lu)
965 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
966 except Exception, err: # pylint: disable=W0703
967 lu.LogWarning("Errors occurred running hooks on %s: %s",
971 def _CheckOutputFields(static, dynamic, selected):
972 """Checks whether all selected fields are valid.
974 @type static: L{utils.FieldSet}
975 @param static: static fields set
976 @type dynamic: L{utils.FieldSet}
977 @param dynamic: dynamic fields set
984 delta = f.NonMatching(selected)
986 raise errors.OpPrereqError("Unknown output fields selected: %s"
987 % ",".join(delta), errors.ECODE_INVAL)
990 def _CheckGlobalHvParams(params):
991 """Validates that given hypervisor params are not global ones.
993 This will ensure that instances don't get customised versions of
997 used_globals = constants.HVC_GLOBALS.intersection(params)
999 msg = ("The following hypervisor parameters are global and cannot"
1000 " be customized at instance level, please modify them at"
1001 " cluster level: %s" % utils.CommaJoin(used_globals))
1002 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1005 def _CheckNodeOnline(lu, node, msg=None):
1006 """Ensure that a given node is online.
1008 @param lu: the LU on behalf of which we make the check
1009 @param node: the node to check
1010 @param msg: if passed, should be a message to replace the default one
1011 @raise errors.OpPrereqError: if the node is offline
1015 msg = "Can't use offline node"
1016 if lu.cfg.GetNodeInfo(node).offline:
1017 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1020 def _CheckNodeNotDrained(lu, node):
1021 """Ensure that a given node is not drained.
1023 @param lu: the LU on behalf of which we make the check
1024 @param node: the node to check
1025 @raise errors.OpPrereqError: if the node is drained
1028 if lu.cfg.GetNodeInfo(node).drained:
1029 raise errors.OpPrereqError("Can't use drained node %s" % node,
1033 def _CheckNodeVmCapable(lu, node):
1034 """Ensure that a given node is vm capable.
1036 @param lu: the LU on behalf of which we make the check
1037 @param node: the node to check
1038 @raise errors.OpPrereqError: if the node is not vm capable
1041 if not lu.cfg.GetNodeInfo(node).vm_capable:
1042 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1046 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1047 """Ensure that a node supports a given OS.
1049 @param lu: the LU on behalf of which we make the check
1050 @param node: the node to check
1051 @param os_name: the OS to query about
1052 @param force_variant: whether to ignore variant errors
1053 @raise errors.OpPrereqError: if the node is not supporting the OS
1056 result = lu.rpc.call_os_get(node, os_name)
1057 result.Raise("OS '%s' not in supported OS list for node %s" %
1059 prereq=True, ecode=errors.ECODE_INVAL)
1060 if not force_variant:
1061 _CheckOSVariant(result.payload, os_name)
1064 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1065 """Ensure that a node has the given secondary ip.
1067 @type lu: L{LogicalUnit}
1068 @param lu: the LU on behalf of which we make the check
1070 @param node: the node to check
1071 @type secondary_ip: string
1072 @param secondary_ip: the ip to check
1073 @type prereq: boolean
1074 @param prereq: whether to throw a prerequisite or an execute error
1075 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1076 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1079 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1080 result.Raise("Failure checking secondary ip on node %s" % node,
1081 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1082 if not result.payload:
1083 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1084 " please fix and re-run this command" % secondary_ip)
1086 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1088 raise errors.OpExecError(msg)
1091 def _GetClusterDomainSecret():
1092 """Reads the cluster domain secret.
1095 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1099 def _CheckInstanceState(lu, instance, req_states, msg=None):
1100 """Ensure that an instance is in one of the required states.
1102 @param lu: the LU on behalf of which we make the check
1103 @param instance: the instance to check
1104 @param msg: if passed, should be a message to replace the default one
1105 @raise errors.OpPrereqError: if the instance is not in the required state
1109 msg = ("can't use instance from outside %s states" %
1110 utils.CommaJoin(req_states))
1111 if instance.admin_state not in req_states:
1112 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1113 (instance.name, instance.admin_state, msg),
1116 if constants.ADMINST_UP not in req_states:
1117 pnode = instance.primary_node
1118 if not lu.cfg.GetNodeInfo(pnode).offline:
1119 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1120 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1121 prereq=True, ecode=errors.ECODE_ENVIRON)
1122 if instance.name in ins_l.payload:
1123 raise errors.OpPrereqError("Instance %s is running, %s" %
1124 (instance.name, msg), errors.ECODE_STATE)
1126 lu.LogWarning("Primary node offline, ignoring check that instance"
1130 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1131 """Computes if value is in the desired range.
1133 @param name: name of the parameter for which we perform the check
1134 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1136 @param ipolicy: dictionary containing min, max and std values
1137 @param value: actual value that we want to use
1138 @return: None or element not meeting the criteria
1142 if value in [None, constants.VALUE_AUTO]:
1144 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1145 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1146 if value > max_v or min_v > value:
1148 fqn = "%s/%s" % (name, qualifier)
1151 return ("%s value %s is not in range [%s, %s]" %
1152 (fqn, value, min_v, max_v))
1156 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1157 nic_count, disk_sizes, spindle_use,
1158 _compute_fn=_ComputeMinMaxSpec):
1159 """Verifies ipolicy against provided specs.
1162 @param ipolicy: The ipolicy
1164 @param mem_size: The memory size
1165 @type cpu_count: int
1166 @param cpu_count: Used cpu cores
1167 @type disk_count: int
1168 @param disk_count: Number of disks used
1169 @type nic_count: int
1170 @param nic_count: Number of nics used
1171 @type disk_sizes: list of ints
1172 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1173 @type spindle_use: int
1174 @param spindle_use: The number of spindles this instance uses
1175 @param _compute_fn: The compute function (unittest only)
1176 @return: A list of violations, or an empty list of no violations are found
1179 assert disk_count == len(disk_sizes)
1182 (constants.ISPEC_MEM_SIZE, "", mem_size),
1183 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1184 (constants.ISPEC_DISK_COUNT, "", disk_count),
1185 (constants.ISPEC_NIC_COUNT, "", nic_count),
1186 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1187 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1188 for idx, d in enumerate(disk_sizes)]
1191 (_compute_fn(name, qualifier, ipolicy, value)
1192 for (name, qualifier, value) in test_settings))
1195 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1196 _compute_fn=_ComputeIPolicySpecViolation):
1197 """Compute if instance meets the specs of ipolicy.
1200 @param ipolicy: The ipolicy to verify against
1201 @type instance: L{objects.Instance}
1202 @param instance: The instance to verify
1203 @param _compute_fn: The function to verify ipolicy (unittest only)
1204 @see: L{_ComputeIPolicySpecViolation}
1207 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1208 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1209 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1210 disk_count = len(instance.disks)
1211 disk_sizes = [disk.size for disk in instance.disks]
1212 nic_count = len(instance.nics)
1214 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1215 disk_sizes, spindle_use)
1218 def _ComputeIPolicyInstanceSpecViolation(
1219 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1220 """Compute if instance specs meets the specs of ipolicy.
1223 @param ipolicy: The ipolicy to verify against
1224 @param instance_spec: dict
1225 @param instance_spec: The instance spec to verify
1226 @param _compute_fn: The function to verify ipolicy (unittest only)
1227 @see: L{_ComputeIPolicySpecViolation}
1230 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1231 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1232 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1233 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1234 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1235 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1237 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1238 disk_sizes, spindle_use)
1241 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1243 _compute_fn=_ComputeIPolicyInstanceViolation):
1244 """Compute if instance meets the specs of the new target group.
1246 @param ipolicy: The ipolicy to verify
1247 @param instance: The instance object to verify
1248 @param current_group: The current group of the instance
1249 @param target_group: The new group of the instance
1250 @param _compute_fn: The function to verify ipolicy (unittest only)
1251 @see: L{_ComputeIPolicySpecViolation}
1254 if current_group == target_group:
1257 return _compute_fn(ipolicy, instance)
1260 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1261 _compute_fn=_ComputeIPolicyNodeViolation):
1262 """Checks that the target node is correct in terms of instance policy.
1264 @param ipolicy: The ipolicy to verify
1265 @param instance: The instance object to verify
1266 @param node: The new node to relocate
1267 @param ignore: Ignore violations of the ipolicy
1268 @param _compute_fn: The function to verify ipolicy (unittest only)
1269 @see: L{_ComputeIPolicySpecViolation}
1272 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1273 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1276 msg = ("Instance does not meet target node group's (%s) instance"
1277 " policy: %s") % (node.group, utils.CommaJoin(res))
1281 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1284 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1285 """Computes a set of any instances that would violate the new ipolicy.
1287 @param old_ipolicy: The current (still in-place) ipolicy
1288 @param new_ipolicy: The new (to become) ipolicy
1289 @param instances: List of instances to verify
1290 @return: A list of instances which violates the new ipolicy but
1294 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1295 _ComputeViolatingInstances(old_ipolicy, instances))
1298 def _ExpandItemName(fn, name, kind):
1299 """Expand an item name.
1301 @param fn: the function to use for expansion
1302 @param name: requested item name
1303 @param kind: text description ('Node' or 'Instance')
1304 @return: the resolved (full) name
1305 @raise errors.OpPrereqError: if the item is not found
1308 full_name = fn(name)
1309 if full_name is None:
1310 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1315 def _ExpandNodeName(cfg, name):
1316 """Wrapper over L{_ExpandItemName} for nodes."""
1317 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1320 def _ExpandInstanceName(cfg, name):
1321 """Wrapper over L{_ExpandItemName} for instance."""
1322 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1325 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1326 network_type, mac_prefix, tags):
1327 """Builds network related env variables for hooks
1329 This builds the hook environment from individual variables.
1332 @param name: the name of the network
1333 @type subnet: string
1334 @param subnet: the ipv4 subnet
1335 @type gateway: string
1336 @param gateway: the ipv4 gateway
1337 @type network6: string
1338 @param network6: the ipv6 subnet
1339 @type gateway6: string
1340 @param gateway6: the ipv6 gateway
1341 @type network_type: string
1342 @param network_type: the type of the network
1343 @type mac_prefix: string
1344 @param mac_prefix: the mac_prefix
1346 @param tags: the tags of the network
1351 env["NETWORK_NAME"] = name
1353 env["NETWORK_SUBNET"] = subnet
1355 env["NETWORK_GATEWAY"] = gateway
1357 env["NETWORK_SUBNET6"] = network6
1359 env["NETWORK_GATEWAY6"] = gateway6
1361 env["NETWORK_MAC_PREFIX"] = mac_prefix
1363 env["NETWORK_TYPE"] = network_type
1365 env["NETWORK_TAGS"] = " ".join(tags)
1370 def _BuildNetworkHookEnvByObject(net):
1371 """Builds network related env varliables for hooks
1373 @type net: L{objects.Network}
1374 @param net: the network object
1379 "subnet": net.network,
1380 "gateway": net.gateway,
1381 "network6": net.network6,
1382 "gateway6": net.gateway6,
1383 "network_type": net.network_type,
1384 "mac_prefix": net.mac_prefix,
1388 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
1391 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1392 minmem, maxmem, vcpus, nics, disk_template, disks,
1393 bep, hvp, hypervisor_name, tags):
1394 """Builds instance related env variables for hooks
1396 This builds the hook environment from individual variables.
1399 @param name: the name of the instance
1400 @type primary_node: string
1401 @param primary_node: the name of the instance's primary node
1402 @type secondary_nodes: list
1403 @param secondary_nodes: list of secondary nodes as strings
1404 @type os_type: string
1405 @param os_type: the name of the instance's OS
1406 @type status: string
1407 @param status: the desired status of the instance
1408 @type minmem: string
1409 @param minmem: the minimum memory size of the instance
1410 @type maxmem: string
1411 @param maxmem: the maximum memory size of the instance
1413 @param vcpus: the count of VCPUs the instance has
1415 @param nics: list of tuples (ip, mac, mode, link, network) representing
1416 the NICs the instance has
1417 @type disk_template: string
1418 @param disk_template: the disk template of the instance
1420 @param disks: the list of (size, mode) pairs
1422 @param bep: the backend parameters for the instance
1424 @param hvp: the hypervisor parameters for the instance
1425 @type hypervisor_name: string
1426 @param hypervisor_name: the hypervisor for the instance
1428 @param tags: list of instance tags as strings
1430 @return: the hook environment for this instance
1435 "INSTANCE_NAME": name,
1436 "INSTANCE_PRIMARY": primary_node,
1437 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1438 "INSTANCE_OS_TYPE": os_type,
1439 "INSTANCE_STATUS": status,
1440 "INSTANCE_MINMEM": minmem,
1441 "INSTANCE_MAXMEM": maxmem,
1442 # TODO(2.7) remove deprecated "memory" value
1443 "INSTANCE_MEMORY": maxmem,
1444 "INSTANCE_VCPUS": vcpus,
1445 "INSTANCE_DISK_TEMPLATE": disk_template,
1446 "INSTANCE_HYPERVISOR": hypervisor_name,
1449 nic_count = len(nics)
1450 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1453 env["INSTANCE_NIC%d_IP" % idx] = ip
1454 env["INSTANCE_NIC%d_MAC" % idx] = mac
1455 env["INSTANCE_NIC%d_MODE" % idx] = mode
1456 env["INSTANCE_NIC%d_LINK" % idx] = link
1458 env["INSTANCE_NIC%d_NETWORK" % idx] = net
1460 nobj = objects.Network.FromDict(netinfo)
1462 env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1464 env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1466 env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1468 env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1470 env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1471 if nobj.network_type:
1472 env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1474 env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1475 if mode == constants.NIC_MODE_BRIDGED:
1476 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1480 env["INSTANCE_NIC_COUNT"] = nic_count
1483 disk_count = len(disks)
1484 for idx, (size, mode) in enumerate(disks):
1485 env["INSTANCE_DISK%d_SIZE" % idx] = size
1486 env["INSTANCE_DISK%d_MODE" % idx] = mode
1490 env["INSTANCE_DISK_COUNT"] = disk_count
1495 env["INSTANCE_TAGS"] = " ".join(tags)
1497 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1498 for key, value in source.items():
1499 env["INSTANCE_%s_%s" % (kind, key)] = value
1504 def _NICToTuple(lu, nic):
1505 """Build a tupple of nic information.
1507 @type lu: L{LogicalUnit}
1508 @param lu: the logical unit on whose behalf we execute
1509 @type nic: L{objects.NIC}
1510 @param nic: nic to convert to hooks tuple
1515 cluster = lu.cfg.GetClusterInfo()
1516 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1517 mode = filled_params[constants.NIC_MODE]
1518 link = filled_params[constants.NIC_LINK]
1522 net_uuid = lu.cfg.LookupNetwork(net)
1524 nobj = lu.cfg.GetNetwork(net_uuid)
1525 netinfo = objects.Network.ToDict(nobj)
1526 return (ip, mac, mode, link, net, netinfo)
1529 def _NICListToTuple(lu, nics):
1530 """Build a list of nic information tuples.
1532 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1533 value in LUInstanceQueryData.
1535 @type lu: L{LogicalUnit}
1536 @param lu: the logical unit on whose behalf we execute
1537 @type nics: list of L{objects.NIC}
1538 @param nics: list of nics to convert to hooks tuples
1543 hooks_nics.append(_NICToTuple(lu, nic))
1547 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1548 """Builds instance related env variables for hooks from an object.
1550 @type lu: L{LogicalUnit}
1551 @param lu: the logical unit on whose behalf we execute
1552 @type instance: L{objects.Instance}
1553 @param instance: the instance for which we should build the
1555 @type override: dict
1556 @param override: dictionary with key/values that will override
1559 @return: the hook environment dictionary
1562 cluster = lu.cfg.GetClusterInfo()
1563 bep = cluster.FillBE(instance)
1564 hvp = cluster.FillHV(instance)
1566 "name": instance.name,
1567 "primary_node": instance.primary_node,
1568 "secondary_nodes": instance.secondary_nodes,
1569 "os_type": instance.os,
1570 "status": instance.admin_state,
1571 "maxmem": bep[constants.BE_MAXMEM],
1572 "minmem": bep[constants.BE_MINMEM],
1573 "vcpus": bep[constants.BE_VCPUS],
1574 "nics": _NICListToTuple(lu, instance.nics),
1575 "disk_template": instance.disk_template,
1576 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1579 "hypervisor_name": instance.hypervisor,
1580 "tags": instance.tags,
1583 args.update(override)
1584 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1587 def _AdjustCandidatePool(lu, exceptions):
1588 """Adjust the candidate pool after node operations.
1591 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1593 lu.LogInfo("Promoted nodes to master candidate role: %s",
1594 utils.CommaJoin(node.name for node in mod_list))
1595 for name in mod_list:
1596 lu.context.ReaddNode(name)
1597 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1599 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1603 def _DecideSelfPromotion(lu, exceptions=None):
1604 """Decide whether I should promote myself as a master candidate.
1607 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1608 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1609 # the new node will increase mc_max with one, so:
1610 mc_should = min(mc_should + 1, cp_size)
1611 return mc_now < mc_should
1614 def _ComputeViolatingInstances(ipolicy, instances):
1615 """Computes a set of instances who violates given ipolicy.
1617 @param ipolicy: The ipolicy to verify
1618 @type instances: object.Instance
1619 @param instances: List of instances to verify
1620 @return: A frozenset of instance names violating the ipolicy
1623 return frozenset([inst.name for inst in instances
1624 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1627 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1628 """Check that the brigdes needed by a list of nics exist.
1631 cluster = lu.cfg.GetClusterInfo()
1632 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1633 brlist = [params[constants.NIC_LINK] for params in paramslist
1634 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1636 result = lu.rpc.call_bridges_exist(target_node, brlist)
1637 result.Raise("Error checking bridges on destination node '%s'" %
1638 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1641 def _CheckInstanceBridgesExist(lu, instance, node=None):
1642 """Check that the brigdes needed by an instance exist.
1646 node = instance.primary_node
1647 _CheckNicsBridgesExist(lu, instance.nics, node)
1650 def _CheckOSVariant(os_obj, name):
1651 """Check whether an OS name conforms to the os variants specification.
1653 @type os_obj: L{objects.OS}
1654 @param os_obj: OS object to check
1656 @param name: OS name passed by the user, to check for validity
1659 variant = objects.OS.GetVariant(name)
1660 if not os_obj.supported_variants:
1662 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1663 " passed)" % (os_obj.name, variant),
1667 raise errors.OpPrereqError("OS name must include a variant",
1670 if variant not in os_obj.supported_variants:
1671 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1674 def _GetNodeInstancesInner(cfg, fn):
1675 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1678 def _GetNodeInstances(cfg, node_name):
1679 """Returns a list of all primary and secondary instances on a node.
1683 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1686 def _GetNodePrimaryInstances(cfg, node_name):
1687 """Returns primary instances on a node.
1690 return _GetNodeInstancesInner(cfg,
1691 lambda inst: node_name == inst.primary_node)
1694 def _GetNodeSecondaryInstances(cfg, node_name):
1695 """Returns secondary instances on a node.
1698 return _GetNodeInstancesInner(cfg,
1699 lambda inst: node_name in inst.secondary_nodes)
1702 def _GetStorageTypeArgs(cfg, storage_type):
1703 """Returns the arguments for a storage type.
1706 # Special case for file storage
1707 if storage_type == constants.ST_FILE:
1708 # storage.FileStorage wants a list of storage directories
1709 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1714 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1717 for dev in instance.disks:
1718 cfg.SetDiskID(dev, node_name)
1720 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1722 result.Raise("Failed to get disk status from node %s" % node_name,
1723 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1725 for idx, bdev_status in enumerate(result.payload):
1726 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1732 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1733 """Check the sanity of iallocator and node arguments and use the
1734 cluster-wide iallocator if appropriate.
1736 Check that at most one of (iallocator, node) is specified. If none is
1737 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1738 then the LU's opcode's iallocator slot is filled with the cluster-wide
1741 @type iallocator_slot: string
1742 @param iallocator_slot: the name of the opcode iallocator slot
1743 @type node_slot: string
1744 @param node_slot: the name of the opcode target node slot
1747 node = getattr(lu.op, node_slot, None)
1748 ialloc = getattr(lu.op, iallocator_slot, None)
1752 if node is not None and ialloc is not None:
1753 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1755 elif ((node is None and ialloc is None) or
1756 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1757 default_iallocator = lu.cfg.GetDefaultIAllocator()
1758 if default_iallocator:
1759 setattr(lu.op, iallocator_slot, default_iallocator)
1761 raise errors.OpPrereqError("No iallocator or node given and no"
1762 " cluster-wide default iallocator found;"
1763 " please specify either an iallocator or a"
1764 " node, or set a cluster-wide default"
1765 " iallocator", errors.ECODE_INVAL)
1768 def _GetDefaultIAllocator(cfg, ialloc):
1769 """Decides on which iallocator to use.
1771 @type cfg: L{config.ConfigWriter}
1772 @param cfg: Cluster configuration object
1773 @type ialloc: string or None
1774 @param ialloc: Iallocator specified in opcode
1776 @return: Iallocator name
1780 # Use default iallocator
1781 ialloc = cfg.GetDefaultIAllocator()
1784 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1785 " opcode nor as a cluster-wide default",
1791 def _CheckHostnameSane(lu, name):
1792 """Ensures that a given hostname resolves to a 'sane' name.
1794 The given name is required to be a prefix of the resolved hostname,
1795 to prevent accidental mismatches.
1797 @param lu: the logical unit on behalf of which we're checking
1798 @param name: the name we should resolve and check
1799 @return: the resolved hostname object
1802 hostname = netutils.GetHostname(name=name)
1803 if hostname.name != name:
1804 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1805 if not utils.MatchNameComponent(name, [hostname.name]):
1806 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1807 " same as given hostname '%s'") %
1808 (hostname.name, name), errors.ECODE_INVAL)
1812 class LUClusterPostInit(LogicalUnit):
1813 """Logical unit for running hooks after cluster initialization.
1816 HPATH = "cluster-init"
1817 HTYPE = constants.HTYPE_CLUSTER
1819 def BuildHooksEnv(self):
1824 "OP_TARGET": self.cfg.GetClusterName(),
1827 def BuildHooksNodes(self):
1828 """Build hooks nodes.
1831 return ([], [self.cfg.GetMasterNode()])
1833 def Exec(self, feedback_fn):
1840 class LUClusterDestroy(LogicalUnit):
1841 """Logical unit for destroying the cluster.
1844 HPATH = "cluster-destroy"
1845 HTYPE = constants.HTYPE_CLUSTER
1847 def BuildHooksEnv(self):
1852 "OP_TARGET": self.cfg.GetClusterName(),
1855 def BuildHooksNodes(self):
1856 """Build hooks nodes.
1861 def CheckPrereq(self):
1862 """Check prerequisites.
1864 This checks whether the cluster is empty.
1866 Any errors are signaled by raising errors.OpPrereqError.
1869 master = self.cfg.GetMasterNode()
1871 nodelist = self.cfg.GetNodeList()
1872 if len(nodelist) != 1 or nodelist[0] != master:
1873 raise errors.OpPrereqError("There are still %d node(s) in"
1874 " this cluster." % (len(nodelist) - 1),
1876 instancelist = self.cfg.GetInstanceList()
1878 raise errors.OpPrereqError("There are still %d instance(s) in"
1879 " this cluster." % len(instancelist),
1882 def Exec(self, feedback_fn):
1883 """Destroys the cluster.
1886 master_params = self.cfg.GetMasterNetworkParameters()
1888 # Run post hooks on master node before it's removed
1889 _RunPostHook(self, master_params.name)
1891 ems = self.cfg.GetUseExternalMipScript()
1892 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1895 self.LogWarning("Error disabling the master IP address: %s",
1898 return master_params.name
1901 def _VerifyCertificate(filename):
1902 """Verifies a certificate for L{LUClusterVerifyConfig}.
1904 @type filename: string
1905 @param filename: Path to PEM file
1909 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1910 utils.ReadFile(filename))
1911 except Exception, err: # pylint: disable=W0703
1912 return (LUClusterVerifyConfig.ETYPE_ERROR,
1913 "Failed to load X509 certificate %s: %s" % (filename, err))
1916 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1917 constants.SSL_CERT_EXPIRATION_ERROR)
1920 fnamemsg = "While verifying %s: %s" % (filename, msg)
1925 return (None, fnamemsg)
1926 elif errcode == utils.CERT_WARNING:
1927 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1928 elif errcode == utils.CERT_ERROR:
1929 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1931 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1934 def _GetAllHypervisorParameters(cluster, instances):
1935 """Compute the set of all hypervisor parameters.
1937 @type cluster: L{objects.Cluster}
1938 @param cluster: the cluster object
1939 @param instances: list of L{objects.Instance}
1940 @param instances: additional instances from which to obtain parameters
1941 @rtype: list of (origin, hypervisor, parameters)
1942 @return: a list with all parameters found, indicating the hypervisor they
1943 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1948 for hv_name in cluster.enabled_hypervisors:
1949 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1951 for os_name, os_hvp in cluster.os_hvp.items():
1952 for hv_name, hv_params in os_hvp.items():
1954 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1955 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1957 # TODO: collapse identical parameter values in a single one
1958 for instance in instances:
1959 if instance.hvparams:
1960 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1961 cluster.FillHV(instance)))
1966 class _VerifyErrors(object):
1967 """Mix-in for cluster/group verify LUs.
1969 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1970 self.op and self._feedback_fn to be available.)
1974 ETYPE_FIELD = "code"
1975 ETYPE_ERROR = "ERROR"
1976 ETYPE_WARNING = "WARNING"
1978 def _Error(self, ecode, item, msg, *args, **kwargs):
1979 """Format an error message.
1981 Based on the opcode's error_codes parameter, either format a
1982 parseable error code, or a simpler error string.
1984 This must be called only from Exec and functions called from Exec.
1987 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1988 itype, etxt, _ = ecode
1989 # first complete the msg
1992 # then format the whole message
1993 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1994 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2000 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2001 # and finally report it via the feedback_fn
2002 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2004 def _ErrorIf(self, cond, ecode, *args, **kwargs):
2005 """Log an error message if the passed condition is True.
2009 or self.op.debug_simulate_errors) # pylint: disable=E1101
2011 # If the error code is in the list of ignored errors, demote the error to a
2013 (_, etxt, _) = ecode
2014 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2015 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2018 self._Error(ecode, *args, **kwargs)
2020 # do not mark the operation as failed for WARN cases only
2021 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2022 self.bad = self.bad or cond
2025 class LUClusterVerify(NoHooksLU):
2026 """Submits all jobs necessary to verify the cluster.
2031 def ExpandNames(self):
2032 self.needed_locks = {}
2034 def Exec(self, feedback_fn):
2037 if self.op.group_name:
2038 groups = [self.op.group_name]
2039 depends_fn = lambda: None
2041 groups = self.cfg.GetNodeGroupList()
2043 # Verify global configuration
2045 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2048 # Always depend on global verification
2049 depends_fn = lambda: [(-len(jobs), [])]
2052 [opcodes.OpClusterVerifyGroup(group_name=group,
2053 ignore_errors=self.op.ignore_errors,
2054 depends=depends_fn())]
2055 for group in groups)
2057 # Fix up all parameters
2058 for op in itertools.chain(*jobs): # pylint: disable=W0142
2059 op.debug_simulate_errors = self.op.debug_simulate_errors
2060 op.verbose = self.op.verbose
2061 op.error_codes = self.op.error_codes
2063 op.skip_checks = self.op.skip_checks
2064 except AttributeError:
2065 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2067 return ResultWithJobs(jobs)
2070 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2071 """Verifies the cluster config.
2076 def _VerifyHVP(self, hvp_data):
2077 """Verifies locally the syntax of the hypervisor parameters.
2080 for item, hv_name, hv_params in hvp_data:
2081 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2084 hv_class = hypervisor.GetHypervisor(hv_name)
2085 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2086 hv_class.CheckParameterSyntax(hv_params)
2087 except errors.GenericError, err:
2088 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2090 def ExpandNames(self):
2091 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2092 self.share_locks = _ShareAll()
2094 def CheckPrereq(self):
2095 """Check prerequisites.
2098 # Retrieve all information
2099 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2100 self.all_node_info = self.cfg.GetAllNodesInfo()
2101 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2103 def Exec(self, feedback_fn):
2104 """Verify integrity of cluster, performing various test on nodes.
2108 self._feedback_fn = feedback_fn
2110 feedback_fn("* Verifying cluster config")
2112 for msg in self.cfg.VerifyConfig():
2113 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2115 feedback_fn("* Verifying cluster certificate files")
2117 for cert_filename in pathutils.ALL_CERT_FILES:
2118 (errcode, msg) = _VerifyCertificate(cert_filename)
2119 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2121 feedback_fn("* Verifying hypervisor parameters")
2123 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2124 self.all_inst_info.values()))
2126 feedback_fn("* Verifying all nodes belong to an existing group")
2128 # We do this verification here because, should this bogus circumstance
2129 # occur, it would never be caught by VerifyGroup, which only acts on
2130 # nodes/instances reachable from existing node groups.
2132 dangling_nodes = set(node.name for node in self.all_node_info.values()
2133 if node.group not in self.all_group_info)
2135 dangling_instances = {}
2136 no_node_instances = []
2138 for inst in self.all_inst_info.values():
2139 if inst.primary_node in dangling_nodes:
2140 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2141 elif inst.primary_node not in self.all_node_info:
2142 no_node_instances.append(inst.name)
2147 utils.CommaJoin(dangling_instances.get(node.name,
2149 for node in dangling_nodes]
2151 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2153 "the following nodes (and their instances) belong to a non"
2154 " existing group: %s", utils.CommaJoin(pretty_dangling))
2156 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2158 "the following instances have a non-existing primary-node:"
2159 " %s", utils.CommaJoin(no_node_instances))
2164 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2165 """Verifies the status of a node group.
2168 HPATH = "cluster-verify"
2169 HTYPE = constants.HTYPE_CLUSTER
2172 _HOOKS_INDENT_RE = re.compile("^", re.M)
2174 class NodeImage(object):
2175 """A class representing the logical and physical status of a node.
2178 @ivar name: the node name to which this object refers
2179 @ivar volumes: a structure as returned from
2180 L{ganeti.backend.GetVolumeList} (runtime)
2181 @ivar instances: a list of running instances (runtime)
2182 @ivar pinst: list of configured primary instances (config)
2183 @ivar sinst: list of configured secondary instances (config)
2184 @ivar sbp: dictionary of {primary-node: list of instances} for all
2185 instances for which this node is secondary (config)
2186 @ivar mfree: free memory, as reported by hypervisor (runtime)
2187 @ivar dfree: free disk, as reported by the node (runtime)
2188 @ivar offline: the offline status (config)
2189 @type rpc_fail: boolean
2190 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2191 not whether the individual keys were correct) (runtime)
2192 @type lvm_fail: boolean
2193 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2194 @type hyp_fail: boolean
2195 @ivar hyp_fail: whether the RPC call didn't return the instance list
2196 @type ghost: boolean
2197 @ivar ghost: whether this is a known node or not (config)
2198 @type os_fail: boolean
2199 @ivar os_fail: whether the RPC call didn't return valid OS data
2201 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2202 @type vm_capable: boolean
2203 @ivar vm_capable: whether the node can host instances
2206 def __init__(self, offline=False, name=None, vm_capable=True):
2215 self.offline = offline
2216 self.vm_capable = vm_capable
2217 self.rpc_fail = False
2218 self.lvm_fail = False
2219 self.hyp_fail = False
2221 self.os_fail = False
2224 def ExpandNames(self):
2225 # This raises errors.OpPrereqError on its own:
2226 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2228 # Get instances in node group; this is unsafe and needs verification later
2230 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2232 self.needed_locks = {
2233 locking.LEVEL_INSTANCE: inst_names,
2234 locking.LEVEL_NODEGROUP: [self.group_uuid],
2235 locking.LEVEL_NODE: [],
2237 # This opcode is run by watcher every five minutes and acquires all nodes
2238 # for a group. It doesn't run for a long time, so it's better to acquire
2239 # the node allocation lock as well.
2240 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2243 self.share_locks = _ShareAll()
2245 def DeclareLocks(self, level):
2246 if level == locking.LEVEL_NODE:
2247 # Get members of node group; this is unsafe and needs verification later
2248 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2250 all_inst_info = self.cfg.GetAllInstancesInfo()
2252 # In Exec(), we warn about mirrored instances that have primary and
2253 # secondary living in separate node groups. To fully verify that
2254 # volumes for these instances are healthy, we will need to do an
2255 # extra call to their secondaries. We ensure here those nodes will
2257 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2258 # Important: access only the instances whose lock is owned
2259 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2260 nodes.update(all_inst_info[inst].secondary_nodes)
2262 self.needed_locks[locking.LEVEL_NODE] = nodes
2264 def CheckPrereq(self):
2265 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2266 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2268 group_nodes = set(self.group_info.members)
2270 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2273 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2275 unlocked_instances = \
2276 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2279 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2280 utils.CommaJoin(unlocked_nodes),
2283 if unlocked_instances:
2284 raise errors.OpPrereqError("Missing lock for instances: %s" %
2285 utils.CommaJoin(unlocked_instances),
2288 self.all_node_info = self.cfg.GetAllNodesInfo()
2289 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2291 self.my_node_names = utils.NiceSort(group_nodes)
2292 self.my_inst_names = utils.NiceSort(group_instances)
2294 self.my_node_info = dict((name, self.all_node_info[name])
2295 for name in self.my_node_names)
2297 self.my_inst_info = dict((name, self.all_inst_info[name])
2298 for name in self.my_inst_names)
2300 # We detect here the nodes that will need the extra RPC calls for verifying
2301 # split LV volumes; they should be locked.
2302 extra_lv_nodes = set()
2304 for inst in self.my_inst_info.values():
2305 if inst.disk_template in constants.DTS_INT_MIRROR:
2306 for nname in inst.all_nodes:
2307 if self.all_node_info[nname].group != self.group_uuid:
2308 extra_lv_nodes.add(nname)
2310 unlocked_lv_nodes = \
2311 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2313 if unlocked_lv_nodes:
2314 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2315 utils.CommaJoin(unlocked_lv_nodes),
2317 self.extra_lv_nodes = list(extra_lv_nodes)
2319 def _VerifyNode(self, ninfo, nresult):
2320 """Perform some basic validation on data returned from a node.
2322 - check the result data structure is well formed and has all the
2324 - check ganeti version
2326 @type ninfo: L{objects.Node}
2327 @param ninfo: the node to check
2328 @param nresult: the results from the node
2330 @return: whether overall this call was successful (and we can expect
2331 reasonable values in the respose)
2335 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2337 # main result, nresult should be a non-empty dict
2338 test = not nresult or not isinstance(nresult, dict)
2339 _ErrorIf(test, constants.CV_ENODERPC, node,
2340 "unable to verify node: no data returned")
2344 # compares ganeti version
2345 local_version = constants.PROTOCOL_VERSION
2346 remote_version = nresult.get("version", None)
2347 test = not (remote_version and
2348 isinstance(remote_version, (list, tuple)) and
2349 len(remote_version) == 2)
2350 _ErrorIf(test, constants.CV_ENODERPC, node,
2351 "connection to node returned invalid data")
2355 test = local_version != remote_version[0]
2356 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2357 "incompatible protocol versions: master %s,"
2358 " node %s", local_version, remote_version[0])
2362 # node seems compatible, we can actually try to look into its results
2364 # full package version
2365 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2366 constants.CV_ENODEVERSION, node,
2367 "software version mismatch: master %s, node %s",
2368 constants.RELEASE_VERSION, remote_version[1],
2369 code=self.ETYPE_WARNING)
2371 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2372 if ninfo.vm_capable and isinstance(hyp_result, dict):
2373 for hv_name, hv_result in hyp_result.iteritems():
2374 test = hv_result is not None
2375 _ErrorIf(test, constants.CV_ENODEHV, node,
2376 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2378 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2379 if ninfo.vm_capable and isinstance(hvp_result, list):
2380 for item, hv_name, hv_result in hvp_result:
2381 _ErrorIf(True, constants.CV_ENODEHV, node,
2382 "hypervisor %s parameter verify failure (source %s): %s",
2383 hv_name, item, hv_result)
2385 test = nresult.get(constants.NV_NODESETUP,
2386 ["Missing NODESETUP results"])
2387 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2392 def _VerifyNodeTime(self, ninfo, nresult,
2393 nvinfo_starttime, nvinfo_endtime):
2394 """Check the node time.
2396 @type ninfo: L{objects.Node}
2397 @param ninfo: the node to check
2398 @param nresult: the remote results for the node
2399 @param nvinfo_starttime: the start time of the RPC call
2400 @param nvinfo_endtime: the end time of the RPC call
2404 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2406 ntime = nresult.get(constants.NV_TIME, None)
2408 ntime_merged = utils.MergeTime(ntime)
2409 except (ValueError, TypeError):
2410 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2413 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2414 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2415 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2416 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2420 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2421 "Node time diverges by at least %s from master node time",
2424 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2425 """Check the node LVM results.
2427 @type ninfo: L{objects.Node}
2428 @param ninfo: the node to check
2429 @param nresult: the remote results for the node
2430 @param vg_name: the configured VG name
2437 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2439 # checks vg existence and size > 20G
2440 vglist = nresult.get(constants.NV_VGLIST, None)
2442 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2444 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2445 constants.MIN_VG_SIZE)
2446 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2449 pvlist = nresult.get(constants.NV_PVLIST, None)
2450 test = pvlist is None
2451 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2453 # check that ':' is not present in PV names, since it's a
2454 # special character for lvcreate (denotes the range of PEs to
2456 for _, pvname, owner_vg in pvlist:
2457 test = ":" in pvname
2458 _ErrorIf(test, constants.CV_ENODELVM, node,
2459 "Invalid character ':' in PV '%s' of VG '%s'",
2462 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2463 """Check the node bridges.
2465 @type ninfo: L{objects.Node}
2466 @param ninfo: the node to check
2467 @param nresult: the remote results for the node
2468 @param bridges: the expected list of bridges
2475 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2477 missing = nresult.get(constants.NV_BRIDGES, None)
2478 test = not isinstance(missing, list)
2479 _ErrorIf(test, constants.CV_ENODENET, node,
2480 "did not return valid bridge information")
2482 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2483 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2485 def _VerifyNodeUserScripts(self, ninfo, nresult):
2486 """Check the results of user scripts presence and executability on the node
2488 @type ninfo: L{objects.Node}
2489 @param ninfo: the node to check
2490 @param nresult: the remote results for the node
2495 test = not constants.NV_USERSCRIPTS in nresult
2496 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2497 "did not return user scripts information")
2499 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2501 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2502 "user scripts not present or not executable: %s" %
2503 utils.CommaJoin(sorted(broken_scripts)))
2505 def _VerifyNodeNetwork(self, ninfo, nresult):
2506 """Check the node network connectivity results.
2508 @type ninfo: L{objects.Node}
2509 @param ninfo: the node to check
2510 @param nresult: the remote results for the node
2514 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2516 test = constants.NV_NODELIST not in nresult
2517 _ErrorIf(test, constants.CV_ENODESSH, node,
2518 "node hasn't returned node ssh connectivity data")
2520 if nresult[constants.NV_NODELIST]:
2521 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2522 _ErrorIf(True, constants.CV_ENODESSH, node,
2523 "ssh communication with node '%s': %s", a_node, a_msg)
2525 test = constants.NV_NODENETTEST not in nresult
2526 _ErrorIf(test, constants.CV_ENODENET, node,
2527 "node hasn't returned node tcp connectivity data")
2529 if nresult[constants.NV_NODENETTEST]:
2530 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2532 _ErrorIf(True, constants.CV_ENODENET, node,
2533 "tcp communication with node '%s': %s",
2534 anode, nresult[constants.NV_NODENETTEST][anode])
2536 test = constants.NV_MASTERIP not in nresult
2537 _ErrorIf(test, constants.CV_ENODENET, node,
2538 "node hasn't returned node master IP reachability data")
2540 if not nresult[constants.NV_MASTERIP]:
2541 if node == self.master_node:
2542 msg = "the master node cannot reach the master IP (not configured?)"
2544 msg = "cannot reach the master IP"
2545 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2547 def _VerifyInstance(self, instance, instanceconfig, node_image,
2549 """Verify an instance.
2551 This function checks to see if the required block devices are
2552 available on the instance's node.
2555 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2556 node_current = instanceconfig.primary_node
2558 node_vol_should = {}
2559 instanceconfig.MapLVsByNode(node_vol_should)
2561 cluster = self.cfg.GetClusterInfo()
2562 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2564 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2565 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2566 code=self.ETYPE_WARNING)
2568 for node in node_vol_should:
2569 n_img = node_image[node]
2570 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2571 # ignore missing volumes on offline or broken nodes
2573 for volume in node_vol_should[node]:
2574 test = volume not in n_img.volumes
2575 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2576 "volume %s missing on node %s", volume, node)
2578 if instanceconfig.admin_state == constants.ADMINST_UP:
2579 pri_img = node_image[node_current]
2580 test = instance not in pri_img.instances and not pri_img.offline
2581 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2582 "instance not running on its primary node %s",
2585 diskdata = [(nname, success, status, idx)
2586 for (nname, disks) in diskstatus.items()
2587 for idx, (success, status) in enumerate(disks)]
2589 for nname, success, bdev_status, idx in diskdata:
2590 # the 'ghost node' construction in Exec() ensures that we have a
2592 snode = node_image[nname]
2593 bad_snode = snode.ghost or snode.offline
2594 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2595 not success and not bad_snode,
2596 constants.CV_EINSTANCEFAULTYDISK, instance,
2597 "couldn't retrieve status for disk/%s on %s: %s",
2598 idx, nname, bdev_status)
2599 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2600 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2601 constants.CV_EINSTANCEFAULTYDISK, instance,
2602 "disk/%s on %s is faulty", idx, nname)
2604 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2605 """Verify if there are any unknown volumes in the cluster.
2607 The .os, .swap and backup volumes are ignored. All other volumes are
2608 reported as unknown.
2610 @type reserved: L{ganeti.utils.FieldSet}
2611 @param reserved: a FieldSet of reserved volume names
2614 for node, n_img in node_image.items():
2615 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2616 self.all_node_info[node].group != self.group_uuid):
2617 # skip non-healthy nodes
2619 for volume in n_img.volumes:
2620 test = ((node not in node_vol_should or
2621 volume not in node_vol_should[node]) and
2622 not reserved.Matches(volume))
2623 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2624 "volume %s is unknown", volume)
2626 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2627 """Verify N+1 Memory Resilience.
2629 Check that if one single node dies we can still start all the
2630 instances it was primary for.
2633 cluster_info = self.cfg.GetClusterInfo()
2634 for node, n_img in node_image.items():
2635 # This code checks that every node which is now listed as
2636 # secondary has enough memory to host all instances it is
2637 # supposed to should a single other node in the cluster fail.
2638 # FIXME: not ready for failover to an arbitrary node
2639 # FIXME: does not support file-backed instances
2640 # WARNING: we currently take into account down instances as well
2641 # as up ones, considering that even if they're down someone
2642 # might want to start them even in the event of a node failure.
2643 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2644 # we're skipping nodes marked offline and nodes in other groups from
2645 # the N+1 warning, since most likely we don't have good memory
2646 # infromation from them; we already list instances living on such
2647 # nodes, and that's enough warning
2649 #TODO(dynmem): also consider ballooning out other instances
2650 for prinode, instances in n_img.sbp.items():
2652 for instance in instances:
2653 bep = cluster_info.FillBE(instance_cfg[instance])
2654 if bep[constants.BE_AUTO_BALANCE]:
2655 needed_mem += bep[constants.BE_MINMEM]
2656 test = n_img.mfree < needed_mem
2657 self._ErrorIf(test, constants.CV_ENODEN1, node,
2658 "not enough memory to accomodate instance failovers"
2659 " should node %s fail (%dMiB needed, %dMiB available)",
2660 prinode, needed_mem, n_img.mfree)
2663 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2664 (files_all, files_opt, files_mc, files_vm)):
2665 """Verifies file checksums collected from all nodes.
2667 @param errorif: Callback for reporting errors
2668 @param nodeinfo: List of L{objects.Node} objects
2669 @param master_node: Name of master node
2670 @param all_nvinfo: RPC results
2673 # Define functions determining which nodes to consider for a file
2676 (files_mc, lambda node: (node.master_candidate or
2677 node.name == master_node)),
2678 (files_vm, lambda node: node.vm_capable),
2681 # Build mapping from filename to list of nodes which should have the file
2683 for (files, fn) in files2nodefn:
2685 filenodes = nodeinfo
2687 filenodes = filter(fn, nodeinfo)
2688 nodefiles.update((filename,
2689 frozenset(map(operator.attrgetter("name"), filenodes)))
2690 for filename in files)
2692 assert set(nodefiles) == (files_all | files_mc | files_vm)
2694 fileinfo = dict((filename, {}) for filename in nodefiles)
2695 ignore_nodes = set()
2697 for node in nodeinfo:
2699 ignore_nodes.add(node.name)
2702 nresult = all_nvinfo[node.name]
2704 if nresult.fail_msg or not nresult.payload:
2707 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2708 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2709 for (key, value) in fingerprints.items())
2712 test = not (node_files and isinstance(node_files, dict))
2713 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2714 "Node did not return file checksum data")
2716 ignore_nodes.add(node.name)
2719 # Build per-checksum mapping from filename to nodes having it
2720 for (filename, checksum) in node_files.items():
2721 assert filename in nodefiles
2722 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2724 for (filename, checksums) in fileinfo.items():
2725 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2727 # Nodes having the file
2728 with_file = frozenset(node_name
2729 for nodes in fileinfo[filename].values()
2730 for node_name in nodes) - ignore_nodes
2732 expected_nodes = nodefiles[filename] - ignore_nodes
2734 # Nodes missing file
2735 missing_file = expected_nodes - with_file
2737 if filename in files_opt:
2739 errorif(missing_file and missing_file != expected_nodes,
2740 constants.CV_ECLUSTERFILECHECK, None,
2741 "File %s is optional, but it must exist on all or no"
2742 " nodes (not found on %s)",
2743 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2745 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2746 "File %s is missing from node(s) %s", filename,
2747 utils.CommaJoin(utils.NiceSort(missing_file)))
2749 # Warn if a node has a file it shouldn't
2750 unexpected = with_file - expected_nodes
2752 constants.CV_ECLUSTERFILECHECK, None,
2753 "File %s should not exist on node(s) %s",
2754 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2756 # See if there are multiple versions of the file
2757 test = len(checksums) > 1
2759 variants = ["variant %s on %s" %
2760 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2761 for (idx, (checksum, nodes)) in
2762 enumerate(sorted(checksums.items()))]
2766 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2767 "File %s found with %s different checksums (%s)",
2768 filename, len(checksums), "; ".join(variants))
2770 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2772 """Verifies and the node DRBD status.
2774 @type ninfo: L{objects.Node}
2775 @param ninfo: the node to check
2776 @param nresult: the remote results for the node
2777 @param instanceinfo: the dict of instances
2778 @param drbd_helper: the configured DRBD usermode helper
2779 @param drbd_map: the DRBD map as returned by
2780 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2784 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2787 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2788 test = (helper_result is None)
2789 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2790 "no drbd usermode helper returned")
2792 status, payload = helper_result
2794 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2795 "drbd usermode helper check unsuccessful: %s", payload)
2796 test = status and (payload != drbd_helper)
2797 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2798 "wrong drbd usermode helper: %s", payload)
2800 # compute the DRBD minors
2802 for minor, instance in drbd_map[node].items():
2803 test = instance not in instanceinfo
2804 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2805 "ghost instance '%s' in temporary DRBD map", instance)
2806 # ghost instance should not be running, but otherwise we
2807 # don't give double warnings (both ghost instance and
2808 # unallocated minor in use)
2810 node_drbd[minor] = (instance, False)
2812 instance = instanceinfo[instance]
2813 node_drbd[minor] = (instance.name,
2814 instance.admin_state == constants.ADMINST_UP)
2816 # and now check them
2817 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2818 test = not isinstance(used_minors, (tuple, list))
2819 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2820 "cannot parse drbd status file: %s", str(used_minors))
2822 # we cannot check drbd status
2825 for minor, (iname, must_exist) in node_drbd.items():
2826 test = minor not in used_minors and must_exist
2827 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2828 "drbd minor %d of instance %s is not active", minor, iname)
2829 for minor in used_minors:
2830 test = minor not in node_drbd
2831 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2832 "unallocated drbd minor %d is in use", minor)
2834 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2835 """Builds the node OS structures.
2837 @type ninfo: L{objects.Node}
2838 @param ninfo: the node to check
2839 @param nresult: the remote results for the node
2840 @param nimg: the node image object
2844 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2846 remote_os = nresult.get(constants.NV_OSLIST, None)
2847 test = (not isinstance(remote_os, list) or
2848 not compat.all(isinstance(v, list) and len(v) == 7
2849 for v in remote_os))
2851 _ErrorIf(test, constants.CV_ENODEOS, node,
2852 "node hasn't returned valid OS data")
2861 for (name, os_path, status, diagnose,
2862 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2864 if name not in os_dict:
2867 # parameters is a list of lists instead of list of tuples due to
2868 # JSON lacking a real tuple type, fix it:
2869 parameters = [tuple(v) for v in parameters]
2870 os_dict[name].append((os_path, status, diagnose,
2871 set(variants), set(parameters), set(api_ver)))
2873 nimg.oslist = os_dict
2875 def _VerifyNodeOS(self, ninfo, nimg, base):
2876 """Verifies the node OS list.
2878 @type ninfo: L{objects.Node}
2879 @param ninfo: the node to check
2880 @param nimg: the node image object
2881 @param base: the 'template' node we match against (e.g. from the master)
2885 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2887 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2889 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2890 for os_name, os_data in nimg.oslist.items():
2891 assert os_data, "Empty OS status for OS %s?!" % os_name
2892 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2893 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2894 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2895 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2896 "OS '%s' has multiple entries (first one shadows the rest): %s",
2897 os_name, utils.CommaJoin([v[0] for v in os_data]))
2898 # comparisons with the 'base' image
2899 test = os_name not in base.oslist
2900 _ErrorIf(test, constants.CV_ENODEOS, node,
2901 "Extra OS %s not present on reference node (%s)",
2905 assert base.oslist[os_name], "Base node has empty OS status?"
2906 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2908 # base OS is invalid, skipping
2910 for kind, a, b in [("API version", f_api, b_api),
2911 ("variants list", f_var, b_var),
2912 ("parameters", beautify_params(f_param),
2913 beautify_params(b_param))]:
2914 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2915 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2916 kind, os_name, base.name,
2917 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2919 # check any missing OSes
2920 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2921 _ErrorIf(missing, constants.CV_ENODEOS, node,
2922 "OSes present on reference node %s but missing on this node: %s",
2923 base.name, utils.CommaJoin(missing))
2925 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2926 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2928 @type ninfo: L{objects.Node}
2929 @param ninfo: the node to check
2930 @param nresult: the remote results for the node
2931 @type is_master: bool
2932 @param is_master: Whether node is the master node
2938 (constants.ENABLE_FILE_STORAGE or
2939 constants.ENABLE_SHARED_FILE_STORAGE)):
2941 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2943 # This should never happen
2944 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2945 "Node did not return forbidden file storage paths")
2947 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2948 "Found forbidden file storage paths: %s",
2949 utils.CommaJoin(fspaths))
2951 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2952 constants.CV_ENODEFILESTORAGEPATHS, node,
2953 "Node should not have returned forbidden file storage"
2956 def _VerifyOob(self, ninfo, nresult):
2957 """Verifies out of band functionality of a node.
2959 @type ninfo: L{objects.Node}
2960 @param ninfo: the node to check
2961 @param nresult: the remote results for the node
2965 # We just have to verify the paths on master and/or master candidates
2966 # as the oob helper is invoked on the master
2967 if ((ninfo.master_candidate or ninfo.master_capable) and
2968 constants.NV_OOB_PATHS in nresult):
2969 for path_result in nresult[constants.NV_OOB_PATHS]:
2970 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2972 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2973 """Verifies and updates the node volume data.
2975 This function will update a L{NodeImage}'s internal structures
2976 with data from the remote call.
2978 @type ninfo: L{objects.Node}
2979 @param ninfo: the node to check
2980 @param nresult: the remote results for the node
2981 @param nimg: the node image object
2982 @param vg_name: the configured VG name
2986 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2988 nimg.lvm_fail = True
2989 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2992 elif isinstance(lvdata, basestring):
2993 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2994 utils.SafeEncode(lvdata))
2995 elif not isinstance(lvdata, dict):
2996 _ErrorIf(True, constants.CV_ENODELVM, node,
2997 "rpc call to node failed (lvlist)")
2999 nimg.volumes = lvdata
3000 nimg.lvm_fail = False
3002 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3003 """Verifies and updates the node instance list.
3005 If the listing was successful, then updates this node's instance
3006 list. Otherwise, it marks the RPC call as failed for the instance
3009 @type ninfo: L{objects.Node}
3010 @param ninfo: the node to check
3011 @param nresult: the remote results for the node
3012 @param nimg: the node image object
3015 idata = nresult.get(constants.NV_INSTANCELIST, None)
3016 test = not isinstance(idata, list)
3017 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3018 "rpc call to node failed (instancelist): %s",
3019 utils.SafeEncode(str(idata)))
3021 nimg.hyp_fail = True
3023 nimg.instances = idata
3025 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3026 """Verifies and computes a node information map
3028 @type ninfo: L{objects.Node}
3029 @param ninfo: the node to check
3030 @param nresult: the remote results for the node
3031 @param nimg: the node image object
3032 @param vg_name: the configured VG name
3036 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3038 # try to read free memory (from the hypervisor)
3039 hv_info = nresult.get(constants.NV_HVINFO, None)
3040 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3041 _ErrorIf(test, constants.CV_ENODEHV, node,
3042 "rpc call to node failed (hvinfo)")
3045 nimg.mfree = int(hv_info["memory_free"])
3046 except (ValueError, TypeError):
3047 _ErrorIf(True, constants.CV_ENODERPC, node,
3048 "node returned invalid nodeinfo, check hypervisor")
3050 # FIXME: devise a free space model for file based instances as well
3051 if vg_name is not None:
3052 test = (constants.NV_VGLIST not in nresult or
3053 vg_name not in nresult[constants.NV_VGLIST])
3054 _ErrorIf(test, constants.CV_ENODELVM, node,
3055 "node didn't return data for the volume group '%s'"
3056 " - it is either missing or broken", vg_name)
3059 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3060 except (ValueError, TypeError):
3061 _ErrorIf(True, constants.CV_ENODERPC, node,
3062 "node returned invalid LVM info, check LVM status")
3064 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3065 """Gets per-disk status information for all instances.
3067 @type nodelist: list of strings
3068 @param nodelist: Node names
3069 @type node_image: dict of (name, L{objects.Node})
3070 @param node_image: Node objects
3071 @type instanceinfo: dict of (name, L{objects.Instance})
3072 @param instanceinfo: Instance objects
3073 @rtype: {instance: {node: [(succes, payload)]}}
3074 @return: a dictionary of per-instance dictionaries with nodes as
3075 keys and disk information as values; the disk information is a
3076 list of tuples (success, payload)
3079 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3082 node_disks_devonly = {}
3083 diskless_instances = set()
3084 diskless = constants.DT_DISKLESS
3086 for nname in nodelist:
3087 node_instances = list(itertools.chain(node_image[nname].pinst,
3088 node_image[nname].sinst))
3089 diskless_instances.update(inst for inst in node_instances
3090 if instanceinfo[inst].disk_template == diskless)
3091 disks = [(inst, disk)
3092 for inst in node_instances
3093 for disk in instanceinfo[inst].disks]
3096 # No need to collect data
3099 node_disks[nname] = disks
3101 # _AnnotateDiskParams makes already copies of the disks
3103 for (inst, dev) in disks:
3104 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3105 self.cfg.SetDiskID(anno_disk, nname)
3106 devonly.append(anno_disk)
3108 node_disks_devonly[nname] = devonly
3110 assert len(node_disks) == len(node_disks_devonly)
3112 # Collect data from all nodes with disks
3113 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3116 assert len(result) == len(node_disks)
3120 for (nname, nres) in result.items():
3121 disks = node_disks[nname]
3124 # No data from this node
3125 data = len(disks) * [(False, "node offline")]
3128 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3129 "while getting disk information: %s", msg)
3131 # No data from this node
3132 data = len(disks) * [(False, msg)]
3135 for idx, i in enumerate(nres.payload):
3136 if isinstance(i, (tuple, list)) and len(i) == 2:
3139 logging.warning("Invalid result from node %s, entry %d: %s",
3141 data.append((False, "Invalid result from the remote node"))
3143 for ((inst, _), status) in zip(disks, data):
3144 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3146 # Add empty entries for diskless instances.
3147 for inst in diskless_instances:
3148 assert inst not in instdisk
3151 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3152 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3153 compat.all(isinstance(s, (tuple, list)) and
3154 len(s) == 2 for s in statuses)
3155 for inst, nnames in instdisk.items()
3156 for nname, statuses in nnames.items())
3157 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3162 def _SshNodeSelector(group_uuid, all_nodes):
3163 """Create endless iterators for all potential SSH check hosts.
3166 nodes = [node for node in all_nodes
3167 if (node.group != group_uuid and
3169 keyfunc = operator.attrgetter("group")
3171 return map(itertools.cycle,
3172 [sorted(map(operator.attrgetter("name"), names))
3173 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3177 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3178 """Choose which nodes should talk to which other nodes.
3180 We will make nodes contact all nodes in their group, and one node from
3183 @warning: This algorithm has a known issue if one node group is much
3184 smaller than others (e.g. just one node). In such a case all other
3185 nodes will talk to the single node.
3188 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3189 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3191 return (online_nodes,
3192 dict((name, sorted([i.next() for i in sel]))
3193 for name in online_nodes))
3195 def BuildHooksEnv(self):
3198 Cluster-Verify hooks just ran in the post phase and their failure makes
3199 the output be logged in the verify output and the verification to fail.
3203 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3206 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3207 for node in self.my_node_info.values())
3211 def BuildHooksNodes(self):
3212 """Build hooks nodes.
3215 return ([], self.my_node_names)
3217 def Exec(self, feedback_fn):
3218 """Verify integrity of the node group, performing various test on nodes.
3221 # This method has too many local variables. pylint: disable=R0914
3222 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3224 if not self.my_node_names:
3226 feedback_fn("* Empty node group, skipping verification")
3230 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3231 verbose = self.op.verbose
3232 self._feedback_fn = feedback_fn
3234 vg_name = self.cfg.GetVGName()
3235 drbd_helper = self.cfg.GetDRBDHelper()
3236 cluster = self.cfg.GetClusterInfo()
3237 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3238 hypervisors = cluster.enabled_hypervisors
3239 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3241 i_non_redundant = [] # Non redundant instances
3242 i_non_a_balanced = [] # Non auto-balanced instances
3243 i_offline = 0 # Count of offline instances
3244 n_offline = 0 # Count of offline nodes
3245 n_drained = 0 # Count of nodes being drained
3246 node_vol_should = {}
3248 # FIXME: verify OS list
3251 filemap = _ComputeAncillaryFiles(cluster, False)
3253 # do local checksums
3254 master_node = self.master_node = self.cfg.GetMasterNode()
3255 master_ip = self.cfg.GetMasterIP()
3257 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3260 if self.cfg.GetUseExternalMipScript():
3261 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3263 node_verify_param = {
3264 constants.NV_FILELIST:
3265 map(vcluster.MakeVirtualPath,
3266 utils.UniqueSequence(filename
3267 for files in filemap
3268 for filename in files)),
3269 constants.NV_NODELIST:
3270 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3271 self.all_node_info.values()),
3272 constants.NV_HYPERVISOR: hypervisors,
3273 constants.NV_HVPARAMS:
3274 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3275 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3276 for node in node_data_list
3277 if not node.offline],
3278 constants.NV_INSTANCELIST: hypervisors,
3279 constants.NV_VERSION: None,
3280 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3281 constants.NV_NODESETUP: None,
3282 constants.NV_TIME: None,
3283 constants.NV_MASTERIP: (master_node, master_ip),
3284 constants.NV_OSLIST: None,
3285 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3286 constants.NV_USERSCRIPTS: user_scripts,
3289 if vg_name is not None:
3290 node_verify_param[constants.NV_VGLIST] = None
3291 node_verify_param[constants.NV_LVLIST] = vg_name
3292 node_verify_param[constants.NV_PVLIST] = [vg_name]
3295 node_verify_param[constants.NV_DRBDLIST] = None
3296 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3298 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3299 # Load file storage paths only from master node
3300 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3303 # FIXME: this needs to be changed per node-group, not cluster-wide
3305 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3306 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3307 bridges.add(default_nicpp[constants.NIC_LINK])
3308 for instance in self.my_inst_info.values():
3309 for nic in instance.nics:
3310 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3311 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3312 bridges.add(full_nic[constants.NIC_LINK])
3315 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3317 # Build our expected cluster state
3318 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3320 vm_capable=node.vm_capable))
3321 for node in node_data_list)
3325 for node in self.all_node_info.values():
3326 path = _SupportsOob(self.cfg, node)
3327 if path and path not in oob_paths:
3328 oob_paths.append(path)
3331 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3333 for instance in self.my_inst_names:
3334 inst_config = self.my_inst_info[instance]
3335 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3338 for nname in inst_config.all_nodes:
3339 if nname not in node_image:
3340 gnode = self.NodeImage(name=nname)
3341 gnode.ghost = (nname not in self.all_node_info)
3342 node_image[nname] = gnode
3344 inst_config.MapLVsByNode(node_vol_should)
3346 pnode = inst_config.primary_node
3347 node_image[pnode].pinst.append(instance)
3349 for snode in inst_config.secondary_nodes:
3350 nimg = node_image[snode]
3351 nimg.sinst.append(instance)
3352 if pnode not in nimg.sbp:
3353 nimg.sbp[pnode] = []
3354 nimg.sbp[pnode].append(instance)
3356 # At this point, we have the in-memory data structures complete,
3357 # except for the runtime information, which we'll gather next
3359 # Due to the way our RPC system works, exact response times cannot be
3360 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3361 # time before and after executing the request, we can at least have a time
3363 nvinfo_starttime = time.time()
3364 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3366 self.cfg.GetClusterName())
3367 nvinfo_endtime = time.time()
3369 if self.extra_lv_nodes and vg_name is not None:
3371 self.rpc.call_node_verify(self.extra_lv_nodes,
3372 {constants.NV_LVLIST: vg_name},
3373 self.cfg.GetClusterName())
3375 extra_lv_nvinfo = {}
3377 all_drbd_map = self.cfg.ComputeDRBDMap()
3379 feedback_fn("* Gathering disk information (%s nodes)" %
3380 len(self.my_node_names))
3381 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3384 feedback_fn("* Verifying configuration file consistency")
3386 # If not all nodes are being checked, we need to make sure the master node
3387 # and a non-checked vm_capable node are in the list.
3388 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3390 vf_nvinfo = all_nvinfo.copy()
3391 vf_node_info = list(self.my_node_info.values())
3392 additional_nodes = []
3393 if master_node not in self.my_node_info:
3394 additional_nodes.append(master_node)
3395 vf_node_info.append(self.all_node_info[master_node])
3396 # Add the first vm_capable node we find which is not included,
3397 # excluding the master node (which we already have)
3398 for node in absent_nodes:
3399 nodeinfo = self.all_node_info[node]
3400 if (nodeinfo.vm_capable and not nodeinfo.offline and
3401 node != master_node):
3402 additional_nodes.append(node)
3403 vf_node_info.append(self.all_node_info[node])
3405 key = constants.NV_FILELIST
3406 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3407 {key: node_verify_param[key]},
3408 self.cfg.GetClusterName()))
3410 vf_nvinfo = all_nvinfo
3411 vf_node_info = self.my_node_info.values()
3413 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3415 feedback_fn("* Verifying node status")
3419 for node_i in node_data_list:
3421 nimg = node_image[node]
3425 feedback_fn("* Skipping offline node %s" % (node,))
3429 if node == master_node:
3431 elif node_i.master_candidate:
3432 ntype = "master candidate"
3433 elif node_i.drained:
3439 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3441 msg = all_nvinfo[node].fail_msg
3442 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3445 nimg.rpc_fail = True
3448 nresult = all_nvinfo[node].payload
3450 nimg.call_ok = self._VerifyNode(node_i, nresult)
3451 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3452 self._VerifyNodeNetwork(node_i, nresult)
3453 self._VerifyNodeUserScripts(node_i, nresult)
3454 self._VerifyOob(node_i, nresult)
3455 self._VerifyFileStoragePaths(node_i, nresult,
3456 node == master_node)
3459 self._VerifyNodeLVM(node_i, nresult, vg_name)
3460 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3463 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3464 self._UpdateNodeInstances(node_i, nresult, nimg)
3465 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3466 self._UpdateNodeOS(node_i, nresult, nimg)
3468 if not nimg.os_fail:
3469 if refos_img is None:
3471 self._VerifyNodeOS(node_i, nimg, refos_img)
3472 self._VerifyNodeBridges(node_i, nresult, bridges)
3474 # Check whether all running instancies are primary for the node. (This
3475 # can no longer be done from _VerifyInstance below, since some of the
3476 # wrong instances could be from other node groups.)
3477 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3479 for inst in non_primary_inst:
3480 test = inst in self.all_inst_info
3481 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3482 "instance should not run on node %s", node_i.name)
3483 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3484 "node is running unknown instance %s", inst)
3486 for node, result in extra_lv_nvinfo.items():
3487 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3488 node_image[node], vg_name)
3490 feedback_fn("* Verifying instance status")
3491 for instance in self.my_inst_names:
3493 feedback_fn("* Verifying instance %s" % instance)
3494 inst_config = self.my_inst_info[instance]
3495 self._VerifyInstance(instance, inst_config, node_image,
3497 inst_nodes_offline = []
3499 pnode = inst_config.primary_node
3500 pnode_img = node_image[pnode]
3501 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3502 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3503 " primary node failed", instance)
3505 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3507 constants.CV_EINSTANCEBADNODE, instance,
3508 "instance is marked as running and lives on offline node %s",
3509 inst_config.primary_node)
3511 # If the instance is non-redundant we cannot survive losing its primary
3512 # node, so we are not N+1 compliant.
3513 if inst_config.disk_template not in constants.DTS_MIRRORED:
3514 i_non_redundant.append(instance)
3516 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3517 constants.CV_EINSTANCELAYOUT,
3518 instance, "instance has multiple secondary nodes: %s",
3519 utils.CommaJoin(inst_config.secondary_nodes),
3520 code=self.ETYPE_WARNING)
3522 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3523 pnode = inst_config.primary_node
3524 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3525 instance_groups = {}
3527 for node in instance_nodes:
3528 instance_groups.setdefault(self.all_node_info[node].group,
3532 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3533 # Sort so that we always list the primary node first.
3534 for group, nodes in sorted(instance_groups.items(),
3535 key=lambda (_, nodes): pnode in nodes,
3538 self._ErrorIf(len(instance_groups) > 1,
3539 constants.CV_EINSTANCESPLITGROUPS,
3540 instance, "instance has primary and secondary nodes in"
3541 " different groups: %s", utils.CommaJoin(pretty_list),
3542 code=self.ETYPE_WARNING)
3544 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3545 i_non_a_balanced.append(instance)
3547 for snode in inst_config.secondary_nodes:
3548 s_img = node_image[snode]
3549 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3550 snode, "instance %s, connection to secondary node failed",
3554 inst_nodes_offline.append(snode)
3556 # warn that the instance lives on offline nodes
3557 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3558 "instance has offline secondary node(s) %s",
3559 utils.CommaJoin(inst_nodes_offline))
3560 # ... or ghost/non-vm_capable nodes
3561 for node in inst_config.all_nodes:
3562 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3563 instance, "instance lives on ghost node %s", node)
3564 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3565 instance, "instance lives on non-vm_capable node %s", node)
3567 feedback_fn("* Verifying orphan volumes")
3568 reserved = utils.FieldSet(*cluster.reserved_lvs)
3570 # We will get spurious "unknown volume" warnings if any node of this group
3571 # is secondary for an instance whose primary is in another group. To avoid
3572 # them, we find these instances and add their volumes to node_vol_should.
3573 for inst in self.all_inst_info.values():
3574 for secondary in inst.secondary_nodes:
3575 if (secondary in self.my_node_info
3576 and inst.name not in self.my_inst_info):
3577 inst.MapLVsByNode(node_vol_should)
3580 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3582 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3583 feedback_fn("* Verifying N+1 Memory redundancy")
3584 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3586 feedback_fn("* Other Notes")
3588 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3589 % len(i_non_redundant))
3591 if i_non_a_balanced:
3592 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3593 % len(i_non_a_balanced))
3596 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3599 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3602 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3606 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3607 """Analyze the post-hooks' result
3609 This method analyses the hook result, handles it, and sends some
3610 nicely-formatted feedback back to the user.
3612 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3613 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3614 @param hooks_results: the results of the multi-node hooks rpc call
3615 @param feedback_fn: function used send feedback back to the caller
3616 @param lu_result: previous Exec result
3617 @return: the new Exec result, based on the previous result
3621 # We only really run POST phase hooks, only for non-empty groups,
3622 # and are only interested in their results
3623 if not self.my_node_names:
3626 elif phase == constants.HOOKS_PHASE_POST:
3627 # Used to change hooks' output to proper indentation
3628 feedback_fn("* Hooks Results")
3629 assert hooks_results, "invalid result from hooks"
3631 for node_name in hooks_results:
3632 res = hooks_results[node_name]
3634 test = msg and not res.offline
3635 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3636 "Communication failure in hooks execution: %s", msg)
3637 if res.offline or msg:
3638 # No need to investigate payload if node is offline or gave
3641 for script, hkr, output in res.payload:
3642 test = hkr == constants.HKR_FAIL
3643 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3644 "Script %s failed, output:", script)
3646 output = self._HOOKS_INDENT_RE.sub(" ", output)
3647 feedback_fn("%s" % output)
3653 class LUClusterVerifyDisks(NoHooksLU):
3654 """Verifies the cluster disks status.
3659 def ExpandNames(self):
3660 self.share_locks = _ShareAll()
3661 self.needed_locks = {
3662 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3665 def Exec(self, feedback_fn):
3666 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3668 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3669 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3670 for group in group_names])
3673 class LUGroupVerifyDisks(NoHooksLU):
3674 """Verifies the status of all disks in a node group.
3679 def ExpandNames(self):
3680 # Raises errors.OpPrereqError on its own if group can't be found
3681 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3683 self.share_locks = _ShareAll()
3684 self.needed_locks = {
3685 locking.LEVEL_INSTANCE: [],
3686 locking.LEVEL_NODEGROUP: [],
3687 locking.LEVEL_NODE: [],
3689 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3690 # starts one instance of this opcode for every group, which means all
3691 # nodes will be locked for a short amount of time, so it's better to
3692 # acquire the node allocation lock as well.
3693 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3696 def DeclareLocks(self, level):
3697 if level == locking.LEVEL_INSTANCE:
3698 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3700 # Lock instances optimistically, needs verification once node and group
3701 # locks have been acquired
3702 self.needed_locks[locking.LEVEL_INSTANCE] = \
3703 self.cfg.GetNodeGroupInstances(self.group_uuid)
3705 elif level == locking.LEVEL_NODEGROUP:
3706 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3708 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3709 set([self.group_uuid] +
3710 # Lock all groups used by instances optimistically; this requires
3711 # going via the node before it's locked, requiring verification
3714 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3715 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3717 elif level == locking.LEVEL_NODE:
3718 # This will only lock the nodes in the group to be verified which contain
3720 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3721 self._LockInstancesNodes()
3723 # Lock all nodes in group to be verified
3724 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3725 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3726 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3728 def CheckPrereq(self):
3729 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3730 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3731 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3733 assert self.group_uuid in owned_groups
3735 # Check if locked instances are still correct
3736 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3738 # Get instance information
3739 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3741 # Check if node groups for locked instances are still correct
3742 _CheckInstancesNodeGroups(self.cfg, self.instances,
3743 owned_groups, owned_nodes, self.group_uuid)
3745 def Exec(self, feedback_fn):
3746 """Verify integrity of cluster disks.
3748 @rtype: tuple of three items
3749 @return: a tuple of (dict of node-to-node_error, list of instances
3750 which need activate-disks, dict of instance: (node, volume) for
3755 res_instances = set()
3758 nv_dict = _MapInstanceDisksToNodes(
3759 [inst for inst in self.instances.values()
3760 if inst.admin_state == constants.ADMINST_UP])
3763 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3764 set(self.cfg.GetVmCapableNodeList()))
3766 node_lvs = self.rpc.call_lv_list(nodes, [])
3768 for (node, node_res) in node_lvs.items():
3769 if node_res.offline:
3772 msg = node_res.fail_msg
3774 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3775 res_nodes[node] = msg
3778 for lv_name, (_, _, lv_online) in node_res.payload.items():
3779 inst = nv_dict.pop((node, lv_name), None)
3780 if not (lv_online or inst is None):
3781 res_instances.add(inst)
3783 # any leftover items in nv_dict are missing LVs, let's arrange the data
3785 for key, inst in nv_dict.iteritems():
3786 res_missing.setdefault(inst, []).append(list(key))
3788 return (res_nodes, list(res_instances), res_missing)
3791 class LUClusterRepairDiskSizes(NoHooksLU):
3792 """Verifies the cluster disks sizes.
3797 def ExpandNames(self):
3798 if self.op.instances:
3799 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3800 # Not getting the node allocation lock as only a specific set of
3801 # instances (and their nodes) is going to be acquired
3802 self.needed_locks = {
3803 locking.LEVEL_NODE_RES: [],
3804 locking.LEVEL_INSTANCE: self.wanted_names,
3806 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3808 self.wanted_names = None
3809 self.needed_locks = {
3810 locking.LEVEL_NODE_RES: locking.ALL_SET,
3811 locking.LEVEL_INSTANCE: locking.ALL_SET,
3813 # This opcode is acquires the node locks for all instances
3814 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3817 self.share_locks = {
3818 locking.LEVEL_NODE_RES: 1,
3819 locking.LEVEL_INSTANCE: 0,
3820 locking.LEVEL_NODE_ALLOC: 1,
3823 def DeclareLocks(self, level):
3824 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3825 self._LockInstancesNodes(primary_only=True, level=level)
3827 def CheckPrereq(self):
3828 """Check prerequisites.
3830 This only checks the optional instance list against the existing names.
3833 if self.wanted_names is None:
3834 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3836 self.wanted_instances = \
3837 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3839 def _EnsureChildSizes(self, disk):
3840 """Ensure children of the disk have the needed disk size.
3842 This is valid mainly for DRBD8 and fixes an issue where the
3843 children have smaller disk size.
3845 @param disk: an L{ganeti.objects.Disk} object
3848 if disk.dev_type == constants.LD_DRBD8:
3849 assert disk.children, "Empty children for DRBD8?"
3850 fchild = disk.children[0]
3851 mismatch = fchild.size < disk.size
3853 self.LogInfo("Child disk has size %d, parent %d, fixing",
3854 fchild.size, disk.size)
3855 fchild.size = disk.size
3857 # and we recurse on this child only, not on the metadev
3858 return self._EnsureChildSizes(fchild) or mismatch
3862 def Exec(self, feedback_fn):
3863 """Verify the size of cluster disks.
3866 # TODO: check child disks too
3867 # TODO: check differences in size between primary/secondary nodes
3869 for instance in self.wanted_instances:
3870 pnode = instance.primary_node
3871 if pnode not in per_node_disks:
3872 per_node_disks[pnode] = []
3873 for idx, disk in enumerate(instance.disks):
3874 per_node_disks[pnode].append((instance, idx, disk))
3876 assert not (frozenset(per_node_disks.keys()) -
3877 self.owned_locks(locking.LEVEL_NODE_RES)), \
3878 "Not owning correct locks"
3879 assert not self.owned_locks(locking.LEVEL_NODE)
3882 for node, dskl in per_node_disks.items():
3883 newl = [v[2].Copy() for v in dskl]
3885 self.cfg.SetDiskID(dsk, node)
3886 result = self.rpc.call_blockdev_getsize(node, newl)
3888 self.LogWarning("Failure in blockdev_getsize call to node"
3889 " %s, ignoring", node)
3891 if len(result.payload) != len(dskl):
3892 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3893 " result.payload=%s", node, len(dskl), result.payload)
3894 self.LogWarning("Invalid result from node %s, ignoring node results",
3897 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3899 self.LogWarning("Disk %d of instance %s did not return size"
3900 " information, ignoring", idx, instance.name)
3902 if not isinstance(size, (int, long)):
3903 self.LogWarning("Disk %d of instance %s did not return valid"
3904 " size information, ignoring", idx, instance.name)
3907 if size != disk.size:
3908 self.LogInfo("Disk %d of instance %s has mismatched size,"
3909 " correcting: recorded %d, actual %d", idx,
3910 instance.name, disk.size, size)
3912 self.cfg.Update(instance, feedback_fn)
3913 changed.append((instance.name, idx, size))
3914 if self._EnsureChildSizes(disk):
3915 self.cfg.Update(instance, feedback_fn)
3916 changed.append((instance.name, idx, disk.size))
3920 class LUClusterRename(LogicalUnit):
3921 """Rename the cluster.
3924 HPATH = "cluster-rename"
3925 HTYPE = constants.HTYPE_CLUSTER
3927 def BuildHooksEnv(self):
3932 "OP_TARGET": self.cfg.GetClusterName(),
3933 "NEW_NAME": self.op.name,
3936 def BuildHooksNodes(self):
3937 """Build hooks nodes.
3940 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3942 def CheckPrereq(self):
3943 """Verify that the passed name is a valid one.
3946 hostname = netutils.GetHostname(name=self.op.name,
3947 family=self.cfg.GetPrimaryIPFamily())
3949 new_name = hostname.name
3950 self.ip = new_ip = hostname.ip
3951 old_name = self.cfg.GetClusterName()
3952 old_ip = self.cfg.GetMasterIP()
3953 if new_name == old_name and new_ip == old_ip:
3954 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3955 " cluster has changed",
3957 if new_ip != old_ip:
3958 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3959 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3960 " reachable on the network" %
3961 new_ip, errors.ECODE_NOTUNIQUE)
3963 self.op.name = new_name
3965 def Exec(self, feedback_fn):
3966 """Rename the cluster.
3969 clustername = self.op.name
3972 # shutdown the master IP
3973 master_params = self.cfg.GetMasterNetworkParameters()
3974 ems = self.cfg.GetUseExternalMipScript()
3975 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3977 result.Raise("Could not disable the master role")
3980 cluster = self.cfg.GetClusterInfo()
3981 cluster.cluster_name = clustername
3982 cluster.master_ip = new_ip
3983 self.cfg.Update(cluster, feedback_fn)
3985 # update the known hosts file
3986 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3987 node_list = self.cfg.GetOnlineNodeList()
3989 node_list.remove(master_params.name)
3992 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3994 master_params.ip = new_ip
3995 result = self.rpc.call_node_activate_master_ip(master_params.name,
3997 msg = result.fail_msg
3999 self.LogWarning("Could not re-enable the master role on"
4000 " the master, please restart manually: %s", msg)
4005 def _ValidateNetmask(cfg, netmask):
4006 """Checks if a netmask is valid.
4008 @type cfg: L{config.ConfigWriter}
4009 @param cfg: The cluster configuration
4011 @param netmask: the netmask to be verified
4012 @raise errors.OpPrereqError: if the validation fails
4015 ip_family = cfg.GetPrimaryIPFamily()
4017 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4018 except errors.ProgrammerError:
4019 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4020 ip_family, errors.ECODE_INVAL)
4021 if not ipcls.ValidateNetmask(netmask):
4022 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4023 (netmask), errors.ECODE_INVAL)
4026 class LUClusterSetParams(LogicalUnit):
4027 """Change the parameters of the cluster.
4030 HPATH = "cluster-modify"
4031 HTYPE = constants.HTYPE_CLUSTER
4034 def CheckArguments(self):
4038 if self.op.uid_pool:
4039 uidpool.CheckUidPool(self.op.uid_pool)
4041 if self.op.add_uids:
4042 uidpool.CheckUidPool(self.op.add_uids)
4044 if self.op.remove_uids:
4045 uidpool.CheckUidPool(self.op.remove_uids)
4047 if self.op.master_netmask is not None:
4048 _ValidateNetmask(self.cfg, self.op.master_netmask)
4050 if self.op.diskparams:
4051 for dt_params in self.op.diskparams.values():
4052 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4054 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4055 except errors.OpPrereqError, err:
4056 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4059 def ExpandNames(self):
4060 # FIXME: in the future maybe other cluster params won't require checking on
4061 # all nodes to be modified.
4062 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4063 # resource locks the right thing, shouldn't it be the BGL instead?
4064 self.needed_locks = {
4065 locking.LEVEL_NODE: locking.ALL_SET,
4066 locking.LEVEL_INSTANCE: locking.ALL_SET,
4067 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4068 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4070 self.share_locks = _ShareAll()
4072 def BuildHooksEnv(self):
4077 "OP_TARGET": self.cfg.GetClusterName(),
4078 "NEW_VG_NAME": self.op.vg_name,
4081 def BuildHooksNodes(self):
4082 """Build hooks nodes.
4085 mn = self.cfg.GetMasterNode()
4088 def CheckPrereq(self):
4089 """Check prerequisites.
4091 This checks whether the given params don't conflict and
4092 if the given volume group is valid.
4095 if self.op.vg_name is not None and not self.op.vg_name:
4096 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4097 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4098 " instances exist", errors.ECODE_INVAL)
4100 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4101 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4102 raise errors.OpPrereqError("Cannot disable drbd helper while"
4103 " drbd-based instances exist",
4106 node_list = self.owned_locks(locking.LEVEL_NODE)
4108 # if vg_name not None, checks given volume group on all nodes
4110 vglist = self.rpc.call_vg_list(node_list)
4111 for node in node_list:
4112 msg = vglist[node].fail_msg
4114 # ignoring down node
4115 self.LogWarning("Error while gathering data on node %s"
4116 " (ignoring node): %s", node, msg)
4118 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4120 constants.MIN_VG_SIZE)
4122 raise errors.OpPrereqError("Error on node '%s': %s" %
4123 (node, vgstatus), errors.ECODE_ENVIRON)
4125 if self.op.drbd_helper:
4126 # checks given drbd helper on all nodes
4127 helpers = self.rpc.call_drbd_helper(node_list)
4128 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4130 self.LogInfo("Not checking drbd helper on offline node %s", node)
4132 msg = helpers[node].fail_msg
4134 raise errors.OpPrereqError("Error checking drbd helper on node"
4135 " '%s': %s" % (node, msg),
4136 errors.ECODE_ENVIRON)
4137 node_helper = helpers[node].payload
4138 if node_helper != self.op.drbd_helper:
4139 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4140 (node, node_helper), errors.ECODE_ENVIRON)
4142 self.cluster = cluster = self.cfg.GetClusterInfo()
4143 # validate params changes
4144 if self.op.beparams:
4145 objects.UpgradeBeParams(self.op.beparams)
4146 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4147 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4149 if self.op.ndparams:
4150 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4151 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4153 # TODO: we need a more general way to handle resetting
4154 # cluster-level parameters to default values
4155 if self.new_ndparams["oob_program"] == "":
4156 self.new_ndparams["oob_program"] = \
4157 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4159 if self.op.hv_state:
4160 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4161 self.cluster.hv_state_static)
4162 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4163 for hv, values in new_hv_state.items())
4165 if self.op.disk_state:
4166 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4167 self.cluster.disk_state_static)
4168 self.new_disk_state = \
4169 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4170 for name, values in svalues.items()))
4171 for storage, svalues in new_disk_state.items())
4174 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4177 all_instances = self.cfg.GetAllInstancesInfo().values()
4179 for group in self.cfg.GetAllNodeGroupsInfo().values():
4180 instances = frozenset([inst for inst in all_instances
4181 if compat.any(node in group.members
4182 for node in inst.all_nodes)])
4183 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4184 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4185 new = _ComputeNewInstanceViolations(ipol,
4186 new_ipolicy, instances)
4188 violations.update(new)
4191 self.LogWarning("After the ipolicy change the following instances"
4192 " violate them: %s",
4193 utils.CommaJoin(utils.NiceSort(violations)))
4195 if self.op.nicparams:
4196 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4197 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4198 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4201 # check all instances for consistency
4202 for instance in self.cfg.GetAllInstancesInfo().values():
4203 for nic_idx, nic in enumerate(instance.nics):
4204 params_copy = copy.deepcopy(nic.nicparams)
4205 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4207 # check parameter syntax
4209 objects.NIC.CheckParameterSyntax(params_filled)
4210 except errors.ConfigurationError, err:
4211 nic_errors.append("Instance %s, nic/%d: %s" %
4212 (instance.name, nic_idx, err))
4214 # if we're moving instances to routed, check that they have an ip
4215 target_mode = params_filled[constants.NIC_MODE]
4216 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4217 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4218 " address" % (instance.name, nic_idx))
4220 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4221 "\n".join(nic_errors), errors.ECODE_INVAL)
4223 # hypervisor list/parameters
4224 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4225 if self.op.hvparams:
4226 for hv_name, hv_dict in self.op.hvparams.items():
4227 if hv_name not in self.new_hvparams:
4228 self.new_hvparams[hv_name] = hv_dict
4230 self.new_hvparams[hv_name].update(hv_dict)
4232 # disk template parameters
4233 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4234 if self.op.diskparams:
4235 for dt_name, dt_params in self.op.diskparams.items():
4236 if dt_name not in self.op.diskparams:
4237 self.new_diskparams[dt_name] = dt_params
4239 self.new_diskparams[dt_name].update(dt_params)
4241 # os hypervisor parameters
4242 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4244 for os_name, hvs in self.op.os_hvp.items():
4245 if os_name not in self.new_os_hvp:
4246 self.new_os_hvp[os_name] = hvs
4248 for hv_name, hv_dict in hvs.items():
4249 if hv_name not in self.new_os_hvp[os_name]:
4250 self.new_os_hvp[os_name][hv_name] = hv_dict
4252 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4255 self.new_osp = objects.FillDict(cluster.osparams, {})
4256 if self.op.osparams:
4257 for os_name, osp in self.op.osparams.items():
4258 if os_name not in self.new_osp:
4259 self.new_osp[os_name] = {}
4261 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4264 if not self.new_osp[os_name]:
4265 # we removed all parameters
4266 del self.new_osp[os_name]
4268 # check the parameter validity (remote check)
4269 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4270 os_name, self.new_osp[os_name])
4272 # changes to the hypervisor list
4273 if self.op.enabled_hypervisors is not None:
4274 self.hv_list = self.op.enabled_hypervisors
4275 for hv in self.hv_list:
4276 # if the hypervisor doesn't already exist in the cluster
4277 # hvparams, we initialize it to empty, and then (in both
4278 # cases) we make sure to fill the defaults, as we might not
4279 # have a complete defaults list if the hypervisor wasn't
4281 if hv not in new_hvp:
4283 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4284 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4286 self.hv_list = cluster.enabled_hypervisors
4288 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4289 # either the enabled list has changed, or the parameters have, validate
4290 for hv_name, hv_params in self.new_hvparams.items():
4291 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4292 (self.op.enabled_hypervisors and
4293 hv_name in self.op.enabled_hypervisors)):
4294 # either this is a new hypervisor, or its parameters have changed
4295 hv_class = hypervisor.GetHypervisor(hv_name)
4296 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4297 hv_class.CheckParameterSyntax(hv_params)
4298 _CheckHVParams(self, node_list, hv_name, hv_params)
4301 # no need to check any newly-enabled hypervisors, since the
4302 # defaults have already been checked in the above code-block
4303 for os_name, os_hvp in self.new_os_hvp.items():
4304 for hv_name, hv_params in os_hvp.items():
4305 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4306 # we need to fill in the new os_hvp on top of the actual hv_p
4307 cluster_defaults = self.new_hvparams.get(hv_name, {})
4308 new_osp = objects.FillDict(cluster_defaults, hv_params)
4309 hv_class = hypervisor.GetHypervisor(hv_name)
4310 hv_class.CheckParameterSyntax(new_osp)
4311 _CheckHVParams(self, node_list, hv_name, new_osp)
4313 if self.op.default_iallocator:
4314 alloc_script = utils.FindFile(self.op.default_iallocator,
4315 constants.IALLOCATOR_SEARCH_PATH,
4317 if alloc_script is None:
4318 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4319 " specified" % self.op.default_iallocator,
4322 def Exec(self, feedback_fn):
4323 """Change the parameters of the cluster.
4326 if self.op.vg_name is not None:
4327 new_volume = self.op.vg_name
4330 if new_volume != self.cfg.GetVGName():
4331 self.cfg.SetVGName(new_volume)
4333 feedback_fn("Cluster LVM configuration already in desired"
4334 " state, not changing")
4335 if self.op.drbd_helper is not None:
4336 new_helper = self.op.drbd_helper
4339 if new_helper != self.cfg.GetDRBDHelper():
4340 self.cfg.SetDRBDHelper(new_helper)
4342 feedback_fn("Cluster DRBD helper already in desired state,"
4344 if self.op.hvparams:
4345 self.cluster.hvparams = self.new_hvparams
4347 self.cluster.os_hvp = self.new_os_hvp
4348 if self.op.enabled_hypervisors is not None:
4349 self.cluster.hvparams = self.new_hvparams
4350 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4351 if self.op.beparams:
4352 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4353 if self.op.nicparams:
4354 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4356 self.cluster.ipolicy = self.new_ipolicy
4357 if self.op.osparams:
4358 self.cluster.osparams = self.new_osp
4359 if self.op.ndparams:
4360 self.cluster.ndparams = self.new_ndparams
4361 if self.op.diskparams:
4362 self.cluster.diskparams = self.new_diskparams
4363 if self.op.hv_state:
4364 self.cluster.hv_state_static = self.new_hv_state
4365 if self.op.disk_state:
4366 self.cluster.disk_state_static = self.new_disk_state
4368 if self.op.candidate_pool_size is not None:
4369 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4370 # we need to update the pool size here, otherwise the save will fail
4371 _AdjustCandidatePool(self, [])
4373 if self.op.maintain_node_health is not None:
4374 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4375 feedback_fn("Note: CONFD was disabled at build time, node health"
4376 " maintenance is not useful (still enabling it)")
4377 self.cluster.maintain_node_health = self.op.maintain_node_health
4379 if self.op.prealloc_wipe_disks is not None:
4380 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4382 if self.op.add_uids is not None:
4383 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4385 if self.op.remove_uids is not None:
4386 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4388 if self.op.uid_pool is not None:
4389 self.cluster.uid_pool = self.op.uid_pool
4391 if self.op.default_iallocator is not None:
4392 self.cluster.default_iallocator = self.op.default_iallocator
4394 if self.op.reserved_lvs is not None:
4395 self.cluster.reserved_lvs = self.op.reserved_lvs
4397 if self.op.use_external_mip_script is not None:
4398 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4400 def helper_os(aname, mods, desc):
4402 lst = getattr(self.cluster, aname)
4403 for key, val in mods:
4404 if key == constants.DDM_ADD:
4406 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4409 elif key == constants.DDM_REMOVE:
4413 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4415 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4417 if self.op.hidden_os:
4418 helper_os("hidden_os", self.op.hidden_os, "hidden")
4420 if self.op.blacklisted_os:
4421 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4423 if self.op.master_netdev:
4424 master_params = self.cfg.GetMasterNetworkParameters()
4425 ems = self.cfg.GetUseExternalMipScript()
4426 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4427 self.cluster.master_netdev)
4428 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4430 result.Raise("Could not disable the master ip")
4431 feedback_fn("Changing master_netdev from %s to %s" %
4432 (master_params.netdev, self.op.master_netdev))
4433 self.cluster.master_netdev = self.op.master_netdev
4435 if self.op.master_netmask:
4436 master_params = self.cfg.GetMasterNetworkParameters()
4437 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4438 result = self.rpc.call_node_change_master_netmask(master_params.name,
4439 master_params.netmask,
4440 self.op.master_netmask,
4442 master_params.netdev)
4444 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4447 self.cluster.master_netmask = self.op.master_netmask
4449 self.cfg.Update(self.cluster, feedback_fn)
4451 if self.op.master_netdev:
4452 master_params = self.cfg.GetMasterNetworkParameters()
4453 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4454 self.op.master_netdev)
4455 ems = self.cfg.GetUseExternalMipScript()
4456 result = self.rpc.call_node_activate_master_ip(master_params.name,
4459 self.LogWarning("Could not re-enable the master ip on"
4460 " the master, please restart manually: %s",
4464 def _UploadHelper(lu, nodes, fname):
4465 """Helper for uploading a file and showing warnings.
4468 if os.path.exists(fname):
4469 result = lu.rpc.call_upload_file(nodes, fname)
4470 for to_node, to_result in result.items():
4471 msg = to_result.fail_msg
4473 msg = ("Copy of file %s to node %s failed: %s" %
4474 (fname, to_node, msg))
4478 def _ComputeAncillaryFiles(cluster, redist):
4479 """Compute files external to Ganeti which need to be consistent.
4481 @type redist: boolean
4482 @param redist: Whether to include files which need to be redistributed
4485 # Compute files for all nodes
4487 pathutils.SSH_KNOWN_HOSTS_FILE,
4488 pathutils.CONFD_HMAC_KEY,
4489 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4490 pathutils.SPICE_CERT_FILE,
4491 pathutils.SPICE_CACERT_FILE,
4492 pathutils.RAPI_USERS_FILE,
4496 # we need to ship at least the RAPI certificate
4497 files_all.add(pathutils.RAPI_CERT_FILE)
4499 files_all.update(pathutils.ALL_CERT_FILES)
4500 files_all.update(ssconf.SimpleStore().GetFileList())
4502 if cluster.modify_etc_hosts:
4503 files_all.add(pathutils.ETC_HOSTS)
4505 if cluster.use_external_mip_script:
4506 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4508 # Files which are optional, these must:
4509 # - be present in one other category as well
4510 # - either exist or not exist on all nodes of that category (mc, vm all)
4512 pathutils.RAPI_USERS_FILE,
4515 # Files which should only be on master candidates
4519 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4523 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4524 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4525 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4527 # Files which should only be on VM-capable nodes
4530 for hv_name in cluster.enabled_hypervisors
4531 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4535 for hv_name in cluster.enabled_hypervisors
4536 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4538 # Filenames in each category must be unique
4539 all_files_set = files_all | files_mc | files_vm
4540 assert (len(all_files_set) ==
4541 sum(map(len, [files_all, files_mc, files_vm]))), \
4542 "Found file listed in more than one file list"
4544 # Optional files must be present in one other category
4545 assert all_files_set.issuperset(files_opt), \
4546 "Optional file not in a different required list"
4548 # This one file should never ever be re-distributed via RPC
4549 assert not (redist and
4550 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4552 return (files_all, files_opt, files_mc, files_vm)
4555 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4556 """Distribute additional files which are part of the cluster configuration.
4558 ConfigWriter takes care of distributing the config and ssconf files, but
4559 there are more files which should be distributed to all nodes. This function
4560 makes sure those are copied.
4562 @param lu: calling logical unit
4563 @param additional_nodes: list of nodes not in the config to distribute to
4564 @type additional_vm: boolean
4565 @param additional_vm: whether the additional nodes are vm-capable or not
4568 # Gather target nodes
4569 cluster = lu.cfg.GetClusterInfo()
4570 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4572 online_nodes = lu.cfg.GetOnlineNodeList()
4573 online_set = frozenset(online_nodes)
4574 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4576 if additional_nodes is not None:
4577 online_nodes.extend(additional_nodes)
4579 vm_nodes.extend(additional_nodes)
4581 # Never distribute to master node
4582 for nodelist in [online_nodes, vm_nodes]:
4583 if master_info.name in nodelist:
4584 nodelist.remove(master_info.name)
4587 (files_all, _, files_mc, files_vm) = \
4588 _ComputeAncillaryFiles(cluster, True)
4590 # Never re-distribute configuration file from here
4591 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4592 pathutils.CLUSTER_CONF_FILE in files_vm)
4593 assert not files_mc, "Master candidates not handled in this function"
4596 (online_nodes, files_all),
4597 (vm_nodes, files_vm),
4601 for (node_list, files) in filemap:
4603 _UploadHelper(lu, node_list, fname)
4606 class LUClusterRedistConf(NoHooksLU):
4607 """Force the redistribution of cluster configuration.
4609 This is a very simple LU.
4614 def ExpandNames(self):
4615 self.needed_locks = {
4616 locking.LEVEL_NODE: locking.ALL_SET,
4617 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4619 self.share_locks = _ShareAll()
4621 def Exec(self, feedback_fn):
4622 """Redistribute the configuration.
4625 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4626 _RedistributeAncillaryFiles(self)
4629 class LUClusterActivateMasterIp(NoHooksLU):
4630 """Activate the master IP on the master node.
4633 def Exec(self, feedback_fn):
4634 """Activate the master IP.
4637 master_params = self.cfg.GetMasterNetworkParameters()
4638 ems = self.cfg.GetUseExternalMipScript()
4639 result = self.rpc.call_node_activate_master_ip(master_params.name,
4641 result.Raise("Could not activate the master IP")
4644 class LUClusterDeactivateMasterIp(NoHooksLU):
4645 """Deactivate the master IP on the master node.
4648 def Exec(self, feedback_fn):
4649 """Deactivate the master IP.
4652 master_params = self.cfg.GetMasterNetworkParameters()
4653 ems = self.cfg.GetUseExternalMipScript()
4654 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4656 result.Raise("Could not deactivate the master IP")
4659 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4660 """Sleep and poll for an instance's disk to sync.
4663 if not instance.disks or disks is not None and not disks:
4666 disks = _ExpandCheckDisks(instance, disks)
4669 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4671 node = instance.primary_node
4674 lu.cfg.SetDiskID(dev, node)
4676 # TODO: Convert to utils.Retry
4679 degr_retries = 10 # in seconds, as we sleep 1 second each time
4683 cumul_degraded = False
4684 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4685 msg = rstats.fail_msg
4687 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4690 raise errors.RemoteError("Can't contact node %s for mirror data,"
4691 " aborting." % node)
4694 rstats = rstats.payload
4696 for i, mstat in enumerate(rstats):
4698 lu.LogWarning("Can't compute data for node %s/%s",
4699 node, disks[i].iv_name)
4702 cumul_degraded = (cumul_degraded or
4703 (mstat.is_degraded and mstat.sync_percent is None))
4704 if mstat.sync_percent is not None:
4706 if mstat.estimated_time is not None:
4707 rem_time = ("%s remaining (estimated)" %
4708 utils.FormatSeconds(mstat.estimated_time))
4709 max_time = mstat.estimated_time
4711 rem_time = "no time estimate"
4712 lu.LogInfo("- device %s: %5.2f%% done, %s",
4713 disks[i].iv_name, mstat.sync_percent, rem_time)
4715 # if we're done but degraded, let's do a few small retries, to
4716 # make sure we see a stable and not transient situation; therefore
4717 # we force restart of the loop
4718 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4719 logging.info("Degraded disks found, %d retries left", degr_retries)
4727 time.sleep(min(60, max_time))
4730 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4732 return not cumul_degraded
4735 def _BlockdevFind(lu, node, dev, instance):
4736 """Wrapper around call_blockdev_find to annotate diskparams.
4738 @param lu: A reference to the lu object
4739 @param node: The node to call out
4740 @param dev: The device to find
4741 @param instance: The instance object the device belongs to
4742 @returns The result of the rpc call
4745 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4746 return lu.rpc.call_blockdev_find(node, disk)
4749 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4750 """Wrapper around L{_CheckDiskConsistencyInner}.
4753 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4754 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4758 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4760 """Check that mirrors are not degraded.
4762 @attention: The device has to be annotated already.
4764 The ldisk parameter, if True, will change the test from the
4765 is_degraded attribute (which represents overall non-ok status for
4766 the device(s)) to the ldisk (representing the local storage status).
4769 lu.cfg.SetDiskID(dev, node)
4773 if on_primary or dev.AssembleOnSecondary():
4774 rstats = lu.rpc.call_blockdev_find(node, dev)
4775 msg = rstats.fail_msg
4777 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4779 elif not rstats.payload:
4780 lu.LogWarning("Can't find disk on node %s", node)
4784 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4786 result = result and not rstats.payload.is_degraded
4789 for child in dev.children:
4790 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4796 class LUOobCommand(NoHooksLU):
4797 """Logical unit for OOB handling.
4801 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4803 def ExpandNames(self):
4804 """Gather locks we need.
4807 if self.op.node_names:
4808 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4809 lock_names = self.op.node_names
4811 lock_names = locking.ALL_SET
4813 self.needed_locks = {
4814 locking.LEVEL_NODE: lock_names,
4817 if not self.op.node_names:
4818 # Acquire node allocation lock only if all nodes are affected
4819 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4820 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4822 def CheckPrereq(self):
4823 """Check prerequisites.
4826 - the node exists in the configuration
4829 Any errors are signaled by raising errors.OpPrereqError.
4833 self.master_node = self.cfg.GetMasterNode()
4835 assert self.op.power_delay >= 0.0
4837 if self.op.node_names:
4838 if (self.op.command in self._SKIP_MASTER and
4839 self.master_node in self.op.node_names):
4840 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4841 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4843 if master_oob_handler:
4844 additional_text = ("run '%s %s %s' if you want to operate on the"
4845 " master regardless") % (master_oob_handler,
4849 additional_text = "it does not support out-of-band operations"
4851 raise errors.OpPrereqError(("Operating on the master node %s is not"
4852 " allowed for %s; %s") %
4853 (self.master_node, self.op.command,
4854 additional_text), errors.ECODE_INVAL)
4856 self.op.node_names = self.cfg.GetNodeList()
4857 if self.op.command in self._SKIP_MASTER:
4858 self.op.node_names.remove(self.master_node)
4860 if self.op.command in self._SKIP_MASTER:
4861 assert self.master_node not in self.op.node_names
4863 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4865 raise errors.OpPrereqError("Node %s not found" % node_name,
4868 self.nodes.append(node)
4870 if (not self.op.ignore_status and
4871 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4872 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4873 " not marked offline") % node_name,
4876 def Exec(self, feedback_fn):
4877 """Execute OOB and return result if we expect any.
4880 master_node = self.master_node
4883 for idx, node in enumerate(utils.NiceSort(self.nodes,
4884 key=lambda node: node.name)):
4885 node_entry = [(constants.RS_NORMAL, node.name)]
4886 ret.append(node_entry)
4888 oob_program = _SupportsOob(self.cfg, node)
4891 node_entry.append((constants.RS_UNAVAIL, None))
4894 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4895 self.op.command, oob_program, node.name)
4896 result = self.rpc.call_run_oob(master_node, oob_program,
4897 self.op.command, node.name,
4901 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4902 node.name, result.fail_msg)
4903 node_entry.append((constants.RS_NODATA, None))
4906 self._CheckPayload(result)
4907 except errors.OpExecError, err:
4908 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4910 node_entry.append((constants.RS_NODATA, None))
4912 if self.op.command == constants.OOB_HEALTH:
4913 # For health we should log important events
4914 for item, status in result.payload:
4915 if status in [constants.OOB_STATUS_WARNING,
4916 constants.OOB_STATUS_CRITICAL]:
4917 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4918 item, node.name, status)
4920 if self.op.command == constants.OOB_POWER_ON:
4922 elif self.op.command == constants.OOB_POWER_OFF:
4923 node.powered = False
4924 elif self.op.command == constants.OOB_POWER_STATUS:
4925 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4926 if powered != node.powered:
4927 logging.warning(("Recorded power state (%s) of node '%s' does not"
4928 " match actual power state (%s)"), node.powered,
4931 # For configuration changing commands we should update the node
4932 if self.op.command in (constants.OOB_POWER_ON,
4933 constants.OOB_POWER_OFF):
4934 self.cfg.Update(node, feedback_fn)
4936 node_entry.append((constants.RS_NORMAL, result.payload))
4938 if (self.op.command == constants.OOB_POWER_ON and
4939 idx < len(self.nodes) - 1):
4940 time.sleep(self.op.power_delay)
4944 def _CheckPayload(self, result):
4945 """Checks if the payload is valid.
4947 @param result: RPC result
4948 @raises errors.OpExecError: If payload is not valid
4952 if self.op.command == constants.OOB_HEALTH:
4953 if not isinstance(result.payload, list):
4954 errs.append("command 'health' is expected to return a list but got %s" %
4955 type(result.payload))
4957 for item, status in result.payload:
4958 if status not in constants.OOB_STATUSES:
4959 errs.append("health item '%s' has invalid status '%s'" %
4962 if self.op.command == constants.OOB_POWER_STATUS:
4963 if not isinstance(result.payload, dict):
4964 errs.append("power-status is expected to return a dict but got %s" %
4965 type(result.payload))
4967 if self.op.command in [
4968 constants.OOB_POWER_ON,
4969 constants.OOB_POWER_OFF,
4970 constants.OOB_POWER_CYCLE,
4972 if result.payload is not None:
4973 errs.append("%s is expected to not return payload but got '%s'" %
4974 (self.op.command, result.payload))
4977 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4978 utils.CommaJoin(errs))
4981 class _OsQuery(_QueryBase):
4982 FIELDS = query.OS_FIELDS
4984 def ExpandNames(self, lu):
4985 # Lock all nodes in shared mode
4986 # Temporary removal of locks, should be reverted later
4987 # TODO: reintroduce locks when they are lighter-weight
4988 lu.needed_locks = {}
4989 #self.share_locks[locking.LEVEL_NODE] = 1
4990 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4992 # The following variables interact with _QueryBase._GetNames
4994 self.wanted = self.names
4996 self.wanted = locking.ALL_SET
4998 self.do_locking = self.use_locking
5000 def DeclareLocks(self, lu, level):
5004 def _DiagnoseByOS(rlist):
5005 """Remaps a per-node return list into an a per-os per-node dictionary
5007 @param rlist: a map with node names as keys and OS objects as values
5010 @return: a dictionary with osnames as keys and as value another
5011 map, with nodes as keys and tuples of (path, status, diagnose,
5012 variants, parameters, api_versions) as values, eg::
5014 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5015 (/srv/..., False, "invalid api")],
5016 "node2": [(/srv/..., True, "", [], [])]}
5021 # we build here the list of nodes that didn't fail the RPC (at RPC
5022 # level), so that nodes with a non-responding node daemon don't
5023 # make all OSes invalid
5024 good_nodes = [node_name for node_name in rlist
5025 if not rlist[node_name].fail_msg]
5026 for node_name, nr in rlist.items():
5027 if nr.fail_msg or not nr.payload:
5029 for (name, path, status, diagnose, variants,
5030 params, api_versions) in nr.payload:
5031 if name not in all_os:
5032 # build a list of nodes for this os containing empty lists
5033 # for each node in node_list
5035 for nname in good_nodes:
5036 all_os[name][nname] = []
5037 # convert params from [name, help] to (name, help)
5038 params = [tuple(v) for v in params]
5039 all_os[name][node_name].append((path, status, diagnose,
5040 variants, params, api_versions))
5043 def _GetQueryData(self, lu):
5044 """Computes the list of nodes and their attributes.
5047 # Locking is not used
5048 assert not (compat.any(lu.glm.is_owned(level)
5049 for level in locking.LEVELS
5050 if level != locking.LEVEL_CLUSTER) or
5051 self.do_locking or self.use_locking)
5053 valid_nodes = [node.name
5054 for node in lu.cfg.GetAllNodesInfo().values()
5055 if not node.offline and node.vm_capable]
5056 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5057 cluster = lu.cfg.GetClusterInfo()
5061 for (os_name, os_data) in pol.items():
5062 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5063 hidden=(os_name in cluster.hidden_os),
5064 blacklisted=(os_name in cluster.blacklisted_os))
5068 api_versions = set()
5070 for idx, osl in enumerate(os_data.values()):
5071 info.valid = bool(info.valid and osl and osl[0][1])
5075 (node_variants, node_params, node_api) = osl[0][3:6]
5078 variants.update(node_variants)
5079 parameters.update(node_params)
5080 api_versions.update(node_api)
5082 # Filter out inconsistent values
5083 variants.intersection_update(node_variants)
5084 parameters.intersection_update(node_params)
5085 api_versions.intersection_update(node_api)
5087 info.variants = list(variants)
5088 info.parameters = list(parameters)
5089 info.api_versions = list(api_versions)
5091 data[os_name] = info
5093 # Prepare data in requested order
5094 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5098 class LUOsDiagnose(NoHooksLU):
5099 """Logical unit for OS diagnose/query.
5105 def _BuildFilter(fields, names):
5106 """Builds a filter for querying OSes.
5109 name_filter = qlang.MakeSimpleFilter("name", names)
5111 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5112 # respective field is not requested
5113 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5114 for fname in ["hidden", "blacklisted"]
5115 if fname not in fields]
5116 if "valid" not in fields:
5117 status_filter.append([qlang.OP_TRUE, "valid"])
5120 status_filter.insert(0, qlang.OP_AND)
5122 status_filter = None
5124 if name_filter and status_filter:
5125 return [qlang.OP_AND, name_filter, status_filter]
5129 return status_filter
5131 def CheckArguments(self):
5132 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5133 self.op.output_fields, False)
5135 def ExpandNames(self):
5136 self.oq.ExpandNames(self)
5138 def Exec(self, feedback_fn):
5139 return self.oq.OldStyleQuery(self)
5142 class LUNodeRemove(LogicalUnit):
5143 """Logical unit for removing a node.
5146 HPATH = "node-remove"
5147 HTYPE = constants.HTYPE_NODE
5149 def BuildHooksEnv(self):
5154 "OP_TARGET": self.op.node_name,
5155 "NODE_NAME": self.op.node_name,
5158 def BuildHooksNodes(self):
5159 """Build hooks nodes.
5161 This doesn't run on the target node in the pre phase as a failed
5162 node would then be impossible to remove.
5165 all_nodes = self.cfg.GetNodeList()
5167 all_nodes.remove(self.op.node_name)
5170 return (all_nodes, all_nodes)
5172 def CheckPrereq(self):
5173 """Check prerequisites.
5176 - the node exists in the configuration
5177 - it does not have primary or secondary instances
5178 - it's not the master
5180 Any errors are signaled by raising errors.OpPrereqError.
5183 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5184 node = self.cfg.GetNodeInfo(self.op.node_name)
5185 assert node is not None
5187 masternode = self.cfg.GetMasterNode()
5188 if node.name == masternode:
5189 raise errors.OpPrereqError("Node is the master node, failover to another"
5190 " node is required", errors.ECODE_INVAL)
5192 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5193 if node.name in instance.all_nodes:
5194 raise errors.OpPrereqError("Instance %s is still running on the node,"
5195 " please remove first" % instance_name,
5197 self.op.node_name = node.name
5200 def Exec(self, feedback_fn):
5201 """Removes the node from the cluster.
5205 logging.info("Stopping the node daemon and removing configs from node %s",
5208 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5210 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5213 # Promote nodes to master candidate as needed
5214 _AdjustCandidatePool(self, exceptions=[node.name])
5215 self.context.RemoveNode(node.name)
5217 # Run post hooks on the node before it's removed
5218 _RunPostHook(self, node.name)
5220 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5221 msg = result.fail_msg
5223 self.LogWarning("Errors encountered on the remote node while leaving"
5224 " the cluster: %s", msg)
5226 # Remove node from our /etc/hosts
5227 if self.cfg.GetClusterInfo().modify_etc_hosts:
5228 master_node = self.cfg.GetMasterNode()
5229 result = self.rpc.call_etc_hosts_modify(master_node,
5230 constants.ETC_HOSTS_REMOVE,
5232 result.Raise("Can't update hosts file with new host data")
5233 _RedistributeAncillaryFiles(self)
5236 class _NodeQuery(_QueryBase):
5237 FIELDS = query.NODE_FIELDS
5239 def ExpandNames(self, lu):
5240 lu.needed_locks = {}
5241 lu.share_locks = _ShareAll()
5244 self.wanted = _GetWantedNodes(lu, self.names)
5246 self.wanted = locking.ALL_SET
5248 self.do_locking = (self.use_locking and
5249 query.NQ_LIVE in self.requested_data)
5252 # If any non-static field is requested we need to lock the nodes
5253 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5254 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5256 def DeclareLocks(self, lu, level):
5259 def _GetQueryData(self, lu):
5260 """Computes the list of nodes and their attributes.
5263 all_info = lu.cfg.GetAllNodesInfo()
5265 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5267 # Gather data as requested
5268 if query.NQ_LIVE in self.requested_data:
5269 # filter out non-vm_capable nodes
5270 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5272 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5273 [lu.cfg.GetHypervisorType()])
5274 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5275 for (name, nresult) in node_data.items()
5276 if not nresult.fail_msg and nresult.payload)
5280 if query.NQ_INST in self.requested_data:
5281 node_to_primary = dict([(name, set()) for name in nodenames])
5282 node_to_secondary = dict([(name, set()) for name in nodenames])
5284 inst_data = lu.cfg.GetAllInstancesInfo()
5286 for inst in inst_data.values():
5287 if inst.primary_node in node_to_primary:
5288 node_to_primary[inst.primary_node].add(inst.name)
5289 for secnode in inst.secondary_nodes:
5290 if secnode in node_to_secondary:
5291 node_to_secondary[secnode].add(inst.name)
5293 node_to_primary = None
5294 node_to_secondary = None
5296 if query.NQ_OOB in self.requested_data:
5297 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5298 for name, node in all_info.iteritems())
5302 if query.NQ_GROUP in self.requested_data:
5303 groups = lu.cfg.GetAllNodeGroupsInfo()
5307 return query.NodeQueryData([all_info[name] for name in nodenames],
5308 live_data, lu.cfg.GetMasterNode(),
5309 node_to_primary, node_to_secondary, groups,
5310 oob_support, lu.cfg.GetClusterInfo())
5313 class LUNodeQuery(NoHooksLU):
5314 """Logical unit for querying nodes.
5317 # pylint: disable=W0142
5320 def CheckArguments(self):
5321 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5322 self.op.output_fields, self.op.use_locking)
5324 def ExpandNames(self):
5325 self.nq.ExpandNames(self)
5327 def DeclareLocks(self, level):
5328 self.nq.DeclareLocks(self, level)
5330 def Exec(self, feedback_fn):
5331 return self.nq.OldStyleQuery(self)
5334 class LUNodeQueryvols(NoHooksLU):
5335 """Logical unit for getting volumes on node(s).
5339 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5340 _FIELDS_STATIC = utils.FieldSet("node")
5342 def CheckArguments(self):
5343 _CheckOutputFields(static=self._FIELDS_STATIC,
5344 dynamic=self._FIELDS_DYNAMIC,
5345 selected=self.op.output_fields)
5347 def ExpandNames(self):
5348 self.share_locks = _ShareAll()
5351 self.needed_locks = {
5352 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5355 self.needed_locks = {
5356 locking.LEVEL_NODE: locking.ALL_SET,
5357 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5360 def Exec(self, feedback_fn):
5361 """Computes the list of nodes and their attributes.
5364 nodenames = self.owned_locks(locking.LEVEL_NODE)
5365 volumes = self.rpc.call_node_volumes(nodenames)
5367 ilist = self.cfg.GetAllInstancesInfo()
5368 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5371 for node in nodenames:
5372 nresult = volumes[node]
5375 msg = nresult.fail_msg
5377 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5380 node_vols = sorted(nresult.payload,
5381 key=operator.itemgetter("dev"))
5383 for vol in node_vols:
5385 for field in self.op.output_fields:
5388 elif field == "phys":
5392 elif field == "name":
5394 elif field == "size":
5395 val = int(float(vol["size"]))
5396 elif field == "instance":
5397 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5399 raise errors.ParameterError(field)
5400 node_output.append(str(val))
5402 output.append(node_output)
5407 class LUNodeQueryStorage(NoHooksLU):
5408 """Logical unit for getting information on storage units on node(s).
5411 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5414 def CheckArguments(self):
5415 _CheckOutputFields(static=self._FIELDS_STATIC,
5416 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5417 selected=self.op.output_fields)
5419 def ExpandNames(self):
5420 self.share_locks = _ShareAll()
5423 self.needed_locks = {
5424 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5427 self.needed_locks = {
5428 locking.LEVEL_NODE: locking.ALL_SET,
5429 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5432 def Exec(self, feedback_fn):
5433 """Computes the list of nodes and their attributes.
5436 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5438 # Always get name to sort by
5439 if constants.SF_NAME in self.op.output_fields:
5440 fields = self.op.output_fields[:]
5442 fields = [constants.SF_NAME] + self.op.output_fields
5444 # Never ask for node or type as it's only known to the LU
5445 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5446 while extra in fields:
5447 fields.remove(extra)
5449 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5450 name_idx = field_idx[constants.SF_NAME]
5452 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5453 data = self.rpc.call_storage_list(self.nodes,
5454 self.op.storage_type, st_args,
5455 self.op.name, fields)
5459 for node in utils.NiceSort(self.nodes):
5460 nresult = data[node]
5464 msg = nresult.fail_msg
5466 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5469 rows = dict([(row[name_idx], row) for row in nresult.payload])
5471 for name in utils.NiceSort(rows.keys()):
5476 for field in self.op.output_fields:
5477 if field == constants.SF_NODE:
5479 elif field == constants.SF_TYPE:
5480 val = self.op.storage_type
5481 elif field in field_idx:
5482 val = row[field_idx[field]]
5484 raise errors.ParameterError(field)
5493 class _InstanceQuery(_QueryBase):
5494 FIELDS = query.INSTANCE_FIELDS
5496 def ExpandNames(self, lu):
5497 lu.needed_locks = {}
5498 lu.share_locks = _ShareAll()
5501 self.wanted = _GetWantedInstances(lu, self.names)
5503 self.wanted = locking.ALL_SET
5505 self.do_locking = (self.use_locking and
5506 query.IQ_LIVE in self.requested_data)
5508 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5509 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5510 lu.needed_locks[locking.LEVEL_NODE] = []
5511 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5513 self.do_grouplocks = (self.do_locking and
5514 query.IQ_NODES in self.requested_data)
5516 def DeclareLocks(self, lu, level):
5518 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5519 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5521 # Lock all groups used by instances optimistically; this requires going
5522 # via the node before it's locked, requiring verification later on
5523 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5525 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5526 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5527 elif level == locking.LEVEL_NODE:
5528 lu._LockInstancesNodes() # pylint: disable=W0212
5531 def _CheckGroupLocks(lu):
5532 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5533 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5535 # Check if node groups for locked instances are still correct
5536 for instance_name in owned_instances:
5537 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5539 def _GetQueryData(self, lu):
5540 """Computes the list of instances and their attributes.
5543 if self.do_grouplocks:
5544 self._CheckGroupLocks(lu)
5546 cluster = lu.cfg.GetClusterInfo()
5547 all_info = lu.cfg.GetAllInstancesInfo()
5549 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5551 instance_list = [all_info[name] for name in instance_names]
5552 nodes = frozenset(itertools.chain(*(inst.all_nodes
5553 for inst in instance_list)))
5554 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5557 wrongnode_inst = set()
5559 # Gather data as requested
5560 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5562 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5564 result = node_data[name]
5566 # offline nodes will be in both lists
5567 assert result.fail_msg
5568 offline_nodes.append(name)
5570 bad_nodes.append(name)
5571 elif result.payload:
5572 for inst in result.payload:
5573 if inst in all_info:
5574 if all_info[inst].primary_node == name:
5575 live_data.update(result.payload)
5577 wrongnode_inst.add(inst)
5579 # orphan instance; we don't list it here as we don't
5580 # handle this case yet in the output of instance listing
5581 logging.warning("Orphan instance '%s' found on node %s",
5583 # else no instance is alive
5587 if query.IQ_DISKUSAGE in self.requested_data:
5588 gmi = ganeti.masterd.instance
5589 disk_usage = dict((inst.name,
5590 gmi.ComputeDiskSize(inst.disk_template,
5591 [{constants.IDISK_SIZE: disk.size}
5592 for disk in inst.disks]))
5593 for inst in instance_list)
5597 if query.IQ_CONSOLE in self.requested_data:
5599 for inst in instance_list:
5600 if inst.name in live_data:
5601 # Instance is running
5602 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5604 consinfo[inst.name] = None
5605 assert set(consinfo.keys()) == set(instance_names)
5609 if query.IQ_NODES in self.requested_data:
5610 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5612 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5613 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5614 for uuid in set(map(operator.attrgetter("group"),
5620 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5621 disk_usage, offline_nodes, bad_nodes,
5622 live_data, wrongnode_inst, consinfo,
5626 class LUQuery(NoHooksLU):
5627 """Query for resources/items of a certain kind.
5630 # pylint: disable=W0142
5633 def CheckArguments(self):
5634 qcls = _GetQueryImplementation(self.op.what)
5636 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5638 def ExpandNames(self):
5639 self.impl.ExpandNames(self)
5641 def DeclareLocks(self, level):
5642 self.impl.DeclareLocks(self, level)
5644 def Exec(self, feedback_fn):
5645 return self.impl.NewStyleQuery(self)
5648 class LUQueryFields(NoHooksLU):
5649 """Query for resources/items of a certain kind.
5652 # pylint: disable=W0142
5655 def CheckArguments(self):
5656 self.qcls = _GetQueryImplementation(self.op.what)
5658 def ExpandNames(self):
5659 self.needed_locks = {}
5661 def Exec(self, feedback_fn):
5662 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5665 class LUNodeModifyStorage(NoHooksLU):
5666 """Logical unit for modifying a storage volume on a node.
5671 def CheckArguments(self):
5672 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5674 storage_type = self.op.storage_type
5677 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5679 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5680 " modified" % storage_type,
5683 diff = set(self.op.changes.keys()) - modifiable
5685 raise errors.OpPrereqError("The following fields can not be modified for"
5686 " storage units of type '%s': %r" %
5687 (storage_type, list(diff)),
5690 def ExpandNames(self):
5691 self.needed_locks = {
5692 locking.LEVEL_NODE: self.op.node_name,
5695 def Exec(self, feedback_fn):
5696 """Computes the list of nodes and their attributes.
5699 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5700 result = self.rpc.call_storage_modify(self.op.node_name,
5701 self.op.storage_type, st_args,
5702 self.op.name, self.op.changes)
5703 result.Raise("Failed to modify storage unit '%s' on %s" %
5704 (self.op.name, self.op.node_name))
5707 class LUNodeAdd(LogicalUnit):
5708 """Logical unit for adding node to the cluster.
5712 HTYPE = constants.HTYPE_NODE
5713 _NFLAGS = ["master_capable", "vm_capable"]
5715 def CheckArguments(self):
5716 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5717 # validate/normalize the node name
5718 self.hostname = netutils.GetHostname(name=self.op.node_name,
5719 family=self.primary_ip_family)
5720 self.op.node_name = self.hostname.name
5722 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5723 raise errors.OpPrereqError("Cannot readd the master node",
5726 if self.op.readd and self.op.group:
5727 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5728 " being readded", errors.ECODE_INVAL)
5730 def BuildHooksEnv(self):
5733 This will run on all nodes before, and on all nodes + the new node after.
5737 "OP_TARGET": self.op.node_name,
5738 "NODE_NAME": self.op.node_name,
5739 "NODE_PIP": self.op.primary_ip,
5740 "NODE_SIP": self.op.secondary_ip,
5741 "MASTER_CAPABLE": str(self.op.master_capable),
5742 "VM_CAPABLE": str(self.op.vm_capable),
5745 def BuildHooksNodes(self):
5746 """Build hooks nodes.
5749 # Exclude added node
5750 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5751 post_nodes = pre_nodes + [self.op.node_name, ]
5753 return (pre_nodes, post_nodes)
5755 def CheckPrereq(self):
5756 """Check prerequisites.
5759 - the new node is not already in the config
5761 - its parameters (single/dual homed) matches the cluster
5763 Any errors are signaled by raising errors.OpPrereqError.
5767 hostname = self.hostname
5768 node = hostname.name
5769 primary_ip = self.op.primary_ip = hostname.ip
5770 if self.op.secondary_ip is None:
5771 if self.primary_ip_family == netutils.IP6Address.family:
5772 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5773 " IPv4 address must be given as secondary",
5775 self.op.secondary_ip = primary_ip
5777 secondary_ip = self.op.secondary_ip
5778 if not netutils.IP4Address.IsValid(secondary_ip):
5779 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5780 " address" % secondary_ip, errors.ECODE_INVAL)
5782 node_list = cfg.GetNodeList()
5783 if not self.op.readd and node in node_list:
5784 raise errors.OpPrereqError("Node %s is already in the configuration" %
5785 node, errors.ECODE_EXISTS)
5786 elif self.op.readd and node not in node_list:
5787 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5790 self.changed_primary_ip = False
5792 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5793 if self.op.readd and node == existing_node_name:
5794 if existing_node.secondary_ip != secondary_ip:
5795 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5796 " address configuration as before",
5798 if existing_node.primary_ip != primary_ip:
5799 self.changed_primary_ip = True
5803 if (existing_node.primary_ip == primary_ip or
5804 existing_node.secondary_ip == primary_ip or
5805 existing_node.primary_ip == secondary_ip or
5806 existing_node.secondary_ip == secondary_ip):
5807 raise errors.OpPrereqError("New node ip address(es) conflict with"
5808 " existing node %s" % existing_node.name,
5809 errors.ECODE_NOTUNIQUE)
5811 # After this 'if' block, None is no longer a valid value for the
5812 # _capable op attributes
5814 old_node = self.cfg.GetNodeInfo(node)
5815 assert old_node is not None, "Can't retrieve locked node %s" % node
5816 for attr in self._NFLAGS:
5817 if getattr(self.op, attr) is None:
5818 setattr(self.op, attr, getattr(old_node, attr))
5820 for attr in self._NFLAGS:
5821 if getattr(self.op, attr) is None:
5822 setattr(self.op, attr, True)
5824 if self.op.readd and not self.op.vm_capable:
5825 pri, sec = cfg.GetNodeInstances(node)
5827 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5828 " flag set to false, but it already holds"
5829 " instances" % node,
5832 # check that the type of the node (single versus dual homed) is the
5833 # same as for the master
5834 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5835 master_singlehomed = myself.secondary_ip == myself.primary_ip
5836 newbie_singlehomed = secondary_ip == primary_ip
5837 if master_singlehomed != newbie_singlehomed:
5838 if master_singlehomed:
5839 raise errors.OpPrereqError("The master has no secondary ip but the"
5840 " new node has one",
5843 raise errors.OpPrereqError("The master has a secondary ip but the"
5844 " new node doesn't have one",
5847 # checks reachability
5848 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5849 raise errors.OpPrereqError("Node not reachable by ping",
5850 errors.ECODE_ENVIRON)
5852 if not newbie_singlehomed:
5853 # check reachability from my secondary ip to newbie's secondary ip
5854 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5855 source=myself.secondary_ip):
5856 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5857 " based ping to node daemon port",
5858 errors.ECODE_ENVIRON)
5865 if self.op.master_capable:
5866 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5868 self.master_candidate = False
5871 self.new_node = old_node
5873 node_group = cfg.LookupNodeGroup(self.op.group)
5874 self.new_node = objects.Node(name=node,
5875 primary_ip=primary_ip,
5876 secondary_ip=secondary_ip,
5877 master_candidate=self.master_candidate,
5878 offline=False, drained=False,
5881 if self.op.ndparams:
5882 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5884 if self.op.hv_state:
5885 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5887 if self.op.disk_state:
5888 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5890 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5891 # it a property on the base class.
5892 result = rpc.DnsOnlyRunner().call_version([node])[node]
5893 result.Raise("Can't get version information from node %s" % node)
5894 if constants.PROTOCOL_VERSION == result.payload:
5895 logging.info("Communication to node %s fine, sw version %s match",
5896 node, result.payload)
5898 raise errors.OpPrereqError("Version mismatch master version %s,"
5899 " node version %s" %
5900 (constants.PROTOCOL_VERSION, result.payload),
5901 errors.ECODE_ENVIRON)
5903 def Exec(self, feedback_fn):
5904 """Adds the new node to the cluster.
5907 new_node = self.new_node
5908 node = new_node.name
5910 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5913 # We adding a new node so we assume it's powered
5914 new_node.powered = True
5916 # for re-adds, reset the offline/drained/master-candidate flags;
5917 # we need to reset here, otherwise offline would prevent RPC calls
5918 # later in the procedure; this also means that if the re-add
5919 # fails, we are left with a non-offlined, broken node
5921 new_node.drained = new_node.offline = False # pylint: disable=W0201
5922 self.LogInfo("Readding a node, the offline/drained flags were reset")
5923 # if we demote the node, we do cleanup later in the procedure
5924 new_node.master_candidate = self.master_candidate
5925 if self.changed_primary_ip:
5926 new_node.primary_ip = self.op.primary_ip
5928 # copy the master/vm_capable flags
5929 for attr in self._NFLAGS:
5930 setattr(new_node, attr, getattr(self.op, attr))
5932 # notify the user about any possible mc promotion
5933 if new_node.master_candidate:
5934 self.LogInfo("Node will be a master candidate")
5936 if self.op.ndparams:
5937 new_node.ndparams = self.op.ndparams
5939 new_node.ndparams = {}
5941 if self.op.hv_state:
5942 new_node.hv_state_static = self.new_hv_state
5944 if self.op.disk_state:
5945 new_node.disk_state_static = self.new_disk_state
5947 # Add node to our /etc/hosts, and add key to known_hosts
5948 if self.cfg.GetClusterInfo().modify_etc_hosts:
5949 master_node = self.cfg.GetMasterNode()
5950 result = self.rpc.call_etc_hosts_modify(master_node,
5951 constants.ETC_HOSTS_ADD,
5954 result.Raise("Can't update hosts file with new host data")
5956 if new_node.secondary_ip != new_node.primary_ip:
5957 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5960 node_verify_list = [self.cfg.GetMasterNode()]
5961 node_verify_param = {
5962 constants.NV_NODELIST: ([node], {}),
5963 # TODO: do a node-net-test as well?
5966 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5967 self.cfg.GetClusterName())
5968 for verifier in node_verify_list:
5969 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5970 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5972 for failed in nl_payload:
5973 feedback_fn("ssh/hostname verification failed"
5974 " (checking from %s): %s" %
5975 (verifier, nl_payload[failed]))
5976 raise errors.OpExecError("ssh/hostname verification failed")
5979 _RedistributeAncillaryFiles(self)
5980 self.context.ReaddNode(new_node)
5981 # make sure we redistribute the config
5982 self.cfg.Update(new_node, feedback_fn)
5983 # and make sure the new node will not have old files around
5984 if not new_node.master_candidate:
5985 result = self.rpc.call_node_demote_from_mc(new_node.name)
5986 msg = result.fail_msg
5988 self.LogWarning("Node failed to demote itself from master"
5989 " candidate status: %s" % msg)
5991 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5992 additional_vm=self.op.vm_capable)
5993 self.context.AddNode(new_node, self.proc.GetECId())
5996 class LUNodeSetParams(LogicalUnit):
5997 """Modifies the parameters of a node.
5999 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6000 to the node role (as _ROLE_*)
6001 @cvar _R2F: a dictionary from node role to tuples of flags
6002 @cvar _FLAGS: a list of attribute names corresponding to the flags
6005 HPATH = "node-modify"
6006 HTYPE = constants.HTYPE_NODE
6008 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6010 (True, False, False): _ROLE_CANDIDATE,
6011 (False, True, False): _ROLE_DRAINED,
6012 (False, False, True): _ROLE_OFFLINE,
6013 (False, False, False): _ROLE_REGULAR,
6015 _R2F = dict((v, k) for k, v in _F2R.items())
6016 _FLAGS = ["master_candidate", "drained", "offline"]
6018 def CheckArguments(self):
6019 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6020 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6021 self.op.master_capable, self.op.vm_capable,
6022 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6024 if all_mods.count(None) == len(all_mods):
6025 raise errors.OpPrereqError("Please pass at least one modification",
6027 if all_mods.count(True) > 1:
6028 raise errors.OpPrereqError("Can't set the node into more than one"
6029 " state at the same time",
6032 # Boolean value that tells us whether we might be demoting from MC
6033 self.might_demote = (self.op.master_candidate is False or
6034 self.op.offline is True or
6035 self.op.drained is True or
6036 self.op.master_capable is False)
6038 if self.op.secondary_ip:
6039 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6040 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6041 " address" % self.op.secondary_ip,
6044 self.lock_all = self.op.auto_promote and self.might_demote
6045 self.lock_instances = self.op.secondary_ip is not None
6047 def _InstanceFilter(self, instance):
6048 """Filter for getting affected instances.
6051 return (instance.disk_template in constants.DTS_INT_MIRROR and
6052 self.op.node_name in instance.all_nodes)
6054 def ExpandNames(self):
6056 self.needed_locks = {
6057 locking.LEVEL_NODE: locking.ALL_SET,
6059 # Block allocations when all nodes are locked
6060 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6063 self.needed_locks = {
6064 locking.LEVEL_NODE: self.op.node_name,
6067 # Since modifying a node can have severe effects on currently running
6068 # operations the resource lock is at least acquired in shared mode
6069 self.needed_locks[locking.LEVEL_NODE_RES] = \
6070 self.needed_locks[locking.LEVEL_NODE]
6072 # Get all locks except nodes in shared mode; they are not used for anything
6073 # but read-only access
6074 self.share_locks = _ShareAll()
6075 self.share_locks[locking.LEVEL_NODE] = 0
6076 self.share_locks[locking.LEVEL_NODE_RES] = 0
6077 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6079 if self.lock_instances:
6080 self.needed_locks[locking.LEVEL_INSTANCE] = \
6081 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6083 def BuildHooksEnv(self):
6086 This runs on the master node.
6090 "OP_TARGET": self.op.node_name,
6091 "MASTER_CANDIDATE": str(self.op.master_candidate),
6092 "OFFLINE": str(self.op.offline),
6093 "DRAINED": str(self.op.drained),
6094 "MASTER_CAPABLE": str(self.op.master_capable),
6095 "VM_CAPABLE": str(self.op.vm_capable),
6098 def BuildHooksNodes(self):
6099 """Build hooks nodes.
6102 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6105 def CheckPrereq(self):
6106 """Check prerequisites.
6108 This only checks the instance list against the existing names.
6111 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6113 if self.lock_instances:
6114 affected_instances = \
6115 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6117 # Verify instance locks
6118 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6119 wanted_instances = frozenset(affected_instances.keys())
6120 if wanted_instances - owned_instances:
6121 raise errors.OpPrereqError("Instances affected by changing node %s's"
6122 " secondary IP address have changed since"
6123 " locks were acquired, wanted '%s', have"
6124 " '%s'; retry the operation" %
6126 utils.CommaJoin(wanted_instances),
6127 utils.CommaJoin(owned_instances)),
6130 affected_instances = None
6132 if (self.op.master_candidate is not None or
6133 self.op.drained is not None or
6134 self.op.offline is not None):
6135 # we can't change the master's node flags
6136 if self.op.node_name == self.cfg.GetMasterNode():
6137 raise errors.OpPrereqError("The master role can be changed"
6138 " only via master-failover",
6141 if self.op.master_candidate and not node.master_capable:
6142 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6143 " it a master candidate" % node.name,
6146 if self.op.vm_capable is False:
6147 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6149 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6150 " the vm_capable flag" % node.name,
6153 if node.master_candidate and self.might_demote and not self.lock_all:
6154 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6155 # check if after removing the current node, we're missing master
6157 (mc_remaining, mc_should, _) = \
6158 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6159 if mc_remaining < mc_should:
6160 raise errors.OpPrereqError("Not enough master candidates, please"
6161 " pass auto promote option to allow"
6162 " promotion (--auto-promote or RAPI"
6163 " auto_promote=True)", errors.ECODE_STATE)
6165 self.old_flags = old_flags = (node.master_candidate,
6166 node.drained, node.offline)
6167 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6168 self.old_role = old_role = self._F2R[old_flags]
6170 # Check for ineffective changes
6171 for attr in self._FLAGS:
6172 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6173 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6174 setattr(self.op, attr, None)
6176 # Past this point, any flag change to False means a transition
6177 # away from the respective state, as only real changes are kept
6179 # TODO: We might query the real power state if it supports OOB
6180 if _SupportsOob(self.cfg, node):
6181 if self.op.offline is False and not (node.powered or
6182 self.op.powered is True):
6183 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6184 " offline status can be reset") %
6185 self.op.node_name, errors.ECODE_STATE)
6186 elif self.op.powered is not None:
6187 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6188 " as it does not support out-of-band"
6189 " handling") % self.op.node_name,
6192 # If we're being deofflined/drained, we'll MC ourself if needed
6193 if (self.op.drained is False or self.op.offline is False or
6194 (self.op.master_capable and not node.master_capable)):
6195 if _DecideSelfPromotion(self):
6196 self.op.master_candidate = True
6197 self.LogInfo("Auto-promoting node to master candidate")
6199 # If we're no longer master capable, we'll demote ourselves from MC
6200 if self.op.master_capable is False and node.master_candidate:
6201 self.LogInfo("Demoting from master candidate")
6202 self.op.master_candidate = False
6205 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6206 if self.op.master_candidate:
6207 new_role = self._ROLE_CANDIDATE
6208 elif self.op.drained:
6209 new_role = self._ROLE_DRAINED
6210 elif self.op.offline:
6211 new_role = self._ROLE_OFFLINE
6212 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6213 # False is still in new flags, which means we're un-setting (the
6215 new_role = self._ROLE_REGULAR
6216 else: # no new flags, nothing, keep old role
6219 self.new_role = new_role
6221 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6222 # Trying to transition out of offline status
6223 result = self.rpc.call_version([node.name])[node.name]
6225 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6226 " to report its version: %s" %
6227 (node.name, result.fail_msg),
6230 self.LogWarning("Transitioning node from offline to online state"
6231 " without using re-add. Please make sure the node"
6234 # When changing the secondary ip, verify if this is a single-homed to
6235 # multi-homed transition or vice versa, and apply the relevant
6237 if self.op.secondary_ip:
6238 # Ok even without locking, because this can't be changed by any LU
6239 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6240 master_singlehomed = master.secondary_ip == master.primary_ip
6241 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6242 if self.op.force and node.name == master.name:
6243 self.LogWarning("Transitioning from single-homed to multi-homed"
6244 " cluster; all nodes will require a secondary IP"
6247 raise errors.OpPrereqError("Changing the secondary ip on a"
6248 " single-homed cluster requires the"
6249 " --force option to be passed, and the"
6250 " target node to be the master",
6252 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6253 if self.op.force and node.name == master.name:
6254 self.LogWarning("Transitioning from multi-homed to single-homed"
6255 " cluster; secondary IP addresses will have to be"
6258 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6259 " same as the primary IP on a multi-homed"
6260 " cluster, unless the --force option is"
6261 " passed, and the target node is the"
6262 " master", errors.ECODE_INVAL)
6264 assert not (frozenset(affected_instances) -
6265 self.owned_locks(locking.LEVEL_INSTANCE))
6268 if affected_instances:
6269 msg = ("Cannot change secondary IP address: offline node has"
6270 " instances (%s) configured to use it" %
6271 utils.CommaJoin(affected_instances.keys()))
6272 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6274 # On online nodes, check that no instances are running, and that
6275 # the node has the new ip and we can reach it.
6276 for instance in affected_instances.values():
6277 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6278 msg="cannot change secondary ip")
6280 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6281 if master.name != node.name:
6282 # check reachability from master secondary ip to new secondary ip
6283 if not netutils.TcpPing(self.op.secondary_ip,
6284 constants.DEFAULT_NODED_PORT,
6285 source=master.secondary_ip):
6286 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6287 " based ping to node daemon port",
6288 errors.ECODE_ENVIRON)
6290 if self.op.ndparams:
6291 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6292 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6293 self.new_ndparams = new_ndparams
6295 if self.op.hv_state:
6296 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6297 self.node.hv_state_static)
6299 if self.op.disk_state:
6300 self.new_disk_state = \
6301 _MergeAndVerifyDiskState(self.op.disk_state,
6302 self.node.disk_state_static)
6304 def Exec(self, feedback_fn):
6309 old_role = self.old_role
6310 new_role = self.new_role
6314 if self.op.ndparams:
6315 node.ndparams = self.new_ndparams
6317 if self.op.powered is not None:
6318 node.powered = self.op.powered
6320 if self.op.hv_state:
6321 node.hv_state_static = self.new_hv_state
6323 if self.op.disk_state:
6324 node.disk_state_static = self.new_disk_state
6326 for attr in ["master_capable", "vm_capable"]:
6327 val = getattr(self.op, attr)
6329 setattr(node, attr, val)
6330 result.append((attr, str(val)))
6332 if new_role != old_role:
6333 # Tell the node to demote itself, if no longer MC and not offline
6334 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6335 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6337 self.LogWarning("Node failed to demote itself: %s", msg)
6339 new_flags = self._R2F[new_role]
6340 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6342 result.append((desc, str(nf)))
6343 (node.master_candidate, node.drained, node.offline) = new_flags
6345 # we locked all nodes, we adjust the CP before updating this node
6347 _AdjustCandidatePool(self, [node.name])
6349 if self.op.secondary_ip:
6350 node.secondary_ip = self.op.secondary_ip
6351 result.append(("secondary_ip", self.op.secondary_ip))
6353 # this will trigger configuration file update, if needed
6354 self.cfg.Update(node, feedback_fn)
6356 # this will trigger job queue propagation or cleanup if the mc
6358 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6359 self.context.ReaddNode(node)
6364 class LUNodePowercycle(NoHooksLU):
6365 """Powercycles a node.
6370 def CheckArguments(self):
6371 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6372 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6373 raise errors.OpPrereqError("The node is the master and the force"
6374 " parameter was not set",
6377 def ExpandNames(self):
6378 """Locking for PowercycleNode.
6380 This is a last-resort option and shouldn't block on other
6381 jobs. Therefore, we grab no locks.
6384 self.needed_locks = {}
6386 def Exec(self, feedback_fn):
6390 result = self.rpc.call_node_powercycle(self.op.node_name,
6391 self.cfg.GetHypervisorType())
6392 result.Raise("Failed to schedule the reboot")
6393 return result.payload
6396 class LUClusterQuery(NoHooksLU):
6397 """Query cluster configuration.
6402 def ExpandNames(self):
6403 self.needed_locks = {}
6405 def Exec(self, feedback_fn):
6406 """Return cluster config.
6409 cluster = self.cfg.GetClusterInfo()
6412 # Filter just for enabled hypervisors
6413 for os_name, hv_dict in cluster.os_hvp.items():
6414 os_hvp[os_name] = {}
6415 for hv_name, hv_params in hv_dict.items():
6416 if hv_name in cluster.enabled_hypervisors:
6417 os_hvp[os_name][hv_name] = hv_params
6419 # Convert ip_family to ip_version
6420 primary_ip_version = constants.IP4_VERSION
6421 if cluster.primary_ip_family == netutils.IP6Address.family:
6422 primary_ip_version = constants.IP6_VERSION
6425 "software_version": constants.RELEASE_VERSION,
6426 "protocol_version": constants.PROTOCOL_VERSION,
6427 "config_version": constants.CONFIG_VERSION,
6428 "os_api_version": max(constants.OS_API_VERSIONS),
6429 "export_version": constants.EXPORT_VERSION,
6430 "architecture": runtime.GetArchInfo(),
6431 "name": cluster.cluster_name,
6432 "master": cluster.master_node,
6433 "default_hypervisor": cluster.primary_hypervisor,
6434 "enabled_hypervisors": cluster.enabled_hypervisors,
6435 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6436 for hypervisor_name in cluster.enabled_hypervisors]),
6438 "beparams": cluster.beparams,
6439 "osparams": cluster.osparams,
6440 "ipolicy": cluster.ipolicy,
6441 "nicparams": cluster.nicparams,
6442 "ndparams": cluster.ndparams,
6443 "diskparams": cluster.diskparams,
6444 "candidate_pool_size": cluster.candidate_pool_size,
6445 "master_netdev": cluster.master_netdev,
6446 "master_netmask": cluster.master_netmask,
6447 "use_external_mip_script": cluster.use_external_mip_script,
6448 "volume_group_name": cluster.volume_group_name,
6449 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6450 "file_storage_dir": cluster.file_storage_dir,
6451 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6452 "maintain_node_health": cluster.maintain_node_health,
6453 "ctime": cluster.ctime,
6454 "mtime": cluster.mtime,
6455 "uuid": cluster.uuid,
6456 "tags": list(cluster.GetTags()),
6457 "uid_pool": cluster.uid_pool,
6458 "default_iallocator": cluster.default_iallocator,
6459 "reserved_lvs": cluster.reserved_lvs,
6460 "primary_ip_version": primary_ip_version,
6461 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6462 "hidden_os": cluster.hidden_os,
6463 "blacklisted_os": cluster.blacklisted_os,
6469 class LUClusterConfigQuery(NoHooksLU):
6470 """Return configuration values.
6475 def CheckArguments(self):
6476 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6478 def ExpandNames(self):
6479 self.cq.ExpandNames(self)
6481 def DeclareLocks(self, level):
6482 self.cq.DeclareLocks(self, level)
6484 def Exec(self, feedback_fn):
6485 result = self.cq.OldStyleQuery(self)
6487 assert len(result) == 1
6492 class _ClusterQuery(_QueryBase):
6493 FIELDS = query.CLUSTER_FIELDS
6495 #: Do not sort (there is only one item)
6498 def ExpandNames(self, lu):
6499 lu.needed_locks = {}
6501 # The following variables interact with _QueryBase._GetNames
6502 self.wanted = locking.ALL_SET
6503 self.do_locking = self.use_locking
6506 raise errors.OpPrereqError("Can not use locking for cluster queries",
6509 def DeclareLocks(self, lu, level):
6512 def _GetQueryData(self, lu):
6513 """Computes the list of nodes and their attributes.
6516 # Locking is not used
6517 assert not (compat.any(lu.glm.is_owned(level)
6518 for level in locking.LEVELS
6519 if level != locking.LEVEL_CLUSTER) or
6520 self.do_locking or self.use_locking)
6522 if query.CQ_CONFIG in self.requested_data:
6523 cluster = lu.cfg.GetClusterInfo()
6525 cluster = NotImplemented
6527 if query.CQ_QUEUE_DRAINED in self.requested_data:
6528 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6530 drain_flag = NotImplemented
6532 if query.CQ_WATCHER_PAUSE in self.requested_data:
6533 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6535 watcher_pause = NotImplemented
6537 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6540 class LUInstanceActivateDisks(NoHooksLU):
6541 """Bring up an instance's disks.
6546 def ExpandNames(self):
6547 self._ExpandAndLockInstance()
6548 self.needed_locks[locking.LEVEL_NODE] = []
6549 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6551 def DeclareLocks(self, level):
6552 if level == locking.LEVEL_NODE:
6553 self._LockInstancesNodes()
6555 def CheckPrereq(self):
6556 """Check prerequisites.
6558 This checks that the instance is in the cluster.
6561 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6562 assert self.instance is not None, \
6563 "Cannot retrieve locked instance %s" % self.op.instance_name
6564 _CheckNodeOnline(self, self.instance.primary_node)
6566 def Exec(self, feedback_fn):
6567 """Activate the disks.
6570 disks_ok, disks_info = \
6571 _AssembleInstanceDisks(self, self.instance,
6572 ignore_size=self.op.ignore_size)
6574 raise errors.OpExecError("Cannot activate block devices")
6576 if self.op.wait_for_sync:
6577 if not _WaitForSync(self, self.instance):
6578 raise errors.OpExecError("Some disks of the instance are degraded!")
6583 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6585 """Prepare the block devices for an instance.
6587 This sets up the block devices on all nodes.
6589 @type lu: L{LogicalUnit}
6590 @param lu: the logical unit on whose behalf we execute
6591 @type instance: L{objects.Instance}
6592 @param instance: the instance for whose disks we assemble
6593 @type disks: list of L{objects.Disk} or None
6594 @param disks: which disks to assemble (or all, if None)
6595 @type ignore_secondaries: boolean
6596 @param ignore_secondaries: if true, errors on secondary nodes
6597 won't result in an error return from the function
6598 @type ignore_size: boolean
6599 @param ignore_size: if true, the current known size of the disk
6600 will not be used during the disk activation, useful for cases
6601 when the size is wrong
6602 @return: False if the operation failed, otherwise a list of
6603 (host, instance_visible_name, node_visible_name)
6604 with the mapping from node devices to instance devices
6609 iname = instance.name
6610 disks = _ExpandCheckDisks(instance, disks)
6612 # With the two passes mechanism we try to reduce the window of
6613 # opportunity for the race condition of switching DRBD to primary
6614 # before handshaking occured, but we do not eliminate it
6616 # The proper fix would be to wait (with some limits) until the
6617 # connection has been made and drbd transitions from WFConnection
6618 # into any other network-connected state (Connected, SyncTarget,
6621 # 1st pass, assemble on all nodes in secondary mode
6622 for idx, inst_disk in enumerate(disks):
6623 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6625 node_disk = node_disk.Copy()
6626 node_disk.UnsetSize()
6627 lu.cfg.SetDiskID(node_disk, node)
6628 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6630 msg = result.fail_msg
6632 is_offline_secondary = (node in instance.secondary_nodes and
6634 lu.LogWarning("Could not prepare block device %s on node %s"
6635 " (is_primary=False, pass=1): %s",
6636 inst_disk.iv_name, node, msg)
6637 if not (ignore_secondaries or is_offline_secondary):
6640 # FIXME: race condition on drbd migration to primary
6642 # 2nd pass, do only the primary node
6643 for idx, inst_disk in enumerate(disks):
6646 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6647 if node != instance.primary_node:
6650 node_disk = node_disk.Copy()
6651 node_disk.UnsetSize()
6652 lu.cfg.SetDiskID(node_disk, node)
6653 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6655 msg = result.fail_msg
6657 lu.LogWarning("Could not prepare block device %s on node %s"
6658 " (is_primary=True, pass=2): %s",
6659 inst_disk.iv_name, node, msg)
6662 dev_path = result.payload
6664 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6666 # leave the disks configured for the primary node
6667 # this is a workaround that would be fixed better by
6668 # improving the logical/physical id handling
6670 lu.cfg.SetDiskID(disk, instance.primary_node)
6672 return disks_ok, device_info
6675 def _StartInstanceDisks(lu, instance, force):
6676 """Start the disks of an instance.
6679 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6680 ignore_secondaries=force)
6682 _ShutdownInstanceDisks(lu, instance)
6683 if force is not None and not force:
6685 hint=("If the message above refers to a secondary node,"
6686 " you can retry the operation using '--force'"))
6687 raise errors.OpExecError("Disk consistency error")
6690 class LUInstanceDeactivateDisks(NoHooksLU):
6691 """Shutdown an instance's disks.
6696 def ExpandNames(self):
6697 self._ExpandAndLockInstance()
6698 self.needed_locks[locking.LEVEL_NODE] = []
6699 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6701 def DeclareLocks(self, level):
6702 if level == locking.LEVEL_NODE:
6703 self._LockInstancesNodes()
6705 def CheckPrereq(self):
6706 """Check prerequisites.
6708 This checks that the instance is in the cluster.
6711 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6712 assert self.instance is not None, \
6713 "Cannot retrieve locked instance %s" % self.op.instance_name
6715 def Exec(self, feedback_fn):
6716 """Deactivate the disks
6719 instance = self.instance
6721 _ShutdownInstanceDisks(self, instance)
6723 _SafeShutdownInstanceDisks(self, instance)
6726 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6727 """Shutdown block devices of an instance.
6729 This function checks if an instance is running, before calling
6730 _ShutdownInstanceDisks.
6733 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6734 _ShutdownInstanceDisks(lu, instance, disks=disks)
6737 def _ExpandCheckDisks(instance, disks):
6738 """Return the instance disks selected by the disks list
6740 @type disks: list of L{objects.Disk} or None
6741 @param disks: selected disks
6742 @rtype: list of L{objects.Disk}
6743 @return: selected instance disks to act on
6747 return instance.disks
6749 if not set(disks).issubset(instance.disks):
6750 raise errors.ProgrammerError("Can only act on disks belonging to the"
6755 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6756 """Shutdown block devices of an instance.
6758 This does the shutdown on all nodes of the instance.
6760 If the ignore_primary is false, errors on the primary node are
6765 disks = _ExpandCheckDisks(instance, disks)
6768 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6769 lu.cfg.SetDiskID(top_disk, node)
6770 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6771 msg = result.fail_msg
6773 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6774 disk.iv_name, node, msg)
6775 if ((node == instance.primary_node and not ignore_primary) or
6776 (node != instance.primary_node and not result.offline)):
6781 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6782 """Checks if a node has enough free memory.
6784 This function check if a given node has the needed amount of free
6785 memory. In case the node has less memory or we cannot get the
6786 information from the node, this function raise an OpPrereqError
6789 @type lu: C{LogicalUnit}
6790 @param lu: a logical unit from which we get configuration data
6792 @param node: the node to check
6793 @type reason: C{str}
6794 @param reason: string to use in the error message
6795 @type requested: C{int}
6796 @param requested: the amount of memory in MiB to check for
6797 @type hypervisor_name: C{str}
6798 @param hypervisor_name: the hypervisor to ask for memory stats
6800 @return: node current free memory
6801 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6802 we cannot check the node
6805 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6806 nodeinfo[node].Raise("Can't get data from node %s" % node,
6807 prereq=True, ecode=errors.ECODE_ENVIRON)
6808 (_, _, (hv_info, )) = nodeinfo[node].payload
6810 free_mem = hv_info.get("memory_free", None)
6811 if not isinstance(free_mem, int):
6812 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6813 " was '%s'" % (node, free_mem),
6814 errors.ECODE_ENVIRON)
6815 if requested > free_mem:
6816 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6817 " needed %s MiB, available %s MiB" %
6818 (node, reason, requested, free_mem),
6823 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6824 """Checks if nodes have enough free disk space in the all VGs.
6826 This function check if all given nodes have the needed amount of
6827 free disk. In case any node has less disk or we cannot get the
6828 information from the node, this function raise an OpPrereqError
6831 @type lu: C{LogicalUnit}
6832 @param lu: a logical unit from which we get configuration data
6833 @type nodenames: C{list}
6834 @param nodenames: the list of node names to check
6835 @type req_sizes: C{dict}
6836 @param req_sizes: the hash of vg and corresponding amount of disk in
6838 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6839 or we cannot check the node
6842 for vg, req_size in req_sizes.items():
6843 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6846 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6847 """Checks if nodes have enough free disk space in the specified VG.
6849 This function check if all given nodes have the needed amount of
6850 free disk. In case any node has less disk or we cannot get the
6851 information from the node, this function raise an OpPrereqError
6854 @type lu: C{LogicalUnit}
6855 @param lu: a logical unit from which we get configuration data
6856 @type nodenames: C{list}
6857 @param nodenames: the list of node names to check
6859 @param vg: the volume group to check
6860 @type requested: C{int}
6861 @param requested: the amount of disk in MiB to check for
6862 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6863 or we cannot check the node
6866 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6867 for node in nodenames:
6868 info = nodeinfo[node]
6869 info.Raise("Cannot get current information from node %s" % node,
6870 prereq=True, ecode=errors.ECODE_ENVIRON)
6871 (_, (vg_info, ), _) = info.payload
6872 vg_free = vg_info.get("vg_free", None)
6873 if not isinstance(vg_free, int):
6874 raise errors.OpPrereqError("Can't compute free disk space on node"
6875 " %s for vg %s, result was '%s'" %
6876 (node, vg, vg_free), errors.ECODE_ENVIRON)
6877 if requested > vg_free:
6878 raise errors.OpPrereqError("Not enough disk space on target node %s"
6879 " vg %s: required %d MiB, available %d MiB" %
6880 (node, vg, requested, vg_free),
6884 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6885 """Checks if nodes have enough physical CPUs
6887 This function checks if all given nodes have the needed number of
6888 physical CPUs. In case any node has less CPUs or we cannot get the
6889 information from the node, this function raises an OpPrereqError
6892 @type lu: C{LogicalUnit}
6893 @param lu: a logical unit from which we get configuration data
6894 @type nodenames: C{list}
6895 @param nodenames: the list of node names to check
6896 @type requested: C{int}
6897 @param requested: the minimum acceptable number of physical CPUs
6898 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6899 or we cannot check the node
6902 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6903 for node in nodenames:
6904 info = nodeinfo[node]
6905 info.Raise("Cannot get current information from node %s" % node,
6906 prereq=True, ecode=errors.ECODE_ENVIRON)
6907 (_, _, (hv_info, )) = info.payload
6908 num_cpus = hv_info.get("cpu_total", None)
6909 if not isinstance(num_cpus, int):
6910 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6911 " on node %s, result was '%s'" %
6912 (node, num_cpus), errors.ECODE_ENVIRON)
6913 if requested > num_cpus:
6914 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6915 "required" % (node, num_cpus, requested),
6919 class LUInstanceStartup(LogicalUnit):
6920 """Starts an instance.
6923 HPATH = "instance-start"
6924 HTYPE = constants.HTYPE_INSTANCE
6927 def CheckArguments(self):
6929 if self.op.beparams:
6930 # fill the beparams dict
6931 objects.UpgradeBeParams(self.op.beparams)
6932 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6934 def ExpandNames(self):
6935 self._ExpandAndLockInstance()
6936 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6938 def DeclareLocks(self, level):
6939 if level == locking.LEVEL_NODE_RES:
6940 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6942 def BuildHooksEnv(self):
6945 This runs on master, primary and secondary nodes of the instance.
6949 "FORCE": self.op.force,
6952 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6956 def BuildHooksNodes(self):
6957 """Build hooks nodes.
6960 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6963 def CheckPrereq(self):
6964 """Check prerequisites.
6966 This checks that the instance is in the cluster.
6969 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6970 assert self.instance is not None, \
6971 "Cannot retrieve locked instance %s" % self.op.instance_name
6974 if self.op.hvparams:
6975 # check hypervisor parameter syntax (locally)
6976 cluster = self.cfg.GetClusterInfo()
6977 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6978 filled_hvp = cluster.FillHV(instance)
6979 filled_hvp.update(self.op.hvparams)
6980 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6981 hv_type.CheckParameterSyntax(filled_hvp)
6982 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6984 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6986 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6988 if self.primary_offline and self.op.ignore_offline_nodes:
6989 self.LogWarning("Ignoring offline primary node")
6991 if self.op.hvparams or self.op.beparams:
6992 self.LogWarning("Overridden parameters are ignored")
6994 _CheckNodeOnline(self, instance.primary_node)
6996 bep = self.cfg.GetClusterInfo().FillBE(instance)
6997 bep.update(self.op.beparams)
6999 # check bridges existence
7000 _CheckInstanceBridgesExist(self, instance)
7002 remote_info = self.rpc.call_instance_info(instance.primary_node,
7004 instance.hypervisor)
7005 remote_info.Raise("Error checking node %s" % instance.primary_node,
7006 prereq=True, ecode=errors.ECODE_ENVIRON)
7007 if not remote_info.payload: # not running already
7008 _CheckNodeFreeMemory(self, instance.primary_node,
7009 "starting instance %s" % instance.name,
7010 bep[constants.BE_MINMEM], instance.hypervisor)
7012 def Exec(self, feedback_fn):
7013 """Start the instance.
7016 instance = self.instance
7017 force = self.op.force
7019 if not self.op.no_remember:
7020 self.cfg.MarkInstanceUp(instance.name)
7022 if self.primary_offline:
7023 assert self.op.ignore_offline_nodes
7024 self.LogInfo("Primary node offline, marked instance as started")
7026 node_current = instance.primary_node
7028 _StartInstanceDisks(self, instance, force)
7031 self.rpc.call_instance_start(node_current,
7032 (instance, self.op.hvparams,
7034 self.op.startup_paused)
7035 msg = result.fail_msg
7037 _ShutdownInstanceDisks(self, instance)
7038 raise errors.OpExecError("Could not start instance: %s" % msg)
7041 class LUInstanceReboot(LogicalUnit):
7042 """Reboot an instance.
7045 HPATH = "instance-reboot"
7046 HTYPE = constants.HTYPE_INSTANCE
7049 def ExpandNames(self):
7050 self._ExpandAndLockInstance()
7052 def BuildHooksEnv(self):
7055 This runs on master, primary and secondary nodes of the instance.
7059 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7060 "REBOOT_TYPE": self.op.reboot_type,
7061 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7064 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7068 def BuildHooksNodes(self):
7069 """Build hooks nodes.
7072 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7075 def CheckPrereq(self):
7076 """Check prerequisites.
7078 This checks that the instance is in the cluster.
7081 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7082 assert self.instance is not None, \
7083 "Cannot retrieve locked instance %s" % self.op.instance_name
7084 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7085 _CheckNodeOnline(self, instance.primary_node)
7087 # check bridges existence
7088 _CheckInstanceBridgesExist(self, instance)
7090 def Exec(self, feedback_fn):
7091 """Reboot the instance.
7094 instance = self.instance
7095 ignore_secondaries = self.op.ignore_secondaries
7096 reboot_type = self.op.reboot_type
7098 remote_info = self.rpc.call_instance_info(instance.primary_node,
7100 instance.hypervisor)
7101 remote_info.Raise("Error checking node %s" % instance.primary_node)
7102 instance_running = bool(remote_info.payload)
7104 node_current = instance.primary_node
7106 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7107 constants.INSTANCE_REBOOT_HARD]:
7108 for disk in instance.disks:
7109 self.cfg.SetDiskID(disk, node_current)
7110 result = self.rpc.call_instance_reboot(node_current, instance,
7112 self.op.shutdown_timeout)
7113 result.Raise("Could not reboot instance")
7115 if instance_running:
7116 result = self.rpc.call_instance_shutdown(node_current, instance,
7117 self.op.shutdown_timeout)
7118 result.Raise("Could not shutdown instance for full reboot")
7119 _ShutdownInstanceDisks(self, instance)
7121 self.LogInfo("Instance %s was already stopped, starting now",
7123 _StartInstanceDisks(self, instance, ignore_secondaries)
7124 result = self.rpc.call_instance_start(node_current,
7125 (instance, None, None), False)
7126 msg = result.fail_msg
7128 _ShutdownInstanceDisks(self, instance)
7129 raise errors.OpExecError("Could not start instance for"
7130 " full reboot: %s" % msg)
7132 self.cfg.MarkInstanceUp(instance.name)
7135 class LUInstanceShutdown(LogicalUnit):
7136 """Shutdown an instance.
7139 HPATH = "instance-stop"
7140 HTYPE = constants.HTYPE_INSTANCE
7143 def ExpandNames(self):
7144 self._ExpandAndLockInstance()
7146 def BuildHooksEnv(self):
7149 This runs on master, primary and secondary nodes of the instance.
7152 env = _BuildInstanceHookEnvByObject(self, self.instance)
7153 env["TIMEOUT"] = self.op.timeout
7156 def BuildHooksNodes(self):
7157 """Build hooks nodes.
7160 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7163 def CheckPrereq(self):
7164 """Check prerequisites.
7166 This checks that the instance is in the cluster.
7169 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7170 assert self.instance is not None, \
7171 "Cannot retrieve locked instance %s" % self.op.instance_name
7173 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7175 self.primary_offline = \
7176 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7178 if self.primary_offline and self.op.ignore_offline_nodes:
7179 self.LogWarning("Ignoring offline primary node")
7181 _CheckNodeOnline(self, self.instance.primary_node)
7183 def Exec(self, feedback_fn):
7184 """Shutdown the instance.
7187 instance = self.instance
7188 node_current = instance.primary_node
7189 timeout = self.op.timeout
7191 if not self.op.no_remember:
7192 self.cfg.MarkInstanceDown(instance.name)
7194 if self.primary_offline:
7195 assert self.op.ignore_offline_nodes
7196 self.LogInfo("Primary node offline, marked instance as stopped")
7198 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7199 msg = result.fail_msg
7201 self.LogWarning("Could not shutdown instance: %s", msg)
7203 _ShutdownInstanceDisks(self, instance)
7206 class LUInstanceReinstall(LogicalUnit):
7207 """Reinstall an instance.
7210 HPATH = "instance-reinstall"
7211 HTYPE = constants.HTYPE_INSTANCE
7214 def ExpandNames(self):
7215 self._ExpandAndLockInstance()
7217 def BuildHooksEnv(self):
7220 This runs on master, primary and secondary nodes of the instance.
7223 return _BuildInstanceHookEnvByObject(self, self.instance)
7225 def BuildHooksNodes(self):
7226 """Build hooks nodes.
7229 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7232 def CheckPrereq(self):
7233 """Check prerequisites.
7235 This checks that the instance is in the cluster and is not running.
7238 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7239 assert instance is not None, \
7240 "Cannot retrieve locked instance %s" % self.op.instance_name
7241 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7242 " offline, cannot reinstall")
7244 if instance.disk_template == constants.DT_DISKLESS:
7245 raise errors.OpPrereqError("Instance '%s' has no disks" %
7246 self.op.instance_name,
7248 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7250 if self.op.os_type is not None:
7252 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7253 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7254 instance_os = self.op.os_type
7256 instance_os = instance.os
7258 nodelist = list(instance.all_nodes)
7260 if self.op.osparams:
7261 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7262 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7263 self.os_inst = i_osdict # the new dict (without defaults)
7267 self.instance = instance
7269 def Exec(self, feedback_fn):
7270 """Reinstall the instance.
7273 inst = self.instance
7275 if self.op.os_type is not None:
7276 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7277 inst.os = self.op.os_type
7278 # Write to configuration
7279 self.cfg.Update(inst, feedback_fn)
7281 _StartInstanceDisks(self, inst, None)
7283 feedback_fn("Running the instance OS create scripts...")
7284 # FIXME: pass debug option from opcode to backend
7285 result = self.rpc.call_instance_os_add(inst.primary_node,
7286 (inst, self.os_inst), True,
7287 self.op.debug_level)
7288 result.Raise("Could not install OS for instance %s on node %s" %
7289 (inst.name, inst.primary_node))
7291 _ShutdownInstanceDisks(self, inst)
7294 class LUInstanceRecreateDisks(LogicalUnit):
7295 """Recreate an instance's missing disks.
7298 HPATH = "instance-recreate-disks"
7299 HTYPE = constants.HTYPE_INSTANCE
7302 _MODIFYABLE = frozenset([
7303 constants.IDISK_SIZE,
7304 constants.IDISK_MODE,
7307 # New or changed disk parameters may have different semantics
7308 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7309 constants.IDISK_ADOPT,
7311 # TODO: Implement support changing VG while recreating
7313 constants.IDISK_METAVG,
7316 def _RunAllocator(self):
7317 """Run the allocator based on input opcode.
7320 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7323 # The allocator should actually run in "relocate" mode, but current
7324 # allocators don't support relocating all the nodes of an instance at
7325 # the same time. As a workaround we use "allocate" mode, but this is
7326 # suboptimal for two reasons:
7327 # - The instance name passed to the allocator is present in the list of
7328 # existing instances, so there could be a conflict within the
7329 # internal structures of the allocator. This doesn't happen with the
7330 # current allocators, but it's a liability.
7331 # - The allocator counts the resources used by the instance twice: once
7332 # because the instance exists already, and once because it tries to
7333 # allocate a new instance.
7334 # The allocator could choose some of the nodes on which the instance is
7335 # running, but that's not a problem. If the instance nodes are broken,
7336 # they should be already be marked as drained or offline, and hence
7337 # skipped by the allocator. If instance disks have been lost for other
7338 # reasons, then recreating the disks on the same nodes should be fine.
7339 disk_template = self.instance.disk_template
7340 spindle_use = be_full[constants.BE_SPINDLE_USE]
7341 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7342 disk_template=disk_template,
7343 tags=list(self.instance.GetTags()),
7344 os=self.instance.os,
7346 vcpus=be_full[constants.BE_VCPUS],
7347 memory=be_full[constants.BE_MAXMEM],
7348 spindle_use=spindle_use,
7349 disks=[{constants.IDISK_SIZE: d.size,
7350 constants.IDISK_MODE: d.mode}
7351 for d in self.instance.disks],
7352 hypervisor=self.instance.hypervisor)
7353 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7355 ial.Run(self.op.iallocator)
7357 assert req.RequiredNodes() == len(self.instance.all_nodes)
7360 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7361 " %s" % (self.op.iallocator, ial.info),
7364 self.op.nodes = ial.result
7365 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7366 self.op.instance_name, self.op.iallocator,
7367 utils.CommaJoin(ial.result))
7369 def CheckArguments(self):
7370 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7371 # Normalize and convert deprecated list of disk indices
7372 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7374 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7376 raise errors.OpPrereqError("Some disks have been specified more than"
7377 " once: %s" % utils.CommaJoin(duplicates),
7380 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7381 # when neither iallocator nor nodes are specified
7382 if self.op.iallocator or self.op.nodes:
7383 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7385 for (idx, params) in self.op.disks:
7386 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7387 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7389 raise errors.OpPrereqError("Parameters for disk %s try to change"
7390 " unmodifyable parameter(s): %s" %
7391 (idx, utils.CommaJoin(unsupported)),
7394 def ExpandNames(self):
7395 self._ExpandAndLockInstance()
7396 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7399 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7400 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7402 self.needed_locks[locking.LEVEL_NODE] = []
7403 if self.op.iallocator:
7404 # iallocator will select a new node in the same group
7405 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7406 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7408 self.needed_locks[locking.LEVEL_NODE_RES] = []
7410 def DeclareLocks(self, level):
7411 if level == locking.LEVEL_NODEGROUP:
7412 assert self.op.iallocator is not None
7413 assert not self.op.nodes
7414 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7415 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7416 # Lock the primary group used by the instance optimistically; this
7417 # requires going via the node before it's locked, requiring
7418 # verification later on
7419 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7420 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7422 elif level == locking.LEVEL_NODE:
7423 # If an allocator is used, then we lock all the nodes in the current
7424 # instance group, as we don't know yet which ones will be selected;
7425 # if we replace the nodes without using an allocator, locks are
7426 # already declared in ExpandNames; otherwise, we need to lock all the
7427 # instance nodes for disk re-creation
7428 if self.op.iallocator:
7429 assert not self.op.nodes
7430 assert not self.needed_locks[locking.LEVEL_NODE]
7431 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7433 # Lock member nodes of the group of the primary node
7434 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7435 self.needed_locks[locking.LEVEL_NODE].extend(
7436 self.cfg.GetNodeGroup(group_uuid).members)
7438 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7439 elif not self.op.nodes:
7440 self._LockInstancesNodes(primary_only=False)
7441 elif level == locking.LEVEL_NODE_RES:
7443 self.needed_locks[locking.LEVEL_NODE_RES] = \
7444 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7446 def BuildHooksEnv(self):
7449 This runs on master, primary and secondary nodes of the instance.
7452 return _BuildInstanceHookEnvByObject(self, self.instance)
7454 def BuildHooksNodes(self):
7455 """Build hooks nodes.
7458 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7461 def CheckPrereq(self):
7462 """Check prerequisites.
7464 This checks that the instance is in the cluster and is not running.
7467 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7468 assert instance is not None, \
7469 "Cannot retrieve locked instance %s" % self.op.instance_name
7471 if len(self.op.nodes) != len(instance.all_nodes):
7472 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7473 " %d replacement nodes were specified" %
7474 (instance.name, len(instance.all_nodes),
7475 len(self.op.nodes)),
7477 assert instance.disk_template != constants.DT_DRBD8 or \
7478 len(self.op.nodes) == 2
7479 assert instance.disk_template != constants.DT_PLAIN or \
7480 len(self.op.nodes) == 1
7481 primary_node = self.op.nodes[0]
7483 primary_node = instance.primary_node
7484 if not self.op.iallocator:
7485 _CheckNodeOnline(self, primary_node)
7487 if instance.disk_template == constants.DT_DISKLESS:
7488 raise errors.OpPrereqError("Instance '%s' has no disks" %
7489 self.op.instance_name, errors.ECODE_INVAL)
7491 # Verify if node group locks are still correct
7492 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7494 # Node group locks are acquired only for the primary node (and only
7495 # when the allocator is used)
7496 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7499 # if we replace nodes *and* the old primary is offline, we don't
7500 # check the instance state
7501 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7502 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7503 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7504 msg="cannot recreate disks")
7507 self.disks = dict(self.op.disks)
7509 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7511 maxidx = max(self.disks.keys())
7512 if maxidx >= len(instance.disks):
7513 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7516 if ((self.op.nodes or self.op.iallocator) and
7517 sorted(self.disks.keys()) != range(len(instance.disks))):
7518 raise errors.OpPrereqError("Can't recreate disks partially and"
7519 " change the nodes at the same time",
7522 self.instance = instance
7524 if self.op.iallocator:
7525 self._RunAllocator()
7526 # Release unneeded node and node resource locks
7527 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7528 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7529 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7531 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7533 def Exec(self, feedback_fn):
7534 """Recreate the disks.
7537 instance = self.instance
7539 assert (self.owned_locks(locking.LEVEL_NODE) ==
7540 self.owned_locks(locking.LEVEL_NODE_RES))
7543 mods = [] # keeps track of needed changes
7545 for idx, disk in enumerate(instance.disks):
7547 changes = self.disks[idx]
7549 # Disk should not be recreated
7553 # update secondaries for disks, if needed
7554 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7555 # need to update the nodes and minors
7556 assert len(self.op.nodes) == 2
7557 assert len(disk.logical_id) == 6 # otherwise disk internals
7559 (_, _, old_port, _, _, old_secret) = disk.logical_id
7560 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7561 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7562 new_minors[0], new_minors[1], old_secret)
7563 assert len(disk.logical_id) == len(new_id)
7567 mods.append((idx, new_id, changes))
7569 # now that we have passed all asserts above, we can apply the mods
7570 # in a single run (to avoid partial changes)
7571 for idx, new_id, changes in mods:
7572 disk = instance.disks[idx]
7573 if new_id is not None:
7574 assert disk.dev_type == constants.LD_DRBD8
7575 disk.logical_id = new_id
7577 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7578 mode=changes.get(constants.IDISK_MODE, None))
7580 # change primary node, if needed
7582 instance.primary_node = self.op.nodes[0]
7583 self.LogWarning("Changing the instance's nodes, you will have to"
7584 " remove any disks left on the older nodes manually")
7587 self.cfg.Update(instance, feedback_fn)
7589 # All touched nodes must be locked
7590 mylocks = self.owned_locks(locking.LEVEL_NODE)
7591 assert mylocks.issuperset(frozenset(instance.all_nodes))
7592 _CreateDisks(self, instance, to_skip=to_skip)
7595 class LUInstanceRename(LogicalUnit):
7596 """Rename an instance.
7599 HPATH = "instance-rename"
7600 HTYPE = constants.HTYPE_INSTANCE
7602 def CheckArguments(self):
7606 if self.op.ip_check and not self.op.name_check:
7607 # TODO: make the ip check more flexible and not depend on the name check
7608 raise errors.OpPrereqError("IP address check requires a name check",
7611 def BuildHooksEnv(self):
7614 This runs on master, primary and secondary nodes of the instance.
7617 env = _BuildInstanceHookEnvByObject(self, self.instance)
7618 env["INSTANCE_NEW_NAME"] = self.op.new_name
7621 def BuildHooksNodes(self):
7622 """Build hooks nodes.
7625 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7628 def CheckPrereq(self):
7629 """Check prerequisites.
7631 This checks that the instance is in the cluster and is not running.
7634 self.op.instance_name = _ExpandInstanceName(self.cfg,
7635 self.op.instance_name)
7636 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7637 assert instance is not None
7638 _CheckNodeOnline(self, instance.primary_node)
7639 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7640 msg="cannot rename")
7641 self.instance = instance
7643 new_name = self.op.new_name
7644 if self.op.name_check:
7645 hostname = _CheckHostnameSane(self, new_name)
7646 new_name = self.op.new_name = hostname.name
7647 if (self.op.ip_check and
7648 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7649 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7650 (hostname.ip, new_name),
7651 errors.ECODE_NOTUNIQUE)
7653 instance_list = self.cfg.GetInstanceList()
7654 if new_name in instance_list and new_name != instance.name:
7655 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7656 new_name, errors.ECODE_EXISTS)
7658 def Exec(self, feedback_fn):
7659 """Rename the instance.
7662 inst = self.instance
7663 old_name = inst.name
7665 rename_file_storage = False
7666 if (inst.disk_template in constants.DTS_FILEBASED and
7667 self.op.new_name != inst.name):
7668 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7669 rename_file_storage = True
7671 self.cfg.RenameInstance(inst.name, self.op.new_name)
7672 # Change the instance lock. This is definitely safe while we hold the BGL.
7673 # Otherwise the new lock would have to be added in acquired mode.
7675 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7676 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7677 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7679 # re-read the instance from the configuration after rename
7680 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7682 if rename_file_storage:
7683 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7684 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7685 old_file_storage_dir,
7686 new_file_storage_dir)
7687 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7688 " (but the instance has been renamed in Ganeti)" %
7689 (inst.primary_node, old_file_storage_dir,
7690 new_file_storage_dir))
7692 _StartInstanceDisks(self, inst, None)
7693 # update info on disks
7694 info = _GetInstanceInfoText(inst)
7695 for (idx, disk) in enumerate(inst.disks):
7696 for node in inst.all_nodes:
7697 self.cfg.SetDiskID(disk, node)
7698 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7700 self.LogWarning("Error setting info on node %s for disk %s: %s",
7701 node, idx, result.fail_msg)
7703 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7704 old_name, self.op.debug_level)
7705 msg = result.fail_msg
7707 msg = ("Could not run OS rename script for instance %s on node %s"
7708 " (but the instance has been renamed in Ganeti): %s" %
7709 (inst.name, inst.primary_node, msg))
7710 self.LogWarning(msg)
7712 _ShutdownInstanceDisks(self, inst)
7717 class LUInstanceRemove(LogicalUnit):
7718 """Remove an instance.
7721 HPATH = "instance-remove"
7722 HTYPE = constants.HTYPE_INSTANCE
7725 def ExpandNames(self):
7726 self._ExpandAndLockInstance()
7727 self.needed_locks[locking.LEVEL_NODE] = []
7728 self.needed_locks[locking.LEVEL_NODE_RES] = []
7729 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7731 def DeclareLocks(self, level):
7732 if level == locking.LEVEL_NODE:
7733 self._LockInstancesNodes()
7734 elif level == locking.LEVEL_NODE_RES:
7736 self.needed_locks[locking.LEVEL_NODE_RES] = \
7737 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7739 def BuildHooksEnv(self):
7742 This runs on master, primary and secondary nodes of the instance.
7745 env = _BuildInstanceHookEnvByObject(self, self.instance)
7746 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7749 def BuildHooksNodes(self):
7750 """Build hooks nodes.
7753 nl = [self.cfg.GetMasterNode()]
7754 nl_post = list(self.instance.all_nodes) + nl
7755 return (nl, nl_post)
7757 def CheckPrereq(self):
7758 """Check prerequisites.
7760 This checks that the instance is in the cluster.
7763 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7764 assert self.instance is not None, \
7765 "Cannot retrieve locked instance %s" % self.op.instance_name
7767 def Exec(self, feedback_fn):
7768 """Remove the instance.
7771 instance = self.instance
7772 logging.info("Shutting down instance %s on node %s",
7773 instance.name, instance.primary_node)
7775 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7776 self.op.shutdown_timeout)
7777 msg = result.fail_msg
7779 if self.op.ignore_failures:
7780 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7782 raise errors.OpExecError("Could not shutdown instance %s on"
7784 (instance.name, instance.primary_node, msg))
7786 assert (self.owned_locks(locking.LEVEL_NODE) ==
7787 self.owned_locks(locking.LEVEL_NODE_RES))
7788 assert not (set(instance.all_nodes) -
7789 self.owned_locks(locking.LEVEL_NODE)), \
7790 "Not owning correct locks"
7792 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7795 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7796 """Utility function to remove an instance.
7799 logging.info("Removing block devices for instance %s", instance.name)
7801 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7802 if not ignore_failures:
7803 raise errors.OpExecError("Can't remove instance's disks")
7804 feedback_fn("Warning: can't remove instance's disks")
7806 logging.info("Removing instance %s out of cluster config", instance.name)
7808 lu.cfg.RemoveInstance(instance.name)
7810 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7811 "Instance lock removal conflict"
7813 # Remove lock for the instance
7814 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7817 class LUInstanceQuery(NoHooksLU):
7818 """Logical unit for querying instances.
7821 # pylint: disable=W0142
7824 def CheckArguments(self):
7825 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7826 self.op.output_fields, self.op.use_locking)
7828 def ExpandNames(self):
7829 self.iq.ExpandNames(self)
7831 def DeclareLocks(self, level):
7832 self.iq.DeclareLocks(self, level)
7834 def Exec(self, feedback_fn):
7835 return self.iq.OldStyleQuery(self)
7838 def _ExpandNamesForMigration(lu):
7839 """Expands names for use with L{TLMigrateInstance}.
7841 @type lu: L{LogicalUnit}
7844 if lu.op.target_node is not None:
7845 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
7847 lu.needed_locks[locking.LEVEL_NODE] = []
7848 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7850 lu.needed_locks[locking.LEVEL_NODE_RES] = []
7851 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7853 # The node allocation lock is actually only needed for replicated instances
7854 # (e.g. DRBD8) and if an iallocator is used.
7855 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
7858 def _DeclareLocksForMigration(lu, level):
7859 """Declares locks for L{TLMigrateInstance}.
7861 @type lu: L{LogicalUnit}
7862 @param level: Lock level
7865 if level == locking.LEVEL_NODE_ALLOC:
7866 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
7868 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
7870 if instance.disk_template in constants.DTS_EXT_MIRROR:
7871 if lu.op.target_node is None:
7872 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7873 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7875 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7877 del lu.recalculate_locks[locking.LEVEL_NODE]
7879 lu._LockInstancesNodes() # pylint: disable=W0212
7881 elif level == locking.LEVEL_NODE:
7882 # Node locks are declared together with the node allocation lock
7883 assert lu.needed_locks[locking.LEVEL_NODE]
7885 elif level == locking.LEVEL_NODE_RES:
7887 lu.needed_locks[locking.LEVEL_NODE_RES] = \
7888 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
7891 class LUInstanceFailover(LogicalUnit):
7892 """Failover an instance.
7895 HPATH = "instance-failover"
7896 HTYPE = constants.HTYPE_INSTANCE
7899 def CheckArguments(self):
7900 """Check the arguments.
7903 self.iallocator = getattr(self.op, "iallocator", None)
7904 self.target_node = getattr(self.op, "target_node", None)
7906 def ExpandNames(self):
7907 self._ExpandAndLockInstance()
7908 _ExpandNamesForMigration(self)
7911 TLMigrateInstance(self, self.op.instance_name, False, True, False,
7912 self.op.ignore_consistency, True,
7913 self.op.shutdown_timeout, self.op.ignore_ipolicy)
7915 self.tasklets = [self._migrater]
7917 def DeclareLocks(self, level):
7918 _DeclareLocksForMigration(self, level)
7920 def BuildHooksEnv(self):
7923 This runs on master, primary and secondary nodes of the instance.
7926 instance = self._migrater.instance
7927 source_node = instance.primary_node
7928 target_node = self.op.target_node
7930 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7931 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7932 "OLD_PRIMARY": source_node,
7933 "NEW_PRIMARY": target_node,
7936 if instance.disk_template in constants.DTS_INT_MIRROR:
7937 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7938 env["NEW_SECONDARY"] = source_node
7940 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7942 env.update(_BuildInstanceHookEnvByObject(self, instance))
7946 def BuildHooksNodes(self):
7947 """Build hooks nodes.
7950 instance = self._migrater.instance
7951 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7952 return (nl, nl + [instance.primary_node])
7955 class LUInstanceMigrate(LogicalUnit):
7956 """Migrate an instance.
7958 This is migration without shutting down, compared to the failover,
7959 which is done with shutdown.
7962 HPATH = "instance-migrate"
7963 HTYPE = constants.HTYPE_INSTANCE
7966 def ExpandNames(self):
7967 self._ExpandAndLockInstance()
7968 _ExpandNamesForMigration(self)
7971 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
7972 False, self.op.allow_failover, False,
7973 self.op.allow_runtime_changes,
7974 constants.DEFAULT_SHUTDOWN_TIMEOUT,
7975 self.op.ignore_ipolicy)
7977 self.tasklets = [self._migrater]
7979 def DeclareLocks(self, level):
7980 _DeclareLocksForMigration(self, level)
7982 def BuildHooksEnv(self):
7985 This runs on master, primary and secondary nodes of the instance.
7988 instance = self._migrater.instance
7989 source_node = instance.primary_node
7990 target_node = self.op.target_node
7991 env = _BuildInstanceHookEnvByObject(self, instance)
7993 "MIGRATE_LIVE": self._migrater.live,
7994 "MIGRATE_CLEANUP": self.op.cleanup,
7995 "OLD_PRIMARY": source_node,
7996 "NEW_PRIMARY": target_node,
7997 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8000 if instance.disk_template in constants.DTS_INT_MIRROR:
8001 env["OLD_SECONDARY"] = target_node
8002 env["NEW_SECONDARY"] = source_node
8004 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8008 def BuildHooksNodes(self):
8009 """Build hooks nodes.
8012 instance = self._migrater.instance
8013 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8014 return (nl, nl + [instance.primary_node])
8017 class LUInstanceMove(LogicalUnit):
8018 """Move an instance by data-copying.
8021 HPATH = "instance-move"
8022 HTYPE = constants.HTYPE_INSTANCE
8025 def ExpandNames(self):
8026 self._ExpandAndLockInstance()
8027 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8028 self.op.target_node = target_node
8029 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8030 self.needed_locks[locking.LEVEL_NODE_RES] = []
8031 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8033 def DeclareLocks(self, level):
8034 if level == locking.LEVEL_NODE:
8035 self._LockInstancesNodes(primary_only=True)
8036 elif level == locking.LEVEL_NODE_RES:
8038 self.needed_locks[locking.LEVEL_NODE_RES] = \
8039 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8041 def BuildHooksEnv(self):
8044 This runs on master, primary and secondary nodes of the instance.
8048 "TARGET_NODE": self.op.target_node,
8049 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8051 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8054 def BuildHooksNodes(self):
8055 """Build hooks nodes.
8059 self.cfg.GetMasterNode(),
8060 self.instance.primary_node,
8061 self.op.target_node,
8065 def CheckPrereq(self):
8066 """Check prerequisites.
8068 This checks that the instance is in the cluster.
8071 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8072 assert self.instance is not None, \
8073 "Cannot retrieve locked instance %s" % self.op.instance_name
8075 node = self.cfg.GetNodeInfo(self.op.target_node)
8076 assert node is not None, \
8077 "Cannot retrieve locked node %s" % self.op.target_node
8079 self.target_node = target_node = node.name
8081 if target_node == instance.primary_node:
8082 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8083 (instance.name, target_node),
8086 bep = self.cfg.GetClusterInfo().FillBE(instance)
8088 for idx, dsk in enumerate(instance.disks):
8089 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8090 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8091 " cannot copy" % idx, errors.ECODE_STATE)
8093 _CheckNodeOnline(self, target_node)
8094 _CheckNodeNotDrained(self, target_node)
8095 _CheckNodeVmCapable(self, target_node)
8096 cluster = self.cfg.GetClusterInfo()
8097 group_info = self.cfg.GetNodeGroup(node.group)
8098 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8099 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8100 ignore=self.op.ignore_ipolicy)
8102 if instance.admin_state == constants.ADMINST_UP:
8103 # check memory requirements on the secondary node
8104 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8105 instance.name, bep[constants.BE_MAXMEM],
8106 instance.hypervisor)
8108 self.LogInfo("Not checking memory on the secondary node as"
8109 " instance will not be started")
8111 # check bridge existance
8112 _CheckInstanceBridgesExist(self, instance, node=target_node)
8114 def Exec(self, feedback_fn):
8115 """Move an instance.
8117 The move is done by shutting it down on its present node, copying
8118 the data over (slow) and starting it on the new node.
8121 instance = self.instance
8123 source_node = instance.primary_node
8124 target_node = self.target_node
8126 self.LogInfo("Shutting down instance %s on source node %s",
8127 instance.name, source_node)
8129 assert (self.owned_locks(locking.LEVEL_NODE) ==
8130 self.owned_locks(locking.LEVEL_NODE_RES))
8132 result = self.rpc.call_instance_shutdown(source_node, instance,
8133 self.op.shutdown_timeout)
8134 msg = result.fail_msg
8136 if self.op.ignore_consistency:
8137 self.LogWarning("Could not shutdown instance %s on node %s."
8138 " Proceeding anyway. Please make sure node"
8139 " %s is down. Error details: %s",
8140 instance.name, source_node, source_node, msg)
8142 raise errors.OpExecError("Could not shutdown instance %s on"
8144 (instance.name, source_node, msg))
8146 # create the target disks
8148 _CreateDisks(self, instance, target_node=target_node)
8149 except errors.OpExecError:
8150 self.LogWarning("Device creation failed, reverting...")
8152 _RemoveDisks(self, instance, target_node=target_node)
8154 self.cfg.ReleaseDRBDMinors(instance.name)
8157 cluster_name = self.cfg.GetClusterInfo().cluster_name
8160 # activate, get path, copy the data over
8161 for idx, disk in enumerate(instance.disks):
8162 self.LogInfo("Copying data for disk %d", idx)
8163 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8164 instance.name, True, idx)
8166 self.LogWarning("Can't assemble newly created disk %d: %s",
8167 idx, result.fail_msg)
8168 errs.append(result.fail_msg)
8170 dev_path = result.payload
8171 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8172 target_node, dev_path,
8175 self.LogWarning("Can't copy data over for disk %d: %s",
8176 idx, result.fail_msg)
8177 errs.append(result.fail_msg)
8181 self.LogWarning("Some disks failed to copy, aborting")
8183 _RemoveDisks(self, instance, target_node=target_node)
8185 self.cfg.ReleaseDRBDMinors(instance.name)
8186 raise errors.OpExecError("Errors during disk copy: %s" %
8189 instance.primary_node = target_node
8190 self.cfg.Update(instance, feedback_fn)
8192 self.LogInfo("Removing the disks on the original node")
8193 _RemoveDisks(self, instance, target_node=source_node)
8195 # Only start the instance if it's marked as up
8196 if instance.admin_state == constants.ADMINST_UP:
8197 self.LogInfo("Starting instance %s on node %s",
8198 instance.name, target_node)
8200 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8201 ignore_secondaries=True)
8203 _ShutdownInstanceDisks(self, instance)
8204 raise errors.OpExecError("Can't activate the instance's disks")
8206 result = self.rpc.call_instance_start(target_node,
8207 (instance, None, None), False)
8208 msg = result.fail_msg
8210 _ShutdownInstanceDisks(self, instance)
8211 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8212 (instance.name, target_node, msg))
8215 class LUNodeMigrate(LogicalUnit):
8216 """Migrate all instances from a node.
8219 HPATH = "node-migrate"
8220 HTYPE = constants.HTYPE_NODE
8223 def CheckArguments(self):
8226 def ExpandNames(self):
8227 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8229 self.share_locks = _ShareAll()
8230 self.needed_locks = {
8231 locking.LEVEL_NODE: [self.op.node_name],
8234 def BuildHooksEnv(self):
8237 This runs on the master, the primary and all the secondaries.
8241 "NODE_NAME": self.op.node_name,
8242 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8245 def BuildHooksNodes(self):
8246 """Build hooks nodes.
8249 nl = [self.cfg.GetMasterNode()]
8252 def CheckPrereq(self):
8255 def Exec(self, feedback_fn):
8256 # Prepare jobs for migration instances
8257 allow_runtime_changes = self.op.allow_runtime_changes
8259 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8262 iallocator=self.op.iallocator,
8263 target_node=self.op.target_node,
8264 allow_runtime_changes=allow_runtime_changes,
8265 ignore_ipolicy=self.op.ignore_ipolicy)]
8266 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8268 # TODO: Run iallocator in this opcode and pass correct placement options to
8269 # OpInstanceMigrate. Since other jobs can modify the cluster between
8270 # running the iallocator and the actual migration, a good consistency model
8271 # will have to be found.
8273 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8274 frozenset([self.op.node_name]))
8276 return ResultWithJobs(jobs)
8279 class TLMigrateInstance(Tasklet):
8280 """Tasklet class for instance migration.
8283 @ivar live: whether the migration will be done live or non-live;
8284 this variable is initalized only after CheckPrereq has run
8285 @type cleanup: boolean
8286 @ivar cleanup: Wheater we cleanup from a failed migration
8287 @type iallocator: string
8288 @ivar iallocator: The iallocator used to determine target_node
8289 @type target_node: string
8290 @ivar target_node: If given, the target_node to reallocate the instance to
8291 @type failover: boolean
8292 @ivar failover: Whether operation results in failover or migration
8293 @type fallback: boolean
8294 @ivar fallback: Whether fallback to failover is allowed if migration not
8296 @type ignore_consistency: boolean
8297 @ivar ignore_consistency: Wheter we should ignore consistency between source
8299 @type shutdown_timeout: int
8300 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8301 @type ignore_ipolicy: bool
8302 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8307 _MIGRATION_POLL_INTERVAL = 1 # seconds
8308 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8310 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8311 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8313 """Initializes this class.
8316 Tasklet.__init__(self, lu)
8319 self.instance_name = instance_name
8320 self.cleanup = cleanup
8321 self.live = False # will be overridden later
8322 self.failover = failover
8323 self.fallback = fallback
8324 self.ignore_consistency = ignore_consistency
8325 self.shutdown_timeout = shutdown_timeout
8326 self.ignore_ipolicy = ignore_ipolicy
8327 self.allow_runtime_changes = allow_runtime_changes
8329 def CheckPrereq(self):
8330 """Check prerequisites.
8332 This checks that the instance is in the cluster.
8335 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8336 instance = self.cfg.GetInstanceInfo(instance_name)
8337 assert instance is not None
8338 self.instance = instance
8339 cluster = self.cfg.GetClusterInfo()
8341 if (not self.cleanup and
8342 not instance.admin_state == constants.ADMINST_UP and
8343 not self.failover and self.fallback):
8344 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8345 " switching to failover")
8346 self.failover = True
8348 if instance.disk_template not in constants.DTS_MIRRORED:
8353 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8354 " %s" % (instance.disk_template, text),
8357 if instance.disk_template in constants.DTS_EXT_MIRROR:
8358 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8360 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8362 if self.lu.op.iallocator:
8363 self._RunAllocator()
8365 # We set set self.target_node as it is required by
8367 self.target_node = self.lu.op.target_node
8369 # Check that the target node is correct in terms of instance policy
8370 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8371 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8372 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8374 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8375 ignore=self.ignore_ipolicy)
8377 # self.target_node is already populated, either directly or by the
8379 target_node = self.target_node
8380 if self.target_node == instance.primary_node:
8381 raise errors.OpPrereqError("Cannot migrate instance %s"
8382 " to its primary (%s)" %
8383 (instance.name, instance.primary_node),
8386 if len(self.lu.tasklets) == 1:
8387 # It is safe to release locks only when we're the only tasklet
8389 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8390 keep=[instance.primary_node, self.target_node])
8391 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8394 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8396 secondary_nodes = instance.secondary_nodes
8397 if not secondary_nodes:
8398 raise errors.ConfigurationError("No secondary node but using"
8399 " %s disk template" %
8400 instance.disk_template)
8401 target_node = secondary_nodes[0]
8402 if self.lu.op.iallocator or (self.lu.op.target_node and
8403 self.lu.op.target_node != target_node):
8405 text = "failed over"
8408 raise errors.OpPrereqError("Instances with disk template %s cannot"
8409 " be %s to arbitrary nodes"
8410 " (neither an iallocator nor a target"
8411 " node can be passed)" %
8412 (instance.disk_template, text),
8414 nodeinfo = self.cfg.GetNodeInfo(target_node)
8415 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8416 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8418 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8419 ignore=self.ignore_ipolicy)
8421 i_be = cluster.FillBE(instance)
8423 # check memory requirements on the secondary node
8424 if (not self.cleanup and
8425 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8426 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8427 "migrating instance %s" %
8429 i_be[constants.BE_MINMEM],
8430 instance.hypervisor)
8432 self.lu.LogInfo("Not checking memory on the secondary node as"
8433 " instance will not be started")
8435 # check if failover must be forced instead of migration
8436 if (not self.cleanup and not self.failover and
8437 i_be[constants.BE_ALWAYS_FAILOVER]):
8438 self.lu.LogInfo("Instance configured to always failover; fallback"
8440 self.failover = True
8442 # check bridge existance
8443 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8445 if not self.cleanup:
8446 _CheckNodeNotDrained(self.lu, target_node)
8447 if not self.failover:
8448 result = self.rpc.call_instance_migratable(instance.primary_node,
8450 if result.fail_msg and self.fallback:
8451 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8453 self.failover = True
8455 result.Raise("Can't migrate, please use failover",
8456 prereq=True, ecode=errors.ECODE_STATE)
8458 assert not (self.failover and self.cleanup)
8460 if not self.failover:
8461 if self.lu.op.live is not None and self.lu.op.mode is not None:
8462 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8463 " parameters are accepted",
8465 if self.lu.op.live is not None:
8467 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8469 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8470 # reset the 'live' parameter to None so that repeated
8471 # invocations of CheckPrereq do not raise an exception
8472 self.lu.op.live = None
8473 elif self.lu.op.mode is None:
8474 # read the default value from the hypervisor
8475 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8476 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8478 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8480 # Failover is never live
8483 if not (self.failover or self.cleanup):
8484 remote_info = self.rpc.call_instance_info(instance.primary_node,
8486 instance.hypervisor)
8487 remote_info.Raise("Error checking instance on node %s" %
8488 instance.primary_node)
8489 instance_running = bool(remote_info.payload)
8490 if instance_running:
8491 self.current_mem = int(remote_info.payload["memory"])
8493 def _RunAllocator(self):
8494 """Run the allocator based on input opcode.
8497 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8499 # FIXME: add a self.ignore_ipolicy option
8500 req = iallocator.IAReqRelocate(name=self.instance_name,
8501 relocate_from=[self.instance.primary_node])
8502 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8504 ial.Run(self.lu.op.iallocator)
8507 raise errors.OpPrereqError("Can't compute nodes using"
8508 " iallocator '%s': %s" %
8509 (self.lu.op.iallocator, ial.info),
8511 self.target_node = ial.result[0]
8512 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8513 self.instance_name, self.lu.op.iallocator,
8514 utils.CommaJoin(ial.result))
8516 def _WaitUntilSync(self):
8517 """Poll with custom rpc for disk sync.
8519 This uses our own step-based rpc call.
8522 self.feedback_fn("* wait until resync is done")
8526 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8528 (self.instance.disks,
8531 for node, nres in result.items():
8532 nres.Raise("Cannot resync disks on node %s" % node)
8533 node_done, node_percent = nres.payload
8534 all_done = all_done and node_done
8535 if node_percent is not None:
8536 min_percent = min(min_percent, node_percent)
8538 if min_percent < 100:
8539 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8542 def _EnsureSecondary(self, node):
8543 """Demote a node to secondary.
8546 self.feedback_fn("* switching node %s to secondary mode" % node)
8548 for dev in self.instance.disks:
8549 self.cfg.SetDiskID(dev, node)
8551 result = self.rpc.call_blockdev_close(node, self.instance.name,
8552 self.instance.disks)
8553 result.Raise("Cannot change disk to secondary on node %s" % node)
8555 def _GoStandalone(self):
8556 """Disconnect from the network.
8559 self.feedback_fn("* changing into standalone mode")
8560 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8561 self.instance.disks)
8562 for node, nres in result.items():
8563 nres.Raise("Cannot disconnect disks node %s" % node)
8565 def _GoReconnect(self, multimaster):
8566 """Reconnect to the network.
8572 msg = "single-master"
8573 self.feedback_fn("* changing disks into %s mode" % msg)
8574 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8575 (self.instance.disks, self.instance),
8576 self.instance.name, multimaster)
8577 for node, nres in result.items():
8578 nres.Raise("Cannot change disks config on node %s" % node)
8580 def _ExecCleanup(self):
8581 """Try to cleanup after a failed migration.
8583 The cleanup is done by:
8584 - check that the instance is running only on one node
8585 (and update the config if needed)
8586 - change disks on its secondary node to secondary
8587 - wait until disks are fully synchronized
8588 - disconnect from the network
8589 - change disks into single-master mode
8590 - wait again until disks are fully synchronized
8593 instance = self.instance
8594 target_node = self.target_node
8595 source_node = self.source_node
8597 # check running on only one node
8598 self.feedback_fn("* checking where the instance actually runs"
8599 " (if this hangs, the hypervisor might be in"
8601 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8602 for node, result in ins_l.items():
8603 result.Raise("Can't contact node %s" % node)
8605 runningon_source = instance.name in ins_l[source_node].payload
8606 runningon_target = instance.name in ins_l[target_node].payload
8608 if runningon_source and runningon_target:
8609 raise errors.OpExecError("Instance seems to be running on two nodes,"
8610 " or the hypervisor is confused; you will have"
8611 " to ensure manually that it runs only on one"
8612 " and restart this operation")
8614 if not (runningon_source or runningon_target):
8615 raise errors.OpExecError("Instance does not seem to be running at all;"
8616 " in this case it's safer to repair by"
8617 " running 'gnt-instance stop' to ensure disk"
8618 " shutdown, and then restarting it")
8620 if runningon_target:
8621 # the migration has actually succeeded, we need to update the config
8622 self.feedback_fn("* instance running on secondary node (%s),"
8623 " updating config" % target_node)
8624 instance.primary_node = target_node
8625 self.cfg.Update(instance, self.feedback_fn)
8626 demoted_node = source_node
8628 self.feedback_fn("* instance confirmed to be running on its"
8629 " primary node (%s)" % source_node)
8630 demoted_node = target_node
8632 if instance.disk_template in constants.DTS_INT_MIRROR:
8633 self._EnsureSecondary(demoted_node)
8635 self._WaitUntilSync()
8636 except errors.OpExecError:
8637 # we ignore here errors, since if the device is standalone, it
8638 # won't be able to sync
8640 self._GoStandalone()
8641 self._GoReconnect(False)
8642 self._WaitUntilSync()
8644 self.feedback_fn("* done")
8646 def _RevertDiskStatus(self):
8647 """Try to revert the disk status after a failed migration.
8650 target_node = self.target_node
8651 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8655 self._EnsureSecondary(target_node)
8656 self._GoStandalone()
8657 self._GoReconnect(False)
8658 self._WaitUntilSync()
8659 except errors.OpExecError, err:
8660 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8661 " please try to recover the instance manually;"
8662 " error '%s'" % str(err))
8664 def _AbortMigration(self):
8665 """Call the hypervisor code to abort a started migration.
8668 instance = self.instance
8669 target_node = self.target_node
8670 source_node = self.source_node
8671 migration_info = self.migration_info
8673 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8677 abort_msg = abort_result.fail_msg
8679 logging.error("Aborting migration failed on target node %s: %s",
8680 target_node, abort_msg)
8681 # Don't raise an exception here, as we stil have to try to revert the
8682 # disk status, even if this step failed.
8684 abort_result = self.rpc.call_instance_finalize_migration_src(
8685 source_node, instance, False, self.live)
8686 abort_msg = abort_result.fail_msg
8688 logging.error("Aborting migration failed on source node %s: %s",
8689 source_node, abort_msg)
8691 def _ExecMigration(self):
8692 """Migrate an instance.
8694 The migrate is done by:
8695 - change the disks into dual-master mode
8696 - wait until disks are fully synchronized again
8697 - migrate the instance
8698 - change disks on the new secondary node (the old primary) to secondary
8699 - wait until disks are fully synchronized
8700 - change disks into single-master mode
8703 instance = self.instance
8704 target_node = self.target_node
8705 source_node = self.source_node
8707 # Check for hypervisor version mismatch and warn the user.
8708 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8709 None, [self.instance.hypervisor])
8710 for ninfo in nodeinfo.values():
8711 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8713 (_, _, (src_info, )) = nodeinfo[source_node].payload
8714 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8716 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8717 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8718 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8719 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8720 if src_version != dst_version:
8721 self.feedback_fn("* warning: hypervisor version mismatch between"
8722 " source (%s) and target (%s) node" %
8723 (src_version, dst_version))
8725 self.feedback_fn("* checking disk consistency between source and target")
8726 for (idx, dev) in enumerate(instance.disks):
8727 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8728 raise errors.OpExecError("Disk %s is degraded or not fully"
8729 " synchronized on target node,"
8730 " aborting migration" % idx)
8732 if self.current_mem > self.tgt_free_mem:
8733 if not self.allow_runtime_changes:
8734 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8735 " free memory to fit instance %s on target"
8736 " node %s (have %dMB, need %dMB)" %
8737 (instance.name, target_node,
8738 self.tgt_free_mem, self.current_mem))
8739 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8740 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8743 rpcres.Raise("Cannot modify instance runtime memory")
8745 # First get the migration information from the remote node
8746 result = self.rpc.call_migration_info(source_node, instance)
8747 msg = result.fail_msg
8749 log_err = ("Failed fetching source migration information from %s: %s" %
8751 logging.error(log_err)
8752 raise errors.OpExecError(log_err)
8754 self.migration_info = migration_info = result.payload
8756 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8757 # Then switch the disks to master/master mode
8758 self._EnsureSecondary(target_node)
8759 self._GoStandalone()
8760 self._GoReconnect(True)
8761 self._WaitUntilSync()
8763 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8764 result = self.rpc.call_accept_instance(target_node,
8767 self.nodes_ip[target_node])
8769 msg = result.fail_msg
8771 logging.error("Instance pre-migration failed, trying to revert"
8772 " disk status: %s", msg)
8773 self.feedback_fn("Pre-migration failed, aborting")
8774 self._AbortMigration()
8775 self._RevertDiskStatus()
8776 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8777 (instance.name, msg))
8779 self.feedback_fn("* migrating instance to %s" % target_node)
8780 result = self.rpc.call_instance_migrate(source_node, instance,
8781 self.nodes_ip[target_node],
8783 msg = result.fail_msg
8785 logging.error("Instance migration failed, trying to revert"
8786 " disk status: %s", msg)
8787 self.feedback_fn("Migration failed, aborting")
8788 self._AbortMigration()
8789 self._RevertDiskStatus()
8790 raise errors.OpExecError("Could not migrate instance %s: %s" %
8791 (instance.name, msg))
8793 self.feedback_fn("* starting memory transfer")
8794 last_feedback = time.time()
8796 result = self.rpc.call_instance_get_migration_status(source_node,
8798 msg = result.fail_msg
8799 ms = result.payload # MigrationStatus instance
8800 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8801 logging.error("Instance migration failed, trying to revert"
8802 " disk status: %s", msg)
8803 self.feedback_fn("Migration failed, aborting")
8804 self._AbortMigration()
8805 self._RevertDiskStatus()
8807 msg = "hypervisor returned failure"
8808 raise errors.OpExecError("Could not migrate instance %s: %s" %
8809 (instance.name, msg))
8811 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8812 self.feedback_fn("* memory transfer complete")
8815 if (utils.TimeoutExpired(last_feedback,
8816 self._MIGRATION_FEEDBACK_INTERVAL) and
8817 ms.transferred_ram is not None):
8818 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8819 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8820 last_feedback = time.time()
8822 time.sleep(self._MIGRATION_POLL_INTERVAL)
8824 result = self.rpc.call_instance_finalize_migration_src(source_node,
8828 msg = result.fail_msg
8830 logging.error("Instance migration succeeded, but finalization failed"
8831 " on the source node: %s", msg)
8832 raise errors.OpExecError("Could not finalize instance migration: %s" %
8835 instance.primary_node = target_node
8837 # distribute new instance config to the other nodes
8838 self.cfg.Update(instance, self.feedback_fn)
8840 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8844 msg = result.fail_msg
8846 logging.error("Instance migration succeeded, but finalization failed"
8847 " on the target node: %s", msg)
8848 raise errors.OpExecError("Could not finalize instance migration: %s" %
8851 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8852 self._EnsureSecondary(source_node)
8853 self._WaitUntilSync()
8854 self._GoStandalone()
8855 self._GoReconnect(False)
8856 self._WaitUntilSync()
8858 # If the instance's disk template is `rbd' and there was a successful
8859 # migration, unmap the device from the source node.
8860 if self.instance.disk_template == constants.DT_RBD:
8861 disks = _ExpandCheckDisks(instance, instance.disks)
8862 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8864 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8865 msg = result.fail_msg
8867 logging.error("Migration was successful, but couldn't unmap the"
8868 " block device %s on source node %s: %s",
8869 disk.iv_name, source_node, msg)
8870 logging.error("You need to unmap the device %s manually on %s",
8871 disk.iv_name, source_node)
8873 self.feedback_fn("* done")
8875 def _ExecFailover(self):
8876 """Failover an instance.
8878 The failover is done by shutting it down on its present node and
8879 starting it on the secondary.
8882 instance = self.instance
8883 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8885 source_node = instance.primary_node
8886 target_node = self.target_node
8888 if instance.admin_state == constants.ADMINST_UP:
8889 self.feedback_fn("* checking disk consistency between source and target")
8890 for (idx, dev) in enumerate(instance.disks):
8891 # for drbd, these are drbd over lvm
8892 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8894 if primary_node.offline:
8895 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8897 (primary_node.name, idx, target_node))
8898 elif not self.ignore_consistency:
8899 raise errors.OpExecError("Disk %s is degraded on target node,"
8900 " aborting failover" % idx)
8902 self.feedback_fn("* not checking disk consistency as instance is not"
8905 self.feedback_fn("* shutting down instance on source node")
8906 logging.info("Shutting down instance %s on node %s",
8907 instance.name, source_node)
8909 result = self.rpc.call_instance_shutdown(source_node, instance,
8910 self.shutdown_timeout)
8911 msg = result.fail_msg
8913 if self.ignore_consistency or primary_node.offline:
8914 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8915 " proceeding anyway; please make sure node"
8916 " %s is down; error details: %s",
8917 instance.name, source_node, source_node, msg)
8919 raise errors.OpExecError("Could not shutdown instance %s on"
8921 (instance.name, source_node, msg))
8923 self.feedback_fn("* deactivating the instance's disks on source node")
8924 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8925 raise errors.OpExecError("Can't shut down the instance's disks")
8927 instance.primary_node = target_node
8928 # distribute new instance config to the other nodes
8929 self.cfg.Update(instance, self.feedback_fn)
8931 # Only start the instance if it's marked as up
8932 if instance.admin_state == constants.ADMINST_UP:
8933 self.feedback_fn("* activating the instance's disks on target node %s" %
8935 logging.info("Starting instance %s on node %s",
8936 instance.name, target_node)
8938 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8939 ignore_secondaries=True)
8941 _ShutdownInstanceDisks(self.lu, instance)
8942 raise errors.OpExecError("Can't activate the instance's disks")
8944 self.feedback_fn("* starting the instance on the target node %s" %
8946 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8948 msg = result.fail_msg
8950 _ShutdownInstanceDisks(self.lu, instance)
8951 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8952 (instance.name, target_node, msg))
8954 def Exec(self, feedback_fn):
8955 """Perform the migration.
8958 self.feedback_fn = feedback_fn
8959 self.source_node = self.instance.primary_node
8961 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8962 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8963 self.target_node = self.instance.secondary_nodes[0]
8964 # Otherwise self.target_node has been populated either
8965 # directly, or through an iallocator.
8967 self.all_nodes = [self.source_node, self.target_node]
8968 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8969 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8972 feedback_fn("Failover instance %s" % self.instance.name)
8973 self._ExecFailover()
8975 feedback_fn("Migrating instance %s" % self.instance.name)
8978 return self._ExecCleanup()
8980 return self._ExecMigration()
8983 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8985 """Wrapper around L{_CreateBlockDevInner}.
8987 This method annotates the root device first.
8990 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8991 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8995 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8997 """Create a tree of block devices on a given node.
8999 If this device type has to be created on secondaries, create it and
9002 If not, just recurse to children keeping the same 'force' value.
9004 @attention: The device has to be annotated already.
9006 @param lu: the lu on whose behalf we execute
9007 @param node: the node on which to create the device
9008 @type instance: L{objects.Instance}
9009 @param instance: the instance which owns the device
9010 @type device: L{objects.Disk}
9011 @param device: the device to create
9012 @type force_create: boolean
9013 @param force_create: whether to force creation of this device; this
9014 will be change to True whenever we find a device which has
9015 CreateOnSecondary() attribute
9016 @param info: the extra 'metadata' we should attach to the device
9017 (this will be represented as a LVM tag)
9018 @type force_open: boolean
9019 @param force_open: this parameter will be passes to the
9020 L{backend.BlockdevCreate} function where it specifies
9021 whether we run on primary or not, and it affects both
9022 the child assembly and the device own Open() execution
9025 if device.CreateOnSecondary():
9029 for child in device.children:
9030 _CreateBlockDevInner(lu, node, instance, child, force_create,
9033 if not force_create:
9036 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
9039 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
9040 """Create a single block device on a given node.
9042 This will not recurse over children of the device, so they must be
9045 @param lu: the lu on whose behalf we execute
9046 @param node: the node on which to create the device
9047 @type instance: L{objects.Instance}
9048 @param instance: the instance which owns the device
9049 @type device: L{objects.Disk}
9050 @param device: the device to create
9051 @param info: the extra 'metadata' we should attach to the device
9052 (this will be represented as a LVM tag)
9053 @type force_open: boolean
9054 @param force_open: this parameter will be passes to the
9055 L{backend.BlockdevCreate} function where it specifies
9056 whether we run on primary or not, and it affects both
9057 the child assembly and the device own Open() execution
9060 lu.cfg.SetDiskID(device, node)
9061 result = lu.rpc.call_blockdev_create(node, device, device.size,
9062 instance.name, force_open, info)
9063 result.Raise("Can't create block device %s on"
9064 " node %s for instance %s" % (device, node, instance.name))
9065 if device.physical_id is None:
9066 device.physical_id = result.payload
9069 def _GenerateUniqueNames(lu, exts):
9070 """Generate a suitable LV name.
9072 This will generate a logical volume name for the given instance.
9077 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9078 results.append("%s%s" % (new_id, val))
9082 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9083 iv_name, p_minor, s_minor):
9084 """Generate a drbd8 device complete with its children.
9087 assert len(vgnames) == len(names) == 2
9088 port = lu.cfg.AllocatePort()
9089 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9091 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9092 logical_id=(vgnames[0], names[0]),
9094 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9095 size=constants.DRBD_META_SIZE,
9096 logical_id=(vgnames[1], names[1]),
9098 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9099 logical_id=(primary, secondary, port,
9102 children=[dev_data, dev_meta],
9103 iv_name=iv_name, params={})
9107 _DISK_TEMPLATE_NAME_PREFIX = {
9108 constants.DT_PLAIN: "",
9109 constants.DT_RBD: ".rbd",
9113 _DISK_TEMPLATE_DEVICE_TYPE = {
9114 constants.DT_PLAIN: constants.LD_LV,
9115 constants.DT_FILE: constants.LD_FILE,
9116 constants.DT_SHARED_FILE: constants.LD_FILE,
9117 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9118 constants.DT_RBD: constants.LD_RBD,
9122 def _GenerateDiskTemplate(
9123 lu, template_name, instance_name, primary_node, secondary_nodes,
9124 disk_info, file_storage_dir, file_driver, base_index,
9125 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9126 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9127 """Generate the entire disk layout for a given template type.
9130 #TODO: compute space requirements
9132 vgname = lu.cfg.GetVGName()
9133 disk_count = len(disk_info)
9136 if template_name == constants.DT_DISKLESS:
9138 elif template_name == constants.DT_DRBD8:
9139 if len(secondary_nodes) != 1:
9140 raise errors.ProgrammerError("Wrong template configuration")
9141 remote_node = secondary_nodes[0]
9142 minors = lu.cfg.AllocateDRBDMinor(
9143 [primary_node, remote_node] * len(disk_info), instance_name)
9145 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9147 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9150 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9151 for i in range(disk_count)]):
9152 names.append(lv_prefix + "_data")
9153 names.append(lv_prefix + "_meta")
9154 for idx, disk in enumerate(disk_info):
9155 disk_index = idx + base_index
9156 data_vg = disk.get(constants.IDISK_VG, vgname)
9157 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9158 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9159 disk[constants.IDISK_SIZE],
9161 names[idx * 2:idx * 2 + 2],
9162 "disk/%d" % disk_index,
9163 minors[idx * 2], minors[idx * 2 + 1])
9164 disk_dev.mode = disk[constants.IDISK_MODE]
9165 disks.append(disk_dev)
9168 raise errors.ProgrammerError("Wrong template configuration")
9170 if template_name == constants.DT_FILE:
9172 elif template_name == constants.DT_SHARED_FILE:
9173 _req_shr_file_storage()
9175 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9176 if name_prefix is None:
9179 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9180 (name_prefix, base_index + i)
9181 for i in range(disk_count)])
9183 if template_name == constants.DT_PLAIN:
9185 def logical_id_fn(idx, _, disk):
9186 vg = disk.get(constants.IDISK_VG, vgname)
9187 return (vg, names[idx])
9189 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9191 lambda _, disk_index, disk: (file_driver,
9192 "%s/disk%d" % (file_storage_dir,
9194 elif template_name == constants.DT_BLOCK:
9196 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9197 disk[constants.IDISK_ADOPT])
9198 elif template_name == constants.DT_RBD:
9199 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9201 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9203 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9205 for idx, disk in enumerate(disk_info):
9206 disk_index = idx + base_index
9207 size = disk[constants.IDISK_SIZE]
9208 feedback_fn("* disk %s, size %s" %
9209 (disk_index, utils.FormatUnit(size, "h")))
9210 disks.append(objects.Disk(dev_type=dev_type, size=size,
9211 logical_id=logical_id_fn(idx, disk_index, disk),
9212 iv_name="disk/%d" % disk_index,
9213 mode=disk[constants.IDISK_MODE],
9219 def _GetInstanceInfoText(instance):
9220 """Compute that text that should be added to the disk's metadata.
9223 return "originstname+%s" % instance.name
9226 def _CalcEta(time_taken, written, total_size):
9227 """Calculates the ETA based on size written and total size.
9229 @param time_taken: The time taken so far
9230 @param written: amount written so far
9231 @param total_size: The total size of data to be written
9232 @return: The remaining time in seconds
9235 avg_time = time_taken / float(written)
9236 return (total_size - written) * avg_time
9239 def _WipeDisks(lu, instance, disks=None):
9240 """Wipes instance disks.
9242 @type lu: L{LogicalUnit}
9243 @param lu: the logical unit on whose behalf we execute
9244 @type instance: L{objects.Instance}
9245 @param instance: the instance whose disks we should create
9246 @return: the success of the wipe
9249 node = instance.primary_node
9252 disks = [(idx, disk, 0)
9253 for (idx, disk) in enumerate(instance.disks)]
9255 for (_, device, _) in disks:
9256 lu.cfg.SetDiskID(device, node)
9258 logging.info("Pausing synchronization of disks of instance '%s'",
9260 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9261 (map(compat.snd, disks),
9264 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9266 for idx, success in enumerate(result.payload):
9268 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9269 " failed", idx, instance.name)
9272 for (idx, device, offset) in disks:
9273 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9274 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9276 int(min(constants.MAX_WIPE_CHUNK,
9277 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9281 start_time = time.time()
9286 info_text = (" (from %s to %s)" %
9287 (utils.FormatUnit(offset, "h"),
9288 utils.FormatUnit(size, "h")))
9290 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9292 logging.info("Wiping disk %d for instance %s on node %s using"
9293 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9295 while offset < size:
9296 wipe_size = min(wipe_chunk_size, size - offset)
9298 logging.debug("Wiping disk %d, offset %s, chunk %s",
9299 idx, offset, wipe_size)
9301 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9303 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9304 (idx, offset, wipe_size))
9308 if now - last_output >= 60:
9309 eta = _CalcEta(now - start_time, offset, size)
9310 lu.LogInfo(" - done: %.1f%% ETA: %s",
9311 offset / float(size) * 100, utils.FormatSeconds(eta))
9314 logging.info("Resuming synchronization of disks for instance '%s'",
9317 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9318 (map(compat.snd, disks),
9323 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9324 node, result.fail_msg)
9326 for idx, success in enumerate(result.payload):
9328 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9329 " failed", idx, instance.name)
9332 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9333 """Create all disks for an instance.
9335 This abstracts away some work from AddInstance.
9337 @type lu: L{LogicalUnit}
9338 @param lu: the logical unit on whose behalf we execute
9339 @type instance: L{objects.Instance}
9340 @param instance: the instance whose disks we should create
9342 @param to_skip: list of indices to skip
9343 @type target_node: string
9344 @param target_node: if passed, overrides the target node for creation
9346 @return: the success of the creation
9349 info = _GetInstanceInfoText(instance)
9350 if target_node is None:
9351 pnode = instance.primary_node
9352 all_nodes = instance.all_nodes
9357 if instance.disk_template in constants.DTS_FILEBASED:
9358 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9359 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9361 result.Raise("Failed to create directory '%s' on"
9362 " node %s" % (file_storage_dir, pnode))
9364 # Note: this needs to be kept in sync with adding of disks in
9365 # LUInstanceSetParams
9366 for idx, device in enumerate(instance.disks):
9367 if to_skip and idx in to_skip:
9369 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9371 for node in all_nodes:
9372 f_create = node == pnode
9373 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9376 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9377 """Remove all disks for an instance.
9379 This abstracts away some work from `AddInstance()` and
9380 `RemoveInstance()`. Note that in case some of the devices couldn't
9381 be removed, the removal will continue with the other ones (compare
9382 with `_CreateDisks()`).
9384 @type lu: L{LogicalUnit}
9385 @param lu: the logical unit on whose behalf we execute
9386 @type instance: L{objects.Instance}
9387 @param instance: the instance whose disks we should remove
9388 @type target_node: string
9389 @param target_node: used to override the node on which to remove the disks
9391 @return: the success of the removal
9394 logging.info("Removing block devices for instance %s", instance.name)
9397 ports_to_release = set()
9398 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9399 for (idx, device) in enumerate(anno_disks):
9401 edata = [(target_node, device)]
9403 edata = device.ComputeNodeTree(instance.primary_node)
9404 for node, disk in edata:
9405 lu.cfg.SetDiskID(disk, node)
9406 result = lu.rpc.call_blockdev_remove(node, disk)
9408 lu.LogWarning("Could not remove disk %s on node %s,"
9409 " continuing anyway: %s", idx, node, result.fail_msg)
9410 if not (result.offline and node != instance.primary_node):
9413 # if this is a DRBD disk, return its port to the pool
9414 if device.dev_type in constants.LDS_DRBD:
9415 ports_to_release.add(device.logical_id[2])
9417 if all_result or ignore_failures:
9418 for port in ports_to_release:
9419 lu.cfg.AddTcpUdpPort(port)
9421 if instance.disk_template in constants.DTS_FILEBASED:
9422 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9426 tgt = instance.primary_node
9427 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9429 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9430 file_storage_dir, instance.primary_node, result.fail_msg)
9436 def _ComputeDiskSizePerVG(disk_template, disks):
9437 """Compute disk size requirements in the volume group
9440 def _compute(disks, payload):
9441 """Universal algorithm.
9446 vgs[disk[constants.IDISK_VG]] = \
9447 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9451 # Required free disk space as a function of disk and swap space
9453 constants.DT_DISKLESS: {},
9454 constants.DT_PLAIN: _compute(disks, 0),
9455 # 128 MB are added for drbd metadata for each disk
9456 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9457 constants.DT_FILE: {},
9458 constants.DT_SHARED_FILE: {},
9461 if disk_template not in req_size_dict:
9462 raise errors.ProgrammerError("Disk template '%s' size requirement"
9463 " is unknown" % disk_template)
9465 return req_size_dict[disk_template]
9468 def _FilterVmNodes(lu, nodenames):
9469 """Filters out non-vm_capable nodes from a list.
9471 @type lu: L{LogicalUnit}
9472 @param lu: the logical unit for which we check
9473 @type nodenames: list
9474 @param nodenames: the list of nodes on which we should check
9476 @return: the list of vm-capable nodes
9479 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9480 return [name for name in nodenames if name not in vm_nodes]
9483 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9484 """Hypervisor parameter validation.
9486 This function abstract the hypervisor parameter validation to be
9487 used in both instance create and instance modify.
9489 @type lu: L{LogicalUnit}
9490 @param lu: the logical unit for which we check
9491 @type nodenames: list
9492 @param nodenames: the list of nodes on which we should check
9493 @type hvname: string
9494 @param hvname: the name of the hypervisor we should use
9495 @type hvparams: dict
9496 @param hvparams: the parameters which we need to check
9497 @raise errors.OpPrereqError: if the parameters are not valid
9500 nodenames = _FilterVmNodes(lu, nodenames)
9502 cluster = lu.cfg.GetClusterInfo()
9503 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9505 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9506 for node in nodenames:
9510 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9513 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9514 """OS parameters validation.
9516 @type lu: L{LogicalUnit}
9517 @param lu: the logical unit for which we check
9518 @type required: boolean
9519 @param required: whether the validation should fail if the OS is not
9521 @type nodenames: list
9522 @param nodenames: the list of nodes on which we should check
9523 @type osname: string
9524 @param osname: the name of the hypervisor we should use
9525 @type osparams: dict
9526 @param osparams: the parameters which we need to check
9527 @raise errors.OpPrereqError: if the parameters are not valid
9530 nodenames = _FilterVmNodes(lu, nodenames)
9531 result = lu.rpc.call_os_validate(nodenames, required, osname,
9532 [constants.OS_VALIDATE_PARAMETERS],
9534 for node, nres in result.items():
9535 # we don't check for offline cases since this should be run only
9536 # against the master node and/or an instance's nodes
9537 nres.Raise("OS Parameters validation failed on node %s" % node)
9538 if not nres.payload:
9539 lu.LogInfo("OS %s not found on node %s, validation skipped",
9543 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9544 """Wrapper around IAReqInstanceAlloc.
9546 @param op: The instance opcode
9547 @param disks: The computed disks
9548 @param nics: The computed nics
9549 @param beparams: The full filled beparams
9550 @param node_whitelist: List of nodes which should appear as online to the
9551 allocator (unless the node is already marked offline)
9553 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9556 spindle_use = beparams[constants.BE_SPINDLE_USE]
9557 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9558 disk_template=op.disk_template,
9561 vcpus=beparams[constants.BE_VCPUS],
9562 memory=beparams[constants.BE_MAXMEM],
9563 spindle_use=spindle_use,
9565 nics=[n.ToDict() for n in nics],
9566 hypervisor=op.hypervisor,
9567 node_whitelist=node_whitelist)
9570 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9571 """Computes the nics.
9573 @param op: The instance opcode
9574 @param cluster: Cluster configuration object
9575 @param default_ip: The default ip to assign
9576 @param cfg: An instance of the configuration object
9577 @param ec_id: Execution context ID
9579 @returns: The build up nics
9584 nic_mode_req = nic.get(constants.INIC_MODE, None)
9585 nic_mode = nic_mode_req
9586 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9587 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9589 net = nic.get(constants.INIC_NETWORK, None)
9590 link = nic.get(constants.NIC_LINK, None)
9591 ip = nic.get(constants.INIC_IP, None)
9593 if net is None or net.lower() == constants.VALUE_NONE:
9596 if nic_mode_req is not None or link is not None:
9597 raise errors.OpPrereqError("If network is given, no mode or link"
9598 " is allowed to be passed",
9601 # ip validity checks
9602 if ip is None or ip.lower() == constants.VALUE_NONE:
9604 elif ip.lower() == constants.VALUE_AUTO:
9605 if not op.name_check:
9606 raise errors.OpPrereqError("IP address set to auto but name checks"
9607 " have been skipped",
9611 # We defer pool operations until later, so that the iallocator has
9612 # filled in the instance's node(s) dimara
9613 if ip.lower() == constants.NIC_IP_POOL:
9615 raise errors.OpPrereqError("if ip=pool, parameter network"
9616 " must be passed too",
9619 elif not netutils.IPAddress.IsValid(ip):
9620 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9625 # TODO: check the ip address for uniqueness
9626 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9627 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9630 # MAC address verification
9631 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9632 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9633 mac = utils.NormalizeAndValidateMac(mac)
9636 # TODO: We need to factor this out
9637 cfg.ReserveMAC(mac, ec_id)
9638 except errors.ReservationError:
9639 raise errors.OpPrereqError("MAC address %s already in use"
9640 " in cluster" % mac,
9641 errors.ECODE_NOTUNIQUE)
9643 # Build nic parameters
9646 nicparams[constants.NIC_MODE] = nic_mode
9648 nicparams[constants.NIC_LINK] = link
9650 check_params = cluster.SimpleFillNIC(nicparams)
9651 objects.NIC.CheckParameterSyntax(check_params)
9652 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9653 network=net, nicparams=nicparams))
9658 def _ComputeDisks(op, default_vg):
9659 """Computes the instance disks.
9661 @param op: The instance opcode
9662 @param default_vg: The default_vg to assume
9664 @return: The computer disks
9668 for disk in op.disks:
9669 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9670 if mode not in constants.DISK_ACCESS_SET:
9671 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9672 mode, errors.ECODE_INVAL)
9673 size = disk.get(constants.IDISK_SIZE, None)
9675 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9678 except (TypeError, ValueError):
9679 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9682 data_vg = disk.get(constants.IDISK_VG, default_vg)
9684 constants.IDISK_SIZE: size,
9685 constants.IDISK_MODE: mode,
9686 constants.IDISK_VG: data_vg,
9688 if constants.IDISK_METAVG in disk:
9689 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9690 if constants.IDISK_ADOPT in disk:
9691 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9692 disks.append(new_disk)
9697 def _ComputeFullBeParams(op, cluster):
9698 """Computes the full beparams.
9700 @param op: The instance opcode
9701 @param cluster: The cluster config object
9703 @return: The fully filled beparams
9706 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9707 for param, value in op.beparams.iteritems():
9708 if value == constants.VALUE_AUTO:
9709 op.beparams[param] = default_beparams[param]
9710 objects.UpgradeBeParams(op.beparams)
9711 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9712 return cluster.SimpleFillBE(op.beparams)
9715 class LUInstanceCreate(LogicalUnit):
9716 """Create an instance.
9719 HPATH = "instance-add"
9720 HTYPE = constants.HTYPE_INSTANCE
9723 def CheckArguments(self):
9727 # do not require name_check to ease forward/backward compatibility
9729 if self.op.no_install and self.op.start:
9730 self.LogInfo("No-installation mode selected, disabling startup")
9731 self.op.start = False
9732 # validate/normalize the instance name
9733 self.op.instance_name = \
9734 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9736 if self.op.ip_check and not self.op.name_check:
9737 # TODO: make the ip check more flexible and not depend on the name check
9738 raise errors.OpPrereqError("Cannot do IP address check without a name"
9739 " check", errors.ECODE_INVAL)
9741 # check nics' parameter names
9742 for nic in self.op.nics:
9743 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9745 # check disks. parameter names and consistent adopt/no-adopt strategy
9746 has_adopt = has_no_adopt = False
9747 for disk in self.op.disks:
9748 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9749 if constants.IDISK_ADOPT in disk:
9753 if has_adopt and has_no_adopt:
9754 raise errors.OpPrereqError("Either all disks are adopted or none is",
9757 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9758 raise errors.OpPrereqError("Disk adoption is not supported for the"
9759 " '%s' disk template" %
9760 self.op.disk_template,
9762 if self.op.iallocator is not None:
9763 raise errors.OpPrereqError("Disk adoption not allowed with an"
9764 " iallocator script", errors.ECODE_INVAL)
9765 if self.op.mode == constants.INSTANCE_IMPORT:
9766 raise errors.OpPrereqError("Disk adoption not allowed for"
9767 " instance import", errors.ECODE_INVAL)
9769 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9770 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9771 " but no 'adopt' parameter given" %
9772 self.op.disk_template,
9775 self.adopt_disks = has_adopt
9777 # instance name verification
9778 if self.op.name_check:
9779 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9780 self.op.instance_name = self.hostname1.name
9781 # used in CheckPrereq for ip ping check
9782 self.check_ip = self.hostname1.ip
9784 self.check_ip = None
9786 # file storage checks
9787 if (self.op.file_driver and
9788 not self.op.file_driver in constants.FILE_DRIVER):
9789 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9790 self.op.file_driver, errors.ECODE_INVAL)
9792 if self.op.disk_template == constants.DT_FILE:
9793 opcodes.RequireFileStorage()
9794 elif self.op.disk_template == constants.DT_SHARED_FILE:
9795 opcodes.RequireSharedFileStorage()
9797 ### Node/iallocator related checks
9798 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9800 if self.op.pnode is not None:
9801 if self.op.disk_template in constants.DTS_INT_MIRROR:
9802 if self.op.snode is None:
9803 raise errors.OpPrereqError("The networked disk templates need"
9804 " a mirror node", errors.ECODE_INVAL)
9806 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9808 self.op.snode = None
9810 self._cds = _GetClusterDomainSecret()
9812 if self.op.mode == constants.INSTANCE_IMPORT:
9813 # On import force_variant must be True, because if we forced it at
9814 # initial install, our only chance when importing it back is that it
9816 self.op.force_variant = True
9818 if self.op.no_install:
9819 self.LogInfo("No-installation mode has no effect during import")
9821 elif self.op.mode == constants.INSTANCE_CREATE:
9822 if self.op.os_type is None:
9823 raise errors.OpPrereqError("No guest OS specified",
9825 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9826 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9827 " installation" % self.op.os_type,
9829 if self.op.disk_template is None:
9830 raise errors.OpPrereqError("No disk template specified",
9833 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9834 # Check handshake to ensure both clusters have the same domain secret
9835 src_handshake = self.op.source_handshake
9836 if not src_handshake:
9837 raise errors.OpPrereqError("Missing source handshake",
9840 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9843 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9846 # Load and check source CA
9847 self.source_x509_ca_pem = self.op.source_x509_ca
9848 if not self.source_x509_ca_pem:
9849 raise errors.OpPrereqError("Missing source X509 CA",
9853 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9855 except OpenSSL.crypto.Error, err:
9856 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9857 (err, ), errors.ECODE_INVAL)
9859 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9860 if errcode is not None:
9861 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9864 self.source_x509_ca = cert
9866 src_instance_name = self.op.source_instance_name
9867 if not src_instance_name:
9868 raise errors.OpPrereqError("Missing source instance name",
9871 self.source_instance_name = \
9872 netutils.GetHostname(name=src_instance_name).name
9875 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9876 self.op.mode, errors.ECODE_INVAL)
9878 def ExpandNames(self):
9879 """ExpandNames for CreateInstance.
9881 Figure out the right locks for instance creation.
9884 self.needed_locks = {}
9886 instance_name = self.op.instance_name
9887 # this is just a preventive check, but someone might still add this
9888 # instance in the meantime, and creation will fail at lock-add time
9889 if instance_name in self.cfg.GetInstanceList():
9890 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9891 instance_name, errors.ECODE_EXISTS)
9893 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9895 if self.op.iallocator:
9896 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9897 # specifying a group on instance creation and then selecting nodes from
9899 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9900 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9902 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9903 nodelist = [self.op.pnode]
9904 if self.op.snode is not None:
9905 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9906 nodelist.append(self.op.snode)
9907 self.needed_locks[locking.LEVEL_NODE] = nodelist
9909 # in case of import lock the source node too
9910 if self.op.mode == constants.INSTANCE_IMPORT:
9911 src_node = self.op.src_node
9912 src_path = self.op.src_path
9914 if src_path is None:
9915 self.op.src_path = src_path = self.op.instance_name
9917 if src_node is None:
9918 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9919 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
9920 self.op.src_node = None
9921 if os.path.isabs(src_path):
9922 raise errors.OpPrereqError("Importing an instance from a path"
9923 " requires a source node option",
9926 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9927 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9928 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9929 if not os.path.isabs(src_path):
9930 self.op.src_path = src_path = \
9931 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9933 self.needed_locks[locking.LEVEL_NODE_RES] = \
9934 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
9936 def _RunAllocator(self):
9937 """Run the allocator based on input opcode.
9940 #TODO Export network to iallocator so that it chooses a pnode
9941 # in a nodegroup that has the desired network connected to
9942 req = _CreateInstanceAllocRequest(self.op, self.disks,
9943 self.nics, self.be_full,
9945 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9947 ial.Run(self.op.iallocator)
9950 raise errors.OpPrereqError("Can't compute nodes using"
9951 " iallocator '%s': %s" %
9952 (self.op.iallocator, ial.info),
9954 self.op.pnode = ial.result[0]
9955 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9956 self.op.instance_name, self.op.iallocator,
9957 utils.CommaJoin(ial.result))
9959 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9961 if req.RequiredNodes() == 2:
9962 self.op.snode = ial.result[1]
9964 def BuildHooksEnv(self):
9967 This runs on master, primary and secondary nodes of the instance.
9971 "ADD_MODE": self.op.mode,
9973 if self.op.mode == constants.INSTANCE_IMPORT:
9974 env["SRC_NODE"] = self.op.src_node
9975 env["SRC_PATH"] = self.op.src_path
9976 env["SRC_IMAGES"] = self.src_images
9978 env.update(_BuildInstanceHookEnv(
9979 name=self.op.instance_name,
9980 primary_node=self.op.pnode,
9981 secondary_nodes=self.secondaries,
9982 status=self.op.start,
9983 os_type=self.op.os_type,
9984 minmem=self.be_full[constants.BE_MINMEM],
9985 maxmem=self.be_full[constants.BE_MAXMEM],
9986 vcpus=self.be_full[constants.BE_VCPUS],
9987 nics=_NICListToTuple(self, self.nics),
9988 disk_template=self.op.disk_template,
9989 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9990 for d in self.disks],
9993 hypervisor_name=self.op.hypervisor,
9999 def BuildHooksNodes(self):
10000 """Build hooks nodes.
10003 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10006 def _ReadExportInfo(self):
10007 """Reads the export information from disk.
10009 It will override the opcode source node and path with the actual
10010 information, if these two were not specified before.
10012 @return: the export information
10015 assert self.op.mode == constants.INSTANCE_IMPORT
10017 src_node = self.op.src_node
10018 src_path = self.op.src_path
10020 if src_node is None:
10021 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10022 exp_list = self.rpc.call_export_list(locked_nodes)
10024 for node in exp_list:
10025 if exp_list[node].fail_msg:
10027 if src_path in exp_list[node].payload:
10029 self.op.src_node = src_node = node
10030 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10034 raise errors.OpPrereqError("No export found for relative path %s" %
10035 src_path, errors.ECODE_INVAL)
10037 _CheckNodeOnline(self, src_node)
10038 result = self.rpc.call_export_info(src_node, src_path)
10039 result.Raise("No export or invalid export found in dir %s" % src_path)
10041 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10042 if not export_info.has_section(constants.INISECT_EXP):
10043 raise errors.ProgrammerError("Corrupted export config",
10044 errors.ECODE_ENVIRON)
10046 ei_version = export_info.get(constants.INISECT_EXP, "version")
10047 if (int(ei_version) != constants.EXPORT_VERSION):
10048 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10049 (ei_version, constants.EXPORT_VERSION),
10050 errors.ECODE_ENVIRON)
10053 def _ReadExportParams(self, einfo):
10054 """Use export parameters as defaults.
10056 In case the opcode doesn't specify (as in override) some instance
10057 parameters, then try to use them from the export information, if
10058 that declares them.
10061 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10063 if self.op.disk_template is None:
10064 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10065 self.op.disk_template = einfo.get(constants.INISECT_INS,
10067 if self.op.disk_template not in constants.DISK_TEMPLATES:
10068 raise errors.OpPrereqError("Disk template specified in configuration"
10069 " file is not one of the allowed values:"
10071 " ".join(constants.DISK_TEMPLATES),
10072 errors.ECODE_INVAL)
10074 raise errors.OpPrereqError("No disk template specified and the export"
10075 " is missing the disk_template information",
10076 errors.ECODE_INVAL)
10078 if not self.op.disks:
10080 # TODO: import the disk iv_name too
10081 for idx in range(constants.MAX_DISKS):
10082 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10083 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10084 disks.append({constants.IDISK_SIZE: disk_sz})
10085 self.op.disks = disks
10086 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10087 raise errors.OpPrereqError("No disk info specified and the export"
10088 " is missing the disk information",
10089 errors.ECODE_INVAL)
10091 if not self.op.nics:
10093 for idx in range(constants.MAX_NICS):
10094 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10096 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10097 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10102 self.op.nics = nics
10104 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10105 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10107 if (self.op.hypervisor is None and
10108 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10109 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10111 if einfo.has_section(constants.INISECT_HYP):
10112 # use the export parameters but do not override the ones
10113 # specified by the user
10114 for name, value in einfo.items(constants.INISECT_HYP):
10115 if name not in self.op.hvparams:
10116 self.op.hvparams[name] = value
10118 if einfo.has_section(constants.INISECT_BEP):
10119 # use the parameters, without overriding
10120 for name, value in einfo.items(constants.INISECT_BEP):
10121 if name not in self.op.beparams:
10122 self.op.beparams[name] = value
10123 # Compatibility for the old "memory" be param
10124 if name == constants.BE_MEMORY:
10125 if constants.BE_MAXMEM not in self.op.beparams:
10126 self.op.beparams[constants.BE_MAXMEM] = value
10127 if constants.BE_MINMEM not in self.op.beparams:
10128 self.op.beparams[constants.BE_MINMEM] = value
10130 # try to read the parameters old style, from the main section
10131 for name in constants.BES_PARAMETERS:
10132 if (name not in self.op.beparams and
10133 einfo.has_option(constants.INISECT_INS, name)):
10134 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10136 if einfo.has_section(constants.INISECT_OSP):
10137 # use the parameters, without overriding
10138 for name, value in einfo.items(constants.INISECT_OSP):
10139 if name not in self.op.osparams:
10140 self.op.osparams[name] = value
10142 def _RevertToDefaults(self, cluster):
10143 """Revert the instance parameters to the default values.
10147 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10148 for name in self.op.hvparams.keys():
10149 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10150 del self.op.hvparams[name]
10152 be_defs = cluster.SimpleFillBE({})
10153 for name in self.op.beparams.keys():
10154 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10155 del self.op.beparams[name]
10157 nic_defs = cluster.SimpleFillNIC({})
10158 for nic in self.op.nics:
10159 for name in constants.NICS_PARAMETERS:
10160 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10163 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10164 for name in self.op.osparams.keys():
10165 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10166 del self.op.osparams[name]
10168 def _CalculateFileStorageDir(self):
10169 """Calculate final instance file storage dir.
10172 # file storage dir calculation/check
10173 self.instance_file_storage_dir = None
10174 if self.op.disk_template in constants.DTS_FILEBASED:
10175 # build the full file storage dir path
10178 if self.op.disk_template == constants.DT_SHARED_FILE:
10179 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10181 get_fsd_fn = self.cfg.GetFileStorageDir
10183 cfg_storagedir = get_fsd_fn()
10184 if not cfg_storagedir:
10185 raise errors.OpPrereqError("Cluster file storage dir not defined",
10186 errors.ECODE_STATE)
10187 joinargs.append(cfg_storagedir)
10189 if self.op.file_storage_dir is not None:
10190 joinargs.append(self.op.file_storage_dir)
10192 joinargs.append(self.op.instance_name)
10194 # pylint: disable=W0142
10195 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10197 def CheckPrereq(self): # pylint: disable=R0914
10198 """Check prerequisites.
10201 self._CalculateFileStorageDir()
10203 if self.op.mode == constants.INSTANCE_IMPORT:
10204 export_info = self._ReadExportInfo()
10205 self._ReadExportParams(export_info)
10206 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10208 self._old_instance_name = None
10210 if (not self.cfg.GetVGName() and
10211 self.op.disk_template not in constants.DTS_NOT_LVM):
10212 raise errors.OpPrereqError("Cluster does not support lvm-based"
10213 " instances", errors.ECODE_STATE)
10215 if (self.op.hypervisor is None or
10216 self.op.hypervisor == constants.VALUE_AUTO):
10217 self.op.hypervisor = self.cfg.GetHypervisorType()
10219 cluster = self.cfg.GetClusterInfo()
10220 enabled_hvs = cluster.enabled_hypervisors
10221 if self.op.hypervisor not in enabled_hvs:
10222 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10224 (self.op.hypervisor, ",".join(enabled_hvs)),
10225 errors.ECODE_STATE)
10227 # Check tag validity
10228 for tag in self.op.tags:
10229 objects.TaggableObject.ValidateTag(tag)
10231 # check hypervisor parameter syntax (locally)
10232 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10233 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10235 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10236 hv_type.CheckParameterSyntax(filled_hvp)
10237 self.hv_full = filled_hvp
10238 # check that we don't specify global parameters on an instance
10239 _CheckGlobalHvParams(self.op.hvparams)
10241 # fill and remember the beparams dict
10242 self.be_full = _ComputeFullBeParams(self.op, cluster)
10244 # build os parameters
10245 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10247 # now that hvp/bep are in final format, let's reset to defaults,
10249 if self.op.identify_defaults:
10250 self._RevertToDefaults(cluster)
10253 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10254 self.proc.GetECId())
10256 # disk checks/pre-build
10257 default_vg = self.cfg.GetVGName()
10258 self.disks = _ComputeDisks(self.op, default_vg)
10260 if self.op.mode == constants.INSTANCE_IMPORT:
10262 for idx in range(len(self.disks)):
10263 option = "disk%d_dump" % idx
10264 if export_info.has_option(constants.INISECT_INS, option):
10265 # FIXME: are the old os-es, disk sizes, etc. useful?
10266 export_name = export_info.get(constants.INISECT_INS, option)
10267 image = utils.PathJoin(self.op.src_path, export_name)
10268 disk_images.append(image)
10270 disk_images.append(False)
10272 self.src_images = disk_images
10274 if self.op.instance_name == self._old_instance_name:
10275 for idx, nic in enumerate(self.nics):
10276 if nic.mac == constants.VALUE_AUTO:
10277 nic_mac_ini = "nic%d_mac" % idx
10278 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10280 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10282 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10283 if self.op.ip_check:
10284 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10285 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10286 (self.check_ip, self.op.instance_name),
10287 errors.ECODE_NOTUNIQUE)
10289 #### mac address generation
10290 # By generating here the mac address both the allocator and the hooks get
10291 # the real final mac address rather than the 'auto' or 'generate' value.
10292 # There is a race condition between the generation and the instance object
10293 # creation, which means that we know the mac is valid now, but we're not
10294 # sure it will be when we actually add the instance. If things go bad
10295 # adding the instance will abort because of a duplicate mac, and the
10296 # creation job will fail.
10297 for nic in self.nics:
10298 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10299 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10303 if self.op.iallocator is not None:
10304 self._RunAllocator()
10306 # Release all unneeded node locks
10307 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10308 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10309 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10310 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10312 assert (self.owned_locks(locking.LEVEL_NODE) ==
10313 self.owned_locks(locking.LEVEL_NODE_RES)), \
10314 "Node locks differ from node resource locks"
10316 #### node related checks
10318 # check primary node
10319 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10320 assert self.pnode is not None, \
10321 "Cannot retrieve locked node %s" % self.op.pnode
10323 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10324 pnode.name, errors.ECODE_STATE)
10326 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10327 pnode.name, errors.ECODE_STATE)
10328 if not pnode.vm_capable:
10329 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10330 " '%s'" % pnode.name, errors.ECODE_STATE)
10332 self.secondaries = []
10334 # Fill in any IPs from IP pools. This must happen here, because we need to
10335 # know the nic's primary node, as specified by the iallocator
10336 for idx, nic in enumerate(self.nics):
10338 if net is not None:
10339 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10340 if netparams is None:
10341 raise errors.OpPrereqError("No netparams found for network"
10342 " %s. Propably not connected to"
10343 " node's %s nodegroup" %
10344 (net, self.pnode.name),
10345 errors.ECODE_INVAL)
10346 self.LogInfo("NIC/%d inherits netparams %s" %
10347 (idx, netparams.values()))
10348 nic.nicparams = dict(netparams)
10349 if nic.ip is not None:
10350 if nic.ip.lower() == constants.NIC_IP_POOL:
10352 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10353 except errors.ReservationError:
10354 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10355 " from the address pool" % idx,
10356 errors.ECODE_STATE)
10357 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10360 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10361 except errors.ReservationError:
10362 raise errors.OpPrereqError("IP address %s already in use"
10363 " or does not belong to network %s" %
10365 errors.ECODE_NOTUNIQUE)
10367 # net is None, ip None or given
10368 if self.op.conflicts_check:
10369 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10371 # mirror node verification
10372 if self.op.disk_template in constants.DTS_INT_MIRROR:
10373 if self.op.snode == pnode.name:
10374 raise errors.OpPrereqError("The secondary node cannot be the"
10375 " primary node", errors.ECODE_INVAL)
10376 _CheckNodeOnline(self, self.op.snode)
10377 _CheckNodeNotDrained(self, self.op.snode)
10378 _CheckNodeVmCapable(self, self.op.snode)
10379 self.secondaries.append(self.op.snode)
10381 snode = self.cfg.GetNodeInfo(self.op.snode)
10382 if pnode.group != snode.group:
10383 self.LogWarning("The primary and secondary nodes are in two"
10384 " different node groups; the disk parameters"
10385 " from the first disk's node group will be"
10388 nodenames = [pnode.name] + self.secondaries
10390 # Verify instance specs
10391 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10393 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10394 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10395 constants.ISPEC_DISK_COUNT: len(self.disks),
10396 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10397 constants.ISPEC_NIC_COUNT: len(self.nics),
10398 constants.ISPEC_SPINDLE_USE: spindle_use,
10401 group_info = self.cfg.GetNodeGroup(pnode.group)
10402 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10403 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10404 if not self.op.ignore_ipolicy and res:
10405 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10406 (pnode.group, group_info.name, utils.CommaJoin(res)))
10407 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10409 if not self.adopt_disks:
10410 if self.op.disk_template == constants.DT_RBD:
10411 # _CheckRADOSFreeSpace() is just a placeholder.
10412 # Any function that checks prerequisites can be placed here.
10413 # Check if there is enough space on the RADOS cluster.
10414 _CheckRADOSFreeSpace()
10416 # Check lv size requirements, if not adopting
10417 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10418 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10420 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10421 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10422 disk[constants.IDISK_ADOPT])
10423 for disk in self.disks])
10424 if len(all_lvs) != len(self.disks):
10425 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10426 errors.ECODE_INVAL)
10427 for lv_name in all_lvs:
10429 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10430 # to ReserveLV uses the same syntax
10431 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10432 except errors.ReservationError:
10433 raise errors.OpPrereqError("LV named %s used by another instance" %
10434 lv_name, errors.ECODE_NOTUNIQUE)
10436 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10437 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10439 node_lvs = self.rpc.call_lv_list([pnode.name],
10440 vg_names.payload.keys())[pnode.name]
10441 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10442 node_lvs = node_lvs.payload
10444 delta = all_lvs.difference(node_lvs.keys())
10446 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10447 utils.CommaJoin(delta),
10448 errors.ECODE_INVAL)
10449 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10451 raise errors.OpPrereqError("Online logical volumes found, cannot"
10452 " adopt: %s" % utils.CommaJoin(online_lvs),
10453 errors.ECODE_STATE)
10454 # update the size of disk based on what is found
10455 for dsk in self.disks:
10456 dsk[constants.IDISK_SIZE] = \
10457 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10458 dsk[constants.IDISK_ADOPT])][0]))
10460 elif self.op.disk_template == constants.DT_BLOCK:
10461 # Normalize and de-duplicate device paths
10462 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10463 for disk in self.disks])
10464 if len(all_disks) != len(self.disks):
10465 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10466 errors.ECODE_INVAL)
10467 baddisks = [d for d in all_disks
10468 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10470 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10471 " cannot be adopted" %
10472 (utils.CommaJoin(baddisks),
10473 constants.ADOPTABLE_BLOCKDEV_ROOT),
10474 errors.ECODE_INVAL)
10476 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10477 list(all_disks))[pnode.name]
10478 node_disks.Raise("Cannot get block device information from node %s" %
10480 node_disks = node_disks.payload
10481 delta = all_disks.difference(node_disks.keys())
10483 raise errors.OpPrereqError("Missing block device(s): %s" %
10484 utils.CommaJoin(delta),
10485 errors.ECODE_INVAL)
10486 for dsk in self.disks:
10487 dsk[constants.IDISK_SIZE] = \
10488 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10490 # Verify instance specs
10491 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10493 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10494 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10495 constants.ISPEC_DISK_COUNT: len(self.disks),
10496 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10497 for disk in self.disks],
10498 constants.ISPEC_NIC_COUNT: len(self.nics),
10499 constants.ISPEC_SPINDLE_USE: spindle_use,
10502 group_info = self.cfg.GetNodeGroup(pnode.group)
10503 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10504 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10505 if not self.op.ignore_ipolicy and res:
10506 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10507 " policy: %s") % (pnode.group,
10508 utils.CommaJoin(res)),
10509 errors.ECODE_INVAL)
10511 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10513 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10514 # check OS parameters (remotely)
10515 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10517 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10519 # memory check on primary node
10520 #TODO(dynmem): use MINMEM for checking
10522 _CheckNodeFreeMemory(self, self.pnode.name,
10523 "creating instance %s" % self.op.instance_name,
10524 self.be_full[constants.BE_MAXMEM],
10525 self.op.hypervisor)
10527 self.dry_run_result = list(nodenames)
10529 def Exec(self, feedback_fn):
10530 """Create and add the instance to the cluster.
10533 instance = self.op.instance_name
10534 pnode_name = self.pnode.name
10536 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10537 self.owned_locks(locking.LEVEL_NODE)), \
10538 "Node locks differ from node resource locks"
10539 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10541 ht_kind = self.op.hypervisor
10542 if ht_kind in constants.HTS_REQ_PORT:
10543 network_port = self.cfg.AllocatePort()
10545 network_port = None
10547 # This is ugly but we got a chicken-egg problem here
10548 # We can only take the group disk parameters, as the instance
10549 # has no disks yet (we are generating them right here).
10550 node = self.cfg.GetNodeInfo(pnode_name)
10551 nodegroup = self.cfg.GetNodeGroup(node.group)
10552 disks = _GenerateDiskTemplate(self,
10553 self.op.disk_template,
10554 instance, pnode_name,
10557 self.instance_file_storage_dir,
10558 self.op.file_driver,
10561 self.cfg.GetGroupDiskParams(nodegroup))
10563 iobj = objects.Instance(name=instance, os=self.op.os_type,
10564 primary_node=pnode_name,
10565 nics=self.nics, disks=disks,
10566 disk_template=self.op.disk_template,
10567 admin_state=constants.ADMINST_DOWN,
10568 network_port=network_port,
10569 beparams=self.op.beparams,
10570 hvparams=self.op.hvparams,
10571 hypervisor=self.op.hypervisor,
10572 osparams=self.op.osparams,
10576 for tag in self.op.tags:
10579 if self.adopt_disks:
10580 if self.op.disk_template == constants.DT_PLAIN:
10581 # rename LVs to the newly-generated names; we need to construct
10582 # 'fake' LV disks with the old data, plus the new unique_id
10583 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10585 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10586 rename_to.append(t_dsk.logical_id)
10587 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10588 self.cfg.SetDiskID(t_dsk, pnode_name)
10589 result = self.rpc.call_blockdev_rename(pnode_name,
10590 zip(tmp_disks, rename_to))
10591 result.Raise("Failed to rename adoped LVs")
10593 feedback_fn("* creating instance disks...")
10595 _CreateDisks(self, iobj)
10596 except errors.OpExecError:
10597 self.LogWarning("Device creation failed, reverting...")
10599 _RemoveDisks(self, iobj)
10601 self.cfg.ReleaseDRBDMinors(instance)
10604 feedback_fn("adding instance %s to cluster config" % instance)
10606 self.cfg.AddInstance(iobj, self.proc.GetECId())
10608 # Declare that we don't want to remove the instance lock anymore, as we've
10609 # added the instance to the config
10610 del self.remove_locks[locking.LEVEL_INSTANCE]
10612 if self.op.mode == constants.INSTANCE_IMPORT:
10613 # Release unused nodes
10614 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10616 # Release all nodes
10617 _ReleaseLocks(self, locking.LEVEL_NODE)
10620 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10621 feedback_fn("* wiping instance disks...")
10623 _WipeDisks(self, iobj)
10624 except errors.OpExecError, err:
10625 logging.exception("Wiping disks failed")
10626 self.LogWarning("Wiping instance disks failed (%s)", err)
10630 # Something is already wrong with the disks, don't do anything else
10632 elif self.op.wait_for_sync:
10633 disk_abort = not _WaitForSync(self, iobj)
10634 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10635 # make sure the disks are not degraded (still sync-ing is ok)
10636 feedback_fn("* checking mirrors status")
10637 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10642 _RemoveDisks(self, iobj)
10643 self.cfg.RemoveInstance(iobj.name)
10644 # Make sure the instance lock gets removed
10645 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10646 raise errors.OpExecError("There are some degraded disks for"
10649 # Release all node resource locks
10650 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10652 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10653 # we need to set the disks ID to the primary node, since the
10654 # preceding code might or might have not done it, depending on
10655 # disk template and other options
10656 for disk in iobj.disks:
10657 self.cfg.SetDiskID(disk, pnode_name)
10658 if self.op.mode == constants.INSTANCE_CREATE:
10659 if not self.op.no_install:
10660 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10661 not self.op.wait_for_sync)
10663 feedback_fn("* pausing disk sync to install instance OS")
10664 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10667 for idx, success in enumerate(result.payload):
10669 logging.warn("pause-sync of instance %s for disk %d failed",
10672 feedback_fn("* running the instance OS create scripts...")
10673 # FIXME: pass debug option from opcode to backend
10675 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10676 self.op.debug_level)
10678 feedback_fn("* resuming disk sync")
10679 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10682 for idx, success in enumerate(result.payload):
10684 logging.warn("resume-sync of instance %s for disk %d failed",
10687 os_add_result.Raise("Could not add os for instance %s"
10688 " on node %s" % (instance, pnode_name))
10691 if self.op.mode == constants.INSTANCE_IMPORT:
10692 feedback_fn("* running the instance OS import scripts...")
10696 for idx, image in enumerate(self.src_images):
10700 # FIXME: pass debug option from opcode to backend
10701 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10702 constants.IEIO_FILE, (image, ),
10703 constants.IEIO_SCRIPT,
10704 (iobj.disks[idx], idx),
10706 transfers.append(dt)
10709 masterd.instance.TransferInstanceData(self, feedback_fn,
10710 self.op.src_node, pnode_name,
10711 self.pnode.secondary_ip,
10713 if not compat.all(import_result):
10714 self.LogWarning("Some disks for instance %s on node %s were not"
10715 " imported successfully" % (instance, pnode_name))
10717 rename_from = self._old_instance_name
10719 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10720 feedback_fn("* preparing remote import...")
10721 # The source cluster will stop the instance before attempting to make
10722 # a connection. In some cases stopping an instance can take a long
10723 # time, hence the shutdown timeout is added to the connection
10725 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10726 self.op.source_shutdown_timeout)
10727 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10729 assert iobj.primary_node == self.pnode.name
10731 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10732 self.source_x509_ca,
10733 self._cds, timeouts)
10734 if not compat.all(disk_results):
10735 # TODO: Should the instance still be started, even if some disks
10736 # failed to import (valid for local imports, too)?
10737 self.LogWarning("Some disks for instance %s on node %s were not"
10738 " imported successfully" % (instance, pnode_name))
10740 rename_from = self.source_instance_name
10743 # also checked in the prereq part
10744 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10747 # Run rename script on newly imported instance
10748 assert iobj.name == instance
10749 feedback_fn("Running rename script for %s" % instance)
10750 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10752 self.op.debug_level)
10753 if result.fail_msg:
10754 self.LogWarning("Failed to run rename script for %s on node"
10755 " %s: %s" % (instance, pnode_name, result.fail_msg))
10757 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10760 iobj.admin_state = constants.ADMINST_UP
10761 self.cfg.Update(iobj, feedback_fn)
10762 logging.info("Starting instance %s on node %s", instance, pnode_name)
10763 feedback_fn("* starting instance...")
10764 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10766 result.Raise("Could not start instance")
10768 return list(iobj.all_nodes)
10771 class LUInstanceMultiAlloc(NoHooksLU):
10772 """Allocates multiple instances at the same time.
10777 def CheckArguments(self):
10778 """Check arguments.
10782 for inst in self.op.instances:
10783 if inst.iallocator is not None:
10784 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10785 " instance objects", errors.ECODE_INVAL)
10786 nodes.append(bool(inst.pnode))
10787 if inst.disk_template in constants.DTS_INT_MIRROR:
10788 nodes.append(bool(inst.snode))
10790 has_nodes = compat.any(nodes)
10791 if compat.all(nodes) ^ has_nodes:
10792 raise errors.OpPrereqError("There are instance objects providing"
10793 " pnode/snode while others do not",
10794 errors.ECODE_INVAL)
10796 if self.op.iallocator is None:
10797 default_iallocator = self.cfg.GetDefaultIAllocator()
10798 if default_iallocator and has_nodes:
10799 self.op.iallocator = default_iallocator
10801 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10802 " given and no cluster-wide default"
10803 " iallocator found; please specify either"
10804 " an iallocator or nodes on the instances"
10805 " or set a cluster-wide default iallocator",
10806 errors.ECODE_INVAL)
10808 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10810 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10811 utils.CommaJoin(dups), errors.ECODE_INVAL)
10813 def ExpandNames(self):
10814 """Calculate the locks.
10817 self.share_locks = _ShareAll()
10818 self.needed_locks = {
10819 # iallocator will select nodes and even if no iallocator is used,
10820 # collisions with LUInstanceCreate should be avoided
10821 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
10824 if self.op.iallocator:
10825 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10826 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10829 for inst in self.op.instances:
10830 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10831 nodeslist.append(inst.pnode)
10832 if inst.snode is not None:
10833 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10834 nodeslist.append(inst.snode)
10836 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10837 # Lock resources of instance's primary and secondary nodes (copy to
10838 # prevent accidential modification)
10839 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10841 def CheckPrereq(self):
10842 """Check prerequisite.
10845 cluster = self.cfg.GetClusterInfo()
10846 default_vg = self.cfg.GetVGName()
10847 ec_id = self.proc.GetECId()
10849 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10850 _ComputeNics(op, cluster, None,
10852 _ComputeFullBeParams(op, cluster),
10854 for op in self.op.instances]
10856 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10857 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10859 ial.Run(self.op.iallocator)
10861 if not ial.success:
10862 raise errors.OpPrereqError("Can't compute nodes using"
10863 " iallocator '%s': %s" %
10864 (self.op.iallocator, ial.info),
10865 errors.ECODE_NORES)
10867 self.ia_result = ial.result
10869 if self.op.dry_run:
10870 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10871 constants.JOB_IDS_KEY: [],
10874 def _ConstructPartialResult(self):
10875 """Contructs the partial result.
10878 (allocatable, failed) = self.ia_result
10880 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10881 map(compat.fst, allocatable),
10882 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10885 def Exec(self, feedback_fn):
10886 """Executes the opcode.
10889 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10890 (allocatable, failed) = self.ia_result
10893 for (name, nodes) in allocatable:
10894 op = op2inst.pop(name)
10897 (op.pnode, op.snode) = nodes
10899 (op.pnode,) = nodes
10903 missing = set(op2inst.keys()) - set(failed)
10904 assert not missing, \
10905 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10907 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10910 def _CheckRADOSFreeSpace():
10911 """Compute disk size requirements inside the RADOS cluster.
10914 # For the RADOS cluster we assume there is always enough space.
10918 class LUInstanceConsole(NoHooksLU):
10919 """Connect to an instance's console.
10921 This is somewhat special in that it returns the command line that
10922 you need to run on the master node in order to connect to the
10928 def ExpandNames(self):
10929 self.share_locks = _ShareAll()
10930 self._ExpandAndLockInstance()
10932 def CheckPrereq(self):
10933 """Check prerequisites.
10935 This checks that the instance is in the cluster.
10938 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10939 assert self.instance is not None, \
10940 "Cannot retrieve locked instance %s" % self.op.instance_name
10941 _CheckNodeOnline(self, self.instance.primary_node)
10943 def Exec(self, feedback_fn):
10944 """Connect to the console of an instance
10947 instance = self.instance
10948 node = instance.primary_node
10950 node_insts = self.rpc.call_instance_list([node],
10951 [instance.hypervisor])[node]
10952 node_insts.Raise("Can't get node information from %s" % node)
10954 if instance.name not in node_insts.payload:
10955 if instance.admin_state == constants.ADMINST_UP:
10956 state = constants.INSTST_ERRORDOWN
10957 elif instance.admin_state == constants.ADMINST_DOWN:
10958 state = constants.INSTST_ADMINDOWN
10960 state = constants.INSTST_ADMINOFFLINE
10961 raise errors.OpExecError("Instance %s is not running (state %s)" %
10962 (instance.name, state))
10964 logging.debug("Connecting to console of %s on %s", instance.name, node)
10966 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10969 def _GetInstanceConsole(cluster, instance):
10970 """Returns console information for an instance.
10972 @type cluster: L{objects.Cluster}
10973 @type instance: L{objects.Instance}
10977 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10978 # beparams and hvparams are passed separately, to avoid editing the
10979 # instance and then saving the defaults in the instance itself.
10980 hvparams = cluster.FillHV(instance)
10981 beparams = cluster.FillBE(instance)
10982 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10984 assert console.instance == instance.name
10985 assert console.Validate()
10987 return console.ToDict()
10990 class LUInstanceReplaceDisks(LogicalUnit):
10991 """Replace the disks of an instance.
10994 HPATH = "mirrors-replace"
10995 HTYPE = constants.HTYPE_INSTANCE
10998 def CheckArguments(self):
10999 """Check arguments.
11002 remote_node = self.op.remote_node
11003 ialloc = self.op.iallocator
11004 if self.op.mode == constants.REPLACE_DISK_CHG:
11005 if remote_node is None and ialloc is None:
11006 raise errors.OpPrereqError("When changing the secondary either an"
11007 " iallocator script must be used or the"
11008 " new node given", errors.ECODE_INVAL)
11010 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11012 elif remote_node is not None or ialloc is not None:
11013 # Not replacing the secondary
11014 raise errors.OpPrereqError("The iallocator and new node options can"
11015 " only be used when changing the"
11016 " secondary node", errors.ECODE_INVAL)
11018 def ExpandNames(self):
11019 self._ExpandAndLockInstance()
11021 assert locking.LEVEL_NODE not in self.needed_locks
11022 assert locking.LEVEL_NODE_RES not in self.needed_locks
11023 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11025 assert self.op.iallocator is None or self.op.remote_node is None, \
11026 "Conflicting options"
11028 if self.op.remote_node is not None:
11029 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11031 # Warning: do not remove the locking of the new secondary here
11032 # unless DRBD8.AddChildren is changed to work in parallel;
11033 # currently it doesn't since parallel invocations of
11034 # FindUnusedMinor will conflict
11035 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11036 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11038 self.needed_locks[locking.LEVEL_NODE] = []
11039 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11041 if self.op.iallocator is not None:
11042 # iallocator will select a new node in the same group
11043 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11044 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11046 self.needed_locks[locking.LEVEL_NODE_RES] = []
11048 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11049 self.op.iallocator, self.op.remote_node,
11050 self.op.disks, self.op.early_release,
11051 self.op.ignore_ipolicy)
11053 self.tasklets = [self.replacer]
11055 def DeclareLocks(self, level):
11056 if level == locking.LEVEL_NODEGROUP:
11057 assert self.op.remote_node is None
11058 assert self.op.iallocator is not None
11059 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11061 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11062 # Lock all groups used by instance optimistically; this requires going
11063 # via the node before it's locked, requiring verification later on
11064 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11065 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11067 elif level == locking.LEVEL_NODE:
11068 if self.op.iallocator is not None:
11069 assert self.op.remote_node is None
11070 assert not self.needed_locks[locking.LEVEL_NODE]
11071 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11073 # Lock member nodes of all locked groups
11074 self.needed_locks[locking.LEVEL_NODE] = \
11076 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11077 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11079 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11081 self._LockInstancesNodes()
11083 elif level == locking.LEVEL_NODE_RES:
11085 self.needed_locks[locking.LEVEL_NODE_RES] = \
11086 self.needed_locks[locking.LEVEL_NODE]
11088 def BuildHooksEnv(self):
11089 """Build hooks env.
11091 This runs on the master, the primary and all the secondaries.
11094 instance = self.replacer.instance
11096 "MODE": self.op.mode,
11097 "NEW_SECONDARY": self.op.remote_node,
11098 "OLD_SECONDARY": instance.secondary_nodes[0],
11100 env.update(_BuildInstanceHookEnvByObject(self, instance))
11103 def BuildHooksNodes(self):
11104 """Build hooks nodes.
11107 instance = self.replacer.instance
11109 self.cfg.GetMasterNode(),
11110 instance.primary_node,
11112 if self.op.remote_node is not None:
11113 nl.append(self.op.remote_node)
11116 def CheckPrereq(self):
11117 """Check prerequisites.
11120 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11121 self.op.iallocator is None)
11123 # Verify if node group locks are still correct
11124 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11126 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11128 return LogicalUnit.CheckPrereq(self)
11131 class TLReplaceDisks(Tasklet):
11132 """Replaces disks for an instance.
11134 Note: Locking is not within the scope of this class.
11137 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11138 disks, early_release, ignore_ipolicy):
11139 """Initializes this class.
11142 Tasklet.__init__(self, lu)
11145 self.instance_name = instance_name
11147 self.iallocator_name = iallocator_name
11148 self.remote_node = remote_node
11150 self.early_release = early_release
11151 self.ignore_ipolicy = ignore_ipolicy
11154 self.instance = None
11155 self.new_node = None
11156 self.target_node = None
11157 self.other_node = None
11158 self.remote_node_info = None
11159 self.node_secondary_ip = None
11162 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11163 """Compute a new secondary node using an IAllocator.
11166 req = iallocator.IAReqRelocate(name=instance_name,
11167 relocate_from=list(relocate_from))
11168 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11170 ial.Run(iallocator_name)
11172 if not ial.success:
11173 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11174 " %s" % (iallocator_name, ial.info),
11175 errors.ECODE_NORES)
11177 remote_node_name = ial.result[0]
11179 lu.LogInfo("Selected new secondary for instance '%s': %s",
11180 instance_name, remote_node_name)
11182 return remote_node_name
11184 def _FindFaultyDisks(self, node_name):
11185 """Wrapper for L{_FindFaultyInstanceDisks}.
11188 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11191 def _CheckDisksActivated(self, instance):
11192 """Checks if the instance disks are activated.
11194 @param instance: The instance to check disks
11195 @return: True if they are activated, False otherwise
11198 nodes = instance.all_nodes
11200 for idx, dev in enumerate(instance.disks):
11202 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11203 self.cfg.SetDiskID(dev, node)
11205 result = _BlockdevFind(self, node, dev, instance)
11209 elif result.fail_msg or not result.payload:
11214 def CheckPrereq(self):
11215 """Check prerequisites.
11217 This checks that the instance is in the cluster.
11220 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11221 assert instance is not None, \
11222 "Cannot retrieve locked instance %s" % self.instance_name
11224 if instance.disk_template != constants.DT_DRBD8:
11225 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11226 " instances", errors.ECODE_INVAL)
11228 if len(instance.secondary_nodes) != 1:
11229 raise errors.OpPrereqError("The instance has a strange layout,"
11230 " expected one secondary but found %d" %
11231 len(instance.secondary_nodes),
11232 errors.ECODE_FAULT)
11234 instance = self.instance
11235 secondary_node = instance.secondary_nodes[0]
11237 if self.iallocator_name is None:
11238 remote_node = self.remote_node
11240 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11241 instance.name, instance.secondary_nodes)
11243 if remote_node is None:
11244 self.remote_node_info = None
11246 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11247 "Remote node '%s' is not locked" % remote_node
11249 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11250 assert self.remote_node_info is not None, \
11251 "Cannot retrieve locked node %s" % remote_node
11253 if remote_node == self.instance.primary_node:
11254 raise errors.OpPrereqError("The specified node is the primary node of"
11255 " the instance", errors.ECODE_INVAL)
11257 if remote_node == secondary_node:
11258 raise errors.OpPrereqError("The specified node is already the"
11259 " secondary node of the instance",
11260 errors.ECODE_INVAL)
11262 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11263 constants.REPLACE_DISK_CHG):
11264 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11265 errors.ECODE_INVAL)
11267 if self.mode == constants.REPLACE_DISK_AUTO:
11268 if not self._CheckDisksActivated(instance):
11269 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11270 " first" % self.instance_name,
11271 errors.ECODE_STATE)
11272 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11273 faulty_secondary = self._FindFaultyDisks(secondary_node)
11275 if faulty_primary and faulty_secondary:
11276 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11277 " one node and can not be repaired"
11278 " automatically" % self.instance_name,
11279 errors.ECODE_STATE)
11282 self.disks = faulty_primary
11283 self.target_node = instance.primary_node
11284 self.other_node = secondary_node
11285 check_nodes = [self.target_node, self.other_node]
11286 elif faulty_secondary:
11287 self.disks = faulty_secondary
11288 self.target_node = secondary_node
11289 self.other_node = instance.primary_node
11290 check_nodes = [self.target_node, self.other_node]
11296 # Non-automatic modes
11297 if self.mode == constants.REPLACE_DISK_PRI:
11298 self.target_node = instance.primary_node
11299 self.other_node = secondary_node
11300 check_nodes = [self.target_node, self.other_node]
11302 elif self.mode == constants.REPLACE_DISK_SEC:
11303 self.target_node = secondary_node
11304 self.other_node = instance.primary_node
11305 check_nodes = [self.target_node, self.other_node]
11307 elif self.mode == constants.REPLACE_DISK_CHG:
11308 self.new_node = remote_node
11309 self.other_node = instance.primary_node
11310 self.target_node = secondary_node
11311 check_nodes = [self.new_node, self.other_node]
11313 _CheckNodeNotDrained(self.lu, remote_node)
11314 _CheckNodeVmCapable(self.lu, remote_node)
11316 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11317 assert old_node_info is not None
11318 if old_node_info.offline and not self.early_release:
11319 # doesn't make sense to delay the release
11320 self.early_release = True
11321 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11322 " early-release mode", secondary_node)
11325 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11328 # If not specified all disks should be replaced
11330 self.disks = range(len(self.instance.disks))
11332 # TODO: This is ugly, but right now we can't distinguish between internal
11333 # submitted opcode and external one. We should fix that.
11334 if self.remote_node_info:
11335 # We change the node, lets verify it still meets instance policy
11336 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11337 cluster = self.cfg.GetClusterInfo()
11338 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11340 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11341 ignore=self.ignore_ipolicy)
11343 for node in check_nodes:
11344 _CheckNodeOnline(self.lu, node)
11346 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11349 if node_name is not None)
11351 # Release unneeded node and node resource locks
11352 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11353 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11354 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11356 # Release any owned node group
11357 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11359 # Check whether disks are valid
11360 for disk_idx in self.disks:
11361 instance.FindDisk(disk_idx)
11363 # Get secondary node IP addresses
11364 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11365 in self.cfg.GetMultiNodeInfo(touched_nodes))
11367 def Exec(self, feedback_fn):
11368 """Execute disk replacement.
11370 This dispatches the disk replacement to the appropriate handler.
11374 # Verify owned locks before starting operation
11375 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11376 assert set(owned_nodes) == set(self.node_secondary_ip), \
11377 ("Incorrect node locks, owning %s, expected %s" %
11378 (owned_nodes, self.node_secondary_ip.keys()))
11379 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11380 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11381 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11383 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11384 assert list(owned_instances) == [self.instance_name], \
11385 "Instance '%s' not locked" % self.instance_name
11387 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11388 "Should not own any node group lock at this point"
11391 feedback_fn("No disks need replacement for instance '%s'" %
11392 self.instance.name)
11395 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11396 (utils.CommaJoin(self.disks), self.instance.name))
11397 feedback_fn("Current primary node: %s", self.instance.primary_node)
11398 feedback_fn("Current seconary node: %s",
11399 utils.CommaJoin(self.instance.secondary_nodes))
11401 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11403 # Activate the instance disks if we're replacing them on a down instance
11405 _StartInstanceDisks(self.lu, self.instance, True)
11408 # Should we replace the secondary node?
11409 if self.new_node is not None:
11410 fn = self._ExecDrbd8Secondary
11412 fn = self._ExecDrbd8DiskOnly
11414 result = fn(feedback_fn)
11416 # Deactivate the instance disks if we're replacing them on a
11419 _SafeShutdownInstanceDisks(self.lu, self.instance)
11421 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11424 # Verify owned locks
11425 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11426 nodes = frozenset(self.node_secondary_ip)
11427 assert ((self.early_release and not owned_nodes) or
11428 (not self.early_release and not (set(owned_nodes) - nodes))), \
11429 ("Not owning the correct locks, early_release=%s, owned=%r,"
11430 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11434 def _CheckVolumeGroup(self, nodes):
11435 self.lu.LogInfo("Checking volume groups")
11437 vgname = self.cfg.GetVGName()
11439 # Make sure volume group exists on all involved nodes
11440 results = self.rpc.call_vg_list(nodes)
11442 raise errors.OpExecError("Can't list volume groups on the nodes")
11445 res = results[node]
11446 res.Raise("Error checking node %s" % node)
11447 if vgname not in res.payload:
11448 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11451 def _CheckDisksExistence(self, nodes):
11452 # Check disk existence
11453 for idx, dev in enumerate(self.instance.disks):
11454 if idx not in self.disks:
11458 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11459 self.cfg.SetDiskID(dev, node)
11461 result = _BlockdevFind(self, node, dev, self.instance)
11463 msg = result.fail_msg
11464 if msg or not result.payload:
11466 msg = "disk not found"
11467 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11470 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11471 for idx, dev in enumerate(self.instance.disks):
11472 if idx not in self.disks:
11475 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11478 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11479 on_primary, ldisk=ldisk):
11480 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11481 " replace disks for instance %s" %
11482 (node_name, self.instance.name))
11484 def _CreateNewStorage(self, node_name):
11485 """Create new storage on the primary or secondary node.
11487 This is only used for same-node replaces, not for changing the
11488 secondary node, hence we don't want to modify the existing disk.
11493 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11494 for idx, dev in enumerate(disks):
11495 if idx not in self.disks:
11498 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11500 self.cfg.SetDiskID(dev, node_name)
11502 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11503 names = _GenerateUniqueNames(self.lu, lv_names)
11505 (data_disk, meta_disk) = dev.children
11506 vg_data = data_disk.logical_id[0]
11507 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11508 logical_id=(vg_data, names[0]),
11509 params=data_disk.params)
11510 vg_meta = meta_disk.logical_id[0]
11511 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11512 size=constants.DRBD_META_SIZE,
11513 logical_id=(vg_meta, names[1]),
11514 params=meta_disk.params)
11516 new_lvs = [lv_data, lv_meta]
11517 old_lvs = [child.Copy() for child in dev.children]
11518 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11520 # we pass force_create=True to force the LVM creation
11521 for new_lv in new_lvs:
11522 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11523 _GetInstanceInfoText(self.instance), False)
11527 def _CheckDevices(self, node_name, iv_names):
11528 for name, (dev, _, _) in iv_names.iteritems():
11529 self.cfg.SetDiskID(dev, node_name)
11531 result = _BlockdevFind(self, node_name, dev, self.instance)
11533 msg = result.fail_msg
11534 if msg or not result.payload:
11536 msg = "disk not found"
11537 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11540 if result.payload.is_degraded:
11541 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11543 def _RemoveOldStorage(self, node_name, iv_names):
11544 for name, (_, old_lvs, _) in iv_names.iteritems():
11545 self.lu.LogInfo("Remove logical volumes for %s", name)
11548 self.cfg.SetDiskID(lv, node_name)
11550 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11552 self.lu.LogWarning("Can't remove old LV: %s", msg,
11553 hint="remove unused LVs manually")
11555 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11556 """Replace a disk on the primary or secondary for DRBD 8.
11558 The algorithm for replace is quite complicated:
11560 1. for each disk to be replaced:
11562 1. create new LVs on the target node with unique names
11563 1. detach old LVs from the drbd device
11564 1. rename old LVs to name_replaced.<time_t>
11565 1. rename new LVs to old LVs
11566 1. attach the new LVs (with the old names now) to the drbd device
11568 1. wait for sync across all devices
11570 1. for each modified disk:
11572 1. remove old LVs (which have the name name_replaces.<time_t>)
11574 Failures are not very well handled.
11579 # Step: check device activation
11580 self.lu.LogStep(1, steps_total, "Check device existence")
11581 self._CheckDisksExistence([self.other_node, self.target_node])
11582 self._CheckVolumeGroup([self.target_node, self.other_node])
11584 # Step: check other node consistency
11585 self.lu.LogStep(2, steps_total, "Check peer consistency")
11586 self._CheckDisksConsistency(self.other_node,
11587 self.other_node == self.instance.primary_node,
11590 # Step: create new storage
11591 self.lu.LogStep(3, steps_total, "Allocate new storage")
11592 iv_names = self._CreateNewStorage(self.target_node)
11594 # Step: for each lv, detach+rename*2+attach
11595 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11596 for dev, old_lvs, new_lvs in iv_names.itervalues():
11597 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11599 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11601 result.Raise("Can't detach drbd from local storage on node"
11602 " %s for device %s" % (self.target_node, dev.iv_name))
11604 #cfg.Update(instance)
11606 # ok, we created the new LVs, so now we know we have the needed
11607 # storage; as such, we proceed on the target node to rename
11608 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11609 # using the assumption that logical_id == physical_id (which in
11610 # turn is the unique_id on that node)
11612 # FIXME(iustin): use a better name for the replaced LVs
11613 temp_suffix = int(time.time())
11614 ren_fn = lambda d, suff: (d.physical_id[0],
11615 d.physical_id[1] + "_replaced-%s" % suff)
11617 # Build the rename list based on what LVs exist on the node
11618 rename_old_to_new = []
11619 for to_ren in old_lvs:
11620 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11621 if not result.fail_msg and result.payload:
11623 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11625 self.lu.LogInfo("Renaming the old LVs on the target node")
11626 result = self.rpc.call_blockdev_rename(self.target_node,
11628 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11630 # Now we rename the new LVs to the old LVs
11631 self.lu.LogInfo("Renaming the new LVs on the target node")
11632 rename_new_to_old = [(new, old.physical_id)
11633 for old, new in zip(old_lvs, new_lvs)]
11634 result = self.rpc.call_blockdev_rename(self.target_node,
11636 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11638 # Intermediate steps of in memory modifications
11639 for old, new in zip(old_lvs, new_lvs):
11640 new.logical_id = old.logical_id
11641 self.cfg.SetDiskID(new, self.target_node)
11643 # We need to modify old_lvs so that removal later removes the
11644 # right LVs, not the newly added ones; note that old_lvs is a
11646 for disk in old_lvs:
11647 disk.logical_id = ren_fn(disk, temp_suffix)
11648 self.cfg.SetDiskID(disk, self.target_node)
11650 # Now that the new lvs have the old name, we can add them to the device
11651 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
11652 result = self.rpc.call_blockdev_addchildren(self.target_node,
11653 (dev, self.instance), new_lvs)
11654 msg = result.fail_msg
11656 for new_lv in new_lvs:
11657 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11660 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11661 hint=("cleanup manually the unused logical"
11663 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11665 cstep = itertools.count(5)
11667 if self.early_release:
11668 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11669 self._RemoveOldStorage(self.target_node, iv_names)
11670 # TODO: Check if releasing locks early still makes sense
11671 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11673 # Release all resource locks except those used by the instance
11674 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11675 keep=self.node_secondary_ip.keys())
11677 # Release all node locks while waiting for sync
11678 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11680 # TODO: Can the instance lock be downgraded here? Take the optional disk
11681 # shutdown in the caller into consideration.
11684 # This can fail as the old devices are degraded and _WaitForSync
11685 # does a combined result over all disks, so we don't check its return value
11686 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11687 _WaitForSync(self.lu, self.instance)
11689 # Check all devices manually
11690 self._CheckDevices(self.instance.primary_node, iv_names)
11692 # Step: remove old storage
11693 if not self.early_release:
11694 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11695 self._RemoveOldStorage(self.target_node, iv_names)
11697 def _ExecDrbd8Secondary(self, feedback_fn):
11698 """Replace the secondary node for DRBD 8.
11700 The algorithm for replace is quite complicated:
11701 - for all disks of the instance:
11702 - create new LVs on the new node with same names
11703 - shutdown the drbd device on the old secondary
11704 - disconnect the drbd network on the primary
11705 - create the drbd device on the new secondary
11706 - network attach the drbd on the primary, using an artifice:
11707 the drbd code for Attach() will connect to the network if it
11708 finds a device which is connected to the good local disks but
11709 not network enabled
11710 - wait for sync across all devices
11711 - remove all disks from the old secondary
11713 Failures are not very well handled.
11718 pnode = self.instance.primary_node
11720 # Step: check device activation
11721 self.lu.LogStep(1, steps_total, "Check device existence")
11722 self._CheckDisksExistence([self.instance.primary_node])
11723 self._CheckVolumeGroup([self.instance.primary_node])
11725 # Step: check other node consistency
11726 self.lu.LogStep(2, steps_total, "Check peer consistency")
11727 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11729 # Step: create new storage
11730 self.lu.LogStep(3, steps_total, "Allocate new storage")
11731 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11732 for idx, dev in enumerate(disks):
11733 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11734 (self.new_node, idx))
11735 # we pass force_create=True to force LVM creation
11736 for new_lv in dev.children:
11737 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11738 True, _GetInstanceInfoText(self.instance), False)
11740 # Step 4: dbrd minors and drbd setups changes
11741 # after this, we must manually remove the drbd minors on both the
11742 # error and the success paths
11743 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11744 minors = self.cfg.AllocateDRBDMinor([self.new_node
11745 for dev in self.instance.disks],
11746 self.instance.name)
11747 logging.debug("Allocated minors %r", minors)
11750 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11751 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11752 (self.new_node, idx))
11753 # create new devices on new_node; note that we create two IDs:
11754 # one without port, so the drbd will be activated without
11755 # networking information on the new node at this stage, and one
11756 # with network, for the latter activation in step 4
11757 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11758 if self.instance.primary_node == o_node1:
11761 assert self.instance.primary_node == o_node2, "Three-node instance?"
11764 new_alone_id = (self.instance.primary_node, self.new_node, None,
11765 p_minor, new_minor, o_secret)
11766 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11767 p_minor, new_minor, o_secret)
11769 iv_names[idx] = (dev, dev.children, new_net_id)
11770 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11772 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11773 logical_id=new_alone_id,
11774 children=dev.children,
11777 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11780 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11782 _GetInstanceInfoText(self.instance), False)
11783 except errors.GenericError:
11784 self.cfg.ReleaseDRBDMinors(self.instance.name)
11787 # We have new devices, shutdown the drbd on the old secondary
11788 for idx, dev in enumerate(self.instance.disks):
11789 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
11790 self.cfg.SetDiskID(dev, self.target_node)
11791 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11792 (dev, self.instance)).fail_msg
11794 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11795 "node: %s" % (idx, msg),
11796 hint=("Please cleanup this device manually as"
11797 " soon as possible"))
11799 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11800 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11801 self.instance.disks)[pnode]
11803 msg = result.fail_msg
11805 # detaches didn't succeed (unlikely)
11806 self.cfg.ReleaseDRBDMinors(self.instance.name)
11807 raise errors.OpExecError("Can't detach the disks from the network on"
11808 " old node: %s" % (msg,))
11810 # if we managed to detach at least one, we update all the disks of
11811 # the instance to point to the new secondary
11812 self.lu.LogInfo("Updating instance configuration")
11813 for dev, _, new_logical_id in iv_names.itervalues():
11814 dev.logical_id = new_logical_id
11815 self.cfg.SetDiskID(dev, self.instance.primary_node)
11817 self.cfg.Update(self.instance, feedback_fn)
11819 # Release all node locks (the configuration has been updated)
11820 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11822 # and now perform the drbd attach
11823 self.lu.LogInfo("Attaching primary drbds to new secondary"
11824 " (standalone => connected)")
11825 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11827 self.node_secondary_ip,
11828 (self.instance.disks, self.instance),
11829 self.instance.name,
11831 for to_node, to_result in result.items():
11832 msg = to_result.fail_msg
11834 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11836 hint=("please do a gnt-instance info to see the"
11837 " status of disks"))
11839 cstep = itertools.count(5)
11841 if self.early_release:
11842 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11843 self._RemoveOldStorage(self.target_node, iv_names)
11844 # TODO: Check if releasing locks early still makes sense
11845 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11847 # Release all resource locks except those used by the instance
11848 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11849 keep=self.node_secondary_ip.keys())
11851 # TODO: Can the instance lock be downgraded here? Take the optional disk
11852 # shutdown in the caller into consideration.
11855 # This can fail as the old devices are degraded and _WaitForSync
11856 # does a combined result over all disks, so we don't check its return value
11857 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11858 _WaitForSync(self.lu, self.instance)
11860 # Check all devices manually
11861 self._CheckDevices(self.instance.primary_node, iv_names)
11863 # Step: remove old storage
11864 if not self.early_release:
11865 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11866 self._RemoveOldStorage(self.target_node, iv_names)
11869 class LURepairNodeStorage(NoHooksLU):
11870 """Repairs the volume group on a node.
11875 def CheckArguments(self):
11876 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11878 storage_type = self.op.storage_type
11880 if (constants.SO_FIX_CONSISTENCY not in
11881 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11882 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11883 " repaired" % storage_type,
11884 errors.ECODE_INVAL)
11886 def ExpandNames(self):
11887 self.needed_locks = {
11888 locking.LEVEL_NODE: [self.op.node_name],
11891 def _CheckFaultyDisks(self, instance, node_name):
11892 """Ensure faulty disks abort the opcode or at least warn."""
11894 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11896 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11897 " node '%s'" % (instance.name, node_name),
11898 errors.ECODE_STATE)
11899 except errors.OpPrereqError, err:
11900 if self.op.ignore_consistency:
11901 self.LogWarning(str(err.args[0]))
11905 def CheckPrereq(self):
11906 """Check prerequisites.
11909 # Check whether any instance on this node has faulty disks
11910 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11911 if inst.admin_state != constants.ADMINST_UP:
11913 check_nodes = set(inst.all_nodes)
11914 check_nodes.discard(self.op.node_name)
11915 for inst_node_name in check_nodes:
11916 self._CheckFaultyDisks(inst, inst_node_name)
11918 def Exec(self, feedback_fn):
11919 feedback_fn("Repairing storage unit '%s' on %s ..." %
11920 (self.op.name, self.op.node_name))
11922 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11923 result = self.rpc.call_storage_execute(self.op.node_name,
11924 self.op.storage_type, st_args,
11926 constants.SO_FIX_CONSISTENCY)
11927 result.Raise("Failed to repair storage unit '%s' on %s" %
11928 (self.op.name, self.op.node_name))
11931 class LUNodeEvacuate(NoHooksLU):
11932 """Evacuates instances off a list of nodes.
11937 _MODE2IALLOCATOR = {
11938 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11939 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11940 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11942 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11943 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11944 constants.IALLOCATOR_NEVAC_MODES)
11946 def CheckArguments(self):
11947 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11949 def ExpandNames(self):
11950 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11952 if self.op.remote_node is not None:
11953 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11954 assert self.op.remote_node
11956 if self.op.remote_node == self.op.node_name:
11957 raise errors.OpPrereqError("Can not use evacuated node as a new"
11958 " secondary node", errors.ECODE_INVAL)
11960 if self.op.mode != constants.NODE_EVAC_SEC:
11961 raise errors.OpPrereqError("Without the use of an iallocator only"
11962 " secondary instances can be evacuated",
11963 errors.ECODE_INVAL)
11966 self.share_locks = _ShareAll()
11967 self.needed_locks = {
11968 locking.LEVEL_INSTANCE: [],
11969 locking.LEVEL_NODEGROUP: [],
11970 locking.LEVEL_NODE: [],
11973 # Determine nodes (via group) optimistically, needs verification once locks
11974 # have been acquired
11975 self.lock_nodes = self._DetermineNodes()
11977 def _DetermineNodes(self):
11978 """Gets the list of nodes to operate on.
11981 if self.op.remote_node is None:
11982 # Iallocator will choose any node(s) in the same group
11983 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11985 group_nodes = frozenset([self.op.remote_node])
11987 # Determine nodes to be locked
11988 return set([self.op.node_name]) | group_nodes
11990 def _DetermineInstances(self):
11991 """Builds list of instances to operate on.
11994 assert self.op.mode in constants.NODE_EVAC_MODES
11996 if self.op.mode == constants.NODE_EVAC_PRI:
11997 # Primary instances only
11998 inst_fn = _GetNodePrimaryInstances
11999 assert self.op.remote_node is None, \
12000 "Evacuating primary instances requires iallocator"
12001 elif self.op.mode == constants.NODE_EVAC_SEC:
12002 # Secondary instances only
12003 inst_fn = _GetNodeSecondaryInstances
12006 assert self.op.mode == constants.NODE_EVAC_ALL
12007 inst_fn = _GetNodeInstances
12008 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12010 raise errors.OpPrereqError("Due to an issue with the iallocator"
12011 " interface it is not possible to evacuate"
12012 " all instances at once; specify explicitly"
12013 " whether to evacuate primary or secondary"
12015 errors.ECODE_INVAL)
12017 return inst_fn(self.cfg, self.op.node_name)
12019 def DeclareLocks(self, level):
12020 if level == locking.LEVEL_INSTANCE:
12021 # Lock instances optimistically, needs verification once node and group
12022 # locks have been acquired
12023 self.needed_locks[locking.LEVEL_INSTANCE] = \
12024 set(i.name for i in self._DetermineInstances())
12026 elif level == locking.LEVEL_NODEGROUP:
12027 # Lock node groups for all potential target nodes optimistically, needs
12028 # verification once nodes have been acquired
12029 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12030 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12032 elif level == locking.LEVEL_NODE:
12033 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12035 def CheckPrereq(self):
12037 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12038 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12039 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12041 need_nodes = self._DetermineNodes()
12043 if not owned_nodes.issuperset(need_nodes):
12044 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12045 " locks were acquired, current nodes are"
12046 " are '%s', used to be '%s'; retry the"
12048 (self.op.node_name,
12049 utils.CommaJoin(need_nodes),
12050 utils.CommaJoin(owned_nodes)),
12051 errors.ECODE_STATE)
12053 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12054 if owned_groups != wanted_groups:
12055 raise errors.OpExecError("Node groups changed since locks were acquired,"
12056 " current groups are '%s', used to be '%s';"
12057 " retry the operation" %
12058 (utils.CommaJoin(wanted_groups),
12059 utils.CommaJoin(owned_groups)))
12061 # Determine affected instances
12062 self.instances = self._DetermineInstances()
12063 self.instance_names = [i.name for i in self.instances]
12065 if set(self.instance_names) != owned_instances:
12066 raise errors.OpExecError("Instances on node '%s' changed since locks"
12067 " were acquired, current instances are '%s',"
12068 " used to be '%s'; retry the operation" %
12069 (self.op.node_name,
12070 utils.CommaJoin(self.instance_names),
12071 utils.CommaJoin(owned_instances)))
12073 if self.instance_names:
12074 self.LogInfo("Evacuating instances from node '%s': %s",
12076 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12078 self.LogInfo("No instances to evacuate from node '%s'",
12081 if self.op.remote_node is not None:
12082 for i in self.instances:
12083 if i.primary_node == self.op.remote_node:
12084 raise errors.OpPrereqError("Node %s is the primary node of"
12085 " instance %s, cannot use it as"
12087 (self.op.remote_node, i.name),
12088 errors.ECODE_INVAL)
12090 def Exec(self, feedback_fn):
12091 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12093 if not self.instance_names:
12094 # No instances to evacuate
12097 elif self.op.iallocator is not None:
12098 # TODO: Implement relocation to other group
12099 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12100 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12101 instances=list(self.instance_names))
12102 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12104 ial.Run(self.op.iallocator)
12106 if not ial.success:
12107 raise errors.OpPrereqError("Can't compute node evacuation using"
12108 " iallocator '%s': %s" %
12109 (self.op.iallocator, ial.info),
12110 errors.ECODE_NORES)
12112 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12114 elif self.op.remote_node is not None:
12115 assert self.op.mode == constants.NODE_EVAC_SEC
12117 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12118 remote_node=self.op.remote_node,
12120 mode=constants.REPLACE_DISK_CHG,
12121 early_release=self.op.early_release)]
12122 for instance_name in self.instance_names]
12125 raise errors.ProgrammerError("No iallocator or remote node")
12127 return ResultWithJobs(jobs)
12130 def _SetOpEarlyRelease(early_release, op):
12131 """Sets C{early_release} flag on opcodes if available.
12135 op.early_release = early_release
12136 except AttributeError:
12137 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12142 def _NodeEvacDest(use_nodes, group, nodes):
12143 """Returns group or nodes depending on caller's choice.
12147 return utils.CommaJoin(nodes)
12152 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12153 """Unpacks the result of change-group and node-evacuate iallocator requests.
12155 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12156 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12158 @type lu: L{LogicalUnit}
12159 @param lu: Logical unit instance
12160 @type alloc_result: tuple/list
12161 @param alloc_result: Result from iallocator
12162 @type early_release: bool
12163 @param early_release: Whether to release locks early if possible
12164 @type use_nodes: bool
12165 @param use_nodes: Whether to display node names instead of groups
12168 (moved, failed, jobs) = alloc_result
12171 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12172 for (name, reason) in failed)
12173 lu.LogWarning("Unable to evacuate instances %s", failreason)
12174 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12177 lu.LogInfo("Instances to be moved: %s",
12178 utils.CommaJoin("%s (to %s)" %
12179 (name, _NodeEvacDest(use_nodes, group, nodes))
12180 for (name, group, nodes) in moved))
12182 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12183 map(opcodes.OpCode.LoadOpCode, ops))
12187 def _DiskSizeInBytesToMebibytes(lu, size):
12188 """Converts a disk size in bytes to mebibytes.
12190 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12193 (mib, remainder) = divmod(size, 1024 * 1024)
12196 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12197 " to not overwrite existing data (%s bytes will not be"
12198 " wiped)", (1024 * 1024) - remainder)
12204 class LUInstanceGrowDisk(LogicalUnit):
12205 """Grow a disk of an instance.
12208 HPATH = "disk-grow"
12209 HTYPE = constants.HTYPE_INSTANCE
12212 def ExpandNames(self):
12213 self._ExpandAndLockInstance()
12214 self.needed_locks[locking.LEVEL_NODE] = []
12215 self.needed_locks[locking.LEVEL_NODE_RES] = []
12216 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12217 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12219 def DeclareLocks(self, level):
12220 if level == locking.LEVEL_NODE:
12221 self._LockInstancesNodes()
12222 elif level == locking.LEVEL_NODE_RES:
12224 self.needed_locks[locking.LEVEL_NODE_RES] = \
12225 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12227 def BuildHooksEnv(self):
12228 """Build hooks env.
12230 This runs on the master, the primary and all the secondaries.
12234 "DISK": self.op.disk,
12235 "AMOUNT": self.op.amount,
12236 "ABSOLUTE": self.op.absolute,
12238 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12241 def BuildHooksNodes(self):
12242 """Build hooks nodes.
12245 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12248 def CheckPrereq(self):
12249 """Check prerequisites.
12251 This checks that the instance is in the cluster.
12254 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12255 assert instance is not None, \
12256 "Cannot retrieve locked instance %s" % self.op.instance_name
12257 nodenames = list(instance.all_nodes)
12258 for node in nodenames:
12259 _CheckNodeOnline(self, node)
12261 self.instance = instance
12263 if instance.disk_template not in constants.DTS_GROWABLE:
12264 raise errors.OpPrereqError("Instance's disk layout does not support"
12265 " growing", errors.ECODE_INVAL)
12267 self.disk = instance.FindDisk(self.op.disk)
12269 if self.op.absolute:
12270 self.target = self.op.amount
12271 self.delta = self.target - self.disk.size
12273 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12274 "current disk size (%s)" %
12275 (utils.FormatUnit(self.target, "h"),
12276 utils.FormatUnit(self.disk.size, "h")),
12277 errors.ECODE_STATE)
12279 self.delta = self.op.amount
12280 self.target = self.disk.size + self.delta
12282 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12283 utils.FormatUnit(self.delta, "h"),
12284 errors.ECODE_INVAL)
12286 if instance.disk_template not in (constants.DT_FILE,
12287 constants.DT_SHARED_FILE,
12289 # TODO: check the free disk space for file, when that feature will be
12291 _CheckNodesFreeDiskPerVG(self, nodenames,
12292 self.disk.ComputeGrowth(self.delta))
12294 def Exec(self, feedback_fn):
12295 """Execute disk grow.
12298 instance = self.instance
12301 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12302 assert (self.owned_locks(locking.LEVEL_NODE) ==
12303 self.owned_locks(locking.LEVEL_NODE_RES))
12305 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12307 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12309 raise errors.OpExecError("Cannot activate block device to grow")
12311 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12312 (self.op.disk, instance.name,
12313 utils.FormatUnit(self.delta, "h"),
12314 utils.FormatUnit(self.target, "h")))
12316 # First run all grow ops in dry-run mode
12317 for node in instance.all_nodes:
12318 self.cfg.SetDiskID(disk, node)
12319 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12321 result.Raise("Dry-run grow request failed to node %s" % node)
12324 # Get disk size from primary node for wiping
12325 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12326 result.Raise("Failed to retrieve disk size from node '%s'" %
12327 instance.primary_node)
12329 (disk_size_in_bytes, ) = result.payload
12331 if disk_size_in_bytes is None:
12332 raise errors.OpExecError("Failed to retrieve disk size from primary"
12333 " node '%s'" % instance.primary_node)
12335 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12337 assert old_disk_size >= disk.size, \
12338 ("Retrieved disk size too small (got %s, should be at least %s)" %
12339 (old_disk_size, disk.size))
12341 old_disk_size = None
12343 # We know that (as far as we can test) operations across different
12344 # nodes will succeed, time to run it for real on the backing storage
12345 for node in instance.all_nodes:
12346 self.cfg.SetDiskID(disk, node)
12347 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12349 result.Raise("Grow request failed to node %s" % node)
12351 # And now execute it for logical storage, on the primary node
12352 node = instance.primary_node
12353 self.cfg.SetDiskID(disk, node)
12354 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12356 result.Raise("Grow request failed to node %s" % node)
12358 disk.RecordGrow(self.delta)
12359 self.cfg.Update(instance, feedback_fn)
12361 # Changes have been recorded, release node lock
12362 _ReleaseLocks(self, locking.LEVEL_NODE)
12364 # Downgrade lock while waiting for sync
12365 self.glm.downgrade(locking.LEVEL_INSTANCE)
12367 assert wipe_disks ^ (old_disk_size is None)
12370 assert instance.disks[self.op.disk] == disk
12372 # Wipe newly added disk space
12373 _WipeDisks(self, instance,
12374 disks=[(self.op.disk, disk, old_disk_size)])
12376 if self.op.wait_for_sync:
12377 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12379 self.LogWarning("Disk syncing has not returned a good status; check"
12381 if instance.admin_state != constants.ADMINST_UP:
12382 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12383 elif instance.admin_state != constants.ADMINST_UP:
12384 self.LogWarning("Not shutting down the disk even if the instance is"
12385 " not supposed to be running because no wait for"
12386 " sync mode was requested")
12388 assert self.owned_locks(locking.LEVEL_NODE_RES)
12389 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12392 class LUInstanceQueryData(NoHooksLU):
12393 """Query runtime instance data.
12398 def ExpandNames(self):
12399 self.needed_locks = {}
12401 # Use locking if requested or when non-static information is wanted
12402 if not (self.op.static or self.op.use_locking):
12403 self.LogWarning("Non-static data requested, locks need to be acquired")
12404 self.op.use_locking = True
12406 if self.op.instances or not self.op.use_locking:
12407 # Expand instance names right here
12408 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12410 # Will use acquired locks
12411 self.wanted_names = None
12413 if self.op.use_locking:
12414 self.share_locks = _ShareAll()
12416 if self.wanted_names is None:
12417 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12419 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12421 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12422 self.needed_locks[locking.LEVEL_NODE] = []
12423 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12425 def DeclareLocks(self, level):
12426 if self.op.use_locking:
12427 if level == locking.LEVEL_NODEGROUP:
12428 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12430 # Lock all groups used by instances optimistically; this requires going
12431 # via the node before it's locked, requiring verification later on
12432 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12433 frozenset(group_uuid
12434 for instance_name in owned_instances
12436 self.cfg.GetInstanceNodeGroups(instance_name))
12438 elif level == locking.LEVEL_NODE:
12439 self._LockInstancesNodes()
12441 def CheckPrereq(self):
12442 """Check prerequisites.
12444 This only checks the optional instance list against the existing names.
12447 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12448 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12449 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12451 if self.wanted_names is None:
12452 assert self.op.use_locking, "Locking was not used"
12453 self.wanted_names = owned_instances
12455 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12457 if self.op.use_locking:
12458 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12461 assert not (owned_instances or owned_groups or owned_nodes)
12463 self.wanted_instances = instances.values()
12465 def _ComputeBlockdevStatus(self, node, instance, dev):
12466 """Returns the status of a block device
12469 if self.op.static or not node:
12472 self.cfg.SetDiskID(dev, node)
12474 result = self.rpc.call_blockdev_find(node, dev)
12478 result.Raise("Can't compute disk status for %s" % instance.name)
12480 status = result.payload
12484 return (status.dev_path, status.major, status.minor,
12485 status.sync_percent, status.estimated_time,
12486 status.is_degraded, status.ldisk_status)
12488 def _ComputeDiskStatus(self, instance, snode, dev):
12489 """Compute block device status.
12492 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12494 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12496 def _ComputeDiskStatusInner(self, instance, snode, dev):
12497 """Compute block device status.
12499 @attention: The device has to be annotated already.
12502 if dev.dev_type in constants.LDS_DRBD:
12503 # we change the snode then (otherwise we use the one passed in)
12504 if dev.logical_id[0] == instance.primary_node:
12505 snode = dev.logical_id[1]
12507 snode = dev.logical_id[0]
12509 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12511 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12514 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12521 "iv_name": dev.iv_name,
12522 "dev_type": dev.dev_type,
12523 "logical_id": dev.logical_id,
12524 "physical_id": dev.physical_id,
12525 "pstatus": dev_pstatus,
12526 "sstatus": dev_sstatus,
12527 "children": dev_children,
12532 def Exec(self, feedback_fn):
12533 """Gather and return data"""
12536 cluster = self.cfg.GetClusterInfo()
12538 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12539 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12541 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12542 for node in nodes.values()))
12544 group2name_fn = lambda uuid: groups[uuid].name
12546 for instance in self.wanted_instances:
12547 pnode = nodes[instance.primary_node]
12549 if self.op.static or pnode.offline:
12550 remote_state = None
12552 self.LogWarning("Primary node %s is marked offline, returning static"
12553 " information only for instance %s" %
12554 (pnode.name, instance.name))
12556 remote_info = self.rpc.call_instance_info(instance.primary_node,
12558 instance.hypervisor)
12559 remote_info.Raise("Error checking node %s" % instance.primary_node)
12560 remote_info = remote_info.payload
12561 if remote_info and "state" in remote_info:
12562 remote_state = "up"
12564 if instance.admin_state == constants.ADMINST_UP:
12565 remote_state = "down"
12567 remote_state = instance.admin_state
12569 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12572 snodes_group_uuids = [nodes[snode_name].group
12573 for snode_name in instance.secondary_nodes]
12575 result[instance.name] = {
12576 "name": instance.name,
12577 "config_state": instance.admin_state,
12578 "run_state": remote_state,
12579 "pnode": instance.primary_node,
12580 "pnode_group_uuid": pnode.group,
12581 "pnode_group_name": group2name_fn(pnode.group),
12582 "snodes": instance.secondary_nodes,
12583 "snodes_group_uuids": snodes_group_uuids,
12584 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12586 # this happens to be the same format used for hooks
12587 "nics": _NICListToTuple(self, instance.nics),
12588 "disk_template": instance.disk_template,
12590 "hypervisor": instance.hypervisor,
12591 "network_port": instance.network_port,
12592 "hv_instance": instance.hvparams,
12593 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12594 "be_instance": instance.beparams,
12595 "be_actual": cluster.FillBE(instance),
12596 "os_instance": instance.osparams,
12597 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12598 "serial_no": instance.serial_no,
12599 "mtime": instance.mtime,
12600 "ctime": instance.ctime,
12601 "uuid": instance.uuid,
12607 def PrepareContainerMods(mods, private_fn):
12608 """Prepares a list of container modifications by adding a private data field.
12610 @type mods: list of tuples; (operation, index, parameters)
12611 @param mods: List of modifications
12612 @type private_fn: callable or None
12613 @param private_fn: Callable for constructing a private data field for a
12618 if private_fn is None:
12623 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12626 #: Type description for changes as returned by L{ApplyContainerMods}'s
12628 _TApplyContModsCbChanges = \
12629 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12630 ht.TNonEmptyString,
12635 def ApplyContainerMods(kind, container, chgdesc, mods,
12636 create_fn, modify_fn, remove_fn):
12637 """Applies descriptions in C{mods} to C{container}.
12640 @param kind: One-word item description
12641 @type container: list
12642 @param container: Container to modify
12643 @type chgdesc: None or list
12644 @param chgdesc: List of applied changes
12646 @param mods: Modifications as returned by L{PrepareContainerMods}
12647 @type create_fn: callable
12648 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12649 receives absolute item index, parameters and private data object as added
12650 by L{PrepareContainerMods}, returns tuple containing new item and changes
12652 @type modify_fn: callable
12653 @param modify_fn: Callback for modifying an existing item
12654 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12655 and private data object as added by L{PrepareContainerMods}, returns
12657 @type remove_fn: callable
12658 @param remove_fn: Callback on removing item; receives absolute item index,
12659 item and private data object as added by L{PrepareContainerMods}
12662 for (op, idx, params, private) in mods:
12665 absidx = len(container) - 1
12667 raise IndexError("Not accepting negative indices other than -1")
12668 elif idx > len(container):
12669 raise IndexError("Got %s index %s, but there are only %s" %
12670 (kind, idx, len(container)))
12676 if op == constants.DDM_ADD:
12677 # Calculate where item will be added
12679 addidx = len(container)
12683 if create_fn is None:
12686 (item, changes) = create_fn(addidx, params, private)
12689 container.append(item)
12692 assert idx <= len(container)
12693 # list.insert does so before the specified index
12694 container.insert(idx, item)
12696 # Retrieve existing item
12698 item = container[absidx]
12700 raise IndexError("Invalid %s index %s" % (kind, idx))
12702 if op == constants.DDM_REMOVE:
12705 if remove_fn is not None:
12706 remove_fn(absidx, item, private)
12708 changes = [("%s/%s" % (kind, absidx), "remove")]
12710 assert container[absidx] == item
12711 del container[absidx]
12712 elif op == constants.DDM_MODIFY:
12713 if modify_fn is not None:
12714 changes = modify_fn(absidx, item, params, private)
12716 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12718 assert _TApplyContModsCbChanges(changes)
12720 if not (chgdesc is None or changes is None):
12721 chgdesc.extend(changes)
12724 def _UpdateIvNames(base_index, disks):
12725 """Updates the C{iv_name} attribute of disks.
12727 @type disks: list of L{objects.Disk}
12730 for (idx, disk) in enumerate(disks):
12731 disk.iv_name = "disk/%s" % (base_index + idx, )
12734 class _InstNicModPrivate:
12735 """Data structure for network interface modifications.
12737 Used by L{LUInstanceSetParams}.
12740 def __init__(self):
12745 class LUInstanceSetParams(LogicalUnit):
12746 """Modifies an instances's parameters.
12749 HPATH = "instance-modify"
12750 HTYPE = constants.HTYPE_INSTANCE
12754 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12755 assert ht.TList(mods)
12756 assert not mods or len(mods[0]) in (2, 3)
12758 if mods and len(mods[0]) == 2:
12762 for op, params in mods:
12763 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12764 result.append((op, -1, params))
12768 raise errors.OpPrereqError("Only one %s add or remove operation is"
12769 " supported at a time" % kind,
12770 errors.ECODE_INVAL)
12772 result.append((constants.DDM_MODIFY, op, params))
12774 assert verify_fn(result)
12781 def _CheckMods(kind, mods, key_types, item_fn):
12782 """Ensures requested disk/NIC modifications are valid.
12785 for (op, _, params) in mods:
12786 assert ht.TDict(params)
12788 utils.ForceDictType(params, key_types)
12790 if op == constants.DDM_REMOVE:
12792 raise errors.OpPrereqError("No settings should be passed when"
12793 " removing a %s" % kind,
12794 errors.ECODE_INVAL)
12795 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12796 item_fn(op, params)
12798 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12801 def _VerifyDiskModification(op, params):
12802 """Verifies a disk modification.
12805 if op == constants.DDM_ADD:
12806 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12807 if mode not in constants.DISK_ACCESS_SET:
12808 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12809 errors.ECODE_INVAL)
12811 size = params.get(constants.IDISK_SIZE, None)
12813 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12814 constants.IDISK_SIZE, errors.ECODE_INVAL)
12818 except (TypeError, ValueError), err:
12819 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12820 errors.ECODE_INVAL)
12822 params[constants.IDISK_SIZE] = size
12824 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12825 raise errors.OpPrereqError("Disk size change not possible, use"
12826 " grow-disk", errors.ECODE_INVAL)
12829 def _VerifyNicModification(op, params):
12830 """Verifies a network interface modification.
12833 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12834 ip = params.get(constants.INIC_IP, None)
12835 req_net = params.get(constants.INIC_NETWORK, None)
12836 link = params.get(constants.NIC_LINK, None)
12837 mode = params.get(constants.NIC_MODE, None)
12838 if req_net is not None:
12839 if req_net.lower() == constants.VALUE_NONE:
12840 params[constants.INIC_NETWORK] = None
12842 elif link is not None or mode is not None:
12843 raise errors.OpPrereqError("If network is given"
12844 " mode or link should not",
12845 errors.ECODE_INVAL)
12847 if op == constants.DDM_ADD:
12848 macaddr = params.get(constants.INIC_MAC, None)
12849 if macaddr is None:
12850 params[constants.INIC_MAC] = constants.VALUE_AUTO
12853 if ip.lower() == constants.VALUE_NONE:
12854 params[constants.INIC_IP] = None
12856 if ip.lower() == constants.NIC_IP_POOL:
12857 if op == constants.DDM_ADD and req_net is None:
12858 raise errors.OpPrereqError("If ip=pool, parameter network"
12860 errors.ECODE_INVAL)
12862 if not netutils.IPAddress.IsValid(ip):
12863 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12864 errors.ECODE_INVAL)
12866 if constants.INIC_MAC in params:
12867 macaddr = params[constants.INIC_MAC]
12868 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12869 macaddr = utils.NormalizeAndValidateMac(macaddr)
12871 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12872 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12873 " modifying an existing NIC",
12874 errors.ECODE_INVAL)
12876 def CheckArguments(self):
12877 if not (self.op.nics or self.op.disks or self.op.disk_template or
12878 self.op.hvparams or self.op.beparams or self.op.os_name or
12879 self.op.offline is not None or self.op.runtime_mem):
12880 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12882 if self.op.hvparams:
12883 _CheckGlobalHvParams(self.op.hvparams)
12885 self.op.disks = self._UpgradeDiskNicMods(
12886 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12887 self.op.nics = self._UpgradeDiskNicMods(
12888 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12890 # Check disk modifications
12891 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12892 self._VerifyDiskModification)
12894 if self.op.disks and self.op.disk_template is not None:
12895 raise errors.OpPrereqError("Disk template conversion and other disk"
12896 " changes not supported at the same time",
12897 errors.ECODE_INVAL)
12899 if (self.op.disk_template and
12900 self.op.disk_template in constants.DTS_INT_MIRROR and
12901 self.op.remote_node is None):
12902 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12903 " one requires specifying a secondary node",
12904 errors.ECODE_INVAL)
12906 # Check NIC modifications
12907 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12908 self._VerifyNicModification)
12910 def ExpandNames(self):
12911 self._ExpandAndLockInstance()
12912 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12913 # Can't even acquire node locks in shared mode as upcoming changes in
12914 # Ganeti 2.6 will start to modify the node object on disk conversion
12915 self.needed_locks[locking.LEVEL_NODE] = []
12916 self.needed_locks[locking.LEVEL_NODE_RES] = []
12917 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12918 # Look node group to look up the ipolicy
12919 self.share_locks[locking.LEVEL_NODEGROUP] = 1
12921 def DeclareLocks(self, level):
12922 if level == locking.LEVEL_NODEGROUP:
12923 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12924 # Acquire locks for the instance's nodegroups optimistically. Needs
12925 # to be verified in CheckPrereq
12926 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12927 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12928 elif level == locking.LEVEL_NODE:
12929 self._LockInstancesNodes()
12930 if self.op.disk_template and self.op.remote_node:
12931 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12932 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12933 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12935 self.needed_locks[locking.LEVEL_NODE_RES] = \
12936 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12938 def BuildHooksEnv(self):
12939 """Build hooks env.
12941 This runs on the master, primary and secondaries.
12945 if constants.BE_MINMEM in self.be_new:
12946 args["minmem"] = self.be_new[constants.BE_MINMEM]
12947 if constants.BE_MAXMEM in self.be_new:
12948 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12949 if constants.BE_VCPUS in self.be_new:
12950 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12951 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12952 # information at all.
12954 if self._new_nics is not None:
12957 for nic in self._new_nics:
12958 n = copy.deepcopy(nic)
12959 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12960 n.nicparams = nicparams
12961 nics.append(_NICToTuple(self, n))
12963 args["nics"] = nics
12965 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12966 if self.op.disk_template:
12967 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12968 if self.op.runtime_mem:
12969 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12973 def BuildHooksNodes(self):
12974 """Build hooks nodes.
12977 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12980 def _PrepareNicModification(self, params, private, old_ip, old_net,
12981 old_params, cluster, pnode):
12983 update_params_dict = dict([(key, params[key])
12984 for key in constants.NICS_PARAMETERS
12987 req_link = update_params_dict.get(constants.NIC_LINK, None)
12988 req_mode = update_params_dict.get(constants.NIC_MODE, None)
12990 new_net = params.get(constants.INIC_NETWORK, old_net)
12991 if new_net is not None:
12992 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12993 if netparams is None:
12994 raise errors.OpPrereqError("No netparams found for the network"
12995 " %s, probably not connected" % new_net,
12996 errors.ECODE_INVAL)
12997 new_params = dict(netparams)
12999 new_params = _GetUpdatedParams(old_params, update_params_dict)
13001 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13003 new_filled_params = cluster.SimpleFillNIC(new_params)
13004 objects.NIC.CheckParameterSyntax(new_filled_params)
13006 new_mode = new_filled_params[constants.NIC_MODE]
13007 if new_mode == constants.NIC_MODE_BRIDGED:
13008 bridge = new_filled_params[constants.NIC_LINK]
13009 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13011 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13013 self.warn.append(msg)
13015 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13017 elif new_mode == constants.NIC_MODE_ROUTED:
13018 ip = params.get(constants.INIC_IP, old_ip)
13020 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13021 " on a routed NIC", errors.ECODE_INVAL)
13023 if constants.INIC_MAC in params:
13024 mac = params[constants.INIC_MAC]
13026 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13027 errors.ECODE_INVAL)
13028 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13029 # otherwise generate the MAC address
13030 params[constants.INIC_MAC] = \
13031 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13033 # or validate/reserve the current one
13035 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13036 except errors.ReservationError:
13037 raise errors.OpPrereqError("MAC address '%s' already in use"
13038 " in cluster" % mac,
13039 errors.ECODE_NOTUNIQUE)
13040 elif new_net != old_net:
13042 def get_net_prefix(net):
13044 uuid = self.cfg.LookupNetwork(net)
13046 nobj = self.cfg.GetNetwork(uuid)
13047 return nobj.mac_prefix
13050 new_prefix = get_net_prefix(new_net)
13051 old_prefix = get_net_prefix(old_net)
13052 if old_prefix != new_prefix:
13053 params[constants.INIC_MAC] = \
13054 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13056 #if there is a change in nic-network configuration
13057 new_ip = params.get(constants.INIC_IP, old_ip)
13058 if (new_ip, new_net) != (old_ip, old_net):
13061 if new_ip.lower() == constants.NIC_IP_POOL:
13063 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13064 except errors.ReservationError:
13065 raise errors.OpPrereqError("Unable to get a free IP"
13066 " from the address pool",
13067 errors.ECODE_STATE)
13068 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13069 params[constants.INIC_IP] = new_ip
13070 elif new_ip != old_ip or new_net != old_net:
13072 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13073 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13074 except errors.ReservationError:
13075 raise errors.OpPrereqError("IP %s not available in network %s" %
13077 errors.ECODE_NOTUNIQUE)
13078 elif new_ip.lower() == constants.NIC_IP_POOL:
13079 raise errors.OpPrereqError("ip=pool, but no network found",
13080 errors.ECODE_INVAL)
13083 if self.op.conflicts_check:
13084 _CheckForConflictingIp(self, new_ip, pnode)
13089 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13090 except errors.AddressPoolError:
13091 logging.warning("Release IP %s not contained in network %s",
13094 # there are no changes in (net, ip) tuple
13095 elif (old_net is not None and
13096 (req_link is not None or req_mode is not None)):
13097 raise errors.OpPrereqError("Not allowed to change link or mode of"
13098 " a NIC that is connected to a network",
13099 errors.ECODE_INVAL)
13101 private.params = new_params
13102 private.filled = new_filled_params
13104 def CheckPrereq(self):
13105 """Check prerequisites.
13107 This only checks the instance list against the existing names.
13110 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13111 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13113 cluster = self.cluster = self.cfg.GetClusterInfo()
13114 assert self.instance is not None, \
13115 "Cannot retrieve locked instance %s" % self.op.instance_name
13117 pnode = instance.primary_node
13118 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13119 nodelist = list(instance.all_nodes)
13120 pnode_info = self.cfg.GetNodeInfo(pnode)
13121 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13123 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13124 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13125 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13127 # dictionary with instance information after the modification
13130 # Prepare disk/NIC modifications
13131 self.diskmod = PrepareContainerMods(self.op.disks, None)
13132 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13135 if self.op.os_name and not self.op.force:
13136 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13137 self.op.force_variant)
13138 instance_os = self.op.os_name
13140 instance_os = instance.os
13142 assert not (self.op.disk_template and self.op.disks), \
13143 "Can't modify disk template and apply disk changes at the same time"
13145 if self.op.disk_template:
13146 if instance.disk_template == self.op.disk_template:
13147 raise errors.OpPrereqError("Instance already has disk template %s" %
13148 instance.disk_template, errors.ECODE_INVAL)
13150 if (instance.disk_template,
13151 self.op.disk_template) not in self._DISK_CONVERSIONS:
13152 raise errors.OpPrereqError("Unsupported disk template conversion from"
13153 " %s to %s" % (instance.disk_template,
13154 self.op.disk_template),
13155 errors.ECODE_INVAL)
13156 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13157 msg="cannot change disk template")
13158 if self.op.disk_template in constants.DTS_INT_MIRROR:
13159 if self.op.remote_node == pnode:
13160 raise errors.OpPrereqError("Given new secondary node %s is the same"
13161 " as the primary node of the instance" %
13162 self.op.remote_node, errors.ECODE_STATE)
13163 _CheckNodeOnline(self, self.op.remote_node)
13164 _CheckNodeNotDrained(self, self.op.remote_node)
13165 # FIXME: here we assume that the old instance type is DT_PLAIN
13166 assert instance.disk_template == constants.DT_PLAIN
13167 disks = [{constants.IDISK_SIZE: d.size,
13168 constants.IDISK_VG: d.logical_id[0]}
13169 for d in instance.disks]
13170 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13171 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13173 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13174 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13175 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13177 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13178 ignore=self.op.ignore_ipolicy)
13179 if pnode_info.group != snode_info.group:
13180 self.LogWarning("The primary and secondary nodes are in two"
13181 " different node groups; the disk parameters"
13182 " from the first disk's node group will be"
13185 # hvparams processing
13186 if self.op.hvparams:
13187 hv_type = instance.hypervisor
13188 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13189 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13190 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13193 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
13194 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13195 self.hv_proposed = self.hv_new = hv_new # the new actual values
13196 self.hv_inst = i_hvdict # the new dict (without defaults)
13198 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13200 self.hv_new = self.hv_inst = {}
13202 # beparams processing
13203 if self.op.beparams:
13204 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13206 objects.UpgradeBeParams(i_bedict)
13207 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13208 be_new = cluster.SimpleFillBE(i_bedict)
13209 self.be_proposed = self.be_new = be_new # the new actual values
13210 self.be_inst = i_bedict # the new dict (without defaults)
13212 self.be_new = self.be_inst = {}
13213 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13214 be_old = cluster.FillBE(instance)
13216 # CPU param validation -- checking every time a parameter is
13217 # changed to cover all cases where either CPU mask or vcpus have
13219 if (constants.BE_VCPUS in self.be_proposed and
13220 constants.HV_CPU_MASK in self.hv_proposed):
13222 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13223 # Verify mask is consistent with number of vCPUs. Can skip this
13224 # test if only 1 entry in the CPU mask, which means same mask
13225 # is applied to all vCPUs.
13226 if (len(cpu_list) > 1 and
13227 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13228 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13230 (self.be_proposed[constants.BE_VCPUS],
13231 self.hv_proposed[constants.HV_CPU_MASK]),
13232 errors.ECODE_INVAL)
13234 # Only perform this test if a new CPU mask is given
13235 if constants.HV_CPU_MASK in self.hv_new:
13236 # Calculate the largest CPU number requested
13237 max_requested_cpu = max(map(max, cpu_list))
13238 # Check that all of the instance's nodes have enough physical CPUs to
13239 # satisfy the requested CPU mask
13240 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13241 max_requested_cpu + 1, instance.hypervisor)
13243 # osparams processing
13244 if self.op.osparams:
13245 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13246 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13247 self.os_inst = i_osdict # the new dict (without defaults)
13253 #TODO(dynmem): do the appropriate check involving MINMEM
13254 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13255 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13256 mem_check_list = [pnode]
13257 if be_new[constants.BE_AUTO_BALANCE]:
13258 # either we changed auto_balance to yes or it was from before
13259 mem_check_list.extend(instance.secondary_nodes)
13260 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13261 instance.hypervisor)
13262 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13263 [instance.hypervisor])
13264 pninfo = nodeinfo[pnode]
13265 msg = pninfo.fail_msg
13267 # Assume the primary node is unreachable and go ahead
13268 self.warn.append("Can't get info from primary node %s: %s" %
13271 (_, _, (pnhvinfo, )) = pninfo.payload
13272 if not isinstance(pnhvinfo.get("memory_free", None), int):
13273 self.warn.append("Node data from primary node %s doesn't contain"
13274 " free memory information" % pnode)
13275 elif instance_info.fail_msg:
13276 self.warn.append("Can't get instance runtime information: %s" %
13277 instance_info.fail_msg)
13279 if instance_info.payload:
13280 current_mem = int(instance_info.payload["memory"])
13282 # Assume instance not running
13283 # (there is a slight race condition here, but it's not very
13284 # probable, and we have no other way to check)
13285 # TODO: Describe race condition
13287 #TODO(dynmem): do the appropriate check involving MINMEM
13288 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13289 pnhvinfo["memory_free"])
13291 raise errors.OpPrereqError("This change will prevent the instance"
13292 " from starting, due to %d MB of memory"
13293 " missing on its primary node" %
13294 miss_mem, errors.ECODE_NORES)
13296 if be_new[constants.BE_AUTO_BALANCE]:
13297 for node, nres in nodeinfo.items():
13298 if node not in instance.secondary_nodes:
13300 nres.Raise("Can't get info from secondary node %s" % node,
13301 prereq=True, ecode=errors.ECODE_STATE)
13302 (_, _, (nhvinfo, )) = nres.payload
13303 if not isinstance(nhvinfo.get("memory_free", None), int):
13304 raise errors.OpPrereqError("Secondary node %s didn't return free"
13305 " memory information" % node,
13306 errors.ECODE_STATE)
13307 #TODO(dynmem): do the appropriate check involving MINMEM
13308 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13309 raise errors.OpPrereqError("This change will prevent the instance"
13310 " from failover to its secondary node"
13311 " %s, due to not enough memory" % node,
13312 errors.ECODE_STATE)
13314 if self.op.runtime_mem:
13315 remote_info = self.rpc.call_instance_info(instance.primary_node,
13317 instance.hypervisor)
13318 remote_info.Raise("Error checking node %s" % instance.primary_node)
13319 if not remote_info.payload: # not running already
13320 raise errors.OpPrereqError("Instance %s is not running" %
13321 instance.name, errors.ECODE_STATE)
13323 current_memory = remote_info.payload["memory"]
13324 if (not self.op.force and
13325 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13326 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13327 raise errors.OpPrereqError("Instance %s must have memory between %d"
13328 " and %d MB of memory unless --force is"
13331 self.be_proposed[constants.BE_MINMEM],
13332 self.be_proposed[constants.BE_MAXMEM]),
13333 errors.ECODE_INVAL)
13335 delta = self.op.runtime_mem - current_memory
13337 _CheckNodeFreeMemory(self, instance.primary_node,
13338 "ballooning memory for instance %s" %
13339 instance.name, delta, instance.hypervisor)
13341 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13342 raise errors.OpPrereqError("Disk operations not supported for"
13343 " diskless instances", errors.ECODE_INVAL)
13345 def _PrepareNicCreate(_, params, private):
13346 self._PrepareNicModification(params, private, None, None,
13347 {}, cluster, pnode)
13348 return (None, None)
13350 def _PrepareNicMod(_, nic, params, private):
13351 self._PrepareNicModification(params, private, nic.ip, nic.network,
13352 nic.nicparams, cluster, pnode)
13355 def _PrepareNicRemove(_, params, __):
13357 net = params.network
13358 if net is not None and ip is not None:
13359 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13361 # Verify NIC changes (operating on copy)
13362 nics = instance.nics[:]
13363 ApplyContainerMods("NIC", nics, None, self.nicmod,
13364 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13365 if len(nics) > constants.MAX_NICS:
13366 raise errors.OpPrereqError("Instance has too many network interfaces"
13367 " (%d), cannot add more" % constants.MAX_NICS,
13368 errors.ECODE_STATE)
13370 # Verify disk changes (operating on a copy)
13371 disks = instance.disks[:]
13372 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13373 if len(disks) > constants.MAX_DISKS:
13374 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13375 " more" % constants.MAX_DISKS,
13376 errors.ECODE_STATE)
13377 disk_sizes = [disk.size for disk in instance.disks]
13378 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13379 self.diskmod if op == constants.DDM_ADD)
13380 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13381 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13383 if self.op.offline is not None:
13384 if self.op.offline:
13385 msg = "can't change to offline"
13387 msg = "can't change to online"
13388 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13390 # Pre-compute NIC changes (necessary to use result in hooks)
13391 self._nic_chgdesc = []
13393 # Operate on copies as this is still in prereq
13394 nics = [nic.Copy() for nic in instance.nics]
13395 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13396 self._CreateNewNic, self._ApplyNicMods, None)
13397 self._new_nics = nics
13398 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13400 self._new_nics = None
13401 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13403 if not self.op.ignore_ipolicy:
13404 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13407 # Fill ispec with backend parameters
13408 ispec[constants.ISPEC_SPINDLE_USE] = \
13409 self.be_new.get(constants.BE_SPINDLE_USE, None)
13410 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13413 # Copy ispec to verify parameters with min/max values separately
13414 ispec_max = ispec.copy()
13415 ispec_max[constants.ISPEC_MEM_SIZE] = \
13416 self.be_new.get(constants.BE_MAXMEM, None)
13417 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13418 ispec_min = ispec.copy()
13419 ispec_min[constants.ISPEC_MEM_SIZE] = \
13420 self.be_new.get(constants.BE_MINMEM, None)
13421 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13423 if (res_max or res_min):
13424 # FIXME: Improve error message by including information about whether
13425 # the upper or lower limit of the parameter fails the ipolicy.
13426 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13427 (group_info, group_info.name,
13428 utils.CommaJoin(set(res_max + res_min))))
13429 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13431 def _ConvertPlainToDrbd(self, feedback_fn):
13432 """Converts an instance from plain to drbd.
13435 feedback_fn("Converting template to drbd")
13436 instance = self.instance
13437 pnode = instance.primary_node
13438 snode = self.op.remote_node
13440 assert instance.disk_template == constants.DT_PLAIN
13442 # create a fake disk info for _GenerateDiskTemplate
13443 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13444 constants.IDISK_VG: d.logical_id[0]}
13445 for d in instance.disks]
13446 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13447 instance.name, pnode, [snode],
13448 disk_info, None, None, 0, feedback_fn,
13450 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13452 info = _GetInstanceInfoText(instance)
13453 feedback_fn("Creating additional volumes...")
13454 # first, create the missing data and meta devices
13455 for disk in anno_disks:
13456 # unfortunately this is... not too nice
13457 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13459 for child in disk.children:
13460 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13461 # at this stage, all new LVs have been created, we can rename the
13463 feedback_fn("Renaming original volumes...")
13464 rename_list = [(o, n.children[0].logical_id)
13465 for (o, n) in zip(instance.disks, new_disks)]
13466 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13467 result.Raise("Failed to rename original LVs")
13469 feedback_fn("Initializing DRBD devices...")
13470 # all child devices are in place, we can now create the DRBD devices
13471 for disk in anno_disks:
13472 for node in [pnode, snode]:
13473 f_create = node == pnode
13474 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13476 # at this point, the instance has been modified
13477 instance.disk_template = constants.DT_DRBD8
13478 instance.disks = new_disks
13479 self.cfg.Update(instance, feedback_fn)
13481 # Release node locks while waiting for sync
13482 _ReleaseLocks(self, locking.LEVEL_NODE)
13484 # disks are created, waiting for sync
13485 disk_abort = not _WaitForSync(self, instance,
13486 oneshot=not self.op.wait_for_sync)
13488 raise errors.OpExecError("There are some degraded disks for"
13489 " this instance, please cleanup manually")
13491 # Node resource locks will be released by caller
13493 def _ConvertDrbdToPlain(self, feedback_fn):
13494 """Converts an instance from drbd to plain.
13497 instance = self.instance
13499 assert len(instance.secondary_nodes) == 1
13500 assert instance.disk_template == constants.DT_DRBD8
13502 pnode = instance.primary_node
13503 snode = instance.secondary_nodes[0]
13504 feedback_fn("Converting template to plain")
13506 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13507 new_disks = [d.children[0] for d in instance.disks]
13509 # copy over size and mode
13510 for parent, child in zip(old_disks, new_disks):
13511 child.size = parent.size
13512 child.mode = parent.mode
13514 # this is a DRBD disk, return its port to the pool
13515 # NOTE: this must be done right before the call to cfg.Update!
13516 for disk in old_disks:
13517 tcp_port = disk.logical_id[2]
13518 self.cfg.AddTcpUdpPort(tcp_port)
13520 # update instance structure
13521 instance.disks = new_disks
13522 instance.disk_template = constants.DT_PLAIN
13523 self.cfg.Update(instance, feedback_fn)
13525 # Release locks in case removing disks takes a while
13526 _ReleaseLocks(self, locking.LEVEL_NODE)
13528 feedback_fn("Removing volumes on the secondary node...")
13529 for disk in old_disks:
13530 self.cfg.SetDiskID(disk, snode)
13531 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13533 self.LogWarning("Could not remove block device %s on node %s,"
13534 " continuing anyway: %s", disk.iv_name, snode, msg)
13536 feedback_fn("Removing unneeded volumes on the primary node...")
13537 for idx, disk in enumerate(old_disks):
13538 meta = disk.children[1]
13539 self.cfg.SetDiskID(meta, pnode)
13540 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13542 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13543 " continuing anyway: %s", idx, pnode, msg)
13545 def _CreateNewDisk(self, idx, params, _):
13546 """Creates a new disk.
13549 instance = self.instance
13552 if instance.disk_template in constants.DTS_FILEBASED:
13553 (file_driver, file_path) = instance.disks[0].logical_id
13554 file_path = os.path.dirname(file_path)
13556 file_driver = file_path = None
13559 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13560 instance.primary_node, instance.secondary_nodes,
13561 [params], file_path, file_driver, idx,
13562 self.Log, self.diskparams)[0]
13564 info = _GetInstanceInfoText(instance)
13566 logging.info("Creating volume %s for instance %s",
13567 disk.iv_name, instance.name)
13568 # Note: this needs to be kept in sync with _CreateDisks
13570 for node in instance.all_nodes:
13571 f_create = (node == instance.primary_node)
13573 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13574 except errors.OpExecError, err:
13575 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13576 disk.iv_name, disk, node, err)
13579 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13583 def _ModifyDisk(idx, disk, params, _):
13584 """Modifies a disk.
13587 disk.mode = params[constants.IDISK_MODE]
13590 ("disk.mode/%d" % idx, disk.mode),
13593 def _RemoveDisk(self, idx, root, _):
13597 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13598 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13599 self.cfg.SetDiskID(disk, node)
13600 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13602 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13603 " continuing anyway", idx, node, msg)
13605 # if this is a DRBD disk, return its port to the pool
13606 if root.dev_type in constants.LDS_DRBD:
13607 self.cfg.AddTcpUdpPort(root.logical_id[2])
13610 def _CreateNewNic(idx, params, private):
13611 """Creates data structure for a new network interface.
13614 mac = params[constants.INIC_MAC]
13615 ip = params.get(constants.INIC_IP, None)
13616 net = params.get(constants.INIC_NETWORK, None)
13617 #TODO: not private.filled?? can a nic have no nicparams??
13618 nicparams = private.filled
13620 return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
13622 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13623 (mac, ip, private.filled[constants.NIC_MODE],
13624 private.filled[constants.NIC_LINK],
13629 def _ApplyNicMods(idx, nic, params, private):
13630 """Modifies a network interface.
13635 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13637 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13638 setattr(nic, key, params[key])
13641 nic.nicparams = private.filled
13643 for (key, val) in nic.nicparams.items():
13644 changes.append(("nic.%s/%d" % (key, idx), val))
13648 def Exec(self, feedback_fn):
13649 """Modifies an instance.
13651 All parameters take effect only at the next restart of the instance.
13654 # Process here the warnings from CheckPrereq, as we don't have a
13655 # feedback_fn there.
13656 # TODO: Replace with self.LogWarning
13657 for warn in self.warn:
13658 feedback_fn("WARNING: %s" % warn)
13660 assert ((self.op.disk_template is None) ^
13661 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13662 "Not owning any node resource locks"
13665 instance = self.instance
13668 if self.op.runtime_mem:
13669 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13671 self.op.runtime_mem)
13672 rpcres.Raise("Cannot modify instance runtime memory")
13673 result.append(("runtime_memory", self.op.runtime_mem))
13675 # Apply disk changes
13676 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13677 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13678 _UpdateIvNames(0, instance.disks)
13680 if self.op.disk_template:
13682 check_nodes = set(instance.all_nodes)
13683 if self.op.remote_node:
13684 check_nodes.add(self.op.remote_node)
13685 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13686 owned = self.owned_locks(level)
13687 assert not (check_nodes - owned), \
13688 ("Not owning the correct locks, owning %r, expected at least %r" %
13689 (owned, check_nodes))
13691 r_shut = _ShutdownInstanceDisks(self, instance)
13693 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13694 " proceed with disk template conversion")
13695 mode = (instance.disk_template, self.op.disk_template)
13697 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13699 self.cfg.ReleaseDRBDMinors(instance.name)
13701 result.append(("disk_template", self.op.disk_template))
13703 assert instance.disk_template == self.op.disk_template, \
13704 ("Expected disk template '%s', found '%s'" %
13705 (self.op.disk_template, instance.disk_template))
13707 # Release node and resource locks if there are any (they might already have
13708 # been released during disk conversion)
13709 _ReleaseLocks(self, locking.LEVEL_NODE)
13710 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13712 # Apply NIC changes
13713 if self._new_nics is not None:
13714 instance.nics = self._new_nics
13715 result.extend(self._nic_chgdesc)
13718 if self.op.hvparams:
13719 instance.hvparams = self.hv_inst
13720 for key, val in self.op.hvparams.iteritems():
13721 result.append(("hv/%s" % key, val))
13724 if self.op.beparams:
13725 instance.beparams = self.be_inst
13726 for key, val in self.op.beparams.iteritems():
13727 result.append(("be/%s" % key, val))
13730 if self.op.os_name:
13731 instance.os = self.op.os_name
13734 if self.op.osparams:
13735 instance.osparams = self.os_inst
13736 for key, val in self.op.osparams.iteritems():
13737 result.append(("os/%s" % key, val))
13739 if self.op.offline is None:
13742 elif self.op.offline:
13743 # Mark instance as offline
13744 self.cfg.MarkInstanceOffline(instance.name)
13745 result.append(("admin_state", constants.ADMINST_OFFLINE))
13747 # Mark instance as online, but stopped
13748 self.cfg.MarkInstanceDown(instance.name)
13749 result.append(("admin_state", constants.ADMINST_DOWN))
13751 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13753 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13754 self.owned_locks(locking.LEVEL_NODE)), \
13755 "All node locks should have been released by now"
13759 _DISK_CONVERSIONS = {
13760 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13761 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13765 class LUInstanceChangeGroup(LogicalUnit):
13766 HPATH = "instance-change-group"
13767 HTYPE = constants.HTYPE_INSTANCE
13770 def ExpandNames(self):
13771 self.share_locks = _ShareAll()
13773 self.needed_locks = {
13774 locking.LEVEL_NODEGROUP: [],
13775 locking.LEVEL_NODE: [],
13776 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
13779 self._ExpandAndLockInstance()
13781 if self.op.target_groups:
13782 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13783 self.op.target_groups)
13785 self.req_target_uuids = None
13787 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13789 def DeclareLocks(self, level):
13790 if level == locking.LEVEL_NODEGROUP:
13791 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13793 if self.req_target_uuids:
13794 lock_groups = set(self.req_target_uuids)
13796 # Lock all groups used by instance optimistically; this requires going
13797 # via the node before it's locked, requiring verification later on
13798 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13799 lock_groups.update(instance_groups)
13801 # No target groups, need to lock all of them
13802 lock_groups = locking.ALL_SET
13804 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13806 elif level == locking.LEVEL_NODE:
13807 if self.req_target_uuids:
13808 # Lock all nodes used by instances
13809 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13810 self._LockInstancesNodes()
13812 # Lock all nodes in all potential target groups
13813 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13814 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13815 member_nodes = [node_name
13816 for group in lock_groups
13817 for node_name in self.cfg.GetNodeGroup(group).members]
13818 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13820 # Lock all nodes as all groups are potential targets
13821 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13823 def CheckPrereq(self):
13824 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13825 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13826 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13828 assert (self.req_target_uuids is None or
13829 owned_groups.issuperset(self.req_target_uuids))
13830 assert owned_instances == set([self.op.instance_name])
13832 # Get instance information
13833 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13835 # Check if node groups for locked instance are still correct
13836 assert owned_nodes.issuperset(self.instance.all_nodes), \
13837 ("Instance %s's nodes changed while we kept the lock" %
13838 self.op.instance_name)
13840 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13843 if self.req_target_uuids:
13844 # User requested specific target groups
13845 self.target_uuids = frozenset(self.req_target_uuids)
13847 # All groups except those used by the instance are potential targets
13848 self.target_uuids = owned_groups - inst_groups
13850 conflicting_groups = self.target_uuids & inst_groups
13851 if conflicting_groups:
13852 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13853 " used by the instance '%s'" %
13854 (utils.CommaJoin(conflicting_groups),
13855 self.op.instance_name),
13856 errors.ECODE_INVAL)
13858 if not self.target_uuids:
13859 raise errors.OpPrereqError("There are no possible target groups",
13860 errors.ECODE_INVAL)
13862 def BuildHooksEnv(self):
13863 """Build hooks env.
13866 assert self.target_uuids
13869 "TARGET_GROUPS": " ".join(self.target_uuids),
13872 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13876 def BuildHooksNodes(self):
13877 """Build hooks nodes.
13880 mn = self.cfg.GetMasterNode()
13881 return ([mn], [mn])
13883 def Exec(self, feedback_fn):
13884 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13886 assert instances == [self.op.instance_name], "Instance not locked"
13888 req = iallocator.IAReqGroupChange(instances=instances,
13889 target_groups=list(self.target_uuids))
13890 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13892 ial.Run(self.op.iallocator)
13894 if not ial.success:
13895 raise errors.OpPrereqError("Can't compute solution for changing group of"
13896 " instance '%s' using iallocator '%s': %s" %
13897 (self.op.instance_name, self.op.iallocator,
13898 ial.info), errors.ECODE_NORES)
13900 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13902 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13903 " instance '%s'", len(jobs), self.op.instance_name)
13905 return ResultWithJobs(jobs)
13908 class LUBackupQuery(NoHooksLU):
13909 """Query the exports list
13914 def CheckArguments(self):
13915 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13916 ["node", "export"], self.op.use_locking)
13918 def ExpandNames(self):
13919 self.expq.ExpandNames(self)
13921 def DeclareLocks(self, level):
13922 self.expq.DeclareLocks(self, level)
13924 def Exec(self, feedback_fn):
13927 for (node, expname) in self.expq.OldStyleQuery(self):
13928 if expname is None:
13929 result[node] = False
13931 result.setdefault(node, []).append(expname)
13936 class _ExportQuery(_QueryBase):
13937 FIELDS = query.EXPORT_FIELDS
13939 #: The node name is not a unique key for this query
13940 SORT_FIELD = "node"
13942 def ExpandNames(self, lu):
13943 lu.needed_locks = {}
13945 # The following variables interact with _QueryBase._GetNames
13947 self.wanted = _GetWantedNodes(lu, self.names)
13949 self.wanted = locking.ALL_SET
13951 self.do_locking = self.use_locking
13953 if self.do_locking:
13954 lu.share_locks = _ShareAll()
13955 lu.needed_locks = {
13956 locking.LEVEL_NODE: self.wanted,
13960 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
13962 def DeclareLocks(self, lu, level):
13965 def _GetQueryData(self, lu):
13966 """Computes the list of nodes and their attributes.
13969 # Locking is not used
13971 assert not (compat.any(lu.glm.is_owned(level)
13972 for level in locking.LEVELS
13973 if level != locking.LEVEL_CLUSTER) or
13974 self.do_locking or self.use_locking)
13976 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13980 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13982 result.append((node, None))
13984 result.extend((node, expname) for expname in nres.payload)
13989 class LUBackupPrepare(NoHooksLU):
13990 """Prepares an instance for an export and returns useful information.
13995 def ExpandNames(self):
13996 self._ExpandAndLockInstance()
13998 def CheckPrereq(self):
13999 """Check prerequisites.
14002 instance_name = self.op.instance_name
14004 self.instance = self.cfg.GetInstanceInfo(instance_name)
14005 assert self.instance is not None, \
14006 "Cannot retrieve locked instance %s" % self.op.instance_name
14007 _CheckNodeOnline(self, self.instance.primary_node)
14009 self._cds = _GetClusterDomainSecret()
14011 def Exec(self, feedback_fn):
14012 """Prepares an instance for an export.
14015 instance = self.instance
14017 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14018 salt = utils.GenerateSecret(8)
14020 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14021 result = self.rpc.call_x509_cert_create(instance.primary_node,
14022 constants.RIE_CERT_VALIDITY)
14023 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14025 (name, cert_pem) = result.payload
14027 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14031 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14032 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14034 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14040 class LUBackupExport(LogicalUnit):
14041 """Export an instance to an image in the cluster.
14044 HPATH = "instance-export"
14045 HTYPE = constants.HTYPE_INSTANCE
14048 def CheckArguments(self):
14049 """Check the arguments.
14052 self.x509_key_name = self.op.x509_key_name
14053 self.dest_x509_ca_pem = self.op.destination_x509_ca
14055 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14056 if not self.x509_key_name:
14057 raise errors.OpPrereqError("Missing X509 key name for encryption",
14058 errors.ECODE_INVAL)
14060 if not self.dest_x509_ca_pem:
14061 raise errors.OpPrereqError("Missing destination X509 CA",
14062 errors.ECODE_INVAL)
14064 def ExpandNames(self):
14065 self._ExpandAndLockInstance()
14067 # Lock all nodes for local exports
14068 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14069 # FIXME: lock only instance primary and destination node
14071 # Sad but true, for now we have do lock all nodes, as we don't know where
14072 # the previous export might be, and in this LU we search for it and
14073 # remove it from its current node. In the future we could fix this by:
14074 # - making a tasklet to search (share-lock all), then create the
14075 # new one, then one to remove, after
14076 # - removing the removal operation altogether
14077 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14079 # Allocations should be stopped while this LU runs with node locks, but
14080 # it doesn't have to be exclusive
14081 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14082 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14084 def DeclareLocks(self, level):
14085 """Last minute lock declaration."""
14086 # All nodes are locked anyway, so nothing to do here.
14088 def BuildHooksEnv(self):
14089 """Build hooks env.
14091 This will run on the master, primary node and target node.
14095 "EXPORT_MODE": self.op.mode,
14096 "EXPORT_NODE": self.op.target_node,
14097 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14098 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14099 # TODO: Generic function for boolean env variables
14100 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14103 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14107 def BuildHooksNodes(self):
14108 """Build hooks nodes.
14111 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14113 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14114 nl.append(self.op.target_node)
14118 def CheckPrereq(self):
14119 """Check prerequisites.
14121 This checks that the instance and node names are valid.
14124 instance_name = self.op.instance_name
14126 self.instance = self.cfg.GetInstanceInfo(instance_name)
14127 assert self.instance is not None, \
14128 "Cannot retrieve locked instance %s" % self.op.instance_name
14129 _CheckNodeOnline(self, self.instance.primary_node)
14131 if (self.op.remove_instance and
14132 self.instance.admin_state == constants.ADMINST_UP and
14133 not self.op.shutdown):
14134 raise errors.OpPrereqError("Can not remove instance without shutting it"
14135 " down before", errors.ECODE_STATE)
14137 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14138 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14139 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14140 assert self.dst_node is not None
14142 _CheckNodeOnline(self, self.dst_node.name)
14143 _CheckNodeNotDrained(self, self.dst_node.name)
14146 self.dest_disk_info = None
14147 self.dest_x509_ca = None
14149 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14150 self.dst_node = None
14152 if len(self.op.target_node) != len(self.instance.disks):
14153 raise errors.OpPrereqError(("Received destination information for %s"
14154 " disks, but instance %s has %s disks") %
14155 (len(self.op.target_node), instance_name,
14156 len(self.instance.disks)),
14157 errors.ECODE_INVAL)
14159 cds = _GetClusterDomainSecret()
14161 # Check X509 key name
14163 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14164 except (TypeError, ValueError), err:
14165 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14166 errors.ECODE_INVAL)
14168 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14169 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14170 errors.ECODE_INVAL)
14172 # Load and verify CA
14174 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14175 except OpenSSL.crypto.Error, err:
14176 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14177 (err, ), errors.ECODE_INVAL)
14179 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14180 if errcode is not None:
14181 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14182 (msg, ), errors.ECODE_INVAL)
14184 self.dest_x509_ca = cert
14186 # Verify target information
14188 for idx, disk_data in enumerate(self.op.target_node):
14190 (host, port, magic) = \
14191 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14192 except errors.GenericError, err:
14193 raise errors.OpPrereqError("Target info for disk %s: %s" %
14194 (idx, err), errors.ECODE_INVAL)
14196 disk_info.append((host, port, magic))
14198 assert len(disk_info) == len(self.op.target_node)
14199 self.dest_disk_info = disk_info
14202 raise errors.ProgrammerError("Unhandled export mode %r" %
14205 # instance disk type verification
14206 # TODO: Implement export support for file-based disks
14207 for disk in self.instance.disks:
14208 if disk.dev_type == constants.LD_FILE:
14209 raise errors.OpPrereqError("Export not supported for instances with"
14210 " file-based disks", errors.ECODE_INVAL)
14212 def _CleanupExports(self, feedback_fn):
14213 """Removes exports of current instance from all other nodes.
14215 If an instance in a cluster with nodes A..D was exported to node C, its
14216 exports will be removed from the nodes A, B and D.
14219 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14221 nodelist = self.cfg.GetNodeList()
14222 nodelist.remove(self.dst_node.name)
14224 # on one-node clusters nodelist will be empty after the removal
14225 # if we proceed the backup would be removed because OpBackupQuery
14226 # substitutes an empty list with the full cluster node list.
14227 iname = self.instance.name
14229 feedback_fn("Removing old exports for instance %s" % iname)
14230 exportlist = self.rpc.call_export_list(nodelist)
14231 for node in exportlist:
14232 if exportlist[node].fail_msg:
14234 if iname in exportlist[node].payload:
14235 msg = self.rpc.call_export_remove(node, iname).fail_msg
14237 self.LogWarning("Could not remove older export for instance %s"
14238 " on node %s: %s", iname, node, msg)
14240 def Exec(self, feedback_fn):
14241 """Export an instance to an image in the cluster.
14244 assert self.op.mode in constants.EXPORT_MODES
14246 instance = self.instance
14247 src_node = instance.primary_node
14249 if self.op.shutdown:
14250 # shutdown the instance, but not the disks
14251 feedback_fn("Shutting down instance %s" % instance.name)
14252 result = self.rpc.call_instance_shutdown(src_node, instance,
14253 self.op.shutdown_timeout)
14254 # TODO: Maybe ignore failures if ignore_remove_failures is set
14255 result.Raise("Could not shutdown instance %s on"
14256 " node %s" % (instance.name, src_node))
14258 # set the disks ID correctly since call_instance_start needs the
14259 # correct drbd minor to create the symlinks
14260 for disk in instance.disks:
14261 self.cfg.SetDiskID(disk, src_node)
14263 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14266 # Activate the instance disks if we'exporting a stopped instance
14267 feedback_fn("Activating disks for %s" % instance.name)
14268 _StartInstanceDisks(self, instance, None)
14271 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14274 helper.CreateSnapshots()
14276 if (self.op.shutdown and
14277 instance.admin_state == constants.ADMINST_UP and
14278 not self.op.remove_instance):
14279 assert not activate_disks
14280 feedback_fn("Starting instance %s" % instance.name)
14281 result = self.rpc.call_instance_start(src_node,
14282 (instance, None, None), False)
14283 msg = result.fail_msg
14285 feedback_fn("Failed to start instance: %s" % msg)
14286 _ShutdownInstanceDisks(self, instance)
14287 raise errors.OpExecError("Could not start instance: %s" % msg)
14289 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14290 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14291 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14292 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14293 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14295 (key_name, _, _) = self.x509_key_name
14298 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14301 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14302 key_name, dest_ca_pem,
14307 # Check for backwards compatibility
14308 assert len(dresults) == len(instance.disks)
14309 assert compat.all(isinstance(i, bool) for i in dresults), \
14310 "Not all results are boolean: %r" % dresults
14314 feedback_fn("Deactivating disks for %s" % instance.name)
14315 _ShutdownInstanceDisks(self, instance)
14317 if not (compat.all(dresults) and fin_resu):
14320 failures.append("export finalization")
14321 if not compat.all(dresults):
14322 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14324 failures.append("disk export: disk(s) %s" % fdsk)
14326 raise errors.OpExecError("Export failed, errors in %s" %
14327 utils.CommaJoin(failures))
14329 # At this point, the export was successful, we can cleanup/finish
14331 # Remove instance if requested
14332 if self.op.remove_instance:
14333 feedback_fn("Removing instance %s" % instance.name)
14334 _RemoveInstance(self, feedback_fn, instance,
14335 self.op.ignore_remove_failures)
14337 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14338 self._CleanupExports(feedback_fn)
14340 return fin_resu, dresults
14343 class LUBackupRemove(NoHooksLU):
14344 """Remove exports related to the named instance.
14349 def ExpandNames(self):
14350 self.needed_locks = {
14351 # We need all nodes to be locked in order for RemoveExport to work, but
14352 # we don't need to lock the instance itself, as nothing will happen to it
14353 # (and we can remove exports also for a removed instance)
14354 locking.LEVEL_NODE: locking.ALL_SET,
14356 # Removing backups is quick, so blocking allocations is justified
14357 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14360 # Allocations should be stopped while this LU runs with node locks, but it
14361 # doesn't have to be exclusive
14362 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14364 def Exec(self, feedback_fn):
14365 """Remove any export.
14368 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14369 # If the instance was not found we'll try with the name that was passed in.
14370 # This will only work if it was an FQDN, though.
14372 if not instance_name:
14374 instance_name = self.op.instance_name
14376 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14377 exportlist = self.rpc.call_export_list(locked_nodes)
14379 for node in exportlist:
14380 msg = exportlist[node].fail_msg
14382 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14384 if instance_name in exportlist[node].payload:
14386 result = self.rpc.call_export_remove(node, instance_name)
14387 msg = result.fail_msg
14389 logging.error("Could not remove export for instance %s"
14390 " on node %s: %s", instance_name, node, msg)
14392 if fqdn_warn and not found:
14393 feedback_fn("Export not found. If trying to remove an export belonging"
14394 " to a deleted instance please use its Fully Qualified"
14398 class LUGroupAdd(LogicalUnit):
14399 """Logical unit for creating node groups.
14402 HPATH = "group-add"
14403 HTYPE = constants.HTYPE_GROUP
14406 def ExpandNames(self):
14407 # We need the new group's UUID here so that we can create and acquire the
14408 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14409 # that it should not check whether the UUID exists in the configuration.
14410 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14411 self.needed_locks = {}
14412 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14414 def CheckPrereq(self):
14415 """Check prerequisites.
14417 This checks that the given group name is not an existing node group
14422 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14423 except errors.OpPrereqError:
14426 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14427 " node group (UUID: %s)" %
14428 (self.op.group_name, existing_uuid),
14429 errors.ECODE_EXISTS)
14431 if self.op.ndparams:
14432 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14434 if self.op.hv_state:
14435 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14437 self.new_hv_state = None
14439 if self.op.disk_state:
14440 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14442 self.new_disk_state = None
14444 if self.op.diskparams:
14445 for templ in constants.DISK_TEMPLATES:
14446 if templ in self.op.diskparams:
14447 utils.ForceDictType(self.op.diskparams[templ],
14448 constants.DISK_DT_TYPES)
14449 self.new_diskparams = self.op.diskparams
14451 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14452 except errors.OpPrereqError, err:
14453 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14454 errors.ECODE_INVAL)
14456 self.new_diskparams = {}
14458 if self.op.ipolicy:
14459 cluster = self.cfg.GetClusterInfo()
14460 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14462 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14463 except errors.ConfigurationError, err:
14464 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14465 errors.ECODE_INVAL)
14467 def BuildHooksEnv(self):
14468 """Build hooks env.
14472 "GROUP_NAME": self.op.group_name,
14475 def BuildHooksNodes(self):
14476 """Build hooks nodes.
14479 mn = self.cfg.GetMasterNode()
14480 return ([mn], [mn])
14482 def Exec(self, feedback_fn):
14483 """Add the node group to the cluster.
14486 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14487 uuid=self.group_uuid,
14488 alloc_policy=self.op.alloc_policy,
14489 ndparams=self.op.ndparams,
14490 diskparams=self.new_diskparams,
14491 ipolicy=self.op.ipolicy,
14492 hv_state_static=self.new_hv_state,
14493 disk_state_static=self.new_disk_state)
14495 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14496 del self.remove_locks[locking.LEVEL_NODEGROUP]
14499 class LUGroupAssignNodes(NoHooksLU):
14500 """Logical unit for assigning nodes to groups.
14505 def ExpandNames(self):
14506 # These raise errors.OpPrereqError on their own:
14507 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14508 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14510 # We want to lock all the affected nodes and groups. We have readily
14511 # available the list of nodes, and the *destination* group. To gather the
14512 # list of "source" groups, we need to fetch node information later on.
14513 self.needed_locks = {
14514 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14515 locking.LEVEL_NODE: self.op.nodes,
14518 def DeclareLocks(self, level):
14519 if level == locking.LEVEL_NODEGROUP:
14520 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14522 # Try to get all affected nodes' groups without having the group or node
14523 # lock yet. Needs verification later in the code flow.
14524 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14526 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14528 def CheckPrereq(self):
14529 """Check prerequisites.
14532 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14533 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14534 frozenset(self.op.nodes))
14536 expected_locks = (set([self.group_uuid]) |
14537 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14538 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14539 if actual_locks != expected_locks:
14540 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14541 " current groups are '%s', used to be '%s'" %
14542 (utils.CommaJoin(expected_locks),
14543 utils.CommaJoin(actual_locks)))
14545 self.node_data = self.cfg.GetAllNodesInfo()
14546 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14547 instance_data = self.cfg.GetAllInstancesInfo()
14549 if self.group is None:
14550 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14551 (self.op.group_name, self.group_uuid))
14553 (new_splits, previous_splits) = \
14554 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14555 for node in self.op.nodes],
14556 self.node_data, instance_data)
14559 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14561 if not self.op.force:
14562 raise errors.OpExecError("The following instances get split by this"
14563 " change and --force was not given: %s" %
14566 self.LogWarning("This operation will split the following instances: %s",
14569 if previous_splits:
14570 self.LogWarning("In addition, these already-split instances continue"
14571 " to be split across groups: %s",
14572 utils.CommaJoin(utils.NiceSort(previous_splits)))
14574 def Exec(self, feedback_fn):
14575 """Assign nodes to a new group.
14578 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14580 self.cfg.AssignGroupNodes(mods)
14583 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14584 """Check for split instances after a node assignment.
14586 This method considers a series of node assignments as an atomic operation,
14587 and returns information about split instances after applying the set of
14590 In particular, it returns information about newly split instances, and
14591 instances that were already split, and remain so after the change.
14593 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14596 @type changes: list of (node_name, new_group_uuid) pairs.
14597 @param changes: list of node assignments to consider.
14598 @param node_data: a dict with data for all nodes
14599 @param instance_data: a dict with all instances to consider
14600 @rtype: a two-tuple
14601 @return: a list of instances that were previously okay and result split as a
14602 consequence of this change, and a list of instances that were previously
14603 split and this change does not fix.
14606 changed_nodes = dict((node, group) for node, group in changes
14607 if node_data[node].group != group)
14609 all_split_instances = set()
14610 previously_split_instances = set()
14612 def InstanceNodes(instance):
14613 return [instance.primary_node] + list(instance.secondary_nodes)
14615 for inst in instance_data.values():
14616 if inst.disk_template not in constants.DTS_INT_MIRROR:
14619 instance_nodes = InstanceNodes(inst)
14621 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14622 previously_split_instances.add(inst.name)
14624 if len(set(changed_nodes.get(node, node_data[node].group)
14625 for node in instance_nodes)) > 1:
14626 all_split_instances.add(inst.name)
14628 return (list(all_split_instances - previously_split_instances),
14629 list(previously_split_instances & all_split_instances))
14632 class _GroupQuery(_QueryBase):
14633 FIELDS = query.GROUP_FIELDS
14635 def ExpandNames(self, lu):
14636 lu.needed_locks = {}
14638 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14639 self._cluster = lu.cfg.GetClusterInfo()
14640 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14643 self.wanted = [name_to_uuid[name]
14644 for name in utils.NiceSort(name_to_uuid.keys())]
14646 # Accept names to be either names or UUIDs.
14649 all_uuid = frozenset(self._all_groups.keys())
14651 for name in self.names:
14652 if name in all_uuid:
14653 self.wanted.append(name)
14654 elif name in name_to_uuid:
14655 self.wanted.append(name_to_uuid[name])
14657 missing.append(name)
14660 raise errors.OpPrereqError("Some groups do not exist: %s" %
14661 utils.CommaJoin(missing),
14662 errors.ECODE_NOENT)
14664 def DeclareLocks(self, lu, level):
14667 def _GetQueryData(self, lu):
14668 """Computes the list of node groups and their attributes.
14671 do_nodes = query.GQ_NODE in self.requested_data
14672 do_instances = query.GQ_INST in self.requested_data
14674 group_to_nodes = None
14675 group_to_instances = None
14677 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14678 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14679 # latter GetAllInstancesInfo() is not enough, for we have to go through
14680 # instance->node. Hence, we will need to process nodes even if we only need
14681 # instance information.
14682 if do_nodes or do_instances:
14683 all_nodes = lu.cfg.GetAllNodesInfo()
14684 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14687 for node in all_nodes.values():
14688 if node.group in group_to_nodes:
14689 group_to_nodes[node.group].append(node.name)
14690 node_to_group[node.name] = node.group
14693 all_instances = lu.cfg.GetAllInstancesInfo()
14694 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14696 for instance in all_instances.values():
14697 node = instance.primary_node
14698 if node in node_to_group:
14699 group_to_instances[node_to_group[node]].append(instance.name)
14702 # Do not pass on node information if it was not requested.
14703 group_to_nodes = None
14705 return query.GroupQueryData(self._cluster,
14706 [self._all_groups[uuid]
14707 for uuid in self.wanted],
14708 group_to_nodes, group_to_instances,
14709 query.GQ_DISKPARAMS in self.requested_data)
14712 class LUGroupQuery(NoHooksLU):
14713 """Logical unit for querying node groups.
14718 def CheckArguments(self):
14719 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14720 self.op.output_fields, False)
14722 def ExpandNames(self):
14723 self.gq.ExpandNames(self)
14725 def DeclareLocks(self, level):
14726 self.gq.DeclareLocks(self, level)
14728 def Exec(self, feedback_fn):
14729 return self.gq.OldStyleQuery(self)
14732 class LUGroupSetParams(LogicalUnit):
14733 """Modifies the parameters of a node group.
14736 HPATH = "group-modify"
14737 HTYPE = constants.HTYPE_GROUP
14740 def CheckArguments(self):
14743 self.op.diskparams,
14744 self.op.alloc_policy,
14746 self.op.disk_state,
14750 if all_changes.count(None) == len(all_changes):
14751 raise errors.OpPrereqError("Please pass at least one modification",
14752 errors.ECODE_INVAL)
14754 def ExpandNames(self):
14755 # This raises errors.OpPrereqError on its own:
14756 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14758 self.needed_locks = {
14759 locking.LEVEL_INSTANCE: [],
14760 locking.LEVEL_NODEGROUP: [self.group_uuid],
14763 self.share_locks[locking.LEVEL_INSTANCE] = 1
14765 def DeclareLocks(self, level):
14766 if level == locking.LEVEL_INSTANCE:
14767 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14769 # Lock instances optimistically, needs verification once group lock has
14771 self.needed_locks[locking.LEVEL_INSTANCE] = \
14772 self.cfg.GetNodeGroupInstances(self.group_uuid)
14775 def _UpdateAndVerifyDiskParams(old, new):
14776 """Updates and verifies disk parameters.
14779 new_params = _GetUpdatedParams(old, new)
14780 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14783 def CheckPrereq(self):
14784 """Check prerequisites.
14787 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14789 # Check if locked instances are still correct
14790 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14792 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14793 cluster = self.cfg.GetClusterInfo()
14795 if self.group is None:
14796 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14797 (self.op.group_name, self.group_uuid))
14799 if self.op.ndparams:
14800 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14801 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14802 self.new_ndparams = new_ndparams
14804 if self.op.diskparams:
14805 diskparams = self.group.diskparams
14806 uavdp = self._UpdateAndVerifyDiskParams
14807 # For each disktemplate subdict update and verify the values
14808 new_diskparams = dict((dt,
14809 uavdp(diskparams.get(dt, {}),
14810 self.op.diskparams[dt]))
14811 for dt in constants.DISK_TEMPLATES
14812 if dt in self.op.diskparams)
14813 # As we've all subdicts of diskparams ready, lets merge the actual
14814 # dict with all updated subdicts
14815 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14817 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14818 except errors.OpPrereqError, err:
14819 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14820 errors.ECODE_INVAL)
14822 if self.op.hv_state:
14823 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14824 self.group.hv_state_static)
14826 if self.op.disk_state:
14827 self.new_disk_state = \
14828 _MergeAndVerifyDiskState(self.op.disk_state,
14829 self.group.disk_state_static)
14831 if self.op.ipolicy:
14832 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14836 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14837 inst_filter = lambda inst: inst.name in owned_instances
14838 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14839 gmi = ganeti.masterd.instance
14841 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14843 new_ipolicy, instances)
14846 self.LogWarning("After the ipolicy change the following instances"
14847 " violate them: %s",
14848 utils.CommaJoin(violations))
14850 def BuildHooksEnv(self):
14851 """Build hooks env.
14855 "GROUP_NAME": self.op.group_name,
14856 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14859 def BuildHooksNodes(self):
14860 """Build hooks nodes.
14863 mn = self.cfg.GetMasterNode()
14864 return ([mn], [mn])
14866 def Exec(self, feedback_fn):
14867 """Modifies the node group.
14872 if self.op.ndparams:
14873 self.group.ndparams = self.new_ndparams
14874 result.append(("ndparams", str(self.group.ndparams)))
14876 if self.op.diskparams:
14877 self.group.diskparams = self.new_diskparams
14878 result.append(("diskparams", str(self.group.diskparams)))
14880 if self.op.alloc_policy:
14881 self.group.alloc_policy = self.op.alloc_policy
14883 if self.op.hv_state:
14884 self.group.hv_state_static = self.new_hv_state
14886 if self.op.disk_state:
14887 self.group.disk_state_static = self.new_disk_state
14889 if self.op.ipolicy:
14890 self.group.ipolicy = self.new_ipolicy
14892 self.cfg.Update(self.group, feedback_fn)
14896 class LUGroupRemove(LogicalUnit):
14897 HPATH = "group-remove"
14898 HTYPE = constants.HTYPE_GROUP
14901 def ExpandNames(self):
14902 # This will raises errors.OpPrereqError on its own:
14903 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14904 self.needed_locks = {
14905 locking.LEVEL_NODEGROUP: [self.group_uuid],
14908 def CheckPrereq(self):
14909 """Check prerequisites.
14911 This checks that the given group name exists as a node group, that is
14912 empty (i.e., contains no nodes), and that is not the last group of the
14916 # Verify that the group is empty.
14917 group_nodes = [node.name
14918 for node in self.cfg.GetAllNodesInfo().values()
14919 if node.group == self.group_uuid]
14922 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14924 (self.op.group_name,
14925 utils.CommaJoin(utils.NiceSort(group_nodes))),
14926 errors.ECODE_STATE)
14928 # Verify the cluster would not be left group-less.
14929 if len(self.cfg.GetNodeGroupList()) == 1:
14930 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14931 " removed" % self.op.group_name,
14932 errors.ECODE_STATE)
14934 def BuildHooksEnv(self):
14935 """Build hooks env.
14939 "GROUP_NAME": self.op.group_name,
14942 def BuildHooksNodes(self):
14943 """Build hooks nodes.
14946 mn = self.cfg.GetMasterNode()
14947 return ([mn], [mn])
14949 def Exec(self, feedback_fn):
14950 """Remove the node group.
14954 self.cfg.RemoveNodeGroup(self.group_uuid)
14955 except errors.ConfigurationError:
14956 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14957 (self.op.group_name, self.group_uuid))
14959 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14962 class LUGroupRename(LogicalUnit):
14963 HPATH = "group-rename"
14964 HTYPE = constants.HTYPE_GROUP
14967 def ExpandNames(self):
14968 # This raises errors.OpPrereqError on its own:
14969 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14971 self.needed_locks = {
14972 locking.LEVEL_NODEGROUP: [self.group_uuid],
14975 def CheckPrereq(self):
14976 """Check prerequisites.
14978 Ensures requested new name is not yet used.
14982 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14983 except errors.OpPrereqError:
14986 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14987 " node group (UUID: %s)" %
14988 (self.op.new_name, new_name_uuid),
14989 errors.ECODE_EXISTS)
14991 def BuildHooksEnv(self):
14992 """Build hooks env.
14996 "OLD_NAME": self.op.group_name,
14997 "NEW_NAME": self.op.new_name,
15000 def BuildHooksNodes(self):
15001 """Build hooks nodes.
15004 mn = self.cfg.GetMasterNode()
15006 all_nodes = self.cfg.GetAllNodesInfo()
15007 all_nodes.pop(mn, None)
15010 run_nodes.extend(node.name for node in all_nodes.values()
15011 if node.group == self.group_uuid)
15013 return (run_nodes, run_nodes)
15015 def Exec(self, feedback_fn):
15016 """Rename the node group.
15019 group = self.cfg.GetNodeGroup(self.group_uuid)
15022 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15023 (self.op.group_name, self.group_uuid))
15025 group.name = self.op.new_name
15026 self.cfg.Update(group, feedback_fn)
15028 return self.op.new_name
15031 class LUGroupEvacuate(LogicalUnit):
15032 HPATH = "group-evacuate"
15033 HTYPE = constants.HTYPE_GROUP
15036 def ExpandNames(self):
15037 # This raises errors.OpPrereqError on its own:
15038 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15040 if self.op.target_groups:
15041 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15042 self.op.target_groups)
15044 self.req_target_uuids = []
15046 if self.group_uuid in self.req_target_uuids:
15047 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15048 " as a target group (targets are %s)" %
15050 utils.CommaJoin(self.req_target_uuids)),
15051 errors.ECODE_INVAL)
15053 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15055 self.share_locks = _ShareAll()
15056 self.needed_locks = {
15057 locking.LEVEL_INSTANCE: [],
15058 locking.LEVEL_NODEGROUP: [],
15059 locking.LEVEL_NODE: [],
15062 def DeclareLocks(self, level):
15063 if level == locking.LEVEL_INSTANCE:
15064 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15066 # Lock instances optimistically, needs verification once node and group
15067 # locks have been acquired
15068 self.needed_locks[locking.LEVEL_INSTANCE] = \
15069 self.cfg.GetNodeGroupInstances(self.group_uuid)
15071 elif level == locking.LEVEL_NODEGROUP:
15072 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15074 if self.req_target_uuids:
15075 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15077 # Lock all groups used by instances optimistically; this requires going
15078 # via the node before it's locked, requiring verification later on
15079 lock_groups.update(group_uuid
15080 for instance_name in
15081 self.owned_locks(locking.LEVEL_INSTANCE)
15083 self.cfg.GetInstanceNodeGroups(instance_name))
15085 # No target groups, need to lock all of them
15086 lock_groups = locking.ALL_SET
15088 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15090 elif level == locking.LEVEL_NODE:
15091 # This will only lock the nodes in the group to be evacuated which
15092 # contain actual instances
15093 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15094 self._LockInstancesNodes()
15096 # Lock all nodes in group to be evacuated and target groups
15097 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15098 assert self.group_uuid in owned_groups
15099 member_nodes = [node_name
15100 for group in owned_groups
15101 for node_name in self.cfg.GetNodeGroup(group).members]
15102 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15104 def CheckPrereq(self):
15105 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15106 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15107 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15109 assert owned_groups.issuperset(self.req_target_uuids)
15110 assert self.group_uuid in owned_groups
15112 # Check if locked instances are still correct
15113 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15115 # Get instance information
15116 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15118 # Check if node groups for locked instances are still correct
15119 _CheckInstancesNodeGroups(self.cfg, self.instances,
15120 owned_groups, owned_nodes, self.group_uuid)
15122 if self.req_target_uuids:
15123 # User requested specific target groups
15124 self.target_uuids = self.req_target_uuids
15126 # All groups except the one to be evacuated are potential targets
15127 self.target_uuids = [group_uuid for group_uuid in owned_groups
15128 if group_uuid != self.group_uuid]
15130 if not self.target_uuids:
15131 raise errors.OpPrereqError("There are no possible target groups",
15132 errors.ECODE_INVAL)
15134 def BuildHooksEnv(self):
15135 """Build hooks env.
15139 "GROUP_NAME": self.op.group_name,
15140 "TARGET_GROUPS": " ".join(self.target_uuids),
15143 def BuildHooksNodes(self):
15144 """Build hooks nodes.
15147 mn = self.cfg.GetMasterNode()
15149 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15151 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15153 return (run_nodes, run_nodes)
15155 def Exec(self, feedback_fn):
15156 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15158 assert self.group_uuid not in self.target_uuids
15160 req = iallocator.IAReqGroupChange(instances=instances,
15161 target_groups=self.target_uuids)
15162 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15164 ial.Run(self.op.iallocator)
15166 if not ial.success:
15167 raise errors.OpPrereqError("Can't compute group evacuation using"
15168 " iallocator '%s': %s" %
15169 (self.op.iallocator, ial.info),
15170 errors.ECODE_NORES)
15172 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15174 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15175 len(jobs), self.op.group_name)
15177 return ResultWithJobs(jobs)
15180 class TagsLU(NoHooksLU): # pylint: disable=W0223
15181 """Generic tags LU.
15183 This is an abstract class which is the parent of all the other tags LUs.
15186 def ExpandNames(self):
15187 self.group_uuid = None
15188 self.needed_locks = {}
15190 if self.op.kind == constants.TAG_NODE:
15191 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15192 lock_level = locking.LEVEL_NODE
15193 lock_name = self.op.name
15194 elif self.op.kind == constants.TAG_INSTANCE:
15195 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15196 lock_level = locking.LEVEL_INSTANCE
15197 lock_name = self.op.name
15198 elif self.op.kind == constants.TAG_NODEGROUP:
15199 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15200 lock_level = locking.LEVEL_NODEGROUP
15201 lock_name = self.group_uuid
15202 elif self.op.kind == constants.TAG_NETWORK:
15203 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15204 lock_level = locking.LEVEL_NETWORK
15205 lock_name = self.network_uuid
15210 if lock_level and getattr(self.op, "use_locking", True):
15211 self.needed_locks[lock_level] = lock_name
15213 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15214 # not possible to acquire the BGL based on opcode parameters)
15216 def CheckPrereq(self):
15217 """Check prerequisites.
15220 if self.op.kind == constants.TAG_CLUSTER:
15221 self.target = self.cfg.GetClusterInfo()
15222 elif self.op.kind == constants.TAG_NODE:
15223 self.target = self.cfg.GetNodeInfo(self.op.name)
15224 elif self.op.kind == constants.TAG_INSTANCE:
15225 self.target = self.cfg.GetInstanceInfo(self.op.name)
15226 elif self.op.kind == constants.TAG_NODEGROUP:
15227 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15228 elif self.op.kind == constants.TAG_NETWORK:
15229 self.target = self.cfg.GetNetwork(self.network_uuid)
15231 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15232 str(self.op.kind), errors.ECODE_INVAL)
15235 class LUTagsGet(TagsLU):
15236 """Returns the tags of a given object.
15241 def ExpandNames(self):
15242 TagsLU.ExpandNames(self)
15244 # Share locks as this is only a read operation
15245 self.share_locks = _ShareAll()
15247 def Exec(self, feedback_fn):
15248 """Returns the tag list.
15251 return list(self.target.GetTags())
15254 class LUTagsSearch(NoHooksLU):
15255 """Searches the tags for a given pattern.
15260 def ExpandNames(self):
15261 self.needed_locks = {}
15263 def CheckPrereq(self):
15264 """Check prerequisites.
15266 This checks the pattern passed for validity by compiling it.
15270 self.re = re.compile(self.op.pattern)
15271 except re.error, err:
15272 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15273 (self.op.pattern, err), errors.ECODE_INVAL)
15275 def Exec(self, feedback_fn):
15276 """Returns the tag list.
15280 tgts = [("/cluster", cfg.GetClusterInfo())]
15281 ilist = cfg.GetAllInstancesInfo().values()
15282 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15283 nlist = cfg.GetAllNodesInfo().values()
15284 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15285 tgts.extend(("/nodegroup/%s" % n.name, n)
15286 for n in cfg.GetAllNodeGroupsInfo().values())
15288 for path, target in tgts:
15289 for tag in target.GetTags():
15290 if self.re.search(tag):
15291 results.append((path, tag))
15295 class LUTagsSet(TagsLU):
15296 """Sets a tag on a given object.
15301 def CheckPrereq(self):
15302 """Check prerequisites.
15304 This checks the type and length of the tag name and value.
15307 TagsLU.CheckPrereq(self)
15308 for tag in self.op.tags:
15309 objects.TaggableObject.ValidateTag(tag)
15311 def Exec(self, feedback_fn):
15316 for tag in self.op.tags:
15317 self.target.AddTag(tag)
15318 except errors.TagError, err:
15319 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15320 self.cfg.Update(self.target, feedback_fn)
15323 class LUTagsDel(TagsLU):
15324 """Delete a list of tags from a given object.
15329 def CheckPrereq(self):
15330 """Check prerequisites.
15332 This checks that we have the given tag.
15335 TagsLU.CheckPrereq(self)
15336 for tag in self.op.tags:
15337 objects.TaggableObject.ValidateTag(tag)
15338 del_tags = frozenset(self.op.tags)
15339 cur_tags = self.target.GetTags()
15341 diff_tags = del_tags - cur_tags
15343 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15344 raise errors.OpPrereqError("Tag(s) %s not found" %
15345 (utils.CommaJoin(diff_names), ),
15346 errors.ECODE_NOENT)
15348 def Exec(self, feedback_fn):
15349 """Remove the tag from the object.
15352 for tag in self.op.tags:
15353 self.target.RemoveTag(tag)
15354 self.cfg.Update(self.target, feedback_fn)
15357 class LUTestDelay(NoHooksLU):
15358 """Sleep for a specified amount of time.
15360 This LU sleeps on the master and/or nodes for a specified amount of
15366 def ExpandNames(self):
15367 """Expand names and set required locks.
15369 This expands the node list, if any.
15372 self.needed_locks = {}
15373 if self.op.on_nodes:
15374 # _GetWantedNodes can be used here, but is not always appropriate to use
15375 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15376 # more information.
15377 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15378 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15380 def _TestDelay(self):
15381 """Do the actual sleep.
15384 if self.op.on_master:
15385 if not utils.TestDelay(self.op.duration):
15386 raise errors.OpExecError("Error during master delay test")
15387 if self.op.on_nodes:
15388 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15389 for node, node_result in result.items():
15390 node_result.Raise("Failure during rpc call to node %s" % node)
15392 def Exec(self, feedback_fn):
15393 """Execute the test delay opcode, with the wanted repetitions.
15396 if self.op.repeat == 0:
15399 top_value = self.op.repeat - 1
15400 for i in range(self.op.repeat):
15401 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15405 class LURestrictedCommand(NoHooksLU):
15406 """Logical unit for executing restricted commands.
15411 def ExpandNames(self):
15413 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15415 self.needed_locks = {
15416 locking.LEVEL_NODE: self.op.nodes,
15418 self.share_locks = {
15419 locking.LEVEL_NODE: not self.op.use_locking,
15422 def CheckPrereq(self):
15423 """Check prerequisites.
15427 def Exec(self, feedback_fn):
15428 """Execute restricted command and return output.
15431 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15433 # Check if correct locks are held
15434 assert set(self.op.nodes).issubset(owned_nodes)
15436 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15440 for node_name in self.op.nodes:
15441 nres = rpcres[node_name]
15443 msg = ("Command '%s' on node '%s' failed: %s" %
15444 (self.op.command, node_name, nres.fail_msg))
15445 result.append((False, msg))
15447 result.append((True, nres.payload))
15452 class LUTestJqueue(NoHooksLU):
15453 """Utility LU to test some aspects of the job queue.
15458 # Must be lower than default timeout for WaitForJobChange to see whether it
15459 # notices changed jobs
15460 _CLIENT_CONNECT_TIMEOUT = 20.0
15461 _CLIENT_CONFIRM_TIMEOUT = 60.0
15464 def _NotifyUsingSocket(cls, cb, errcls):
15465 """Opens a Unix socket and waits for another program to connect.
15468 @param cb: Callback to send socket name to client
15469 @type errcls: class
15470 @param errcls: Exception class to use for errors
15473 # Using a temporary directory as there's no easy way to create temporary
15474 # sockets without writing a custom loop around tempfile.mktemp and
15476 tmpdir = tempfile.mkdtemp()
15478 tmpsock = utils.PathJoin(tmpdir, "sock")
15480 logging.debug("Creating temporary socket at %s", tmpsock)
15481 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15486 # Send details to client
15489 # Wait for client to connect before continuing
15490 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15492 (conn, _) = sock.accept()
15493 except socket.error, err:
15494 raise errcls("Client didn't connect in time (%s)" % err)
15498 # Remove as soon as client is connected
15499 shutil.rmtree(tmpdir)
15501 # Wait for client to close
15504 # pylint: disable=E1101
15505 # Instance of '_socketobject' has no ... member
15506 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15508 except socket.error, err:
15509 raise errcls("Client failed to confirm notification (%s)" % err)
15513 def _SendNotification(self, test, arg, sockname):
15514 """Sends a notification to the client.
15517 @param test: Test name
15518 @param arg: Test argument (depends on test)
15519 @type sockname: string
15520 @param sockname: Socket path
15523 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15525 def _Notify(self, prereq, test, arg):
15526 """Notifies the client of a test.
15529 @param prereq: Whether this is a prereq-phase test
15531 @param test: Test name
15532 @param arg: Test argument (depends on test)
15536 errcls = errors.OpPrereqError
15538 errcls = errors.OpExecError
15540 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15544 def CheckArguments(self):
15545 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15546 self.expandnames_calls = 0
15548 def ExpandNames(self):
15549 checkargs_calls = getattr(self, "checkargs_calls", 0)
15550 if checkargs_calls < 1:
15551 raise errors.ProgrammerError("CheckArguments was not called")
15553 self.expandnames_calls += 1
15555 if self.op.notify_waitlock:
15556 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15558 self.LogInfo("Expanding names")
15560 # Get lock on master node (just to get a lock, not for a particular reason)
15561 self.needed_locks = {
15562 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15565 def Exec(self, feedback_fn):
15566 if self.expandnames_calls < 1:
15567 raise errors.ProgrammerError("ExpandNames was not called")
15569 if self.op.notify_exec:
15570 self._Notify(False, constants.JQT_EXEC, None)
15572 self.LogInfo("Executing")
15574 if self.op.log_messages:
15575 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15576 for idx, msg in enumerate(self.op.log_messages):
15577 self.LogInfo("Sending log message %s", idx + 1)
15578 feedback_fn(constants.JQT_MSGPREFIX + msg)
15579 # Report how many test messages have been sent
15580 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15583 raise errors.OpExecError("Opcode failure was requested")
15588 class LUTestAllocator(NoHooksLU):
15589 """Run allocator tests.
15591 This LU runs the allocator tests
15594 def CheckPrereq(self):
15595 """Check prerequisites.
15597 This checks the opcode parameters depending on the director and mode test.
15600 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15601 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15602 for attr in ["memory", "disks", "disk_template",
15603 "os", "tags", "nics", "vcpus"]:
15604 if not hasattr(self.op, attr):
15605 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15606 attr, errors.ECODE_INVAL)
15607 iname = self.cfg.ExpandInstanceName(self.op.name)
15608 if iname is not None:
15609 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15610 iname, errors.ECODE_EXISTS)
15611 if not isinstance(self.op.nics, list):
15612 raise errors.OpPrereqError("Invalid parameter 'nics'",
15613 errors.ECODE_INVAL)
15614 if not isinstance(self.op.disks, list):
15615 raise errors.OpPrereqError("Invalid parameter 'disks'",
15616 errors.ECODE_INVAL)
15617 for row in self.op.disks:
15618 if (not isinstance(row, dict) or
15619 constants.IDISK_SIZE not in row or
15620 not isinstance(row[constants.IDISK_SIZE], int) or
15621 constants.IDISK_MODE not in row or
15622 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15623 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15624 " parameter", errors.ECODE_INVAL)
15625 if self.op.hypervisor is None:
15626 self.op.hypervisor = self.cfg.GetHypervisorType()
15627 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15628 fname = _ExpandInstanceName(self.cfg, self.op.name)
15629 self.op.name = fname
15630 self.relocate_from = \
15631 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15632 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15633 constants.IALLOCATOR_MODE_NODE_EVAC):
15634 if not self.op.instances:
15635 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15636 self.op.instances = _GetWantedInstances(self, self.op.instances)
15638 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15639 self.op.mode, errors.ECODE_INVAL)
15641 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15642 if self.op.iallocator is None:
15643 raise errors.OpPrereqError("Missing allocator name",
15644 errors.ECODE_INVAL)
15645 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15646 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15647 self.op.direction, errors.ECODE_INVAL)
15649 def Exec(self, feedback_fn):
15650 """Run the allocator test.
15653 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15654 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15655 memory=self.op.memory,
15656 disks=self.op.disks,
15657 disk_template=self.op.disk_template,
15661 vcpus=self.op.vcpus,
15662 spindle_use=self.op.spindle_use,
15663 hypervisor=self.op.hypervisor)
15664 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15665 req = iallocator.IAReqRelocate(name=self.op.name,
15666 relocate_from=list(self.relocate_from))
15667 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15668 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15669 target_groups=self.op.target_groups)
15670 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15671 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15672 evac_mode=self.op.evac_mode)
15673 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15674 disk_template = self.op.disk_template
15675 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15676 memory=self.op.memory,
15677 disks=self.op.disks,
15678 disk_template=disk_template,
15682 vcpus=self.op.vcpus,
15683 spindle_use=self.op.spindle_use,
15684 hypervisor=self.op.hypervisor)
15685 for idx in range(self.op.count)]
15686 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15688 raise errors.ProgrammerError("Uncatched mode %s in"
15689 " LUTestAllocator.Exec", self.op.mode)
15691 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15692 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15693 result = ial.in_text
15695 ial.Run(self.op.iallocator, validate=False)
15696 result = ial.out_text
15700 class LUNetworkAdd(LogicalUnit):
15701 """Logical unit for creating networks.
15704 HPATH = "network-add"
15705 HTYPE = constants.HTYPE_NETWORK
15708 def BuildHooksNodes(self):
15709 """Build hooks nodes.
15712 mn = self.cfg.GetMasterNode()
15713 return ([mn], [mn])
15715 def CheckArguments(self):
15716 if self.op.mac_prefix:
15717 self.op.mac_prefix = \
15718 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15720 def ExpandNames(self):
15721 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15723 if self.op.conflicts_check:
15724 self.share_locks[locking.LEVEL_NODE] = 1
15725 self.needed_locks = {
15726 locking.LEVEL_NODE: locking.ALL_SET,
15729 self.needed_locks = {}
15731 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15733 def CheckPrereq(self):
15734 if self.op.network is None:
15735 raise errors.OpPrereqError("Network must be given",
15736 errors.ECODE_INVAL)
15738 uuid = self.cfg.LookupNetwork(self.op.network_name)
15741 raise errors.OpPrereqError("Network '%s' already defined" %
15742 self.op.network, errors.ECODE_EXISTS)
15744 # Check tag validity
15745 for tag in self.op.tags:
15746 objects.TaggableObject.ValidateTag(tag)
15748 def BuildHooksEnv(self):
15749 """Build hooks env.
15753 "name": self.op.network_name,
15754 "subnet": self.op.network,
15755 "gateway": self.op.gateway,
15756 "network6": self.op.network6,
15757 "gateway6": self.op.gateway6,
15758 "mac_prefix": self.op.mac_prefix,
15759 "network_type": self.op.network_type,
15760 "tags": self.op.tags,
15762 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15764 def Exec(self, feedback_fn):
15765 """Add the ip pool to the cluster.
15768 nobj = objects.Network(name=self.op.network_name,
15769 network=self.op.network,
15770 gateway=self.op.gateway,
15771 network6=self.op.network6,
15772 gateway6=self.op.gateway6,
15773 mac_prefix=self.op.mac_prefix,
15774 network_type=self.op.network_type,
15775 uuid=self.network_uuid,
15776 family=constants.IP4_VERSION)
15777 # Initialize the associated address pool
15779 pool = network.AddressPool.InitializeNetwork(nobj)
15780 except errors.AddressPoolError, e:
15781 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
15783 # Check if we need to reserve the nodes and the cluster master IP
15784 # These may not be allocated to any instances in routed mode, as
15785 # they wouldn't function anyway.
15786 if self.op.conflicts_check:
15787 for node in self.cfg.GetAllNodesInfo().values():
15788 for ip in [node.primary_ip, node.secondary_ip]:
15790 if pool.Contains(ip):
15792 self.LogInfo("Reserved IP address of node '%s' (%s)",
15794 except errors.AddressPoolError:
15795 self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
15798 master_ip = self.cfg.GetClusterInfo().master_ip
15800 if pool.Contains(master_ip):
15801 pool.Reserve(master_ip)
15802 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
15803 except errors.AddressPoolError:
15804 self.LogWarning("Cannot reserve cluster master IP address (%s)",
15807 if self.op.add_reserved_ips:
15808 for ip in self.op.add_reserved_ips:
15810 pool.Reserve(ip, external=True)
15811 except errors.AddressPoolError, e:
15812 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
15815 for tag in self.op.tags:
15818 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
15819 del self.remove_locks[locking.LEVEL_NETWORK]
15822 class LUNetworkRemove(LogicalUnit):
15823 HPATH = "network-remove"
15824 HTYPE = constants.HTYPE_NETWORK
15827 def ExpandNames(self):
15828 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15830 if not self.network_uuid:
15831 raise errors.OpPrereqError(("Network '%s' not found" %
15832 self.op.network_name),
15833 errors.ECODE_INVAL)
15835 self.share_locks[locking.LEVEL_NODEGROUP] = 1
15836 self.needed_locks = {
15837 locking.LEVEL_NETWORK: [self.network_uuid],
15838 locking.LEVEL_NODEGROUP: locking.ALL_SET,
15841 def CheckPrereq(self):
15842 """Check prerequisites.
15844 This checks that the given network name exists as a network, that is
15845 empty (i.e., contains no nodes), and that is not the last group of the
15849 # Verify that the network is not conncted.
15850 node_groups = [group.name
15851 for group in self.cfg.GetAllNodeGroupsInfo().values()
15852 if self.network_uuid in group.networks]
15855 self.LogWarning("Network '%s' is connected to the following"
15856 " node groups: %s" %
15857 (self.op.network_name,
15858 utils.CommaJoin(utils.NiceSort(node_groups))))
15859 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
15861 def BuildHooksEnv(self):
15862 """Build hooks env.
15866 "NETWORK_NAME": self.op.network_name,
15869 def BuildHooksNodes(self):
15870 """Build hooks nodes.
15873 mn = self.cfg.GetMasterNode()
15874 return ([mn], [mn])
15876 def Exec(self, feedback_fn):
15877 """Remove the network.
15881 self.cfg.RemoveNetwork(self.network_uuid)
15882 except errors.ConfigurationError:
15883 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
15884 (self.op.network_name, self.network_uuid))
15887 class LUNetworkSetParams(LogicalUnit):
15888 """Modifies the parameters of a network.
15891 HPATH = "network-modify"
15892 HTYPE = constants.HTYPE_NETWORK
15895 def CheckArguments(self):
15896 if (self.op.gateway and
15897 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
15898 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
15899 " at once", errors.ECODE_INVAL)
15901 def ExpandNames(self):
15902 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
15903 if self.network_uuid is None:
15904 raise errors.OpPrereqError(("Network '%s' not found" %
15905 self.op.network_name),
15906 errors.ECODE_INVAL)
15908 self.needed_locks = {
15909 locking.LEVEL_NETWORK: [self.network_uuid],
15912 def CheckPrereq(self):
15913 """Check prerequisites.
15916 self.network = self.cfg.GetNetwork(self.network_uuid)
15917 self.gateway = self.network.gateway
15918 self.network_type = self.network.network_type
15919 self.mac_prefix = self.network.mac_prefix
15920 self.network6 = self.network.network6
15921 self.gateway6 = self.network.gateway6
15922 self.tags = self.network.tags
15924 self.pool = network.AddressPool(self.network)
15926 if self.op.gateway:
15927 if self.op.gateway == constants.VALUE_NONE:
15928 self.gateway = None
15930 self.gateway = self.op.gateway
15931 if self.pool.IsReserved(self.gateway):
15932 raise errors.OpPrereqError("%s is already reserved" %
15933 self.gateway, errors.ECODE_INVAL)
15935 if self.op.network_type:
15936 if self.op.network_type == constants.VALUE_NONE:
15937 self.network_type = None
15939 self.network_type = self.op.network_type
15941 if self.op.mac_prefix:
15942 if self.op.mac_prefix == constants.VALUE_NONE:
15943 self.mac_prefix = None
15945 self.mac_prefix = \
15946 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
15948 if self.op.gateway6:
15949 if self.op.gateway6 == constants.VALUE_NONE:
15950 self.gateway6 = None
15952 self.gateway6 = self.op.gateway6
15954 if self.op.network6:
15955 if self.op.network6 == constants.VALUE_NONE:
15956 self.network6 = None
15958 self.network6 = self.op.network6
15960 def BuildHooksEnv(self):
15961 """Build hooks env.
15965 "name": self.op.network_name,
15966 "subnet": self.network.network,
15967 "gateway": self.gateway,
15968 "network6": self.network6,
15969 "gateway6": self.gateway6,
15970 "mac_prefix": self.mac_prefix,
15971 "network_type": self.network_type,
15974 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
15976 def BuildHooksNodes(self):
15977 """Build hooks nodes.
15980 mn = self.cfg.GetMasterNode()
15981 return ([mn], [mn])
15983 def Exec(self, feedback_fn):
15984 """Modifies the network.
15987 #TODO: reserve/release via temporary reservation manager
15988 # extend cfg.ReserveIp/ReleaseIp with the external flag
15989 if self.op.gateway:
15990 if self.gateway == self.network.gateway:
15991 self.LogWarning("Gateway is already %s", self.gateway)
15994 self.pool.Reserve(self.gateway, external=True)
15995 if self.network.gateway:
15996 self.pool.Release(self.network.gateway, external=True)
15997 self.network.gateway = self.gateway
15999 if self.op.add_reserved_ips:
16000 for ip in self.op.add_reserved_ips:
16002 if self.pool.IsReserved(ip):
16003 self.LogWarning("IP address %s is already reserved", ip)
16005 self.pool.Reserve(ip, external=True)
16006 except errors.AddressPoolError, err:
16007 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16009 if self.op.remove_reserved_ips:
16010 for ip in self.op.remove_reserved_ips:
16011 if ip == self.network.gateway:
16012 self.LogWarning("Cannot unreserve Gateway's IP")
16015 if not self.pool.IsReserved(ip):
16016 self.LogWarning("IP address %s is already unreserved", ip)
16018 self.pool.Release(ip, external=True)
16019 except errors.AddressPoolError, err:
16020 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16022 if self.op.mac_prefix:
16023 self.network.mac_prefix = self.mac_prefix
16025 if self.op.network6:
16026 self.network.network6 = self.network6
16028 if self.op.gateway6:
16029 self.network.gateway6 = self.gateway6
16031 if self.op.network_type:
16032 self.network.network_type = self.network_type
16034 self.pool.Validate()
16036 self.cfg.Update(self.network, feedback_fn)
16039 class _NetworkQuery(_QueryBase):
16040 FIELDS = query.NETWORK_FIELDS
16042 def ExpandNames(self, lu):
16043 lu.needed_locks = {}
16045 self._all_networks = lu.cfg.GetAllNetworksInfo()
16046 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
16049 self.wanted = [name_to_uuid[name]
16050 for name in utils.NiceSort(name_to_uuid.keys())]
16052 # Accept names to be either names or UUIDs.
16055 all_uuid = frozenset(self._all_networks.keys())
16057 for name in self.names:
16058 if name in all_uuid:
16059 self.wanted.append(name)
16060 elif name in name_to_uuid:
16061 self.wanted.append(name_to_uuid[name])
16063 missing.append(name)
16066 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16067 errors.ECODE_NOENT)
16069 def DeclareLocks(self, lu, level):
16072 def _GetQueryData(self, lu):
16073 """Computes the list of networks and their attributes.
16076 do_instances = query.NETQ_INST in self.requested_data
16077 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
16078 do_stats = query.NETQ_STATS in self.requested_data
16080 network_to_groups = None
16081 network_to_instances = None
16084 # For NETQ_GROUP, we need to map network->[groups]
16086 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16087 network_to_groups = dict((uuid, []) for uuid in self.wanted)
16090 all_instances = lu.cfg.GetAllInstancesInfo()
16091 all_nodes = lu.cfg.GetAllNodesInfo()
16092 network_to_instances = dict((uuid, []) for uuid in self.wanted)
16094 for group in all_groups.values():
16096 group_nodes = [node.name for node in all_nodes.values() if
16097 node.group == group.uuid]
16098 group_instances = [instance for instance in all_instances.values()
16099 if instance.primary_node in group_nodes]
16101 for net_uuid in group.networks.keys():
16102 if net_uuid in network_to_groups:
16103 netparams = group.networks[net_uuid]
16104 mode = netparams[constants.NIC_MODE]
16105 link = netparams[constants.NIC_LINK]
16106 info = group.name + "(" + mode + ", " + link + ")"
16107 network_to_groups[net_uuid].append(info)
16110 for instance in group_instances:
16111 for nic in instance.nics:
16112 if nic.network == self._all_networks[net_uuid].name:
16113 network_to_instances[net_uuid].append(instance.name)
16118 for uuid, net in self._all_networks.items():
16119 if uuid in self.wanted:
16120 pool = network.AddressPool(net)
16122 "free_count": pool.GetFreeCount(),
16123 "reserved_count": pool.GetReservedCount(),
16124 "map": pool.GetMap(),
16125 "external_reservations":
16126 utils.CommaJoin(pool.GetExternalReservations()),
16129 return query.NetworkQueryData([self._all_networks[uuid]
16130 for uuid in self.wanted],
16132 network_to_instances,
16136 class LUNetworkQuery(NoHooksLU):
16137 """Logical unit for querying networks.
16142 def CheckArguments(self):
16143 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16144 self.op.output_fields, False)
16146 def ExpandNames(self):
16147 self.nq.ExpandNames(self)
16149 def Exec(self, feedback_fn):
16150 return self.nq.OldStyleQuery(self)
16153 class LUNetworkConnect(LogicalUnit):
16154 """Connect a network to a nodegroup
16157 HPATH = "network-connect"
16158 HTYPE = constants.HTYPE_NETWORK
16161 def ExpandNames(self):
16162 self.network_name = self.op.network_name
16163 self.group_name = self.op.group_name
16164 self.network_mode = self.op.network_mode
16165 self.network_link = self.op.network_link
16167 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16168 if self.network_uuid is None:
16169 raise errors.OpPrereqError("Network %s does not exist" %
16170 self.network_name, errors.ECODE_INVAL)
16172 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16173 if self.group_uuid is None:
16174 raise errors.OpPrereqError("Group %s does not exist" %
16175 self.group_name, errors.ECODE_INVAL)
16177 self.needed_locks = {
16178 locking.LEVEL_INSTANCE: [],
16179 locking.LEVEL_NODEGROUP: [self.group_uuid],
16181 self.share_locks[locking.LEVEL_INSTANCE] = 1
16183 if self.op.conflicts_check:
16184 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16185 self.share_locks[locking.LEVEL_NETWORK] = 1
16187 def DeclareLocks(self, level):
16188 if level == locking.LEVEL_INSTANCE:
16189 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16191 # Lock instances optimistically, needs verification once group lock has
16193 if self.op.conflicts_check:
16194 self.needed_locks[locking.LEVEL_INSTANCE] = \
16195 self.cfg.GetNodeGroupInstances(self.group_uuid)
16197 def BuildHooksEnv(self):
16199 "GROUP_NAME": self.group_name,
16200 "GROUP_NETWORK_MODE": self.network_mode,
16201 "GROUP_NETWORK_LINK": self.network_link,
16205 def BuildHooksNodes(self):
16206 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16207 return (nodes, nodes)
16209 def CheckPrereq(self):
16210 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16212 assert self.group_uuid in owned_groups
16214 l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16218 constants.NIC_MODE: self.network_mode,
16219 constants.NIC_LINK: self.network_link,
16221 objects.NIC.CheckParameterSyntax(self.netparams)
16223 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16224 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16225 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16226 self.connected = False
16227 if self.network_uuid in self.group.networks:
16228 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16229 (self.network_name, self.group.name))
16230 self.connected = True
16233 if self.op.conflicts_check:
16234 # Check if locked instances are still correct
16235 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16236 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16238 nobj = self.cfg.GetNetwork(self.network_uuid)
16239 pool = network.AddressPool(nobj)
16240 conflicting_instances = []
16242 for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16243 for idx, nic in enumerate(instance.nics):
16244 if pool.Contains(nic.ip):
16245 conflicting_instances.append((instance.name, idx, nic.ip))
16247 if conflicting_instances:
16248 self.LogWarning("Following occurences use IPs from network %s"
16249 " that is about to connect to nodegroup %s: %s" %
16250 (self.network_name, self.group.name,
16251 l(conflicting_instances)))
16252 raise errors.OpPrereqError("Conflicting IPs found."
16253 " Please remove/modify"
16254 " corresponding NICs",
16255 errors.ECODE_INVAL)
16257 def Exec(self, feedback_fn):
16261 self.group.networks[self.network_uuid] = self.netparams
16262 self.cfg.Update(self.group, feedback_fn)
16265 class LUNetworkDisconnect(LogicalUnit):
16266 """Disconnect a network to a nodegroup
16269 HPATH = "network-disconnect"
16270 HTYPE = constants.HTYPE_NETWORK
16273 def ExpandNames(self):
16274 self.network_name = self.op.network_name
16275 self.group_name = self.op.group_name
16277 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16278 if self.network_uuid is None:
16279 raise errors.OpPrereqError("Network %s does not exist" %
16280 self.network_name, errors.ECODE_INVAL)
16282 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16283 if self.group_uuid is None:
16284 raise errors.OpPrereqError("Group %s does not exist" %
16285 self.group_name, errors.ECODE_INVAL)
16287 self.needed_locks = {
16288 locking.LEVEL_INSTANCE: [],
16289 locking.LEVEL_NODEGROUP: [self.group_uuid],
16291 self.share_locks[locking.LEVEL_INSTANCE] = 1
16293 def DeclareLocks(self, level):
16294 if level == locking.LEVEL_INSTANCE:
16295 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16297 # Lock instances optimistically, needs verification once group lock has
16299 if self.op.conflicts_check:
16300 self.needed_locks[locking.LEVEL_INSTANCE] = \
16301 self.cfg.GetNodeGroupInstances(self.group_uuid)
16303 def BuildHooksEnv(self):
16305 "GROUP_NAME": self.group_name,
16309 def BuildHooksNodes(self):
16310 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16311 return (nodes, nodes)
16313 def CheckPrereq(self):
16314 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16316 assert self.group_uuid in owned_groups
16318 l = lambda value: utils.CommaJoin("%s: %s/%s" % (i[0], i[1], i[2])
16321 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16322 self.connected = True
16323 if self.network_uuid not in self.group.networks:
16324 self.LogWarning("Network '%s' is not mapped to group '%s'",
16325 self.network_name, self.group.name)
16326 self.connected = False
16329 if self.op.conflicts_check:
16330 # Check if locked instances are still correct
16331 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16332 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16334 conflicting_instances = []
16336 for (_, instance) in self.cfg.GetMultiInstanceInfo(owned_instances):
16337 for idx, nic in enumerate(instance.nics):
16338 if nic.network == self.network_name:
16339 conflicting_instances.append((instance.name, idx, nic.ip))
16341 if conflicting_instances:
16342 self.LogWarning("Following occurences use IPs from network %s"
16343 " that is about to disconnected from the nodegroup"
16345 (self.network_name, self.group.name,
16346 l(conflicting_instances)))
16347 raise errors.OpPrereqError("Conflicting IPs."
16348 " Please remove/modify"
16349 " corresponding NICS",
16350 errors.ECODE_INVAL)
16352 def Exec(self, feedback_fn):
16353 if not self.connected:
16356 del self.group.networks[self.network_uuid]
16357 self.cfg.Update(self.group, feedback_fn)
16360 #: Query type implementations
16362 constants.QR_CLUSTER: _ClusterQuery,
16363 constants.QR_INSTANCE: _InstanceQuery,
16364 constants.QR_NODE: _NodeQuery,
16365 constants.QR_GROUP: _GroupQuery,
16366 constants.QR_NETWORK: _NetworkQuery,
16367 constants.QR_OS: _OsQuery,
16368 constants.QR_EXPORT: _ExportQuery,
16371 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16374 def _GetQueryImplementation(name):
16375 """Returns the implemtnation for a query type.
16377 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16381 return _QUERY_IMPL[name]
16383 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16384 errors.ECODE_INVAL)
16387 def _CheckForConflictingIp(lu, ip, node):
16388 """In case of conflicting ip raise error.
16391 @param ip: ip address
16393 @param node: node name
16396 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16397 if conf_net is not None:
16398 raise errors.OpPrereqError("Conflicting IP found:"
16399 " %s <> %s." % (ip, conf_net),
16400 errors.ECODE_INVAL)
16402 return (None, None)