4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node.
703 @type cfg: L{config.ConfigWriter}
704 @param cfg: The cluster configuration
705 @type node: L{objects.Node}
706 @param node: The node
708 @return: The effective value of exclusive_storage
711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node.
717 @type cfg: L{config.ConfigWriter}
718 @param cfg: The cluster configuration
719 @type nodename: string
720 @param nodename: The node
722 @return: The effective value of exclusive_storage
723 @raise errors.OpPrereqError: if no node exists with the given name
726 ni = cfg.GetNodeInfo(nodename)
728 raise errors.OpPrereqError("Invalid node name %s" % nodename,
730 return _IsExclusiveStorageEnabledNode(cfg, ni)
733 def _CopyLockList(names):
734 """Makes a copy of a list of lock names.
736 Handles L{locking.ALL_SET} correctly.
739 if names == locking.ALL_SET:
740 return locking.ALL_SET
745 def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names.
748 @type lu: L{LogicalUnit}
749 @param lu: the logical unit on whose behalf we execute
751 @param nodes: list of node names or None for all nodes
753 @return: the list of nodes, sorted
754 @raise errors.ProgrammerError: if the nodes parameter is wrong type
758 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
760 return utils.NiceSort(lu.cfg.GetNodeList())
763 def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names.
766 @type lu: L{LogicalUnit}
767 @param lu: the logical unit on whose behalf we execute
768 @type instances: list
769 @param instances: list of instance names or None for all instances
771 @return: the list of instances, sorted
772 @raise errors.OpPrereqError: if the instances parameter is wrong type
773 @raise errors.OpPrereqError: if any of the passed instances is not found
777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
779 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
783 def _GetUpdatedParams(old_params, update_dict,
784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary.
787 @type old_params: dict
788 @param old_params: old parameters
789 @type update_dict: dict
790 @param update_dict: dict containing new parameter values, or
791 constants.VALUE_DEFAULT to reset the parameter to its default
793 @param use_default: boolean
794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
795 values as 'to be deleted' values
796 @param use_none: boolean
797 @type use_none: whether to recognise C{None} values as 'to be
800 @return: the new parameter dictionary
803 params_copy = copy.deepcopy(old_params)
804 for key, val in update_dict.iteritems():
805 if ((use_default and val == constants.VALUE_DEFAULT) or
806 (use_none and val is None)):
812 params_copy[key] = val
816 def _UpdateMinMaxISpecs(ipolicy, new_minmax, group_policy):
817 use_none = use_default = group_policy
818 minmax = ipolicy.setdefault(constants.ISPECS_MINMAX, {})
819 for (key, value) in new_minmax.items():
820 if key not in constants.ISPECS_MINMAX_KEYS:
821 raise errors.OpPrereqError("Invalid key in new ipolicy/%s: %s" %
822 (constants.ISPECS_MINMAX, key),
824 old_spec = minmax.get(key, {})
825 minmax[key] = _GetUpdatedParams(old_spec, value, use_none=use_none,
826 use_default=use_default)
827 utils.ForceDictType(minmax[key], constants.ISPECS_PARAMETER_TYPES)
830 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
831 """Return the new version of an instance policy.
833 @param group_policy: whether this policy applies to a group and thus
834 we should support removal of policy entries
837 use_none = use_default = group_policy
838 ipolicy = copy.deepcopy(old_ipolicy)
839 for key, value in new_ipolicy.items():
840 if key not in constants.IPOLICY_ALL_KEYS:
841 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
843 if key == constants.ISPECS_MINMAX:
844 _UpdateMinMaxISpecs(ipolicy, value, group_policy)
845 elif key == constants.ISPECS_STD:
846 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
848 use_default=use_default)
849 utils.ForceDictType(ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
851 if (not value or value == [constants.VALUE_DEFAULT] or
852 value == constants.VALUE_DEFAULT):
856 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
857 " on the cluster'" % key,
860 if key in constants.IPOLICY_PARAMETERS:
861 # FIXME: we assume all such values are float
863 ipolicy[key] = float(value)
864 except (TypeError, ValueError), err:
865 raise errors.OpPrereqError("Invalid value for attribute"
866 " '%s': '%s', error: %s" %
867 (key, value, err), errors.ECODE_INVAL)
869 # FIXME: we assume all others are lists; this should be redone
871 ipolicy[key] = list(value)
873 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
874 except errors.ConfigurationError, err:
875 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
880 def _UpdateAndVerifySubDict(base, updates, type_check):
881 """Updates and verifies a dict with sub dicts of the same type.
883 @param base: The dict with the old data
884 @param updates: The dict with the new data
885 @param type_check: Dict suitable to ForceDictType to verify correct types
886 @returns: A new dict with updated and verified values
890 new = _GetUpdatedParams(old, value)
891 utils.ForceDictType(new, type_check)
894 ret = copy.deepcopy(base)
895 ret.update(dict((key, fn(base.get(key, {}), value))
896 for key, value in updates.items()))
900 def _MergeAndVerifyHvState(op_input, obj_input):
901 """Combines the hv state from an opcode with the one of the object
903 @param op_input: The input dict from the opcode
904 @param obj_input: The input dict from the objects
905 @return: The verified and updated dict
909 invalid_hvs = set(op_input) - constants.HYPER_TYPES
911 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
912 " %s" % utils.CommaJoin(invalid_hvs),
914 if obj_input is None:
916 type_check = constants.HVSTS_PARAMETER_TYPES
917 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
922 def _MergeAndVerifyDiskState(op_input, obj_input):
923 """Combines the disk state from an opcode with the one of the object
925 @param op_input: The input dict from the opcode
926 @param obj_input: The input dict from the objects
927 @return: The verified and updated dict
930 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
932 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
933 utils.CommaJoin(invalid_dst),
935 type_check = constants.DSS_PARAMETER_TYPES
936 if obj_input is None:
938 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
940 for key, value in op_input.items())
945 def _ReleaseLocks(lu, level, names=None, keep=None):
946 """Releases locks owned by an LU.
948 @type lu: L{LogicalUnit}
949 @param level: Lock level
950 @type names: list or None
951 @param names: Names of locks to release
952 @type keep: list or None
953 @param keep: Names of locks to retain
956 assert not (keep is not None and names is not None), \
957 "Only one of the 'names' and the 'keep' parameters can be given"
959 if names is not None:
960 should_release = names.__contains__
962 should_release = lambda name: name not in keep
964 should_release = None
966 owned = lu.owned_locks(level)
968 # Not owning any lock at this level, do nothing
975 # Determine which locks to release
977 if should_release(name):
982 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
984 # Release just some locks
985 lu.glm.release(level, names=release)
987 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
990 lu.glm.release(level)
992 assert not lu.glm.is_owned(level), "No locks should be owned"
995 def _MapInstanceDisksToNodes(instances):
996 """Creates a map from (node, volume) to instance name.
998 @type instances: list of L{objects.Instance}
999 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
1002 return dict(((node, vol), inst.name)
1003 for inst in instances
1004 for (node, vols) in inst.MapLVsByNode().items()
1008 def _RunPostHook(lu, node_name):
1009 """Runs the post-hook for an opcode on a single node.
1012 hm = lu.proc.BuildHooksManager(lu)
1014 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
1015 except Exception, err: # pylint: disable=W0703
1016 lu.LogWarning("Errors occurred running hooks on %s: %s",
1020 def _CheckOutputFields(static, dynamic, selected):
1021 """Checks whether all selected fields are valid.
1023 @type static: L{utils.FieldSet}
1024 @param static: static fields set
1025 @type dynamic: L{utils.FieldSet}
1026 @param dynamic: dynamic fields set
1029 f = utils.FieldSet()
1033 delta = f.NonMatching(selected)
1035 raise errors.OpPrereqError("Unknown output fields selected: %s"
1036 % ",".join(delta), errors.ECODE_INVAL)
1039 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1040 """Make sure that none of the given paramters is global.
1042 If a global parameter is found, an L{errors.OpPrereqError} exception is
1043 raised. This is used to avoid setting global parameters for individual nodes.
1045 @type params: dictionary
1046 @param params: Parameters to check
1047 @type glob_pars: dictionary
1048 @param glob_pars: Forbidden parameters
1050 @param kind: Kind of parameters (e.g. "node")
1051 @type bad_levels: string
1052 @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
1054 @type good_levels: strings
1055 @param good_levels: Level(s) at which the parameters are allowed (e.g.
1059 used_globals = glob_pars.intersection(params)
1061 msg = ("The following %s parameters are global and cannot"
1062 " be customized at %s level, please modify them at"
1064 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
1065 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1068 def _CheckNodeOnline(lu, node, msg=None):
1069 """Ensure that a given node is online.
1071 @param lu: the LU on behalf of which we make the check
1072 @param node: the node to check
1073 @param msg: if passed, should be a message to replace the default one
1074 @raise errors.OpPrereqError: if the node is offline
1078 msg = "Can't use offline node"
1079 if lu.cfg.GetNodeInfo(node).offline:
1080 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1083 def _CheckNodeNotDrained(lu, node):
1084 """Ensure that a given node is not drained.
1086 @param lu: the LU on behalf of which we make the check
1087 @param node: the node to check
1088 @raise errors.OpPrereqError: if the node is drained
1091 if lu.cfg.GetNodeInfo(node).drained:
1092 raise errors.OpPrereqError("Can't use drained node %s" % node,
1096 def _CheckNodeVmCapable(lu, node):
1097 """Ensure that a given node is vm capable.
1099 @param lu: the LU on behalf of which we make the check
1100 @param node: the node to check
1101 @raise errors.OpPrereqError: if the node is not vm capable
1104 if not lu.cfg.GetNodeInfo(node).vm_capable:
1105 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1109 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1110 """Ensure that a node supports a given OS.
1112 @param lu: the LU on behalf of which we make the check
1113 @param node: the node to check
1114 @param os_name: the OS to query about
1115 @param force_variant: whether to ignore variant errors
1116 @raise errors.OpPrereqError: if the node is not supporting the OS
1119 result = lu.rpc.call_os_get(node, os_name)
1120 result.Raise("OS '%s' not in supported OS list for node %s" %
1122 prereq=True, ecode=errors.ECODE_INVAL)
1123 if not force_variant:
1124 _CheckOSVariant(result.payload, os_name)
1127 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1128 """Ensure that a node has the given secondary ip.
1130 @type lu: L{LogicalUnit}
1131 @param lu: the LU on behalf of which we make the check
1133 @param node: the node to check
1134 @type secondary_ip: string
1135 @param secondary_ip: the ip to check
1136 @type prereq: boolean
1137 @param prereq: whether to throw a prerequisite or an execute error
1138 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1139 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1142 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1143 result.Raise("Failure checking secondary ip on node %s" % node,
1144 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1145 if not result.payload:
1146 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1147 " please fix and re-run this command" % secondary_ip)
1149 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1151 raise errors.OpExecError(msg)
1154 def _CheckNodePVs(nresult, exclusive_storage):
1158 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1159 if pvlist_dict is None:
1160 return (["Can't get PV list from node"], None)
1161 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1163 # check that ':' is not present in PV names, since it's a
1164 # special character for lvcreate (denotes the range of PEs to
1168 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1169 (pv.name, pv.vg_name))
1171 if exclusive_storage:
1172 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1173 errlist.extend(errmsgs)
1174 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1176 for (pvname, lvlist) in shared_pvs:
1177 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1178 errlist.append("PV %s is shared among unrelated LVs (%s)" %
1179 (pvname, utils.CommaJoin(lvlist)))
1180 return (errlist, es_pvinfo)
1183 def _GetClusterDomainSecret():
1184 """Reads the cluster domain secret.
1187 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1191 def _CheckInstanceState(lu, instance, req_states, msg=None):
1192 """Ensure that an instance is in one of the required states.
1194 @param lu: the LU on behalf of which we make the check
1195 @param instance: the instance to check
1196 @param msg: if passed, should be a message to replace the default one
1197 @raise errors.OpPrereqError: if the instance is not in the required state
1201 msg = ("can't use instance from outside %s states" %
1202 utils.CommaJoin(req_states))
1203 if instance.admin_state not in req_states:
1204 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1205 (instance.name, instance.admin_state, msg),
1208 if constants.ADMINST_UP not in req_states:
1209 pnode = instance.primary_node
1210 if not lu.cfg.GetNodeInfo(pnode).offline:
1211 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1212 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1213 prereq=True, ecode=errors.ECODE_ENVIRON)
1214 if instance.name in ins_l.payload:
1215 raise errors.OpPrereqError("Instance %s is running, %s" %
1216 (instance.name, msg), errors.ECODE_STATE)
1218 lu.LogWarning("Primary node offline, ignoring check that instance"
1222 def _ComputeMinMaxSpec(name, qualifier, ispecs, value):
1223 """Computes if value is in the desired range.
1225 @param name: name of the parameter for which we perform the check
1226 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1228 @param ispecs: dictionary containing min and max values
1229 @param value: actual value that we want to use
1230 @return: None or an error string
1233 if value in [None, constants.VALUE_AUTO]:
1235 max_v = ispecs[constants.ISPECS_MAX].get(name, value)
1236 min_v = ispecs[constants.ISPECS_MIN].get(name, value)
1237 if value > max_v or min_v > value:
1239 fqn = "%s/%s" % (name, qualifier)
1242 return ("%s value %s is not in range [%s, %s]" %
1243 (fqn, value, min_v, max_v))
1247 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1248 nic_count, disk_sizes, spindle_use,
1250 _compute_fn=_ComputeMinMaxSpec):
1251 """Verifies ipolicy against provided specs.
1254 @param ipolicy: The ipolicy
1256 @param mem_size: The memory size
1257 @type cpu_count: int
1258 @param cpu_count: Used cpu cores
1259 @type disk_count: int
1260 @param disk_count: Number of disks used
1261 @type nic_count: int
1262 @param nic_count: Number of nics used
1263 @type disk_sizes: list of ints
1264 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1265 @type spindle_use: int
1266 @param spindle_use: The number of spindles this instance uses
1267 @type disk_template: string
1268 @param disk_template: The disk template of the instance
1269 @param _compute_fn: The compute function (unittest only)
1270 @return: A list of violations, or an empty list of no violations are found
1273 assert disk_count == len(disk_sizes)
1276 (constants.ISPEC_MEM_SIZE, "", mem_size),
1277 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1278 (constants.ISPEC_NIC_COUNT, "", nic_count),
1279 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1280 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1281 for idx, d in enumerate(disk_sizes)]
1282 if disk_template != constants.DT_DISKLESS:
1283 # This check doesn't make sense for diskless instances
1284 test_settings.append((constants.ISPEC_DISK_COUNT, "", disk_count))
1286 allowed_dts = ipolicy[constants.IPOLICY_DTS]
1287 if disk_template not in allowed_dts:
1288 ret.append("Disk template %s is not allowed (allowed templates: %s)" %
1289 (disk_template, utils.CommaJoin(allowed_dts)))
1291 minmax = ipolicy[constants.ISPECS_MINMAX]
1292 return ret + filter(None,
1293 (_compute_fn(name, qualifier, minmax, value)
1294 for (name, qualifier, value) in test_settings))
1297 def _ComputeIPolicyInstanceViolation(ipolicy, instance, cfg,
1298 _compute_fn=_ComputeIPolicySpecViolation):
1299 """Compute if instance meets the specs of ipolicy.
1302 @param ipolicy: The ipolicy to verify against
1303 @type instance: L{objects.Instance}
1304 @param instance: The instance to verify
1305 @type cfg: L{config.ConfigWriter}
1306 @param cfg: Cluster configuration
1307 @param _compute_fn: The function to verify ipolicy (unittest only)
1308 @see: L{_ComputeIPolicySpecViolation}
1311 be_full = cfg.GetClusterInfo().FillBE(instance)
1312 mem_size = be_full[constants.BE_MAXMEM]
1313 cpu_count = be_full[constants.BE_VCPUS]
1314 spindle_use = be_full[constants.BE_SPINDLE_USE]
1315 disk_count = len(instance.disks)
1316 disk_sizes = [disk.size for disk in instance.disks]
1317 nic_count = len(instance.nics)
1318 disk_template = instance.disk_template
1320 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1321 disk_sizes, spindle_use, disk_template)
1324 def _ComputeIPolicyInstanceSpecViolation(
1325 ipolicy, instance_spec, disk_template,
1326 _compute_fn=_ComputeIPolicySpecViolation):
1327 """Compute if instance specs meets the specs of ipolicy.
1330 @param ipolicy: The ipolicy to verify against
1331 @param instance_spec: dict
1332 @param instance_spec: The instance spec to verify
1333 @type disk_template: string
1334 @param disk_template: the disk template of the instance
1335 @param _compute_fn: The function to verify ipolicy (unittest only)
1336 @see: L{_ComputeIPolicySpecViolation}
1339 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1340 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1341 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1342 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1343 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1344 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1346 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1347 disk_sizes, spindle_use, disk_template)
1350 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1352 _compute_fn=_ComputeIPolicyInstanceViolation):
1353 """Compute if instance meets the specs of the new target group.
1355 @param ipolicy: The ipolicy to verify
1356 @param instance: The instance object to verify
1357 @param current_group: The current group of the instance
1358 @param target_group: The new group of the instance
1359 @type cfg: L{config.ConfigWriter}
1360 @param cfg: Cluster configuration
1361 @param _compute_fn: The function to verify ipolicy (unittest only)
1362 @see: L{_ComputeIPolicySpecViolation}
1365 if current_group == target_group:
1368 return _compute_fn(ipolicy, instance, cfg)
1371 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False,
1372 _compute_fn=_ComputeIPolicyNodeViolation):
1373 """Checks that the target node is correct in terms of instance policy.
1375 @param ipolicy: The ipolicy to verify
1376 @param instance: The instance object to verify
1377 @param node: The new node to relocate
1378 @type cfg: L{config.ConfigWriter}
1379 @param cfg: Cluster configuration
1380 @param ignore: Ignore violations of the ipolicy
1381 @param _compute_fn: The function to verify ipolicy (unittest only)
1382 @see: L{_ComputeIPolicySpecViolation}
1385 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1386 res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg)
1389 msg = ("Instance does not meet target node group's (%s) instance"
1390 " policy: %s") % (node.group, utils.CommaJoin(res))
1394 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1397 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances, cfg):
1398 """Computes a set of any instances that would violate the new ipolicy.
1400 @param old_ipolicy: The current (still in-place) ipolicy
1401 @param new_ipolicy: The new (to become) ipolicy
1402 @param instances: List of instances to verify
1403 @type cfg: L{config.ConfigWriter}
1404 @param cfg: Cluster configuration
1405 @return: A list of instances which violates the new ipolicy but
1409 return (_ComputeViolatingInstances(new_ipolicy, instances, cfg) -
1410 _ComputeViolatingInstances(old_ipolicy, instances, cfg))
1413 def _ExpandItemName(fn, name, kind):
1414 """Expand an item name.
1416 @param fn: the function to use for expansion
1417 @param name: requested item name
1418 @param kind: text description ('Node' or 'Instance')
1419 @return: the resolved (full) name
1420 @raise errors.OpPrereqError: if the item is not found
1423 full_name = fn(name)
1424 if full_name is None:
1425 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1430 def _ExpandNodeName(cfg, name):
1431 """Wrapper over L{_ExpandItemName} for nodes."""
1432 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1435 def _ExpandInstanceName(cfg, name):
1436 """Wrapper over L{_ExpandItemName} for instance."""
1437 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1440 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1442 """Builds network related env variables for hooks
1444 This builds the hook environment from individual variables.
1447 @param name: the name of the network
1448 @type subnet: string
1449 @param subnet: the ipv4 subnet
1450 @type gateway: string
1451 @param gateway: the ipv4 gateway
1452 @type network6: string
1453 @param network6: the ipv6 subnet
1454 @type gateway6: string
1455 @param gateway6: the ipv6 gateway
1456 @type mac_prefix: string
1457 @param mac_prefix: the mac_prefix
1459 @param tags: the tags of the network
1464 env["NETWORK_NAME"] = name
1466 env["NETWORK_SUBNET"] = subnet
1468 env["NETWORK_GATEWAY"] = gateway
1470 env["NETWORK_SUBNET6"] = network6
1472 env["NETWORK_GATEWAY6"] = gateway6
1474 env["NETWORK_MAC_PREFIX"] = mac_prefix
1476 env["NETWORK_TAGS"] = " ".join(tags)
1481 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1482 minmem, maxmem, vcpus, nics, disk_template, disks,
1483 bep, hvp, hypervisor_name, tags):
1484 """Builds instance related env variables for hooks
1486 This builds the hook environment from individual variables.
1489 @param name: the name of the instance
1490 @type primary_node: string
1491 @param primary_node: the name of the instance's primary node
1492 @type secondary_nodes: list
1493 @param secondary_nodes: list of secondary nodes as strings
1494 @type os_type: string
1495 @param os_type: the name of the instance's OS
1496 @type status: string
1497 @param status: the desired status of the instance
1498 @type minmem: string
1499 @param minmem: the minimum memory size of the instance
1500 @type maxmem: string
1501 @param maxmem: the maximum memory size of the instance
1503 @param vcpus: the count of VCPUs the instance has
1505 @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1506 the NICs the instance has
1507 @type disk_template: string
1508 @param disk_template: the disk template of the instance
1510 @param disks: the list of (size, mode) pairs
1512 @param bep: the backend parameters for the instance
1514 @param hvp: the hypervisor parameters for the instance
1515 @type hypervisor_name: string
1516 @param hypervisor_name: the hypervisor for the instance
1518 @param tags: list of instance tags as strings
1520 @return: the hook environment for this instance
1525 "INSTANCE_NAME": name,
1526 "INSTANCE_PRIMARY": primary_node,
1527 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1528 "INSTANCE_OS_TYPE": os_type,
1529 "INSTANCE_STATUS": status,
1530 "INSTANCE_MINMEM": minmem,
1531 "INSTANCE_MAXMEM": maxmem,
1532 # TODO(2.7) remove deprecated "memory" value
1533 "INSTANCE_MEMORY": maxmem,
1534 "INSTANCE_VCPUS": vcpus,
1535 "INSTANCE_DISK_TEMPLATE": disk_template,
1536 "INSTANCE_HYPERVISOR": hypervisor_name,
1539 nic_count = len(nics)
1540 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1543 env["INSTANCE_NIC%d_IP" % idx] = ip
1544 env["INSTANCE_NIC%d_MAC" % idx] = mac
1545 env["INSTANCE_NIC%d_MODE" % idx] = mode
1546 env["INSTANCE_NIC%d_LINK" % idx] = link
1548 nobj = objects.Network.FromDict(netinfo)
1549 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1551 # FIXME: broken network reference: the instance NIC specifies a
1552 # network, but the relevant network entry was not in the config. This
1553 # should be made impossible.
1554 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
1555 if mode == constants.NIC_MODE_BRIDGED:
1556 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1560 env["INSTANCE_NIC_COUNT"] = nic_count
1563 disk_count = len(disks)
1564 for idx, (size, mode) in enumerate(disks):
1565 env["INSTANCE_DISK%d_SIZE" % idx] = size
1566 env["INSTANCE_DISK%d_MODE" % idx] = mode
1570 env["INSTANCE_DISK_COUNT"] = disk_count
1575 env["INSTANCE_TAGS"] = " ".join(tags)
1577 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1578 for key, value in source.items():
1579 env["INSTANCE_%s_%s" % (kind, key)] = value
1584 def _NICToTuple(lu, nic):
1585 """Build a tupple of nic information.
1587 @type lu: L{LogicalUnit}
1588 @param lu: the logical unit on whose behalf we execute
1589 @type nic: L{objects.NIC}
1590 @param nic: nic to convert to hooks tuple
1593 cluster = lu.cfg.GetClusterInfo()
1594 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1595 mode = filled_params[constants.NIC_MODE]
1596 link = filled_params[constants.NIC_LINK]
1599 nobj = lu.cfg.GetNetwork(nic.network)
1600 netinfo = objects.Network.ToDict(nobj)
1601 return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1604 def _NICListToTuple(lu, nics):
1605 """Build a list of nic information tuples.
1607 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1608 value in LUInstanceQueryData.
1610 @type lu: L{LogicalUnit}
1611 @param lu: the logical unit on whose behalf we execute
1612 @type nics: list of L{objects.NIC}
1613 @param nics: list of nics to convert to hooks tuples
1618 hooks_nics.append(_NICToTuple(lu, nic))
1622 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1623 """Builds instance related env variables for hooks from an object.
1625 @type lu: L{LogicalUnit}
1626 @param lu: the logical unit on whose behalf we execute
1627 @type instance: L{objects.Instance}
1628 @param instance: the instance for which we should build the
1630 @type override: dict
1631 @param override: dictionary with key/values that will override
1634 @return: the hook environment dictionary
1637 cluster = lu.cfg.GetClusterInfo()
1638 bep = cluster.FillBE(instance)
1639 hvp = cluster.FillHV(instance)
1641 "name": instance.name,
1642 "primary_node": instance.primary_node,
1643 "secondary_nodes": instance.secondary_nodes,
1644 "os_type": instance.os,
1645 "status": instance.admin_state,
1646 "maxmem": bep[constants.BE_MAXMEM],
1647 "minmem": bep[constants.BE_MINMEM],
1648 "vcpus": bep[constants.BE_VCPUS],
1649 "nics": _NICListToTuple(lu, instance.nics),
1650 "disk_template": instance.disk_template,
1651 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1654 "hypervisor_name": instance.hypervisor,
1655 "tags": instance.tags,
1658 args.update(override)
1659 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1662 def _AdjustCandidatePool(lu, exceptions):
1663 """Adjust the candidate pool after node operations.
1666 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1668 lu.LogInfo("Promoted nodes to master candidate role: %s",
1669 utils.CommaJoin(node.name for node in mod_list))
1670 for name in mod_list:
1671 lu.context.ReaddNode(name)
1672 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1674 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1678 def _DecideSelfPromotion(lu, exceptions=None):
1679 """Decide whether I should promote myself as a master candidate.
1682 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1683 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1684 # the new node will increase mc_max with one, so:
1685 mc_should = min(mc_should + 1, cp_size)
1686 return mc_now < mc_should
1689 def _ComputeViolatingInstances(ipolicy, instances, cfg):
1690 """Computes a set of instances who violates given ipolicy.
1692 @param ipolicy: The ipolicy to verify
1693 @type instances: L{objects.Instance}
1694 @param instances: List of instances to verify
1695 @type cfg: L{config.ConfigWriter}
1696 @param cfg: Cluster configuration
1697 @return: A frozenset of instance names violating the ipolicy
1700 return frozenset([inst.name for inst in instances
1701 if _ComputeIPolicyInstanceViolation(ipolicy, inst, cfg)])
1704 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1705 """Check that the brigdes needed by a list of nics exist.
1708 cluster = lu.cfg.GetClusterInfo()
1709 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1710 brlist = [params[constants.NIC_LINK] for params in paramslist
1711 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1713 result = lu.rpc.call_bridges_exist(target_node, brlist)
1714 result.Raise("Error checking bridges on destination node '%s'" %
1715 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1718 def _CheckInstanceBridgesExist(lu, instance, node=None):
1719 """Check that the brigdes needed by an instance exist.
1723 node = instance.primary_node
1724 _CheckNicsBridgesExist(lu, instance.nics, node)
1727 def _CheckOSVariant(os_obj, name):
1728 """Check whether an OS name conforms to the os variants specification.
1730 @type os_obj: L{objects.OS}
1731 @param os_obj: OS object to check
1733 @param name: OS name passed by the user, to check for validity
1736 variant = objects.OS.GetVariant(name)
1737 if not os_obj.supported_variants:
1739 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1740 " passed)" % (os_obj.name, variant),
1744 raise errors.OpPrereqError("OS name must include a variant",
1747 if variant not in os_obj.supported_variants:
1748 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1751 def _GetNodeInstancesInner(cfg, fn):
1752 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1755 def _GetNodeInstances(cfg, node_name):
1756 """Returns a list of all primary and secondary instances on a node.
1760 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1763 def _GetNodePrimaryInstances(cfg, node_name):
1764 """Returns primary instances on a node.
1767 return _GetNodeInstancesInner(cfg,
1768 lambda inst: node_name == inst.primary_node)
1771 def _GetNodeSecondaryInstances(cfg, node_name):
1772 """Returns secondary instances on a node.
1775 return _GetNodeInstancesInner(cfg,
1776 lambda inst: node_name in inst.secondary_nodes)
1779 def _GetStorageTypeArgs(cfg, storage_type):
1780 """Returns the arguments for a storage type.
1783 # Special case for file storage
1784 if storage_type == constants.ST_FILE:
1785 # storage.FileStorage wants a list of storage directories
1786 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1791 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1794 for dev in instance.disks:
1795 cfg.SetDiskID(dev, node_name)
1797 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1799 result.Raise("Failed to get disk status from node %s" % node_name,
1800 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1802 for idx, bdev_status in enumerate(result.payload):
1803 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1809 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1810 """Check the sanity of iallocator and node arguments and use the
1811 cluster-wide iallocator if appropriate.
1813 Check that at most one of (iallocator, node) is specified. If none is
1814 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1815 then the LU's opcode's iallocator slot is filled with the cluster-wide
1818 @type iallocator_slot: string
1819 @param iallocator_slot: the name of the opcode iallocator slot
1820 @type node_slot: string
1821 @param node_slot: the name of the opcode target node slot
1824 node = getattr(lu.op, node_slot, None)
1825 ialloc = getattr(lu.op, iallocator_slot, None)
1829 if node is not None and ialloc is not None:
1830 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1832 elif ((node is None and ialloc is None) or
1833 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1834 default_iallocator = lu.cfg.GetDefaultIAllocator()
1835 if default_iallocator:
1836 setattr(lu.op, iallocator_slot, default_iallocator)
1838 raise errors.OpPrereqError("No iallocator or node given and no"
1839 " cluster-wide default iallocator found;"
1840 " please specify either an iallocator or a"
1841 " node, or set a cluster-wide default"
1842 " iallocator", errors.ECODE_INVAL)
1845 def _GetDefaultIAllocator(cfg, ialloc):
1846 """Decides on which iallocator to use.
1848 @type cfg: L{config.ConfigWriter}
1849 @param cfg: Cluster configuration object
1850 @type ialloc: string or None
1851 @param ialloc: Iallocator specified in opcode
1853 @return: Iallocator name
1857 # Use default iallocator
1858 ialloc = cfg.GetDefaultIAllocator()
1861 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1862 " opcode nor as a cluster-wide default",
1868 def _CheckHostnameSane(lu, name):
1869 """Ensures that a given hostname resolves to a 'sane' name.
1871 The given name is required to be a prefix of the resolved hostname,
1872 to prevent accidental mismatches.
1874 @param lu: the logical unit on behalf of which we're checking
1875 @param name: the name we should resolve and check
1876 @return: the resolved hostname object
1879 hostname = netutils.GetHostname(name=name)
1880 if hostname.name != name:
1881 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1882 if not utils.MatchNameComponent(name, [hostname.name]):
1883 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1884 " same as given hostname '%s'") %
1885 (hostname.name, name), errors.ECODE_INVAL)
1889 class LUClusterPostInit(LogicalUnit):
1890 """Logical unit for running hooks after cluster initialization.
1893 HPATH = "cluster-init"
1894 HTYPE = constants.HTYPE_CLUSTER
1896 def BuildHooksEnv(self):
1901 "OP_TARGET": self.cfg.GetClusterName(),
1904 def BuildHooksNodes(self):
1905 """Build hooks nodes.
1908 return ([], [self.cfg.GetMasterNode()])
1910 def Exec(self, feedback_fn):
1917 class LUClusterDestroy(LogicalUnit):
1918 """Logical unit for destroying the cluster.
1921 HPATH = "cluster-destroy"
1922 HTYPE = constants.HTYPE_CLUSTER
1924 def BuildHooksEnv(self):
1929 "OP_TARGET": self.cfg.GetClusterName(),
1932 def BuildHooksNodes(self):
1933 """Build hooks nodes.
1938 def CheckPrereq(self):
1939 """Check prerequisites.
1941 This checks whether the cluster is empty.
1943 Any errors are signaled by raising errors.OpPrereqError.
1946 master = self.cfg.GetMasterNode()
1948 nodelist = self.cfg.GetNodeList()
1949 if len(nodelist) != 1 or nodelist[0] != master:
1950 raise errors.OpPrereqError("There are still %d node(s) in"
1951 " this cluster." % (len(nodelist) - 1),
1953 instancelist = self.cfg.GetInstanceList()
1955 raise errors.OpPrereqError("There are still %d instance(s) in"
1956 " this cluster." % len(instancelist),
1959 def Exec(self, feedback_fn):
1960 """Destroys the cluster.
1963 master_params = self.cfg.GetMasterNetworkParameters()
1965 # Run post hooks on master node before it's removed
1966 _RunPostHook(self, master_params.name)
1968 ems = self.cfg.GetUseExternalMipScript()
1969 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1972 self.LogWarning("Error disabling the master IP address: %s",
1975 return master_params.name
1978 def _VerifyCertificate(filename):
1979 """Verifies a certificate for L{LUClusterVerifyConfig}.
1981 @type filename: string
1982 @param filename: Path to PEM file
1986 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1987 utils.ReadFile(filename))
1988 except Exception, err: # pylint: disable=W0703
1989 return (LUClusterVerifyConfig.ETYPE_ERROR,
1990 "Failed to load X509 certificate %s: %s" % (filename, err))
1993 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1994 constants.SSL_CERT_EXPIRATION_ERROR)
1997 fnamemsg = "While verifying %s: %s" % (filename, msg)
2002 return (None, fnamemsg)
2003 elif errcode == utils.CERT_WARNING:
2004 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
2005 elif errcode == utils.CERT_ERROR:
2006 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
2008 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
2011 def _GetAllHypervisorParameters(cluster, instances):
2012 """Compute the set of all hypervisor parameters.
2014 @type cluster: L{objects.Cluster}
2015 @param cluster: the cluster object
2016 @param instances: list of L{objects.Instance}
2017 @param instances: additional instances from which to obtain parameters
2018 @rtype: list of (origin, hypervisor, parameters)
2019 @return: a list with all parameters found, indicating the hypervisor they
2020 apply to, and the origin (can be "cluster", "os X", or "instance Y")
2025 for hv_name in cluster.enabled_hypervisors:
2026 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2028 for os_name, os_hvp in cluster.os_hvp.items():
2029 for hv_name, hv_params in os_hvp.items():
2031 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2032 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2034 # TODO: collapse identical parameter values in a single one
2035 for instance in instances:
2036 if instance.hvparams:
2037 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2038 cluster.FillHV(instance)))
2043 class _VerifyErrors(object):
2044 """Mix-in for cluster/group verify LUs.
2046 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2047 self.op and self._feedback_fn to be available.)
2051 ETYPE_FIELD = "code"
2052 ETYPE_ERROR = "ERROR"
2053 ETYPE_WARNING = "WARNING"
2055 def _Error(self, ecode, item, msg, *args, **kwargs):
2056 """Format an error message.
2058 Based on the opcode's error_codes parameter, either format a
2059 parseable error code, or a simpler error string.
2061 This must be called only from Exec and functions called from Exec.
2064 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2065 itype, etxt, _ = ecode
2066 # If the error code is in the list of ignored errors, demote the error to a
2068 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2069 ltype = self.ETYPE_WARNING
2070 # first complete the msg
2073 # then format the whole message
2074 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2075 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2081 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2082 # and finally report it via the feedback_fn
2083 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2084 # do not mark the operation as failed for WARN cases only
2085 if ltype == self.ETYPE_ERROR:
2088 def _ErrorIf(self, cond, *args, **kwargs):
2089 """Log an error message if the passed condition is True.
2093 or self.op.debug_simulate_errors): # pylint: disable=E1101
2094 self._Error(*args, **kwargs)
2097 class LUClusterVerify(NoHooksLU):
2098 """Submits all jobs necessary to verify the cluster.
2103 def ExpandNames(self):
2104 self.needed_locks = {}
2106 def Exec(self, feedback_fn):
2109 if self.op.group_name:
2110 groups = [self.op.group_name]
2111 depends_fn = lambda: None
2113 groups = self.cfg.GetNodeGroupList()
2115 # Verify global configuration
2117 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2120 # Always depend on global verification
2121 depends_fn = lambda: [(-len(jobs), [])]
2124 [opcodes.OpClusterVerifyGroup(group_name=group,
2125 ignore_errors=self.op.ignore_errors,
2126 depends=depends_fn())]
2127 for group in groups)
2129 # Fix up all parameters
2130 for op in itertools.chain(*jobs): # pylint: disable=W0142
2131 op.debug_simulate_errors = self.op.debug_simulate_errors
2132 op.verbose = self.op.verbose
2133 op.error_codes = self.op.error_codes
2135 op.skip_checks = self.op.skip_checks
2136 except AttributeError:
2137 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2139 return ResultWithJobs(jobs)
2142 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2143 """Verifies the cluster config.
2148 def _VerifyHVP(self, hvp_data):
2149 """Verifies locally the syntax of the hypervisor parameters.
2152 for item, hv_name, hv_params in hvp_data:
2153 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2156 hv_class = hypervisor.GetHypervisorClass(hv_name)
2157 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2158 hv_class.CheckParameterSyntax(hv_params)
2159 except errors.GenericError, err:
2160 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2162 def ExpandNames(self):
2163 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2164 self.share_locks = _ShareAll()
2166 def CheckPrereq(self):
2167 """Check prerequisites.
2170 # Retrieve all information
2171 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2172 self.all_node_info = self.cfg.GetAllNodesInfo()
2173 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2175 def Exec(self, feedback_fn):
2176 """Verify integrity of cluster, performing various test on nodes.
2180 self._feedback_fn = feedback_fn
2182 feedback_fn("* Verifying cluster config")
2184 for msg in self.cfg.VerifyConfig():
2185 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2187 feedback_fn("* Verifying cluster certificate files")
2189 for cert_filename in pathutils.ALL_CERT_FILES:
2190 (errcode, msg) = _VerifyCertificate(cert_filename)
2191 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2193 feedback_fn("* Verifying hypervisor parameters")
2195 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2196 self.all_inst_info.values()))
2198 feedback_fn("* Verifying all nodes belong to an existing group")
2200 # We do this verification here because, should this bogus circumstance
2201 # occur, it would never be caught by VerifyGroup, which only acts on
2202 # nodes/instances reachable from existing node groups.
2204 dangling_nodes = set(node.name for node in self.all_node_info.values()
2205 if node.group not in self.all_group_info)
2207 dangling_instances = {}
2208 no_node_instances = []
2210 for inst in self.all_inst_info.values():
2211 if inst.primary_node in dangling_nodes:
2212 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2213 elif inst.primary_node not in self.all_node_info:
2214 no_node_instances.append(inst.name)
2219 utils.CommaJoin(dangling_instances.get(node.name,
2221 for node in dangling_nodes]
2223 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2225 "the following nodes (and their instances) belong to a non"
2226 " existing group: %s", utils.CommaJoin(pretty_dangling))
2228 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2230 "the following instances have a non-existing primary-node:"
2231 " %s", utils.CommaJoin(no_node_instances))
2236 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2237 """Verifies the status of a node group.
2240 HPATH = "cluster-verify"
2241 HTYPE = constants.HTYPE_CLUSTER
2244 _HOOKS_INDENT_RE = re.compile("^", re.M)
2246 class NodeImage(object):
2247 """A class representing the logical and physical status of a node.
2250 @ivar name: the node name to which this object refers
2251 @ivar volumes: a structure as returned from
2252 L{ganeti.backend.GetVolumeList} (runtime)
2253 @ivar instances: a list of running instances (runtime)
2254 @ivar pinst: list of configured primary instances (config)
2255 @ivar sinst: list of configured secondary instances (config)
2256 @ivar sbp: dictionary of {primary-node: list of instances} for all
2257 instances for which this node is secondary (config)
2258 @ivar mfree: free memory, as reported by hypervisor (runtime)
2259 @ivar dfree: free disk, as reported by the node (runtime)
2260 @ivar offline: the offline status (config)
2261 @type rpc_fail: boolean
2262 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2263 not whether the individual keys were correct) (runtime)
2264 @type lvm_fail: boolean
2265 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2266 @type hyp_fail: boolean
2267 @ivar hyp_fail: whether the RPC call didn't return the instance list
2268 @type ghost: boolean
2269 @ivar ghost: whether this is a known node or not (config)
2270 @type os_fail: boolean
2271 @ivar os_fail: whether the RPC call didn't return valid OS data
2273 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2274 @type vm_capable: boolean
2275 @ivar vm_capable: whether the node can host instances
2277 @ivar pv_min: size in MiB of the smallest PVs
2279 @ivar pv_max: size in MiB of the biggest PVs
2282 def __init__(self, offline=False, name=None, vm_capable=True):
2291 self.offline = offline
2292 self.vm_capable = vm_capable
2293 self.rpc_fail = False
2294 self.lvm_fail = False
2295 self.hyp_fail = False
2297 self.os_fail = False
2302 def ExpandNames(self):
2303 # This raises errors.OpPrereqError on its own:
2304 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2306 # Get instances in node group; this is unsafe and needs verification later
2308 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2310 self.needed_locks = {
2311 locking.LEVEL_INSTANCE: inst_names,
2312 locking.LEVEL_NODEGROUP: [self.group_uuid],
2313 locking.LEVEL_NODE: [],
2315 # This opcode is run by watcher every five minutes and acquires all nodes
2316 # for a group. It doesn't run for a long time, so it's better to acquire
2317 # the node allocation lock as well.
2318 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2321 self.share_locks = _ShareAll()
2323 def DeclareLocks(self, level):
2324 if level == locking.LEVEL_NODE:
2325 # Get members of node group; this is unsafe and needs verification later
2326 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2328 all_inst_info = self.cfg.GetAllInstancesInfo()
2330 # In Exec(), we warn about mirrored instances that have primary and
2331 # secondary living in separate node groups. To fully verify that
2332 # volumes for these instances are healthy, we will need to do an
2333 # extra call to their secondaries. We ensure here those nodes will
2335 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2336 # Important: access only the instances whose lock is owned
2337 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2338 nodes.update(all_inst_info[inst].secondary_nodes)
2340 self.needed_locks[locking.LEVEL_NODE] = nodes
2342 def CheckPrereq(self):
2343 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2344 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2346 group_nodes = set(self.group_info.members)
2348 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2351 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2353 unlocked_instances = \
2354 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2357 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2358 utils.CommaJoin(unlocked_nodes),
2361 if unlocked_instances:
2362 raise errors.OpPrereqError("Missing lock for instances: %s" %
2363 utils.CommaJoin(unlocked_instances),
2366 self.all_node_info = self.cfg.GetAllNodesInfo()
2367 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2369 self.my_node_names = utils.NiceSort(group_nodes)
2370 self.my_inst_names = utils.NiceSort(group_instances)
2372 self.my_node_info = dict((name, self.all_node_info[name])
2373 for name in self.my_node_names)
2375 self.my_inst_info = dict((name, self.all_inst_info[name])
2376 for name in self.my_inst_names)
2378 # We detect here the nodes that will need the extra RPC calls for verifying
2379 # split LV volumes; they should be locked.
2380 extra_lv_nodes = set()
2382 for inst in self.my_inst_info.values():
2383 if inst.disk_template in constants.DTS_INT_MIRROR:
2384 for nname in inst.all_nodes:
2385 if self.all_node_info[nname].group != self.group_uuid:
2386 extra_lv_nodes.add(nname)
2388 unlocked_lv_nodes = \
2389 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2391 if unlocked_lv_nodes:
2392 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2393 utils.CommaJoin(unlocked_lv_nodes),
2395 self.extra_lv_nodes = list(extra_lv_nodes)
2397 def _VerifyNode(self, ninfo, nresult):
2398 """Perform some basic validation on data returned from a node.
2400 - check the result data structure is well formed and has all the
2402 - check ganeti version
2404 @type ninfo: L{objects.Node}
2405 @param ninfo: the node to check
2406 @param nresult: the results from the node
2408 @return: whether overall this call was successful (and we can expect
2409 reasonable values in the respose)
2413 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2415 # main result, nresult should be a non-empty dict
2416 test = not nresult or not isinstance(nresult, dict)
2417 _ErrorIf(test, constants.CV_ENODERPC, node,
2418 "unable to verify node: no data returned")
2422 # compares ganeti version
2423 local_version = constants.PROTOCOL_VERSION
2424 remote_version = nresult.get("version", None)
2425 test = not (remote_version and
2426 isinstance(remote_version, (list, tuple)) and
2427 len(remote_version) == 2)
2428 _ErrorIf(test, constants.CV_ENODERPC, node,
2429 "connection to node returned invalid data")
2433 test = local_version != remote_version[0]
2434 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2435 "incompatible protocol versions: master %s,"
2436 " node %s", local_version, remote_version[0])
2440 # node seems compatible, we can actually try to look into its results
2442 # full package version
2443 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2444 constants.CV_ENODEVERSION, node,
2445 "software version mismatch: master %s, node %s",
2446 constants.RELEASE_VERSION, remote_version[1],
2447 code=self.ETYPE_WARNING)
2449 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2450 if ninfo.vm_capable and isinstance(hyp_result, dict):
2451 for hv_name, hv_result in hyp_result.iteritems():
2452 test = hv_result is not None
2453 _ErrorIf(test, constants.CV_ENODEHV, node,
2454 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2456 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2457 if ninfo.vm_capable and isinstance(hvp_result, list):
2458 for item, hv_name, hv_result in hvp_result:
2459 _ErrorIf(True, constants.CV_ENODEHV, node,
2460 "hypervisor %s parameter verify failure (source %s): %s",
2461 hv_name, item, hv_result)
2463 test = nresult.get(constants.NV_NODESETUP,
2464 ["Missing NODESETUP results"])
2465 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2470 def _VerifyNodeTime(self, ninfo, nresult,
2471 nvinfo_starttime, nvinfo_endtime):
2472 """Check the node time.
2474 @type ninfo: L{objects.Node}
2475 @param ninfo: the node to check
2476 @param nresult: the remote results for the node
2477 @param nvinfo_starttime: the start time of the RPC call
2478 @param nvinfo_endtime: the end time of the RPC call
2482 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2484 ntime = nresult.get(constants.NV_TIME, None)
2486 ntime_merged = utils.MergeTime(ntime)
2487 except (ValueError, TypeError):
2488 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2491 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2492 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2493 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2494 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2498 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2499 "Node time diverges by at least %s from master node time",
2502 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2503 """Check the node LVM results and update info for cross-node checks.
2505 @type ninfo: L{objects.Node}
2506 @param ninfo: the node to check
2507 @param nresult: the remote results for the node
2508 @param vg_name: the configured VG name
2509 @type nimg: L{NodeImage}
2510 @param nimg: node image
2517 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2519 # checks vg existence and size > 20G
2520 vglist = nresult.get(constants.NV_VGLIST, None)
2522 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2524 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2525 constants.MIN_VG_SIZE)
2526 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2529 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2531 self._Error(constants.CV_ENODELVM, node, em)
2532 if pvminmax is not None:
2533 (nimg.pv_min, nimg.pv_max) = pvminmax
2535 def _VerifyGroupLVM(self, node_image, vg_name):
2536 """Check cross-node consistency in LVM.
2538 @type node_image: dict
2539 @param node_image: info about nodes, mapping from node to names to
2540 L{NodeImage} objects
2541 @param vg_name: the configured VG name
2547 # Only exlcusive storage needs this kind of checks
2548 if not self._exclusive_storage:
2551 # exclusive_storage wants all PVs to have the same size (approximately),
2552 # if the smallest and the biggest ones are okay, everything is fine.
2553 # pv_min is None iff pv_max is None
2554 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2557 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2558 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2559 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2560 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2561 "PV sizes differ too much in the group; smallest (%s MB) is"
2562 " on %s, biggest (%s MB) is on %s",
2563 pvmin, minnode, pvmax, maxnode)
2565 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2566 """Check the node bridges.
2568 @type ninfo: L{objects.Node}
2569 @param ninfo: the node to check
2570 @param nresult: the remote results for the node
2571 @param bridges: the expected list of bridges
2578 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2580 missing = nresult.get(constants.NV_BRIDGES, None)
2581 test = not isinstance(missing, list)
2582 _ErrorIf(test, constants.CV_ENODENET, node,
2583 "did not return valid bridge information")
2585 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2586 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2588 def _VerifyNodeUserScripts(self, ninfo, nresult):
2589 """Check the results of user scripts presence and executability on the node
2591 @type ninfo: L{objects.Node}
2592 @param ninfo: the node to check
2593 @param nresult: the remote results for the node
2598 test = not constants.NV_USERSCRIPTS in nresult
2599 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2600 "did not return user scripts information")
2602 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2604 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2605 "user scripts not present or not executable: %s" %
2606 utils.CommaJoin(sorted(broken_scripts)))
2608 def _VerifyNodeNetwork(self, ninfo, nresult):
2609 """Check the node network connectivity results.
2611 @type ninfo: L{objects.Node}
2612 @param ninfo: the node to check
2613 @param nresult: the remote results for the node
2617 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2619 test = constants.NV_NODELIST not in nresult
2620 _ErrorIf(test, constants.CV_ENODESSH, node,
2621 "node hasn't returned node ssh connectivity data")
2623 if nresult[constants.NV_NODELIST]:
2624 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2625 _ErrorIf(True, constants.CV_ENODESSH, node,
2626 "ssh communication with node '%s': %s", a_node, a_msg)
2628 test = constants.NV_NODENETTEST not in nresult
2629 _ErrorIf(test, constants.CV_ENODENET, node,
2630 "node hasn't returned node tcp connectivity data")
2632 if nresult[constants.NV_NODENETTEST]:
2633 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2635 _ErrorIf(True, constants.CV_ENODENET, node,
2636 "tcp communication with node '%s': %s",
2637 anode, nresult[constants.NV_NODENETTEST][anode])
2639 test = constants.NV_MASTERIP not in nresult
2640 _ErrorIf(test, constants.CV_ENODENET, node,
2641 "node hasn't returned node master IP reachability data")
2643 if not nresult[constants.NV_MASTERIP]:
2644 if node == self.master_node:
2645 msg = "the master node cannot reach the master IP (not configured?)"
2647 msg = "cannot reach the master IP"
2648 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2650 def _VerifyInstance(self, instance, inst_config, node_image,
2652 """Verify an instance.
2654 This function checks to see if the required block devices are
2655 available on the instance's node, and that the nodes are in the correct
2659 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2660 pnode = inst_config.primary_node
2661 pnode_img = node_image[pnode]
2662 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2664 node_vol_should = {}
2665 inst_config.MapLVsByNode(node_vol_should)
2667 cluster = self.cfg.GetClusterInfo()
2668 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2670 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg)
2671 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2672 code=self.ETYPE_WARNING)
2674 for node in node_vol_should:
2675 n_img = node_image[node]
2676 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2677 # ignore missing volumes on offline or broken nodes
2679 for volume in node_vol_should[node]:
2680 test = volume not in n_img.volumes
2681 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2682 "volume %s missing on node %s", volume, node)
2684 if inst_config.admin_state == constants.ADMINST_UP:
2685 test = instance not in pnode_img.instances and not pnode_img.offline
2686 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2687 "instance not running on its primary node %s",
2689 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2690 "instance is marked as running and lives on offline node %s",
2693 diskdata = [(nname, success, status, idx)
2694 for (nname, disks) in diskstatus.items()
2695 for idx, (success, status) in enumerate(disks)]
2697 for nname, success, bdev_status, idx in diskdata:
2698 # the 'ghost node' construction in Exec() ensures that we have a
2700 snode = node_image[nname]
2701 bad_snode = snode.ghost or snode.offline
2702 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2703 not success and not bad_snode,
2704 constants.CV_EINSTANCEFAULTYDISK, instance,
2705 "couldn't retrieve status for disk/%s on %s: %s",
2706 idx, nname, bdev_status)
2707 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2708 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2709 constants.CV_EINSTANCEFAULTYDISK, instance,
2710 "disk/%s on %s is faulty", idx, nname)
2712 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2713 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2714 " primary node failed", instance)
2716 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2717 constants.CV_EINSTANCELAYOUT,
2718 instance, "instance has multiple secondary nodes: %s",
2719 utils.CommaJoin(inst_config.secondary_nodes),
2720 code=self.ETYPE_WARNING)
2722 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2723 # Disk template not compatible with exclusive_storage: no instance
2724 # node should have the flag set
2725 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2726 inst_config.all_nodes)
2727 es_nodes = [n for (n, es) in es_flags.items()
2729 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2730 "instance has template %s, which is not supported on nodes"
2731 " that have exclusive storage set: %s",
2732 inst_config.disk_template, utils.CommaJoin(es_nodes))
2734 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2735 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2736 instance_groups = {}
2738 for node in instance_nodes:
2739 instance_groups.setdefault(self.all_node_info[node].group,
2743 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2744 # Sort so that we always list the primary node first.
2745 for group, nodes in sorted(instance_groups.items(),
2746 key=lambda (_, nodes): pnode in nodes,
2749 self._ErrorIf(len(instance_groups) > 1,
2750 constants.CV_EINSTANCESPLITGROUPS,
2751 instance, "instance has primary and secondary nodes in"
2752 " different groups: %s", utils.CommaJoin(pretty_list),
2753 code=self.ETYPE_WARNING)
2755 inst_nodes_offline = []
2756 for snode in inst_config.secondary_nodes:
2757 s_img = node_image[snode]
2758 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2759 snode, "instance %s, connection to secondary node failed",
2763 inst_nodes_offline.append(snode)
2765 # warn that the instance lives on offline nodes
2766 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2767 "instance has offline secondary node(s) %s",
2768 utils.CommaJoin(inst_nodes_offline))
2769 # ... or ghost/non-vm_capable nodes
2770 for node in inst_config.all_nodes:
2771 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2772 instance, "instance lives on ghost node %s", node)
2773 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2774 instance, "instance lives on non-vm_capable node %s", node)
2776 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2777 """Verify if there are any unknown volumes in the cluster.
2779 The .os, .swap and backup volumes are ignored. All other volumes are
2780 reported as unknown.
2782 @type reserved: L{ganeti.utils.FieldSet}
2783 @param reserved: a FieldSet of reserved volume names
2786 for node, n_img in node_image.items():
2787 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2788 self.all_node_info[node].group != self.group_uuid):
2789 # skip non-healthy nodes
2791 for volume in n_img.volumes:
2792 test = ((node not in node_vol_should or
2793 volume not in node_vol_should[node]) and
2794 not reserved.Matches(volume))
2795 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2796 "volume %s is unknown", volume)
2798 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2799 """Verify N+1 Memory Resilience.
2801 Check that if one single node dies we can still start all the
2802 instances it was primary for.
2805 cluster_info = self.cfg.GetClusterInfo()
2806 for node, n_img in node_image.items():
2807 # This code checks that every node which is now listed as
2808 # secondary has enough memory to host all instances it is
2809 # supposed to should a single other node in the cluster fail.
2810 # FIXME: not ready for failover to an arbitrary node
2811 # FIXME: does not support file-backed instances
2812 # WARNING: we currently take into account down instances as well
2813 # as up ones, considering that even if they're down someone
2814 # might want to start them even in the event of a node failure.
2815 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2816 # we're skipping nodes marked offline and nodes in other groups from
2817 # the N+1 warning, since most likely we don't have good memory
2818 # infromation from them; we already list instances living on such
2819 # nodes, and that's enough warning
2821 #TODO(dynmem): also consider ballooning out other instances
2822 for prinode, instances in n_img.sbp.items():
2824 for instance in instances:
2825 bep = cluster_info.FillBE(instance_cfg[instance])
2826 if bep[constants.BE_AUTO_BALANCE]:
2827 needed_mem += bep[constants.BE_MINMEM]
2828 test = n_img.mfree < needed_mem
2829 self._ErrorIf(test, constants.CV_ENODEN1, node,
2830 "not enough memory to accomodate instance failovers"
2831 " should node %s fail (%dMiB needed, %dMiB available)",
2832 prinode, needed_mem, n_img.mfree)
2835 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2836 (files_all, files_opt, files_mc, files_vm)):
2837 """Verifies file checksums collected from all nodes.
2839 @param errorif: Callback for reporting errors
2840 @param nodeinfo: List of L{objects.Node} objects
2841 @param master_node: Name of master node
2842 @param all_nvinfo: RPC results
2845 # Define functions determining which nodes to consider for a file
2848 (files_mc, lambda node: (node.master_candidate or
2849 node.name == master_node)),
2850 (files_vm, lambda node: node.vm_capable),
2853 # Build mapping from filename to list of nodes which should have the file
2855 for (files, fn) in files2nodefn:
2857 filenodes = nodeinfo
2859 filenodes = filter(fn, nodeinfo)
2860 nodefiles.update((filename,
2861 frozenset(map(operator.attrgetter("name"), filenodes)))
2862 for filename in files)
2864 assert set(nodefiles) == (files_all | files_mc | files_vm)
2866 fileinfo = dict((filename, {}) for filename in nodefiles)
2867 ignore_nodes = set()
2869 for node in nodeinfo:
2871 ignore_nodes.add(node.name)
2874 nresult = all_nvinfo[node.name]
2876 if nresult.fail_msg or not nresult.payload:
2879 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2880 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2881 for (key, value) in fingerprints.items())
2884 test = not (node_files and isinstance(node_files, dict))
2885 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2886 "Node did not return file checksum data")
2888 ignore_nodes.add(node.name)
2891 # Build per-checksum mapping from filename to nodes having it
2892 for (filename, checksum) in node_files.items():
2893 assert filename in nodefiles
2894 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2896 for (filename, checksums) in fileinfo.items():
2897 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2899 # Nodes having the file
2900 with_file = frozenset(node_name
2901 for nodes in fileinfo[filename].values()
2902 for node_name in nodes) - ignore_nodes
2904 expected_nodes = nodefiles[filename] - ignore_nodes
2906 # Nodes missing file
2907 missing_file = expected_nodes - with_file
2909 if filename in files_opt:
2911 errorif(missing_file and missing_file != expected_nodes,
2912 constants.CV_ECLUSTERFILECHECK, None,
2913 "File %s is optional, but it must exist on all or no"
2914 " nodes (not found on %s)",
2915 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2917 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2918 "File %s is missing from node(s) %s", filename,
2919 utils.CommaJoin(utils.NiceSort(missing_file)))
2921 # Warn if a node has a file it shouldn't
2922 unexpected = with_file - expected_nodes
2924 constants.CV_ECLUSTERFILECHECK, None,
2925 "File %s should not exist on node(s) %s",
2926 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2928 # See if there are multiple versions of the file
2929 test = len(checksums) > 1
2931 variants = ["variant %s on %s" %
2932 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2933 for (idx, (checksum, nodes)) in
2934 enumerate(sorted(checksums.items()))]
2938 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2939 "File %s found with %s different checksums (%s)",
2940 filename, len(checksums), "; ".join(variants))
2942 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2944 """Verifies and the node DRBD status.
2946 @type ninfo: L{objects.Node}
2947 @param ninfo: the node to check
2948 @param nresult: the remote results for the node
2949 @param instanceinfo: the dict of instances
2950 @param drbd_helper: the configured DRBD usermode helper
2951 @param drbd_map: the DRBD map as returned by
2952 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2956 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2959 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2960 test = (helper_result is None)
2961 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2962 "no drbd usermode helper returned")
2964 status, payload = helper_result
2966 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2967 "drbd usermode helper check unsuccessful: %s", payload)
2968 test = status and (payload != drbd_helper)
2969 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2970 "wrong drbd usermode helper: %s", payload)
2972 # compute the DRBD minors
2974 for minor, instance in drbd_map[node].items():
2975 test = instance not in instanceinfo
2976 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2977 "ghost instance '%s' in temporary DRBD map", instance)
2978 # ghost instance should not be running, but otherwise we
2979 # don't give double warnings (both ghost instance and
2980 # unallocated minor in use)
2982 node_drbd[minor] = (instance, False)
2984 instance = instanceinfo[instance]
2985 node_drbd[minor] = (instance.name,
2986 instance.admin_state == constants.ADMINST_UP)
2988 # and now check them
2989 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2990 test = not isinstance(used_minors, (tuple, list))
2991 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2992 "cannot parse drbd status file: %s", str(used_minors))
2994 # we cannot check drbd status
2997 for minor, (iname, must_exist) in node_drbd.items():
2998 test = minor not in used_minors and must_exist
2999 _ErrorIf(test, constants.CV_ENODEDRBD, node,
3000 "drbd minor %d of instance %s is not active", minor, iname)
3001 for minor in used_minors:
3002 test = minor not in node_drbd
3003 _ErrorIf(test, constants.CV_ENODEDRBD, node,
3004 "unallocated drbd minor %d is in use", minor)
3006 def _UpdateNodeOS(self, ninfo, nresult, nimg):
3007 """Builds the node OS structures.
3009 @type ninfo: L{objects.Node}
3010 @param ninfo: the node to check
3011 @param nresult: the remote results for the node
3012 @param nimg: the node image object
3016 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3018 remote_os = nresult.get(constants.NV_OSLIST, None)
3019 test = (not isinstance(remote_os, list) or
3020 not compat.all(isinstance(v, list) and len(v) == 7
3021 for v in remote_os))
3023 _ErrorIf(test, constants.CV_ENODEOS, node,
3024 "node hasn't returned valid OS data")
3033 for (name, os_path, status, diagnose,
3034 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
3036 if name not in os_dict:
3039 # parameters is a list of lists instead of list of tuples due to
3040 # JSON lacking a real tuple type, fix it:
3041 parameters = [tuple(v) for v in parameters]
3042 os_dict[name].append((os_path, status, diagnose,
3043 set(variants), set(parameters), set(api_ver)))
3045 nimg.oslist = os_dict
3047 def _VerifyNodeOS(self, ninfo, nimg, base):
3048 """Verifies the node OS list.
3050 @type ninfo: L{objects.Node}
3051 @param ninfo: the node to check
3052 @param nimg: the node image object
3053 @param base: the 'template' node we match against (e.g. from the master)
3057 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3059 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3061 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3062 for os_name, os_data in nimg.oslist.items():
3063 assert os_data, "Empty OS status for OS %s?!" % os_name
3064 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3065 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3066 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3067 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3068 "OS '%s' has multiple entries (first one shadows the rest): %s",
3069 os_name, utils.CommaJoin([v[0] for v in os_data]))
3070 # comparisons with the 'base' image
3071 test = os_name not in base.oslist
3072 _ErrorIf(test, constants.CV_ENODEOS, node,
3073 "Extra OS %s not present on reference node (%s)",
3077 assert base.oslist[os_name], "Base node has empty OS status?"
3078 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3080 # base OS is invalid, skipping
3082 for kind, a, b in [("API version", f_api, b_api),
3083 ("variants list", f_var, b_var),
3084 ("parameters", beautify_params(f_param),
3085 beautify_params(b_param))]:
3086 _ErrorIf(a != b, constants.CV_ENODEOS, node,
3087 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3088 kind, os_name, base.name,
3089 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3091 # check any missing OSes
3092 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3093 _ErrorIf(missing, constants.CV_ENODEOS, node,
3094 "OSes present on reference node %s but missing on this node: %s",
3095 base.name, utils.CommaJoin(missing))
3097 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3098 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3100 @type ninfo: L{objects.Node}
3101 @param ninfo: the node to check
3102 @param nresult: the remote results for the node
3103 @type is_master: bool
3104 @param is_master: Whether node is the master node
3110 (constants.ENABLE_FILE_STORAGE or
3111 constants.ENABLE_SHARED_FILE_STORAGE)):
3113 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3115 # This should never happen
3116 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3117 "Node did not return forbidden file storage paths")
3119 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3120 "Found forbidden file storage paths: %s",
3121 utils.CommaJoin(fspaths))
3123 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3124 constants.CV_ENODEFILESTORAGEPATHS, node,
3125 "Node should not have returned forbidden file storage"
3128 def _VerifyOob(self, ninfo, nresult):
3129 """Verifies out of band functionality of a node.
3131 @type ninfo: L{objects.Node}
3132 @param ninfo: the node to check
3133 @param nresult: the remote results for the node
3137 # We just have to verify the paths on master and/or master candidates
3138 # as the oob helper is invoked on the master
3139 if ((ninfo.master_candidate or ninfo.master_capable) and
3140 constants.NV_OOB_PATHS in nresult):
3141 for path_result in nresult[constants.NV_OOB_PATHS]:
3142 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3144 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3145 """Verifies and updates the node volume data.
3147 This function will update a L{NodeImage}'s internal structures
3148 with data from the remote call.
3150 @type ninfo: L{objects.Node}
3151 @param ninfo: the node to check
3152 @param nresult: the remote results for the node
3153 @param nimg: the node image object
3154 @param vg_name: the configured VG name
3158 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3160 nimg.lvm_fail = True
3161 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3164 elif isinstance(lvdata, basestring):
3165 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3166 utils.SafeEncode(lvdata))
3167 elif not isinstance(lvdata, dict):
3168 _ErrorIf(True, constants.CV_ENODELVM, node,
3169 "rpc call to node failed (lvlist)")
3171 nimg.volumes = lvdata
3172 nimg.lvm_fail = False
3174 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3175 """Verifies and updates the node instance list.
3177 If the listing was successful, then updates this node's instance
3178 list. Otherwise, it marks the RPC call as failed for the instance
3181 @type ninfo: L{objects.Node}
3182 @param ninfo: the node to check
3183 @param nresult: the remote results for the node
3184 @param nimg: the node image object
3187 idata = nresult.get(constants.NV_INSTANCELIST, None)
3188 test = not isinstance(idata, list)
3189 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3190 "rpc call to node failed (instancelist): %s",
3191 utils.SafeEncode(str(idata)))
3193 nimg.hyp_fail = True
3195 nimg.instances = idata
3197 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3198 """Verifies and computes a node information map
3200 @type ninfo: L{objects.Node}
3201 @param ninfo: the node to check
3202 @param nresult: the remote results for the node
3203 @param nimg: the node image object
3204 @param vg_name: the configured VG name
3208 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3210 # try to read free memory (from the hypervisor)
3211 hv_info = nresult.get(constants.NV_HVINFO, None)
3212 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3213 _ErrorIf(test, constants.CV_ENODEHV, node,
3214 "rpc call to node failed (hvinfo)")
3217 nimg.mfree = int(hv_info["memory_free"])
3218 except (ValueError, TypeError):
3219 _ErrorIf(True, constants.CV_ENODERPC, node,
3220 "node returned invalid nodeinfo, check hypervisor")
3222 # FIXME: devise a free space model for file based instances as well
3223 if vg_name is not None:
3224 test = (constants.NV_VGLIST not in nresult or
3225 vg_name not in nresult[constants.NV_VGLIST])
3226 _ErrorIf(test, constants.CV_ENODELVM, node,
3227 "node didn't return data for the volume group '%s'"
3228 " - it is either missing or broken", vg_name)
3231 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3232 except (ValueError, TypeError):
3233 _ErrorIf(True, constants.CV_ENODERPC, node,
3234 "node returned invalid LVM info, check LVM status")
3236 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3237 """Gets per-disk status information for all instances.
3239 @type nodelist: list of strings
3240 @param nodelist: Node names
3241 @type node_image: dict of (name, L{objects.Node})
3242 @param node_image: Node objects
3243 @type instanceinfo: dict of (name, L{objects.Instance})
3244 @param instanceinfo: Instance objects
3245 @rtype: {instance: {node: [(succes, payload)]}}
3246 @return: a dictionary of per-instance dictionaries with nodes as
3247 keys and disk information as values; the disk information is a
3248 list of tuples (success, payload)
3251 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3254 node_disks_devonly = {}
3255 diskless_instances = set()
3256 diskless = constants.DT_DISKLESS
3258 for nname in nodelist:
3259 node_instances = list(itertools.chain(node_image[nname].pinst,
3260 node_image[nname].sinst))
3261 diskless_instances.update(inst for inst in node_instances
3262 if instanceinfo[inst].disk_template == diskless)
3263 disks = [(inst, disk)
3264 for inst in node_instances
3265 for disk in instanceinfo[inst].disks]
3268 # No need to collect data
3271 node_disks[nname] = disks
3273 # _AnnotateDiskParams makes already copies of the disks
3275 for (inst, dev) in disks:
3276 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3277 self.cfg.SetDiskID(anno_disk, nname)
3278 devonly.append(anno_disk)
3280 node_disks_devonly[nname] = devonly
3282 assert len(node_disks) == len(node_disks_devonly)
3284 # Collect data from all nodes with disks
3285 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3288 assert len(result) == len(node_disks)
3292 for (nname, nres) in result.items():
3293 disks = node_disks[nname]
3296 # No data from this node
3297 data = len(disks) * [(False, "node offline")]
3300 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3301 "while getting disk information: %s", msg)
3303 # No data from this node
3304 data = len(disks) * [(False, msg)]
3307 for idx, i in enumerate(nres.payload):
3308 if isinstance(i, (tuple, list)) and len(i) == 2:
3311 logging.warning("Invalid result from node %s, entry %d: %s",
3313 data.append((False, "Invalid result from the remote node"))
3315 for ((inst, _), status) in zip(disks, data):
3316 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3318 # Add empty entries for diskless instances.
3319 for inst in diskless_instances:
3320 assert inst not in instdisk
3323 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3324 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3325 compat.all(isinstance(s, (tuple, list)) and
3326 len(s) == 2 for s in statuses)
3327 for inst, nnames in instdisk.items()
3328 for nname, statuses in nnames.items())
3330 instdisk_keys = set(instdisk)
3331 instanceinfo_keys = set(instanceinfo)
3332 assert instdisk_keys == instanceinfo_keys, \
3333 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3334 (instdisk_keys, instanceinfo_keys))
3339 def _SshNodeSelector(group_uuid, all_nodes):
3340 """Create endless iterators for all potential SSH check hosts.
3343 nodes = [node for node in all_nodes
3344 if (node.group != group_uuid and
3346 keyfunc = operator.attrgetter("group")
3348 return map(itertools.cycle,
3349 [sorted(map(operator.attrgetter("name"), names))
3350 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3354 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3355 """Choose which nodes should talk to which other nodes.
3357 We will make nodes contact all nodes in their group, and one node from
3360 @warning: This algorithm has a known issue if one node group is much
3361 smaller than others (e.g. just one node). In such a case all other
3362 nodes will talk to the single node.
3365 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3366 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3368 return (online_nodes,
3369 dict((name, sorted([i.next() for i in sel]))
3370 for name in online_nodes))
3372 def BuildHooksEnv(self):
3375 Cluster-Verify hooks just ran in the post phase and their failure makes
3376 the output be logged in the verify output and the verification to fail.
3380 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3383 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3384 for node in self.my_node_info.values())
3388 def BuildHooksNodes(self):
3389 """Build hooks nodes.
3392 return ([], self.my_node_names)
3394 def Exec(self, feedback_fn):
3395 """Verify integrity of the node group, performing various test on nodes.
3398 # This method has too many local variables. pylint: disable=R0914
3399 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3401 if not self.my_node_names:
3403 feedback_fn("* Empty node group, skipping verification")
3407 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3408 verbose = self.op.verbose
3409 self._feedback_fn = feedback_fn
3411 vg_name = self.cfg.GetVGName()
3412 drbd_helper = self.cfg.GetDRBDHelper()
3413 cluster = self.cfg.GetClusterInfo()
3414 hypervisors = cluster.enabled_hypervisors
3415 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3417 i_non_redundant = [] # Non redundant instances
3418 i_non_a_balanced = [] # Non auto-balanced instances
3419 i_offline = 0 # Count of offline instances
3420 n_offline = 0 # Count of offline nodes
3421 n_drained = 0 # Count of nodes being drained
3422 node_vol_should = {}
3424 # FIXME: verify OS list
3427 filemap = _ComputeAncillaryFiles(cluster, False)
3429 # do local checksums
3430 master_node = self.master_node = self.cfg.GetMasterNode()
3431 master_ip = self.cfg.GetMasterIP()
3433 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3436 if self.cfg.GetUseExternalMipScript():
3437 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3439 node_verify_param = {
3440 constants.NV_FILELIST:
3441 map(vcluster.MakeVirtualPath,
3442 utils.UniqueSequence(filename
3443 for files in filemap
3444 for filename in files)),
3445 constants.NV_NODELIST:
3446 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3447 self.all_node_info.values()),
3448 constants.NV_HYPERVISOR: hypervisors,
3449 constants.NV_HVPARAMS:
3450 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3451 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3452 for node in node_data_list
3453 if not node.offline],
3454 constants.NV_INSTANCELIST: hypervisors,
3455 constants.NV_VERSION: None,
3456 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3457 constants.NV_NODESETUP: None,
3458 constants.NV_TIME: None,
3459 constants.NV_MASTERIP: (master_node, master_ip),
3460 constants.NV_OSLIST: None,
3461 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3462 constants.NV_USERSCRIPTS: user_scripts,
3465 if vg_name is not None:
3466 node_verify_param[constants.NV_VGLIST] = None
3467 node_verify_param[constants.NV_LVLIST] = vg_name
3468 node_verify_param[constants.NV_PVLIST] = [vg_name]
3471 node_verify_param[constants.NV_DRBDLIST] = None
3472 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3474 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3475 # Load file storage paths only from master node
3476 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3479 # FIXME: this needs to be changed per node-group, not cluster-wide
3481 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3482 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3483 bridges.add(default_nicpp[constants.NIC_LINK])
3484 for instance in self.my_inst_info.values():
3485 for nic in instance.nics:
3486 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3487 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3488 bridges.add(full_nic[constants.NIC_LINK])
3491 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3493 # Build our expected cluster state
3494 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3496 vm_capable=node.vm_capable))
3497 for node in node_data_list)
3501 for node in self.all_node_info.values():
3502 path = _SupportsOob(self.cfg, node)
3503 if path and path not in oob_paths:
3504 oob_paths.append(path)
3507 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3509 for instance in self.my_inst_names:
3510 inst_config = self.my_inst_info[instance]
3511 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3514 for nname in inst_config.all_nodes:
3515 if nname not in node_image:
3516 gnode = self.NodeImage(name=nname)
3517 gnode.ghost = (nname not in self.all_node_info)
3518 node_image[nname] = gnode
3520 inst_config.MapLVsByNode(node_vol_should)
3522 pnode = inst_config.primary_node
3523 node_image[pnode].pinst.append(instance)
3525 for snode in inst_config.secondary_nodes:
3526 nimg = node_image[snode]
3527 nimg.sinst.append(instance)
3528 if pnode not in nimg.sbp:
3529 nimg.sbp[pnode] = []
3530 nimg.sbp[pnode].append(instance)
3532 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3533 # The value of exclusive_storage should be the same across the group, so if
3534 # it's True for at least a node, we act as if it were set for all the nodes
3535 self._exclusive_storage = compat.any(es_flags.values())
3536 if self._exclusive_storage:
3537 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3539 # At this point, we have the in-memory data structures complete,
3540 # except for the runtime information, which we'll gather next
3542 # Due to the way our RPC system works, exact response times cannot be
3543 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3544 # time before and after executing the request, we can at least have a time
3546 nvinfo_starttime = time.time()
3547 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3549 self.cfg.GetClusterName())
3550 nvinfo_endtime = time.time()
3552 if self.extra_lv_nodes and vg_name is not None:
3554 self.rpc.call_node_verify(self.extra_lv_nodes,
3555 {constants.NV_LVLIST: vg_name},
3556 self.cfg.GetClusterName())
3558 extra_lv_nvinfo = {}
3560 all_drbd_map = self.cfg.ComputeDRBDMap()
3562 feedback_fn("* Gathering disk information (%s nodes)" %
3563 len(self.my_node_names))
3564 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3567 feedback_fn("* Verifying configuration file consistency")
3569 # If not all nodes are being checked, we need to make sure the master node
3570 # and a non-checked vm_capable node are in the list.
3571 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3573 vf_nvinfo = all_nvinfo.copy()
3574 vf_node_info = list(self.my_node_info.values())
3575 additional_nodes = []
3576 if master_node not in self.my_node_info:
3577 additional_nodes.append(master_node)
3578 vf_node_info.append(self.all_node_info[master_node])
3579 # Add the first vm_capable node we find which is not included,
3580 # excluding the master node (which we already have)
3581 for node in absent_nodes:
3582 nodeinfo = self.all_node_info[node]
3583 if (nodeinfo.vm_capable and not nodeinfo.offline and
3584 node != master_node):
3585 additional_nodes.append(node)
3586 vf_node_info.append(self.all_node_info[node])
3588 key = constants.NV_FILELIST
3589 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3590 {key: node_verify_param[key]},
3591 self.cfg.GetClusterName()))
3593 vf_nvinfo = all_nvinfo
3594 vf_node_info = self.my_node_info.values()
3596 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3598 feedback_fn("* Verifying node status")
3602 for node_i in node_data_list:
3604 nimg = node_image[node]
3608 feedback_fn("* Skipping offline node %s" % (node,))
3612 if node == master_node:
3614 elif node_i.master_candidate:
3615 ntype = "master candidate"
3616 elif node_i.drained:
3622 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3624 msg = all_nvinfo[node].fail_msg
3625 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3628 nimg.rpc_fail = True
3631 nresult = all_nvinfo[node].payload
3633 nimg.call_ok = self._VerifyNode(node_i, nresult)
3634 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3635 self._VerifyNodeNetwork(node_i, nresult)
3636 self._VerifyNodeUserScripts(node_i, nresult)
3637 self._VerifyOob(node_i, nresult)
3638 self._VerifyFileStoragePaths(node_i, nresult,
3639 node == master_node)
3642 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3643 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3646 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3647 self._UpdateNodeInstances(node_i, nresult, nimg)
3648 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3649 self._UpdateNodeOS(node_i, nresult, nimg)
3651 if not nimg.os_fail:
3652 if refos_img is None:
3654 self._VerifyNodeOS(node_i, nimg, refos_img)
3655 self._VerifyNodeBridges(node_i, nresult, bridges)
3657 # Check whether all running instancies are primary for the node. (This
3658 # can no longer be done from _VerifyInstance below, since some of the
3659 # wrong instances could be from other node groups.)
3660 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3662 for inst in non_primary_inst:
3663 test = inst in self.all_inst_info
3664 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3665 "instance should not run on node %s", node_i.name)
3666 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3667 "node is running unknown instance %s", inst)
3669 self._VerifyGroupLVM(node_image, vg_name)
3671 for node, result in extra_lv_nvinfo.items():
3672 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3673 node_image[node], vg_name)
3675 feedback_fn("* Verifying instance status")
3676 for instance in self.my_inst_names:
3678 feedback_fn("* Verifying instance %s" % instance)
3679 inst_config = self.my_inst_info[instance]
3680 self._VerifyInstance(instance, inst_config, node_image,
3683 # If the instance is non-redundant we cannot survive losing its primary
3684 # node, so we are not N+1 compliant.
3685 if inst_config.disk_template not in constants.DTS_MIRRORED:
3686 i_non_redundant.append(instance)
3688 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3689 i_non_a_balanced.append(instance)
3691 feedback_fn("* Verifying orphan volumes")
3692 reserved = utils.FieldSet(*cluster.reserved_lvs)
3694 # We will get spurious "unknown volume" warnings if any node of this group
3695 # is secondary for an instance whose primary is in another group. To avoid
3696 # them, we find these instances and add their volumes to node_vol_should.
3697 for inst in self.all_inst_info.values():
3698 for secondary in inst.secondary_nodes:
3699 if (secondary in self.my_node_info
3700 and inst.name not in self.my_inst_info):
3701 inst.MapLVsByNode(node_vol_should)
3704 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3706 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3707 feedback_fn("* Verifying N+1 Memory redundancy")
3708 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3710 feedback_fn("* Other Notes")
3712 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3713 % len(i_non_redundant))
3715 if i_non_a_balanced:
3716 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3717 % len(i_non_a_balanced))
3720 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3723 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3726 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3730 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3731 """Analyze the post-hooks' result
3733 This method analyses the hook result, handles it, and sends some
3734 nicely-formatted feedback back to the user.
3736 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3737 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3738 @param hooks_results: the results of the multi-node hooks rpc call
3739 @param feedback_fn: function used send feedback back to the caller
3740 @param lu_result: previous Exec result
3741 @return: the new Exec result, based on the previous result
3745 # We only really run POST phase hooks, only for non-empty groups,
3746 # and are only interested in their results
3747 if not self.my_node_names:
3750 elif phase == constants.HOOKS_PHASE_POST:
3751 # Used to change hooks' output to proper indentation
3752 feedback_fn("* Hooks Results")
3753 assert hooks_results, "invalid result from hooks"
3755 for node_name in hooks_results:
3756 res = hooks_results[node_name]
3758 test = msg and not res.offline
3759 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3760 "Communication failure in hooks execution: %s", msg)
3761 if res.offline or msg:
3762 # No need to investigate payload if node is offline or gave
3765 for script, hkr, output in res.payload:
3766 test = hkr == constants.HKR_FAIL
3767 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3768 "Script %s failed, output:", script)
3770 output = self._HOOKS_INDENT_RE.sub(" ", output)
3771 feedback_fn("%s" % output)
3777 class LUClusterVerifyDisks(NoHooksLU):
3778 """Verifies the cluster disks status.
3783 def ExpandNames(self):
3784 self.share_locks = _ShareAll()
3785 self.needed_locks = {
3786 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3789 def Exec(self, feedback_fn):
3790 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3792 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3793 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3794 for group in group_names])
3797 class LUGroupVerifyDisks(NoHooksLU):
3798 """Verifies the status of all disks in a node group.
3803 def ExpandNames(self):
3804 # Raises errors.OpPrereqError on its own if group can't be found
3805 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3807 self.share_locks = _ShareAll()
3808 self.needed_locks = {
3809 locking.LEVEL_INSTANCE: [],
3810 locking.LEVEL_NODEGROUP: [],
3811 locking.LEVEL_NODE: [],
3813 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3814 # starts one instance of this opcode for every group, which means all
3815 # nodes will be locked for a short amount of time, so it's better to
3816 # acquire the node allocation lock as well.
3817 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3820 def DeclareLocks(self, level):
3821 if level == locking.LEVEL_INSTANCE:
3822 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3824 # Lock instances optimistically, needs verification once node and group
3825 # locks have been acquired
3826 self.needed_locks[locking.LEVEL_INSTANCE] = \
3827 self.cfg.GetNodeGroupInstances(self.group_uuid)
3829 elif level == locking.LEVEL_NODEGROUP:
3830 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3832 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3833 set([self.group_uuid] +
3834 # Lock all groups used by instances optimistically; this requires
3835 # going via the node before it's locked, requiring verification
3838 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3839 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3841 elif level == locking.LEVEL_NODE:
3842 # This will only lock the nodes in the group to be verified which contain
3844 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3845 self._LockInstancesNodes()
3847 # Lock all nodes in group to be verified
3848 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3849 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3850 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3852 def CheckPrereq(self):
3853 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3854 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3855 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3857 assert self.group_uuid in owned_groups
3859 # Check if locked instances are still correct
3860 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3862 # Get instance information
3863 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3865 # Check if node groups for locked instances are still correct
3866 _CheckInstancesNodeGroups(self.cfg, self.instances,
3867 owned_groups, owned_nodes, self.group_uuid)
3869 def Exec(self, feedback_fn):
3870 """Verify integrity of cluster disks.
3872 @rtype: tuple of three items
3873 @return: a tuple of (dict of node-to-node_error, list of instances
3874 which need activate-disks, dict of instance: (node, volume) for
3879 res_instances = set()
3882 nv_dict = _MapInstanceDisksToNodes(
3883 [inst for inst in self.instances.values()
3884 if inst.admin_state == constants.ADMINST_UP])
3887 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3888 set(self.cfg.GetVmCapableNodeList()))
3890 node_lvs = self.rpc.call_lv_list(nodes, [])
3892 for (node, node_res) in node_lvs.items():
3893 if node_res.offline:
3896 msg = node_res.fail_msg
3898 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3899 res_nodes[node] = msg
3902 for lv_name, (_, _, lv_online) in node_res.payload.items():
3903 inst = nv_dict.pop((node, lv_name), None)
3904 if not (lv_online or inst is None):
3905 res_instances.add(inst)
3907 # any leftover items in nv_dict are missing LVs, let's arrange the data
3909 for key, inst in nv_dict.iteritems():
3910 res_missing.setdefault(inst, []).append(list(key))
3912 return (res_nodes, list(res_instances), res_missing)
3915 class LUClusterRepairDiskSizes(NoHooksLU):
3916 """Verifies the cluster disks sizes.
3921 def ExpandNames(self):
3922 if self.op.instances:
3923 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3924 # Not getting the node allocation lock as only a specific set of
3925 # instances (and their nodes) is going to be acquired
3926 self.needed_locks = {
3927 locking.LEVEL_NODE_RES: [],
3928 locking.LEVEL_INSTANCE: self.wanted_names,
3930 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3932 self.wanted_names = None
3933 self.needed_locks = {
3934 locking.LEVEL_NODE_RES: locking.ALL_SET,
3935 locking.LEVEL_INSTANCE: locking.ALL_SET,
3937 # This opcode is acquires the node locks for all instances
3938 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3941 self.share_locks = {
3942 locking.LEVEL_NODE_RES: 1,
3943 locking.LEVEL_INSTANCE: 0,
3944 locking.LEVEL_NODE_ALLOC: 1,
3947 def DeclareLocks(self, level):
3948 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3949 self._LockInstancesNodes(primary_only=True, level=level)
3951 def CheckPrereq(self):
3952 """Check prerequisites.
3954 This only checks the optional instance list against the existing names.
3957 if self.wanted_names is None:
3958 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3960 self.wanted_instances = \
3961 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3963 def _EnsureChildSizes(self, disk):
3964 """Ensure children of the disk have the needed disk size.
3966 This is valid mainly for DRBD8 and fixes an issue where the
3967 children have smaller disk size.
3969 @param disk: an L{ganeti.objects.Disk} object
3972 if disk.dev_type == constants.LD_DRBD8:
3973 assert disk.children, "Empty children for DRBD8?"
3974 fchild = disk.children[0]
3975 mismatch = fchild.size < disk.size
3977 self.LogInfo("Child disk has size %d, parent %d, fixing",
3978 fchild.size, disk.size)
3979 fchild.size = disk.size
3981 # and we recurse on this child only, not on the metadev
3982 return self._EnsureChildSizes(fchild) or mismatch
3986 def Exec(self, feedback_fn):
3987 """Verify the size of cluster disks.
3990 # TODO: check child disks too
3991 # TODO: check differences in size between primary/secondary nodes
3993 for instance in self.wanted_instances:
3994 pnode = instance.primary_node
3995 if pnode not in per_node_disks:
3996 per_node_disks[pnode] = []
3997 for idx, disk in enumerate(instance.disks):
3998 per_node_disks[pnode].append((instance, idx, disk))
4000 assert not (frozenset(per_node_disks.keys()) -
4001 self.owned_locks(locking.LEVEL_NODE_RES)), \
4002 "Not owning correct locks"
4003 assert not self.owned_locks(locking.LEVEL_NODE)
4006 for node, dskl in per_node_disks.items():
4007 newl = [v[2].Copy() for v in dskl]
4009 self.cfg.SetDiskID(dsk, node)
4010 result = self.rpc.call_blockdev_getsize(node, newl)
4012 self.LogWarning("Failure in blockdev_getsize call to node"
4013 " %s, ignoring", node)
4015 if len(result.payload) != len(dskl):
4016 logging.warning("Invalid result from node %s: len(dksl)=%d,"
4017 " result.payload=%s", node, len(dskl), result.payload)
4018 self.LogWarning("Invalid result from node %s, ignoring node results",
4021 for ((instance, idx, disk), size) in zip(dskl, result.payload):
4023 self.LogWarning("Disk %d of instance %s did not return size"
4024 " information, ignoring", idx, instance.name)
4026 if not isinstance(size, (int, long)):
4027 self.LogWarning("Disk %d of instance %s did not return valid"
4028 " size information, ignoring", idx, instance.name)
4031 if size != disk.size:
4032 self.LogInfo("Disk %d of instance %s has mismatched size,"
4033 " correcting: recorded %d, actual %d", idx,
4034 instance.name, disk.size, size)
4036 self.cfg.Update(instance, feedback_fn)
4037 changed.append((instance.name, idx, size))
4038 if self._EnsureChildSizes(disk):
4039 self.cfg.Update(instance, feedback_fn)
4040 changed.append((instance.name, idx, disk.size))
4044 class LUClusterRename(LogicalUnit):
4045 """Rename the cluster.
4048 HPATH = "cluster-rename"
4049 HTYPE = constants.HTYPE_CLUSTER
4051 def BuildHooksEnv(self):
4056 "OP_TARGET": self.cfg.GetClusterName(),
4057 "NEW_NAME": self.op.name,
4060 def BuildHooksNodes(self):
4061 """Build hooks nodes.
4064 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4066 def CheckPrereq(self):
4067 """Verify that the passed name is a valid one.
4070 hostname = netutils.GetHostname(name=self.op.name,
4071 family=self.cfg.GetPrimaryIPFamily())
4073 new_name = hostname.name
4074 self.ip = new_ip = hostname.ip
4075 old_name = self.cfg.GetClusterName()
4076 old_ip = self.cfg.GetMasterIP()
4077 if new_name == old_name and new_ip == old_ip:
4078 raise errors.OpPrereqError("Neither the name nor the IP address of the"
4079 " cluster has changed",
4081 if new_ip != old_ip:
4082 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4083 raise errors.OpPrereqError("The given cluster IP address (%s) is"
4084 " reachable on the network" %
4085 new_ip, errors.ECODE_NOTUNIQUE)
4087 self.op.name = new_name
4089 def Exec(self, feedback_fn):
4090 """Rename the cluster.
4093 clustername = self.op.name
4096 # shutdown the master IP
4097 master_params = self.cfg.GetMasterNetworkParameters()
4098 ems = self.cfg.GetUseExternalMipScript()
4099 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4101 result.Raise("Could not disable the master role")
4104 cluster = self.cfg.GetClusterInfo()
4105 cluster.cluster_name = clustername
4106 cluster.master_ip = new_ip
4107 self.cfg.Update(cluster, feedback_fn)
4109 # update the known hosts file
4110 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4111 node_list = self.cfg.GetOnlineNodeList()
4113 node_list.remove(master_params.name)
4116 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4118 master_params.ip = new_ip
4119 result = self.rpc.call_node_activate_master_ip(master_params.name,
4121 msg = result.fail_msg
4123 self.LogWarning("Could not re-enable the master role on"
4124 " the master, please restart manually: %s", msg)
4129 def _ValidateNetmask(cfg, netmask):
4130 """Checks if a netmask is valid.
4132 @type cfg: L{config.ConfigWriter}
4133 @param cfg: The cluster configuration
4135 @param netmask: the netmask to be verified
4136 @raise errors.OpPrereqError: if the validation fails
4139 ip_family = cfg.GetPrimaryIPFamily()
4141 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4142 except errors.ProgrammerError:
4143 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4144 ip_family, errors.ECODE_INVAL)
4145 if not ipcls.ValidateNetmask(netmask):
4146 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4147 (netmask), errors.ECODE_INVAL)
4150 class LUClusterSetParams(LogicalUnit):
4151 """Change the parameters of the cluster.
4154 HPATH = "cluster-modify"
4155 HTYPE = constants.HTYPE_CLUSTER
4158 def CheckArguments(self):
4162 if self.op.uid_pool:
4163 uidpool.CheckUidPool(self.op.uid_pool)
4165 if self.op.add_uids:
4166 uidpool.CheckUidPool(self.op.add_uids)
4168 if self.op.remove_uids:
4169 uidpool.CheckUidPool(self.op.remove_uids)
4171 if self.op.master_netmask is not None:
4172 _ValidateNetmask(self.cfg, self.op.master_netmask)
4174 if self.op.diskparams:
4175 for dt_params in self.op.diskparams.values():
4176 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4178 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4179 except errors.OpPrereqError, err:
4180 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4183 def ExpandNames(self):
4184 # FIXME: in the future maybe other cluster params won't require checking on
4185 # all nodes to be modified.
4186 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4187 # resource locks the right thing, shouldn't it be the BGL instead?
4188 self.needed_locks = {
4189 locking.LEVEL_NODE: locking.ALL_SET,
4190 locking.LEVEL_INSTANCE: locking.ALL_SET,
4191 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4192 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4194 self.share_locks = _ShareAll()
4196 def BuildHooksEnv(self):
4201 "OP_TARGET": self.cfg.GetClusterName(),
4202 "NEW_VG_NAME": self.op.vg_name,
4205 def BuildHooksNodes(self):
4206 """Build hooks nodes.
4209 mn = self.cfg.GetMasterNode()
4212 def CheckPrereq(self):
4213 """Check prerequisites.
4215 This checks whether the given params don't conflict and
4216 if the given volume group is valid.
4219 if self.op.vg_name is not None and not self.op.vg_name:
4220 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4221 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4222 " instances exist", errors.ECODE_INVAL)
4224 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4225 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4226 raise errors.OpPrereqError("Cannot disable drbd helper while"
4227 " drbd-based instances exist",
4230 node_list = self.owned_locks(locking.LEVEL_NODE)
4232 # if vg_name not None, checks given volume group on all nodes
4234 vglist = self.rpc.call_vg_list(node_list)
4235 for node in node_list:
4236 msg = vglist[node].fail_msg
4238 # ignoring down node
4239 self.LogWarning("Error while gathering data on node %s"
4240 " (ignoring node): %s", node, msg)
4242 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4244 constants.MIN_VG_SIZE)
4246 raise errors.OpPrereqError("Error on node '%s': %s" %
4247 (node, vgstatus), errors.ECODE_ENVIRON)
4249 if self.op.drbd_helper:
4250 # checks given drbd helper on all nodes
4251 helpers = self.rpc.call_drbd_helper(node_list)
4252 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4254 self.LogInfo("Not checking drbd helper on offline node %s", node)
4256 msg = helpers[node].fail_msg
4258 raise errors.OpPrereqError("Error checking drbd helper on node"
4259 " '%s': %s" % (node, msg),
4260 errors.ECODE_ENVIRON)
4261 node_helper = helpers[node].payload
4262 if node_helper != self.op.drbd_helper:
4263 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4264 (node, node_helper), errors.ECODE_ENVIRON)
4266 self.cluster = cluster = self.cfg.GetClusterInfo()
4267 # validate params changes
4268 if self.op.beparams:
4269 objects.UpgradeBeParams(self.op.beparams)
4270 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4271 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4273 if self.op.ndparams:
4274 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4275 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4277 # TODO: we need a more general way to handle resetting
4278 # cluster-level parameters to default values
4279 if self.new_ndparams["oob_program"] == "":
4280 self.new_ndparams["oob_program"] = \
4281 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4283 if self.op.hv_state:
4284 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4285 self.cluster.hv_state_static)
4286 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4287 for hv, values in new_hv_state.items())
4289 if self.op.disk_state:
4290 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4291 self.cluster.disk_state_static)
4292 self.new_disk_state = \
4293 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4294 for name, values in svalues.items()))
4295 for storage, svalues in new_disk_state.items())
4298 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4301 all_instances = self.cfg.GetAllInstancesInfo().values()
4303 for group in self.cfg.GetAllNodeGroupsInfo().values():
4304 instances = frozenset([inst for inst in all_instances
4305 if compat.any(node in group.members
4306 for node in inst.all_nodes)])
4307 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4308 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4309 new = _ComputeNewInstanceViolations(ipol,
4310 new_ipolicy, instances, self.cfg)
4312 violations.update(new)
4315 self.LogWarning("After the ipolicy change the following instances"
4316 " violate them: %s",
4317 utils.CommaJoin(utils.NiceSort(violations)))
4319 if self.op.nicparams:
4320 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4321 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4322 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4325 # check all instances for consistency
4326 for instance in self.cfg.GetAllInstancesInfo().values():
4327 for nic_idx, nic in enumerate(instance.nics):
4328 params_copy = copy.deepcopy(nic.nicparams)
4329 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4331 # check parameter syntax
4333 objects.NIC.CheckParameterSyntax(params_filled)
4334 except errors.ConfigurationError, err:
4335 nic_errors.append("Instance %s, nic/%d: %s" %
4336 (instance.name, nic_idx, err))
4338 # if we're moving instances to routed, check that they have an ip
4339 target_mode = params_filled[constants.NIC_MODE]
4340 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4341 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4342 " address" % (instance.name, nic_idx))
4344 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4345 "\n".join(nic_errors), errors.ECODE_INVAL)
4347 # hypervisor list/parameters
4348 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4349 if self.op.hvparams:
4350 for hv_name, hv_dict in self.op.hvparams.items():
4351 if hv_name not in self.new_hvparams:
4352 self.new_hvparams[hv_name] = hv_dict
4354 self.new_hvparams[hv_name].update(hv_dict)
4356 # disk template parameters
4357 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4358 if self.op.diskparams:
4359 for dt_name, dt_params in self.op.diskparams.items():
4360 if dt_name not in self.op.diskparams:
4361 self.new_diskparams[dt_name] = dt_params
4363 self.new_diskparams[dt_name].update(dt_params)
4365 # os hypervisor parameters
4366 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4368 for os_name, hvs in self.op.os_hvp.items():
4369 if os_name not in self.new_os_hvp:
4370 self.new_os_hvp[os_name] = hvs
4372 for hv_name, hv_dict in hvs.items():
4374 # Delete if it exists
4375 self.new_os_hvp[os_name].pop(hv_name, None)
4376 elif hv_name not in self.new_os_hvp[os_name]:
4377 self.new_os_hvp[os_name][hv_name] = hv_dict
4379 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4382 self.new_osp = objects.FillDict(cluster.osparams, {})
4383 if self.op.osparams:
4384 for os_name, osp in self.op.osparams.items():
4385 if os_name not in self.new_osp:
4386 self.new_osp[os_name] = {}
4388 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4391 if not self.new_osp[os_name]:
4392 # we removed all parameters
4393 del self.new_osp[os_name]
4395 # check the parameter validity (remote check)
4396 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4397 os_name, self.new_osp[os_name])
4399 # changes to the hypervisor list
4400 if self.op.enabled_hypervisors is not None:
4401 self.hv_list = self.op.enabled_hypervisors
4402 for hv in self.hv_list:
4403 # if the hypervisor doesn't already exist in the cluster
4404 # hvparams, we initialize it to empty, and then (in both
4405 # cases) we make sure to fill the defaults, as we might not
4406 # have a complete defaults list if the hypervisor wasn't
4408 if hv not in new_hvp:
4410 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4411 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4413 self.hv_list = cluster.enabled_hypervisors
4415 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4416 # either the enabled list has changed, or the parameters have, validate
4417 for hv_name, hv_params in self.new_hvparams.items():
4418 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4419 (self.op.enabled_hypervisors and
4420 hv_name in self.op.enabled_hypervisors)):
4421 # either this is a new hypervisor, or its parameters have changed
4422 hv_class = hypervisor.GetHypervisorClass(hv_name)
4423 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4424 hv_class.CheckParameterSyntax(hv_params)
4425 _CheckHVParams(self, node_list, hv_name, hv_params)
4427 self._CheckDiskTypeConsistency()
4430 # no need to check any newly-enabled hypervisors, since the
4431 # defaults have already been checked in the above code-block
4432 for os_name, os_hvp in self.new_os_hvp.items():
4433 for hv_name, hv_params in os_hvp.items():
4434 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4435 # we need to fill in the new os_hvp on top of the actual hv_p
4436 cluster_defaults = self.new_hvparams.get(hv_name, {})
4437 new_osp = objects.FillDict(cluster_defaults, hv_params)
4438 hv_class = hypervisor.GetHypervisorClass(hv_name)
4439 hv_class.CheckParameterSyntax(new_osp)
4440 _CheckHVParams(self, node_list, hv_name, new_osp)
4442 if self.op.default_iallocator:
4443 alloc_script = utils.FindFile(self.op.default_iallocator,
4444 constants.IALLOCATOR_SEARCH_PATH,
4446 if alloc_script is None:
4447 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4448 " specified" % self.op.default_iallocator,
4451 def _CheckDiskTypeConsistency(self):
4452 """Check whether the storage types that are going to be disabled
4453 are still in use by some instances.
4456 if self.op.enabled_storage_types:
4457 cluster = self.cfg.GetClusterInfo()
4458 instances = self.cfg.GetAllInstancesInfo()
4460 storage_types_to_remove = set(cluster.enabled_storage_types) \
4461 - set(self.op.enabled_storage_types)
4462 for instance in instances.itervalues():
4463 storage_type = constants.DISK_TEMPLATES_STORAGE_TYPE[
4464 instance.disk_template]
4465 if storage_type in storage_types_to_remove:
4466 raise errors.OpPrereqError("Cannot disable storage type '%s',"
4467 " because instance '%s' is using disk"
4469 (storage_type, instance.name,
4470 instance.disk_template))
4472 def Exec(self, feedback_fn):
4473 """Change the parameters of the cluster.
4476 if self.op.vg_name is not None:
4477 new_volume = self.op.vg_name
4480 if new_volume != self.cfg.GetVGName():
4481 self.cfg.SetVGName(new_volume)
4483 feedback_fn("Cluster LVM configuration already in desired"
4484 " state, not changing")
4485 if self.op.drbd_helper is not None:
4486 new_helper = self.op.drbd_helper
4489 if new_helper != self.cfg.GetDRBDHelper():
4490 self.cfg.SetDRBDHelper(new_helper)
4492 feedback_fn("Cluster DRBD helper already in desired state,"
4494 if self.op.hvparams:
4495 self.cluster.hvparams = self.new_hvparams
4497 self.cluster.os_hvp = self.new_os_hvp
4498 if self.op.enabled_hypervisors is not None:
4499 self.cluster.hvparams = self.new_hvparams
4500 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4501 # FIXME: remove once 'enabled_disk_templates' is fully implemented.
4502 if self.op.enabled_storage_types is not None:
4503 self.cluster.enabled_storage_types = \
4504 list(set(self.op.enabled_storage_types))
4505 if self.op.enabled_disk_templates:
4506 self.cluster.enabled_disk_templates = \
4507 list(set(self.op.enabled_disk_templates))
4508 if self.op.beparams:
4509 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4510 if self.op.nicparams:
4511 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4513 self.cluster.ipolicy = self.new_ipolicy
4514 if self.op.osparams:
4515 self.cluster.osparams = self.new_osp
4516 if self.op.ndparams:
4517 self.cluster.ndparams = self.new_ndparams
4518 if self.op.diskparams:
4519 self.cluster.diskparams = self.new_diskparams
4520 if self.op.hv_state:
4521 self.cluster.hv_state_static = self.new_hv_state
4522 if self.op.disk_state:
4523 self.cluster.disk_state_static = self.new_disk_state
4525 if self.op.candidate_pool_size is not None:
4526 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4527 # we need to update the pool size here, otherwise the save will fail
4528 _AdjustCandidatePool(self, [])
4530 if self.op.maintain_node_health is not None:
4531 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4532 feedback_fn("Note: CONFD was disabled at build time, node health"
4533 " maintenance is not useful (still enabling it)")
4534 self.cluster.maintain_node_health = self.op.maintain_node_health
4536 if self.op.prealloc_wipe_disks is not None:
4537 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4539 if self.op.add_uids is not None:
4540 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4542 if self.op.remove_uids is not None:
4543 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4545 if self.op.uid_pool is not None:
4546 self.cluster.uid_pool = self.op.uid_pool
4548 if self.op.default_iallocator is not None:
4549 self.cluster.default_iallocator = self.op.default_iallocator
4551 if self.op.reserved_lvs is not None:
4552 self.cluster.reserved_lvs = self.op.reserved_lvs
4554 if self.op.use_external_mip_script is not None:
4555 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4557 def helper_os(aname, mods, desc):
4559 lst = getattr(self.cluster, aname)
4560 for key, val in mods:
4561 if key == constants.DDM_ADD:
4563 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4566 elif key == constants.DDM_REMOVE:
4570 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4572 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4574 if self.op.hidden_os:
4575 helper_os("hidden_os", self.op.hidden_os, "hidden")
4577 if self.op.blacklisted_os:
4578 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4580 if self.op.master_netdev:
4581 master_params = self.cfg.GetMasterNetworkParameters()
4582 ems = self.cfg.GetUseExternalMipScript()
4583 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4584 self.cluster.master_netdev)
4585 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4587 result.Raise("Could not disable the master ip")
4588 feedback_fn("Changing master_netdev from %s to %s" %
4589 (master_params.netdev, self.op.master_netdev))
4590 self.cluster.master_netdev = self.op.master_netdev
4592 if self.op.master_netmask:
4593 master_params = self.cfg.GetMasterNetworkParameters()
4594 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4595 result = self.rpc.call_node_change_master_netmask(master_params.name,
4596 master_params.netmask,
4597 self.op.master_netmask,
4599 master_params.netdev)
4601 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4604 self.cluster.master_netmask = self.op.master_netmask
4606 self.cfg.Update(self.cluster, feedback_fn)
4608 if self.op.master_netdev:
4609 master_params = self.cfg.GetMasterNetworkParameters()
4610 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4611 self.op.master_netdev)
4612 ems = self.cfg.GetUseExternalMipScript()
4613 result = self.rpc.call_node_activate_master_ip(master_params.name,
4616 self.LogWarning("Could not re-enable the master ip on"
4617 " the master, please restart manually: %s",
4621 def _UploadHelper(lu, nodes, fname):
4622 """Helper for uploading a file and showing warnings.
4625 if os.path.exists(fname):
4626 result = lu.rpc.call_upload_file(nodes, fname)
4627 for to_node, to_result in result.items():
4628 msg = to_result.fail_msg
4630 msg = ("Copy of file %s to node %s failed: %s" %
4631 (fname, to_node, msg))
4635 def _ComputeAncillaryFiles(cluster, redist):
4636 """Compute files external to Ganeti which need to be consistent.
4638 @type redist: boolean
4639 @param redist: Whether to include files which need to be redistributed
4642 # Compute files for all nodes
4644 pathutils.SSH_KNOWN_HOSTS_FILE,
4645 pathutils.CONFD_HMAC_KEY,
4646 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4647 pathutils.SPICE_CERT_FILE,
4648 pathutils.SPICE_CACERT_FILE,
4649 pathutils.RAPI_USERS_FILE,
4653 # we need to ship at least the RAPI certificate
4654 files_all.add(pathutils.RAPI_CERT_FILE)
4656 files_all.update(pathutils.ALL_CERT_FILES)
4657 files_all.update(ssconf.SimpleStore().GetFileList())
4659 if cluster.modify_etc_hosts:
4660 files_all.add(pathutils.ETC_HOSTS)
4662 if cluster.use_external_mip_script:
4663 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4665 # Files which are optional, these must:
4666 # - be present in one other category as well
4667 # - either exist or not exist on all nodes of that category (mc, vm all)
4669 pathutils.RAPI_USERS_FILE,
4672 # Files which should only be on master candidates
4676 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4680 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4681 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4682 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4684 # Files which should only be on VM-capable nodes
4687 for hv_name in cluster.enabled_hypervisors
4689 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4693 for hv_name in cluster.enabled_hypervisors
4695 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4697 # Filenames in each category must be unique
4698 all_files_set = files_all | files_mc | files_vm
4699 assert (len(all_files_set) ==
4700 sum(map(len, [files_all, files_mc, files_vm]))), \
4701 "Found file listed in more than one file list"
4703 # Optional files must be present in one other category
4704 assert all_files_set.issuperset(files_opt), \
4705 "Optional file not in a different required list"
4707 # This one file should never ever be re-distributed via RPC
4708 assert not (redist and
4709 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4711 return (files_all, files_opt, files_mc, files_vm)
4714 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4715 """Distribute additional files which are part of the cluster configuration.
4717 ConfigWriter takes care of distributing the config and ssconf files, but
4718 there are more files which should be distributed to all nodes. This function
4719 makes sure those are copied.
4721 @param lu: calling logical unit
4722 @param additional_nodes: list of nodes not in the config to distribute to
4723 @type additional_vm: boolean
4724 @param additional_vm: whether the additional nodes are vm-capable or not
4727 # Gather target nodes
4728 cluster = lu.cfg.GetClusterInfo()
4729 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4731 online_nodes = lu.cfg.GetOnlineNodeList()
4732 online_set = frozenset(online_nodes)
4733 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4735 if additional_nodes is not None:
4736 online_nodes.extend(additional_nodes)
4738 vm_nodes.extend(additional_nodes)
4740 # Never distribute to master node
4741 for nodelist in [online_nodes, vm_nodes]:
4742 if master_info.name in nodelist:
4743 nodelist.remove(master_info.name)
4746 (files_all, _, files_mc, files_vm) = \
4747 _ComputeAncillaryFiles(cluster, True)
4749 # Never re-distribute configuration file from here
4750 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4751 pathutils.CLUSTER_CONF_FILE in files_vm)
4752 assert not files_mc, "Master candidates not handled in this function"
4755 (online_nodes, files_all),
4756 (vm_nodes, files_vm),
4760 for (node_list, files) in filemap:
4762 _UploadHelper(lu, node_list, fname)
4765 class LUClusterRedistConf(NoHooksLU):
4766 """Force the redistribution of cluster configuration.
4768 This is a very simple LU.
4773 def ExpandNames(self):
4774 self.needed_locks = {
4775 locking.LEVEL_NODE: locking.ALL_SET,
4776 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4778 self.share_locks = _ShareAll()
4780 def Exec(self, feedback_fn):
4781 """Redistribute the configuration.
4784 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4785 _RedistributeAncillaryFiles(self)
4788 class LUClusterActivateMasterIp(NoHooksLU):
4789 """Activate the master IP on the master node.
4792 def Exec(self, feedback_fn):
4793 """Activate the master IP.
4796 master_params = self.cfg.GetMasterNetworkParameters()
4797 ems = self.cfg.GetUseExternalMipScript()
4798 result = self.rpc.call_node_activate_master_ip(master_params.name,
4800 result.Raise("Could not activate the master IP")
4803 class LUClusterDeactivateMasterIp(NoHooksLU):
4804 """Deactivate the master IP on the master node.
4807 def Exec(self, feedback_fn):
4808 """Deactivate the master IP.
4811 master_params = self.cfg.GetMasterNetworkParameters()
4812 ems = self.cfg.GetUseExternalMipScript()
4813 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4815 result.Raise("Could not deactivate the master IP")
4818 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4819 """Sleep and poll for an instance's disk to sync.
4822 if not instance.disks or disks is not None and not disks:
4825 disks = _ExpandCheckDisks(instance, disks)
4828 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4830 node = instance.primary_node
4833 lu.cfg.SetDiskID(dev, node)
4835 # TODO: Convert to utils.Retry
4838 degr_retries = 10 # in seconds, as we sleep 1 second each time
4842 cumul_degraded = False
4843 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4844 msg = rstats.fail_msg
4846 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4849 raise errors.RemoteError("Can't contact node %s for mirror data,"
4850 " aborting." % node)
4853 rstats = rstats.payload
4855 for i, mstat in enumerate(rstats):
4857 lu.LogWarning("Can't compute data for node %s/%s",
4858 node, disks[i].iv_name)
4861 cumul_degraded = (cumul_degraded or
4862 (mstat.is_degraded and mstat.sync_percent is None))
4863 if mstat.sync_percent is not None:
4865 if mstat.estimated_time is not None:
4866 rem_time = ("%s remaining (estimated)" %
4867 utils.FormatSeconds(mstat.estimated_time))
4868 max_time = mstat.estimated_time
4870 rem_time = "no time estimate"
4871 lu.LogInfo("- device %s: %5.2f%% done, %s",
4872 disks[i].iv_name, mstat.sync_percent, rem_time)
4874 # if we're done but degraded, let's do a few small retries, to
4875 # make sure we see a stable and not transient situation; therefore
4876 # we force restart of the loop
4877 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4878 logging.info("Degraded disks found, %d retries left", degr_retries)
4886 time.sleep(min(60, max_time))
4889 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4891 return not cumul_degraded
4894 def _BlockdevFind(lu, node, dev, instance):
4895 """Wrapper around call_blockdev_find to annotate diskparams.
4897 @param lu: A reference to the lu object
4898 @param node: The node to call out
4899 @param dev: The device to find
4900 @param instance: The instance object the device belongs to
4901 @returns The result of the rpc call
4904 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4905 return lu.rpc.call_blockdev_find(node, disk)
4908 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4909 """Wrapper around L{_CheckDiskConsistencyInner}.
4912 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4913 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4917 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4919 """Check that mirrors are not degraded.
4921 @attention: The device has to be annotated already.
4923 The ldisk parameter, if True, will change the test from the
4924 is_degraded attribute (which represents overall non-ok status for
4925 the device(s)) to the ldisk (representing the local storage status).
4928 lu.cfg.SetDiskID(dev, node)
4932 if on_primary or dev.AssembleOnSecondary():
4933 rstats = lu.rpc.call_blockdev_find(node, dev)
4934 msg = rstats.fail_msg
4936 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4938 elif not rstats.payload:
4939 lu.LogWarning("Can't find disk on node %s", node)
4943 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4945 result = result and not rstats.payload.is_degraded
4948 for child in dev.children:
4949 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4955 class LUOobCommand(NoHooksLU):
4956 """Logical unit for OOB handling.
4960 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4962 def ExpandNames(self):
4963 """Gather locks we need.
4966 if self.op.node_names:
4967 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4968 lock_names = self.op.node_names
4970 lock_names = locking.ALL_SET
4972 self.needed_locks = {
4973 locking.LEVEL_NODE: lock_names,
4976 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4978 if not self.op.node_names:
4979 # Acquire node allocation lock only if all nodes are affected
4980 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4982 def CheckPrereq(self):
4983 """Check prerequisites.
4986 - the node exists in the configuration
4989 Any errors are signaled by raising errors.OpPrereqError.
4993 self.master_node = self.cfg.GetMasterNode()
4995 assert self.op.power_delay >= 0.0
4997 if self.op.node_names:
4998 if (self.op.command in self._SKIP_MASTER and
4999 self.master_node in self.op.node_names):
5000 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
5001 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
5003 if master_oob_handler:
5004 additional_text = ("run '%s %s %s' if you want to operate on the"
5005 " master regardless") % (master_oob_handler,
5009 additional_text = "it does not support out-of-band operations"
5011 raise errors.OpPrereqError(("Operating on the master node %s is not"
5012 " allowed for %s; %s") %
5013 (self.master_node, self.op.command,
5014 additional_text), errors.ECODE_INVAL)
5016 self.op.node_names = self.cfg.GetNodeList()
5017 if self.op.command in self._SKIP_MASTER:
5018 self.op.node_names.remove(self.master_node)
5020 if self.op.command in self._SKIP_MASTER:
5021 assert self.master_node not in self.op.node_names
5023 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
5025 raise errors.OpPrereqError("Node %s not found" % node_name,
5028 self.nodes.append(node)
5030 if (not self.op.ignore_status and
5031 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
5032 raise errors.OpPrereqError(("Cannot power off node %s because it is"
5033 " not marked offline") % node_name,
5036 def Exec(self, feedback_fn):
5037 """Execute OOB and return result if we expect any.
5040 master_node = self.master_node
5043 for idx, node in enumerate(utils.NiceSort(self.nodes,
5044 key=lambda node: node.name)):
5045 node_entry = [(constants.RS_NORMAL, node.name)]
5046 ret.append(node_entry)
5048 oob_program = _SupportsOob(self.cfg, node)
5051 node_entry.append((constants.RS_UNAVAIL, None))
5054 logging.info("Executing out-of-band command '%s' using '%s' on %s",
5055 self.op.command, oob_program, node.name)
5056 result = self.rpc.call_run_oob(master_node, oob_program,
5057 self.op.command, node.name,
5061 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
5062 node.name, result.fail_msg)
5063 node_entry.append((constants.RS_NODATA, None))
5066 self._CheckPayload(result)
5067 except errors.OpExecError, err:
5068 self.LogWarning("Payload returned by node '%s' is not valid: %s",
5070 node_entry.append((constants.RS_NODATA, None))
5072 if self.op.command == constants.OOB_HEALTH:
5073 # For health we should log important events
5074 for item, status in result.payload:
5075 if status in [constants.OOB_STATUS_WARNING,
5076 constants.OOB_STATUS_CRITICAL]:
5077 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5078 item, node.name, status)
5080 if self.op.command == constants.OOB_POWER_ON:
5082 elif self.op.command == constants.OOB_POWER_OFF:
5083 node.powered = False
5084 elif self.op.command == constants.OOB_POWER_STATUS:
5085 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5086 if powered != node.powered:
5087 logging.warning(("Recorded power state (%s) of node '%s' does not"
5088 " match actual power state (%s)"), node.powered,
5091 # For configuration changing commands we should update the node
5092 if self.op.command in (constants.OOB_POWER_ON,
5093 constants.OOB_POWER_OFF):
5094 self.cfg.Update(node, feedback_fn)
5096 node_entry.append((constants.RS_NORMAL, result.payload))
5098 if (self.op.command == constants.OOB_POWER_ON and
5099 idx < len(self.nodes) - 1):
5100 time.sleep(self.op.power_delay)
5104 def _CheckPayload(self, result):
5105 """Checks if the payload is valid.
5107 @param result: RPC result
5108 @raises errors.OpExecError: If payload is not valid
5112 if self.op.command == constants.OOB_HEALTH:
5113 if not isinstance(result.payload, list):
5114 errs.append("command 'health' is expected to return a list but got %s" %
5115 type(result.payload))
5117 for item, status in result.payload:
5118 if status not in constants.OOB_STATUSES:
5119 errs.append("health item '%s' has invalid status '%s'" %
5122 if self.op.command == constants.OOB_POWER_STATUS:
5123 if not isinstance(result.payload, dict):
5124 errs.append("power-status is expected to return a dict but got %s" %
5125 type(result.payload))
5127 if self.op.command in [
5128 constants.OOB_POWER_ON,
5129 constants.OOB_POWER_OFF,
5130 constants.OOB_POWER_CYCLE,
5132 if result.payload is not None:
5133 errs.append("%s is expected to not return payload but got '%s'" %
5134 (self.op.command, result.payload))
5137 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5138 utils.CommaJoin(errs))
5141 class _OsQuery(_QueryBase):
5142 FIELDS = query.OS_FIELDS
5144 def ExpandNames(self, lu):
5145 # Lock all nodes in shared mode
5146 # Temporary removal of locks, should be reverted later
5147 # TODO: reintroduce locks when they are lighter-weight
5148 lu.needed_locks = {}
5149 #self.share_locks[locking.LEVEL_NODE] = 1
5150 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5152 # The following variables interact with _QueryBase._GetNames
5154 self.wanted = self.names
5156 self.wanted = locking.ALL_SET
5158 self.do_locking = self.use_locking
5160 def DeclareLocks(self, lu, level):
5164 def _DiagnoseByOS(rlist):
5165 """Remaps a per-node return list into an a per-os per-node dictionary
5167 @param rlist: a map with node names as keys and OS objects as values
5170 @return: a dictionary with osnames as keys and as value another
5171 map, with nodes as keys and tuples of (path, status, diagnose,
5172 variants, parameters, api_versions) as values, eg::
5174 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5175 (/srv/..., False, "invalid api")],
5176 "node2": [(/srv/..., True, "", [], [])]}
5181 # we build here the list of nodes that didn't fail the RPC (at RPC
5182 # level), so that nodes with a non-responding node daemon don't
5183 # make all OSes invalid
5184 good_nodes = [node_name for node_name in rlist
5185 if not rlist[node_name].fail_msg]
5186 for node_name, nr in rlist.items():
5187 if nr.fail_msg or not nr.payload:
5189 for (name, path, status, diagnose, variants,
5190 params, api_versions) in nr.payload:
5191 if name not in all_os:
5192 # build a list of nodes for this os containing empty lists
5193 # for each node in node_list
5195 for nname in good_nodes:
5196 all_os[name][nname] = []
5197 # convert params from [name, help] to (name, help)
5198 params = [tuple(v) for v in params]
5199 all_os[name][node_name].append((path, status, diagnose,
5200 variants, params, api_versions))
5203 def _GetQueryData(self, lu):
5204 """Computes the list of nodes and their attributes.
5207 # Locking is not used
5208 assert not (compat.any(lu.glm.is_owned(level)
5209 for level in locking.LEVELS
5210 if level != locking.LEVEL_CLUSTER) or
5211 self.do_locking or self.use_locking)
5213 valid_nodes = [node.name
5214 for node in lu.cfg.GetAllNodesInfo().values()
5215 if not node.offline and node.vm_capable]
5216 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5217 cluster = lu.cfg.GetClusterInfo()
5221 for (os_name, os_data) in pol.items():
5222 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5223 hidden=(os_name in cluster.hidden_os),
5224 blacklisted=(os_name in cluster.blacklisted_os))
5228 api_versions = set()
5230 for idx, osl in enumerate(os_data.values()):
5231 info.valid = bool(info.valid and osl and osl[0][1])
5235 (node_variants, node_params, node_api) = osl[0][3:6]
5238 variants.update(node_variants)
5239 parameters.update(node_params)
5240 api_versions.update(node_api)
5242 # Filter out inconsistent values
5243 variants.intersection_update(node_variants)
5244 parameters.intersection_update(node_params)
5245 api_versions.intersection_update(node_api)
5247 info.variants = list(variants)
5248 info.parameters = list(parameters)
5249 info.api_versions = list(api_versions)
5251 data[os_name] = info
5253 # Prepare data in requested order
5254 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5258 class LUOsDiagnose(NoHooksLU):
5259 """Logical unit for OS diagnose/query.
5265 def _BuildFilter(fields, names):
5266 """Builds a filter for querying OSes.
5269 name_filter = qlang.MakeSimpleFilter("name", names)
5271 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5272 # respective field is not requested
5273 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5274 for fname in ["hidden", "blacklisted"]
5275 if fname not in fields]
5276 if "valid" not in fields:
5277 status_filter.append([qlang.OP_TRUE, "valid"])
5280 status_filter.insert(0, qlang.OP_AND)
5282 status_filter = None
5284 if name_filter and status_filter:
5285 return [qlang.OP_AND, name_filter, status_filter]
5289 return status_filter
5291 def CheckArguments(self):
5292 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5293 self.op.output_fields, False)
5295 def ExpandNames(self):
5296 self.oq.ExpandNames(self)
5298 def Exec(self, feedback_fn):
5299 return self.oq.OldStyleQuery(self)
5302 class _ExtStorageQuery(_QueryBase):
5303 FIELDS = query.EXTSTORAGE_FIELDS
5305 def ExpandNames(self, lu):
5306 # Lock all nodes in shared mode
5307 # Temporary removal of locks, should be reverted later
5308 # TODO: reintroduce locks when they are lighter-weight
5309 lu.needed_locks = {}
5310 #self.share_locks[locking.LEVEL_NODE] = 1
5311 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5313 # The following variables interact with _QueryBase._GetNames
5315 self.wanted = self.names
5317 self.wanted = locking.ALL_SET
5319 self.do_locking = self.use_locking
5321 def DeclareLocks(self, lu, level):
5325 def _DiagnoseByProvider(rlist):
5326 """Remaps a per-node return list into an a per-provider per-node dictionary
5328 @param rlist: a map with node names as keys and ExtStorage objects as values
5331 @return: a dictionary with extstorage providers as keys and as
5332 value another map, with nodes as keys and tuples of
5333 (path, status, diagnose, parameters) as values, eg::
5335 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5336 "node2": [(/srv/..., False, "missing file")]
5337 "node3": [(/srv/..., True, "", [])]
5342 # we build here the list of nodes that didn't fail the RPC (at RPC
5343 # level), so that nodes with a non-responding node daemon don't
5344 # make all OSes invalid
5345 good_nodes = [node_name for node_name in rlist
5346 if not rlist[node_name].fail_msg]
5347 for node_name, nr in rlist.items():
5348 if nr.fail_msg or not nr.payload:
5350 for (name, path, status, diagnose, params) in nr.payload:
5351 if name not in all_es:
5352 # build a list of nodes for this os containing empty lists
5353 # for each node in node_list
5355 for nname in good_nodes:
5356 all_es[name][nname] = []
5357 # convert params from [name, help] to (name, help)
5358 params = [tuple(v) for v in params]
5359 all_es[name][node_name].append((path, status, diagnose, params))
5362 def _GetQueryData(self, lu):
5363 """Computes the list of nodes and their attributes.
5366 # Locking is not used
5367 assert not (compat.any(lu.glm.is_owned(level)
5368 for level in locking.LEVELS
5369 if level != locking.LEVEL_CLUSTER) or
5370 self.do_locking or self.use_locking)
5372 valid_nodes = [node.name
5373 for node in lu.cfg.GetAllNodesInfo().values()
5374 if not node.offline and node.vm_capable]
5375 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5379 nodegroup_list = lu.cfg.GetNodeGroupList()
5381 for (es_name, es_data) in pol.items():
5382 # For every provider compute the nodegroup validity.
5383 # To do this we need to check the validity of each node in es_data
5384 # and then construct the corresponding nodegroup dict:
5385 # { nodegroup1: status
5386 # nodegroup2: status
5389 for nodegroup in nodegroup_list:
5390 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5392 nodegroup_nodes = ndgrp.members
5393 nodegroup_name = ndgrp.name
5396 for node in nodegroup_nodes:
5397 if node in valid_nodes:
5398 if es_data[node] != []:
5399 node_status = es_data[node][0][1]
5400 node_statuses.append(node_status)
5402 node_statuses.append(False)
5404 if False in node_statuses:
5405 ndgrp_data[nodegroup_name] = False
5407 ndgrp_data[nodegroup_name] = True
5409 # Compute the provider's parameters
5411 for idx, esl in enumerate(es_data.values()):
5412 valid = bool(esl and esl[0][1])
5416 node_params = esl[0][3]
5419 parameters.update(node_params)
5421 # Filter out inconsistent values
5422 parameters.intersection_update(node_params)
5424 params = list(parameters)
5426 # Now fill all the info for this provider
5427 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5428 nodegroup_status=ndgrp_data,
5431 data[es_name] = info
5433 # Prepare data in requested order
5434 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5438 class LUExtStorageDiagnose(NoHooksLU):
5439 """Logical unit for ExtStorage diagnose/query.
5444 def CheckArguments(self):
5445 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5446 self.op.output_fields, False)
5448 def ExpandNames(self):
5449 self.eq.ExpandNames(self)
5451 def Exec(self, feedback_fn):
5452 return self.eq.OldStyleQuery(self)
5455 class LUNodeRemove(LogicalUnit):
5456 """Logical unit for removing a node.
5459 HPATH = "node-remove"
5460 HTYPE = constants.HTYPE_NODE
5462 def BuildHooksEnv(self):
5467 "OP_TARGET": self.op.node_name,
5468 "NODE_NAME": self.op.node_name,
5471 def BuildHooksNodes(self):
5472 """Build hooks nodes.
5474 This doesn't run on the target node in the pre phase as a failed
5475 node would then be impossible to remove.
5478 all_nodes = self.cfg.GetNodeList()
5480 all_nodes.remove(self.op.node_name)
5483 return (all_nodes, all_nodes)
5485 def CheckPrereq(self):
5486 """Check prerequisites.
5489 - the node exists in the configuration
5490 - it does not have primary or secondary instances
5491 - it's not the master
5493 Any errors are signaled by raising errors.OpPrereqError.
5496 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5497 node = self.cfg.GetNodeInfo(self.op.node_name)
5498 assert node is not None
5500 masternode = self.cfg.GetMasterNode()
5501 if node.name == masternode:
5502 raise errors.OpPrereqError("Node is the master node, failover to another"
5503 " node is required", errors.ECODE_INVAL)
5505 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5506 if node.name in instance.all_nodes:
5507 raise errors.OpPrereqError("Instance %s is still running on the node,"
5508 " please remove first" % instance_name,
5510 self.op.node_name = node.name
5513 def Exec(self, feedback_fn):
5514 """Removes the node from the cluster.
5518 logging.info("Stopping the node daemon and removing configs from node %s",
5521 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5523 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5526 # Promote nodes to master candidate as needed
5527 _AdjustCandidatePool(self, exceptions=[node.name])
5528 self.context.RemoveNode(node.name)
5530 # Run post hooks on the node before it's removed
5531 _RunPostHook(self, node.name)
5533 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5534 msg = result.fail_msg
5536 self.LogWarning("Errors encountered on the remote node while leaving"
5537 " the cluster: %s", msg)
5539 # Remove node from our /etc/hosts
5540 if self.cfg.GetClusterInfo().modify_etc_hosts:
5541 master_node = self.cfg.GetMasterNode()
5542 result = self.rpc.call_etc_hosts_modify(master_node,
5543 constants.ETC_HOSTS_REMOVE,
5545 result.Raise("Can't update hosts file with new host data")
5546 _RedistributeAncillaryFiles(self)
5549 class _NodeQuery(_QueryBase):
5550 FIELDS = query.NODE_FIELDS
5552 def ExpandNames(self, lu):
5553 lu.needed_locks = {}
5554 lu.share_locks = _ShareAll()
5557 self.wanted = _GetWantedNodes(lu, self.names)
5559 self.wanted = locking.ALL_SET
5561 self.do_locking = (self.use_locking and
5562 query.NQ_LIVE in self.requested_data)
5565 # If any non-static field is requested we need to lock the nodes
5566 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5567 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5569 def DeclareLocks(self, lu, level):
5572 def _GetQueryData(self, lu):
5573 """Computes the list of nodes and their attributes.
5576 all_info = lu.cfg.GetAllNodesInfo()
5578 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5580 # Gather data as requested
5581 if query.NQ_LIVE in self.requested_data:
5582 # filter out non-vm_capable nodes
5583 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5585 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5586 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5587 [lu.cfg.GetHypervisorType()], es_flags)
5588 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5589 for (name, nresult) in node_data.items()
5590 if not nresult.fail_msg and nresult.payload)
5594 if query.NQ_INST in self.requested_data:
5595 node_to_primary = dict([(name, set()) for name in nodenames])
5596 node_to_secondary = dict([(name, set()) for name in nodenames])
5598 inst_data = lu.cfg.GetAllInstancesInfo()
5600 for inst in inst_data.values():
5601 if inst.primary_node in node_to_primary:
5602 node_to_primary[inst.primary_node].add(inst.name)
5603 for secnode in inst.secondary_nodes:
5604 if secnode in node_to_secondary:
5605 node_to_secondary[secnode].add(inst.name)
5607 node_to_primary = None
5608 node_to_secondary = None
5610 if query.NQ_OOB in self.requested_data:
5611 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5612 for name, node in all_info.iteritems())
5616 if query.NQ_GROUP in self.requested_data:
5617 groups = lu.cfg.GetAllNodeGroupsInfo()
5621 return query.NodeQueryData([all_info[name] for name in nodenames],
5622 live_data, lu.cfg.GetMasterNode(),
5623 node_to_primary, node_to_secondary, groups,
5624 oob_support, lu.cfg.GetClusterInfo())
5627 class LUNodeQuery(NoHooksLU):
5628 """Logical unit for querying nodes.
5631 # pylint: disable=W0142
5634 def CheckArguments(self):
5635 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5636 self.op.output_fields, self.op.use_locking)
5638 def ExpandNames(self):
5639 self.nq.ExpandNames(self)
5641 def DeclareLocks(self, level):
5642 self.nq.DeclareLocks(self, level)
5644 def Exec(self, feedback_fn):
5645 return self.nq.OldStyleQuery(self)
5648 class LUNodeQueryvols(NoHooksLU):
5649 """Logical unit for getting volumes on node(s).
5653 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5654 _FIELDS_STATIC = utils.FieldSet("node")
5656 def CheckArguments(self):
5657 _CheckOutputFields(static=self._FIELDS_STATIC,
5658 dynamic=self._FIELDS_DYNAMIC,
5659 selected=self.op.output_fields)
5661 def ExpandNames(self):
5662 self.share_locks = _ShareAll()
5665 self.needed_locks = {
5666 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5669 self.needed_locks = {
5670 locking.LEVEL_NODE: locking.ALL_SET,
5671 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5674 def Exec(self, feedback_fn):
5675 """Computes the list of nodes and their attributes.
5678 nodenames = self.owned_locks(locking.LEVEL_NODE)
5679 volumes = self.rpc.call_node_volumes(nodenames)
5681 ilist = self.cfg.GetAllInstancesInfo()
5682 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5685 for node in nodenames:
5686 nresult = volumes[node]
5689 msg = nresult.fail_msg
5691 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5694 node_vols = sorted(nresult.payload,
5695 key=operator.itemgetter("dev"))
5697 for vol in node_vols:
5699 for field in self.op.output_fields:
5702 elif field == "phys":
5706 elif field == "name":
5708 elif field == "size":
5709 val = int(float(vol["size"]))
5710 elif field == "instance":
5711 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5713 raise errors.ParameterError(field)
5714 node_output.append(str(val))
5716 output.append(node_output)
5721 class LUNodeQueryStorage(NoHooksLU):
5722 """Logical unit for getting information on storage units on node(s).
5725 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5728 def CheckArguments(self):
5729 _CheckOutputFields(static=self._FIELDS_STATIC,
5730 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5731 selected=self.op.output_fields)
5733 def ExpandNames(self):
5734 self.share_locks = _ShareAll()
5737 self.needed_locks = {
5738 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5741 self.needed_locks = {
5742 locking.LEVEL_NODE: locking.ALL_SET,
5743 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5746 def Exec(self, feedback_fn):
5747 """Computes the list of nodes and their attributes.
5750 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5752 # Always get name to sort by
5753 if constants.SF_NAME in self.op.output_fields:
5754 fields = self.op.output_fields[:]
5756 fields = [constants.SF_NAME] + self.op.output_fields
5758 # Never ask for node or type as it's only known to the LU
5759 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5760 while extra in fields:
5761 fields.remove(extra)
5763 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5764 name_idx = field_idx[constants.SF_NAME]
5766 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5767 data = self.rpc.call_storage_list(self.nodes,
5768 self.op.storage_type, st_args,
5769 self.op.name, fields)
5773 for node in utils.NiceSort(self.nodes):
5774 nresult = data[node]
5778 msg = nresult.fail_msg
5780 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5783 rows = dict([(row[name_idx], row) for row in nresult.payload])
5785 for name in utils.NiceSort(rows.keys()):
5790 for field in self.op.output_fields:
5791 if field == constants.SF_NODE:
5793 elif field == constants.SF_TYPE:
5794 val = self.op.storage_type
5795 elif field in field_idx:
5796 val = row[field_idx[field]]
5798 raise errors.ParameterError(field)
5807 class _InstanceQuery(_QueryBase):
5808 FIELDS = query.INSTANCE_FIELDS
5810 def ExpandNames(self, lu):
5811 lu.needed_locks = {}
5812 lu.share_locks = _ShareAll()
5815 self.wanted = _GetWantedInstances(lu, self.names)
5817 self.wanted = locking.ALL_SET
5819 self.do_locking = (self.use_locking and
5820 query.IQ_LIVE in self.requested_data)
5822 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5823 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5824 lu.needed_locks[locking.LEVEL_NODE] = []
5825 lu.needed_locks[locking.LEVEL_NETWORK] = []
5826 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5828 self.do_grouplocks = (self.do_locking and
5829 query.IQ_NODES in self.requested_data)
5831 def DeclareLocks(self, lu, level):
5833 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5834 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5836 # Lock all groups used by instances optimistically; this requires going
5837 # via the node before it's locked, requiring verification later on
5838 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5840 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5841 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5842 elif level == locking.LEVEL_NODE:
5843 lu._LockInstancesNodes() # pylint: disable=W0212
5845 elif level == locking.LEVEL_NETWORK:
5846 lu.needed_locks[locking.LEVEL_NETWORK] = \
5848 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5849 for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
5852 def _CheckGroupLocks(lu):
5853 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5854 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5856 # Check if node groups for locked instances are still correct
5857 for instance_name in owned_instances:
5858 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5860 def _GetQueryData(self, lu):
5861 """Computes the list of instances and their attributes.
5864 if self.do_grouplocks:
5865 self._CheckGroupLocks(lu)
5867 cluster = lu.cfg.GetClusterInfo()
5868 all_info = lu.cfg.GetAllInstancesInfo()
5870 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5872 instance_list = [all_info[name] for name in instance_names]
5873 nodes = frozenset(itertools.chain(*(inst.all_nodes
5874 for inst in instance_list)))
5875 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5878 wrongnode_inst = set()
5880 # Gather data as requested
5881 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5883 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5885 result = node_data[name]
5887 # offline nodes will be in both lists
5888 assert result.fail_msg
5889 offline_nodes.append(name)
5891 bad_nodes.append(name)
5892 elif result.payload:
5893 for inst in result.payload:
5894 if inst in all_info:
5895 if all_info[inst].primary_node == name:
5896 live_data.update(result.payload)
5898 wrongnode_inst.add(inst)
5900 # orphan instance; we don't list it here as we don't
5901 # handle this case yet in the output of instance listing
5902 logging.warning("Orphan instance '%s' found on node %s",
5904 # else no instance is alive
5908 if query.IQ_DISKUSAGE in self.requested_data:
5909 gmi = ganeti.masterd.instance
5910 disk_usage = dict((inst.name,
5911 gmi.ComputeDiskSize(inst.disk_template,
5912 [{constants.IDISK_SIZE: disk.size}
5913 for disk in inst.disks]))
5914 for inst in instance_list)
5918 if query.IQ_CONSOLE in self.requested_data:
5920 for inst in instance_list:
5921 if inst.name in live_data:
5922 # Instance is running
5923 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5925 consinfo[inst.name] = None
5926 assert set(consinfo.keys()) == set(instance_names)
5930 if query.IQ_NODES in self.requested_data:
5931 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5933 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5934 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5935 for uuid in set(map(operator.attrgetter("group"),
5941 if query.IQ_NETWORKS in self.requested_data:
5942 net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
5943 for i in instance_list))
5944 networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
5948 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5949 disk_usage, offline_nodes, bad_nodes,
5950 live_data, wrongnode_inst, consinfo,
5951 nodes, groups, networks)
5954 class LUQuery(NoHooksLU):
5955 """Query for resources/items of a certain kind.
5958 # pylint: disable=W0142
5961 def CheckArguments(self):
5962 qcls = _GetQueryImplementation(self.op.what)
5964 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5966 def ExpandNames(self):
5967 self.impl.ExpandNames(self)
5969 def DeclareLocks(self, level):
5970 self.impl.DeclareLocks(self, level)
5972 def Exec(self, feedback_fn):
5973 return self.impl.NewStyleQuery(self)
5976 class LUQueryFields(NoHooksLU):
5977 """Query for resources/items of a certain kind.
5980 # pylint: disable=W0142
5983 def CheckArguments(self):
5984 self.qcls = _GetQueryImplementation(self.op.what)
5986 def ExpandNames(self):
5987 self.needed_locks = {}
5989 def Exec(self, feedback_fn):
5990 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5993 class LUNodeModifyStorage(NoHooksLU):
5994 """Logical unit for modifying a storage volume on a node.
5999 def CheckArguments(self):
6000 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6002 storage_type = self.op.storage_type
6005 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
6007 raise errors.OpPrereqError("Storage units of type '%s' can not be"
6008 " modified" % storage_type,
6011 diff = set(self.op.changes.keys()) - modifiable
6013 raise errors.OpPrereqError("The following fields can not be modified for"
6014 " storage units of type '%s': %r" %
6015 (storage_type, list(diff)),
6018 def ExpandNames(self):
6019 self.needed_locks = {
6020 locking.LEVEL_NODE: self.op.node_name,
6023 def Exec(self, feedback_fn):
6024 """Computes the list of nodes and their attributes.
6027 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6028 result = self.rpc.call_storage_modify(self.op.node_name,
6029 self.op.storage_type, st_args,
6030 self.op.name, self.op.changes)
6031 result.Raise("Failed to modify storage unit '%s' on %s" %
6032 (self.op.name, self.op.node_name))
6035 class LUNodeAdd(LogicalUnit):
6036 """Logical unit for adding node to the cluster.
6040 HTYPE = constants.HTYPE_NODE
6041 _NFLAGS = ["master_capable", "vm_capable"]
6043 def CheckArguments(self):
6044 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
6045 # validate/normalize the node name
6046 self.hostname = netutils.GetHostname(name=self.op.node_name,
6047 family=self.primary_ip_family)
6048 self.op.node_name = self.hostname.name
6050 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
6051 raise errors.OpPrereqError("Cannot readd the master node",
6054 if self.op.readd and self.op.group:
6055 raise errors.OpPrereqError("Cannot pass a node group when a node is"
6056 " being readded", errors.ECODE_INVAL)
6058 def BuildHooksEnv(self):
6061 This will run on all nodes before, and on all nodes + the new node after.
6065 "OP_TARGET": self.op.node_name,
6066 "NODE_NAME": self.op.node_name,
6067 "NODE_PIP": self.op.primary_ip,
6068 "NODE_SIP": self.op.secondary_ip,
6069 "MASTER_CAPABLE": str(self.op.master_capable),
6070 "VM_CAPABLE": str(self.op.vm_capable),
6073 def BuildHooksNodes(self):
6074 """Build hooks nodes.
6077 # Exclude added node
6078 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6079 post_nodes = pre_nodes + [self.op.node_name, ]
6081 return (pre_nodes, post_nodes)
6083 def CheckPrereq(self):
6084 """Check prerequisites.
6087 - the new node is not already in the config
6089 - its parameters (single/dual homed) matches the cluster
6091 Any errors are signaled by raising errors.OpPrereqError.
6095 hostname = self.hostname
6096 node = hostname.name
6097 primary_ip = self.op.primary_ip = hostname.ip
6098 if self.op.secondary_ip is None:
6099 if self.primary_ip_family == netutils.IP6Address.family:
6100 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6101 " IPv4 address must be given as secondary",
6103 self.op.secondary_ip = primary_ip
6105 secondary_ip = self.op.secondary_ip
6106 if not netutils.IP4Address.IsValid(secondary_ip):
6107 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6108 " address" % secondary_ip, errors.ECODE_INVAL)
6110 node_list = cfg.GetNodeList()
6111 if not self.op.readd and node in node_list:
6112 raise errors.OpPrereqError("Node %s is already in the configuration" %
6113 node, errors.ECODE_EXISTS)
6114 elif self.op.readd and node not in node_list:
6115 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6118 self.changed_primary_ip = False
6120 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6121 if self.op.readd and node == existing_node_name:
6122 if existing_node.secondary_ip != secondary_ip:
6123 raise errors.OpPrereqError("Readded node doesn't have the same IP"
6124 " address configuration as before",
6126 if existing_node.primary_ip != primary_ip:
6127 self.changed_primary_ip = True
6131 if (existing_node.primary_ip == primary_ip or
6132 existing_node.secondary_ip == primary_ip or
6133 existing_node.primary_ip == secondary_ip or
6134 existing_node.secondary_ip == secondary_ip):
6135 raise errors.OpPrereqError("New node ip address(es) conflict with"
6136 " existing node %s" % existing_node.name,
6137 errors.ECODE_NOTUNIQUE)
6139 # After this 'if' block, None is no longer a valid value for the
6140 # _capable op attributes
6142 old_node = self.cfg.GetNodeInfo(node)
6143 assert old_node is not None, "Can't retrieve locked node %s" % node
6144 for attr in self._NFLAGS:
6145 if getattr(self.op, attr) is None:
6146 setattr(self.op, attr, getattr(old_node, attr))
6148 for attr in self._NFLAGS:
6149 if getattr(self.op, attr) is None:
6150 setattr(self.op, attr, True)
6152 if self.op.readd and not self.op.vm_capable:
6153 pri, sec = cfg.GetNodeInstances(node)
6155 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6156 " flag set to false, but it already holds"
6157 " instances" % node,
6160 # check that the type of the node (single versus dual homed) is the
6161 # same as for the master
6162 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6163 master_singlehomed = myself.secondary_ip == myself.primary_ip
6164 newbie_singlehomed = secondary_ip == primary_ip
6165 if master_singlehomed != newbie_singlehomed:
6166 if master_singlehomed:
6167 raise errors.OpPrereqError("The master has no secondary ip but the"
6168 " new node has one",
6171 raise errors.OpPrereqError("The master has a secondary ip but the"
6172 " new node doesn't have one",
6175 # checks reachability
6176 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6177 raise errors.OpPrereqError("Node not reachable by ping",
6178 errors.ECODE_ENVIRON)
6180 if not newbie_singlehomed:
6181 # check reachability from my secondary ip to newbie's secondary ip
6182 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6183 source=myself.secondary_ip):
6184 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6185 " based ping to node daemon port",
6186 errors.ECODE_ENVIRON)
6193 if self.op.master_capable:
6194 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6196 self.master_candidate = False
6199 self.new_node = old_node
6201 node_group = cfg.LookupNodeGroup(self.op.group)
6202 self.new_node = objects.Node(name=node,
6203 primary_ip=primary_ip,
6204 secondary_ip=secondary_ip,
6205 master_candidate=self.master_candidate,
6206 offline=False, drained=False,
6207 group=node_group, ndparams={})
6209 if self.op.ndparams:
6210 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6211 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6212 "node", "cluster or group")
6214 if self.op.hv_state:
6215 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6217 if self.op.disk_state:
6218 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6220 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6221 # it a property on the base class.
6222 rpcrunner = rpc.DnsOnlyRunner()
6223 result = rpcrunner.call_version([node])[node]
6224 result.Raise("Can't get version information from node %s" % node)
6225 if constants.PROTOCOL_VERSION == result.payload:
6226 logging.info("Communication to node %s fine, sw version %s match",
6227 node, result.payload)
6229 raise errors.OpPrereqError("Version mismatch master version %s,"
6230 " node version %s" %
6231 (constants.PROTOCOL_VERSION, result.payload),
6232 errors.ECODE_ENVIRON)
6234 vg_name = cfg.GetVGName()
6235 if vg_name is not None:
6236 vparams = {constants.NV_PVLIST: [vg_name]}
6237 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6238 cname = self.cfg.GetClusterName()
6239 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6240 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6242 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6243 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6245 def Exec(self, feedback_fn):
6246 """Adds the new node to the cluster.
6249 new_node = self.new_node
6250 node = new_node.name
6252 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6255 # We adding a new node so we assume it's powered
6256 new_node.powered = True
6258 # for re-adds, reset the offline/drained/master-candidate flags;
6259 # we need to reset here, otherwise offline would prevent RPC calls
6260 # later in the procedure; this also means that if the re-add
6261 # fails, we are left with a non-offlined, broken node
6263 new_node.drained = new_node.offline = False # pylint: disable=W0201
6264 self.LogInfo("Readding a node, the offline/drained flags were reset")
6265 # if we demote the node, we do cleanup later in the procedure
6266 new_node.master_candidate = self.master_candidate
6267 if self.changed_primary_ip:
6268 new_node.primary_ip = self.op.primary_ip
6270 # copy the master/vm_capable flags
6271 for attr in self._NFLAGS:
6272 setattr(new_node, attr, getattr(self.op, attr))
6274 # notify the user about any possible mc promotion
6275 if new_node.master_candidate:
6276 self.LogInfo("Node will be a master candidate")
6278 if self.op.ndparams:
6279 new_node.ndparams = self.op.ndparams
6281 new_node.ndparams = {}
6283 if self.op.hv_state:
6284 new_node.hv_state_static = self.new_hv_state
6286 if self.op.disk_state:
6287 new_node.disk_state_static = self.new_disk_state
6289 # Add node to our /etc/hosts, and add key to known_hosts
6290 if self.cfg.GetClusterInfo().modify_etc_hosts:
6291 master_node = self.cfg.GetMasterNode()
6292 result = self.rpc.call_etc_hosts_modify(master_node,
6293 constants.ETC_HOSTS_ADD,
6296 result.Raise("Can't update hosts file with new host data")
6298 if new_node.secondary_ip != new_node.primary_ip:
6299 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6302 node_verify_list = [self.cfg.GetMasterNode()]
6303 node_verify_param = {
6304 constants.NV_NODELIST: ([node], {}),
6305 # TODO: do a node-net-test as well?
6308 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6309 self.cfg.GetClusterName())
6310 for verifier in node_verify_list:
6311 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6312 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6314 for failed in nl_payload:
6315 feedback_fn("ssh/hostname verification failed"
6316 " (checking from %s): %s" %
6317 (verifier, nl_payload[failed]))
6318 raise errors.OpExecError("ssh/hostname verification failed")
6321 _RedistributeAncillaryFiles(self)
6322 self.context.ReaddNode(new_node)
6323 # make sure we redistribute the config
6324 self.cfg.Update(new_node, feedback_fn)
6325 # and make sure the new node will not have old files around
6326 if not new_node.master_candidate:
6327 result = self.rpc.call_node_demote_from_mc(new_node.name)
6328 msg = result.fail_msg
6330 self.LogWarning("Node failed to demote itself from master"
6331 " candidate status: %s" % msg)
6333 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6334 additional_vm=self.op.vm_capable)
6335 self.context.AddNode(new_node, self.proc.GetECId())
6338 class LUNodeSetParams(LogicalUnit):
6339 """Modifies the parameters of a node.
6341 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6342 to the node role (as _ROLE_*)
6343 @cvar _R2F: a dictionary from node role to tuples of flags
6344 @cvar _FLAGS: a list of attribute names corresponding to the flags
6347 HPATH = "node-modify"
6348 HTYPE = constants.HTYPE_NODE
6350 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6352 (True, False, False): _ROLE_CANDIDATE,
6353 (False, True, False): _ROLE_DRAINED,
6354 (False, False, True): _ROLE_OFFLINE,
6355 (False, False, False): _ROLE_REGULAR,
6357 _R2F = dict((v, k) for k, v in _F2R.items())
6358 _FLAGS = ["master_candidate", "drained", "offline"]
6360 def CheckArguments(self):
6361 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6362 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6363 self.op.master_capable, self.op.vm_capable,
6364 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6366 if all_mods.count(None) == len(all_mods):
6367 raise errors.OpPrereqError("Please pass at least one modification",
6369 if all_mods.count(True) > 1:
6370 raise errors.OpPrereqError("Can't set the node into more than one"
6371 " state at the same time",
6374 # Boolean value that tells us whether we might be demoting from MC
6375 self.might_demote = (self.op.master_candidate is False or
6376 self.op.offline is True or
6377 self.op.drained is True or
6378 self.op.master_capable is False)
6380 if self.op.secondary_ip:
6381 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6382 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6383 " address" % self.op.secondary_ip,
6386 self.lock_all = self.op.auto_promote and self.might_demote
6387 self.lock_instances = self.op.secondary_ip is not None
6389 def _InstanceFilter(self, instance):
6390 """Filter for getting affected instances.
6393 return (instance.disk_template in constants.DTS_INT_MIRROR and
6394 self.op.node_name in instance.all_nodes)
6396 def ExpandNames(self):
6398 self.needed_locks = {
6399 locking.LEVEL_NODE: locking.ALL_SET,
6401 # Block allocations when all nodes are locked
6402 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6405 self.needed_locks = {
6406 locking.LEVEL_NODE: self.op.node_name,
6409 # Since modifying a node can have severe effects on currently running
6410 # operations the resource lock is at least acquired in shared mode
6411 self.needed_locks[locking.LEVEL_NODE_RES] = \
6412 self.needed_locks[locking.LEVEL_NODE]
6414 # Get all locks except nodes in shared mode; they are not used for anything
6415 # but read-only access
6416 self.share_locks = _ShareAll()
6417 self.share_locks[locking.LEVEL_NODE] = 0
6418 self.share_locks[locking.LEVEL_NODE_RES] = 0
6419 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6421 if self.lock_instances:
6422 self.needed_locks[locking.LEVEL_INSTANCE] = \
6423 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6425 def BuildHooksEnv(self):
6428 This runs on the master node.
6432 "OP_TARGET": self.op.node_name,
6433 "MASTER_CANDIDATE": str(self.op.master_candidate),
6434 "OFFLINE": str(self.op.offline),
6435 "DRAINED": str(self.op.drained),
6436 "MASTER_CAPABLE": str(self.op.master_capable),
6437 "VM_CAPABLE": str(self.op.vm_capable),
6440 def BuildHooksNodes(self):
6441 """Build hooks nodes.
6444 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6447 def CheckPrereq(self):
6448 """Check prerequisites.
6450 This only checks the instance list against the existing names.
6453 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6455 if self.lock_instances:
6456 affected_instances = \
6457 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6459 # Verify instance locks
6460 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6461 wanted_instances = frozenset(affected_instances.keys())
6462 if wanted_instances - owned_instances:
6463 raise errors.OpPrereqError("Instances affected by changing node %s's"
6464 " secondary IP address have changed since"
6465 " locks were acquired, wanted '%s', have"
6466 " '%s'; retry the operation" %
6468 utils.CommaJoin(wanted_instances),
6469 utils.CommaJoin(owned_instances)),
6472 affected_instances = None
6474 if (self.op.master_candidate is not None or
6475 self.op.drained is not None or
6476 self.op.offline is not None):
6477 # we can't change the master's node flags
6478 if self.op.node_name == self.cfg.GetMasterNode():
6479 raise errors.OpPrereqError("The master role can be changed"
6480 " only via master-failover",
6483 if self.op.master_candidate and not node.master_capable:
6484 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6485 " it a master candidate" % node.name,
6488 if self.op.vm_capable is False:
6489 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6491 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6492 " the vm_capable flag" % node.name,
6495 if node.master_candidate and self.might_demote and not self.lock_all:
6496 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6497 # check if after removing the current node, we're missing master
6499 (mc_remaining, mc_should, _) = \
6500 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6501 if mc_remaining < mc_should:
6502 raise errors.OpPrereqError("Not enough master candidates, please"
6503 " pass auto promote option to allow"
6504 " promotion (--auto-promote or RAPI"
6505 " auto_promote=True)", errors.ECODE_STATE)
6507 self.old_flags = old_flags = (node.master_candidate,
6508 node.drained, node.offline)
6509 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6510 self.old_role = old_role = self._F2R[old_flags]
6512 # Check for ineffective changes
6513 for attr in self._FLAGS:
6514 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6515 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6516 setattr(self.op, attr, None)
6518 # Past this point, any flag change to False means a transition
6519 # away from the respective state, as only real changes are kept
6521 # TODO: We might query the real power state if it supports OOB
6522 if _SupportsOob(self.cfg, node):
6523 if self.op.offline is False and not (node.powered or
6524 self.op.powered is True):
6525 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6526 " offline status can be reset") %
6527 self.op.node_name, errors.ECODE_STATE)
6528 elif self.op.powered is not None:
6529 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6530 " as it does not support out-of-band"
6531 " handling") % self.op.node_name,
6534 # If we're being deofflined/drained, we'll MC ourself if needed
6535 if (self.op.drained is False or self.op.offline is False or
6536 (self.op.master_capable and not node.master_capable)):
6537 if _DecideSelfPromotion(self):
6538 self.op.master_candidate = True
6539 self.LogInfo("Auto-promoting node to master candidate")
6541 # If we're no longer master capable, we'll demote ourselves from MC
6542 if self.op.master_capable is False and node.master_candidate:
6543 self.LogInfo("Demoting from master candidate")
6544 self.op.master_candidate = False
6547 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6548 if self.op.master_candidate:
6549 new_role = self._ROLE_CANDIDATE
6550 elif self.op.drained:
6551 new_role = self._ROLE_DRAINED
6552 elif self.op.offline:
6553 new_role = self._ROLE_OFFLINE
6554 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6555 # False is still in new flags, which means we're un-setting (the
6557 new_role = self._ROLE_REGULAR
6558 else: # no new flags, nothing, keep old role
6561 self.new_role = new_role
6563 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6564 # Trying to transition out of offline status
6565 result = self.rpc.call_version([node.name])[node.name]
6567 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6568 " to report its version: %s" %
6569 (node.name, result.fail_msg),
6572 self.LogWarning("Transitioning node from offline to online state"
6573 " without using re-add. Please make sure the node"
6576 # When changing the secondary ip, verify if this is a single-homed to
6577 # multi-homed transition or vice versa, and apply the relevant
6579 if self.op.secondary_ip:
6580 # Ok even without locking, because this can't be changed by any LU
6581 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6582 master_singlehomed = master.secondary_ip == master.primary_ip
6583 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6584 if self.op.force and node.name == master.name:
6585 self.LogWarning("Transitioning from single-homed to multi-homed"
6586 " cluster; all nodes will require a secondary IP"
6589 raise errors.OpPrereqError("Changing the secondary ip on a"
6590 " single-homed cluster requires the"
6591 " --force option to be passed, and the"
6592 " target node to be the master",
6594 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6595 if self.op.force and node.name == master.name:
6596 self.LogWarning("Transitioning from multi-homed to single-homed"
6597 " cluster; secondary IP addresses will have to be"
6600 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6601 " same as the primary IP on a multi-homed"
6602 " cluster, unless the --force option is"
6603 " passed, and the target node is the"
6604 " master", errors.ECODE_INVAL)
6606 assert not (frozenset(affected_instances) -
6607 self.owned_locks(locking.LEVEL_INSTANCE))
6610 if affected_instances:
6611 msg = ("Cannot change secondary IP address: offline node has"
6612 " instances (%s) configured to use it" %
6613 utils.CommaJoin(affected_instances.keys()))
6614 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6616 # On online nodes, check that no instances are running, and that
6617 # the node has the new ip and we can reach it.
6618 for instance in affected_instances.values():
6619 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6620 msg="cannot change secondary ip")
6622 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6623 if master.name != node.name:
6624 # check reachability from master secondary ip to new secondary ip
6625 if not netutils.TcpPing(self.op.secondary_ip,
6626 constants.DEFAULT_NODED_PORT,
6627 source=master.secondary_ip):
6628 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6629 " based ping to node daemon port",
6630 errors.ECODE_ENVIRON)
6632 if self.op.ndparams:
6633 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6634 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6635 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6636 "node", "cluster or group")
6637 self.new_ndparams = new_ndparams
6639 if self.op.hv_state:
6640 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6641 self.node.hv_state_static)
6643 if self.op.disk_state:
6644 self.new_disk_state = \
6645 _MergeAndVerifyDiskState(self.op.disk_state,
6646 self.node.disk_state_static)
6648 def Exec(self, feedback_fn):
6653 old_role = self.old_role
6654 new_role = self.new_role
6658 if self.op.ndparams:
6659 node.ndparams = self.new_ndparams
6661 if self.op.powered is not None:
6662 node.powered = self.op.powered
6664 if self.op.hv_state:
6665 node.hv_state_static = self.new_hv_state
6667 if self.op.disk_state:
6668 node.disk_state_static = self.new_disk_state
6670 for attr in ["master_capable", "vm_capable"]:
6671 val = getattr(self.op, attr)
6673 setattr(node, attr, val)
6674 result.append((attr, str(val)))
6676 if new_role != old_role:
6677 # Tell the node to demote itself, if no longer MC and not offline
6678 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6679 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6681 self.LogWarning("Node failed to demote itself: %s", msg)
6683 new_flags = self._R2F[new_role]
6684 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6686 result.append((desc, str(nf)))
6687 (node.master_candidate, node.drained, node.offline) = new_flags
6689 # we locked all nodes, we adjust the CP before updating this node
6691 _AdjustCandidatePool(self, [node.name])
6693 if self.op.secondary_ip:
6694 node.secondary_ip = self.op.secondary_ip
6695 result.append(("secondary_ip", self.op.secondary_ip))
6697 # this will trigger configuration file update, if needed
6698 self.cfg.Update(node, feedback_fn)
6700 # this will trigger job queue propagation or cleanup if the mc
6702 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6703 self.context.ReaddNode(node)
6708 class LUNodePowercycle(NoHooksLU):
6709 """Powercycles a node.
6714 def CheckArguments(self):
6715 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6716 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6717 raise errors.OpPrereqError("The node is the master and the force"
6718 " parameter was not set",
6721 def ExpandNames(self):
6722 """Locking for PowercycleNode.
6724 This is a last-resort option and shouldn't block on other
6725 jobs. Therefore, we grab no locks.
6728 self.needed_locks = {}
6730 def Exec(self, feedback_fn):
6734 result = self.rpc.call_node_powercycle(self.op.node_name,
6735 self.cfg.GetHypervisorType())
6736 result.Raise("Failed to schedule the reboot")
6737 return result.payload
6740 class LUClusterQuery(NoHooksLU):
6741 """Query cluster configuration.
6746 def ExpandNames(self):
6747 self.needed_locks = {}
6749 def Exec(self, feedback_fn):
6750 """Return cluster config.
6753 cluster = self.cfg.GetClusterInfo()
6756 # Filter just for enabled hypervisors
6757 for os_name, hv_dict in cluster.os_hvp.items():
6758 os_hvp[os_name] = {}
6759 for hv_name, hv_params in hv_dict.items():
6760 if hv_name in cluster.enabled_hypervisors:
6761 os_hvp[os_name][hv_name] = hv_params
6763 # Convert ip_family to ip_version
6764 primary_ip_version = constants.IP4_VERSION
6765 if cluster.primary_ip_family == netutils.IP6Address.family:
6766 primary_ip_version = constants.IP6_VERSION
6769 "software_version": constants.RELEASE_VERSION,
6770 "protocol_version": constants.PROTOCOL_VERSION,
6771 "config_version": constants.CONFIG_VERSION,
6772 "os_api_version": max(constants.OS_API_VERSIONS),
6773 "export_version": constants.EXPORT_VERSION,
6774 "architecture": runtime.GetArchInfo(),
6775 "name": cluster.cluster_name,
6776 "master": cluster.master_node,
6777 "default_hypervisor": cluster.primary_hypervisor,
6778 "enabled_hypervisors": cluster.enabled_hypervisors,
6779 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6780 for hypervisor_name in cluster.enabled_hypervisors]),
6782 "beparams": cluster.beparams,
6783 "osparams": cluster.osparams,
6784 "ipolicy": cluster.ipolicy,
6785 "nicparams": cluster.nicparams,
6786 "ndparams": cluster.ndparams,
6787 "diskparams": cluster.diskparams,
6788 "candidate_pool_size": cluster.candidate_pool_size,
6789 "master_netdev": cluster.master_netdev,
6790 "master_netmask": cluster.master_netmask,
6791 "use_external_mip_script": cluster.use_external_mip_script,
6792 "volume_group_name": cluster.volume_group_name,
6793 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6794 "file_storage_dir": cluster.file_storage_dir,
6795 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6796 "maintain_node_health": cluster.maintain_node_health,
6797 "ctime": cluster.ctime,
6798 "mtime": cluster.mtime,
6799 "uuid": cluster.uuid,
6800 "tags": list(cluster.GetTags()),
6801 "uid_pool": cluster.uid_pool,
6802 "default_iallocator": cluster.default_iallocator,
6803 "reserved_lvs": cluster.reserved_lvs,
6804 "primary_ip_version": primary_ip_version,
6805 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6806 "hidden_os": cluster.hidden_os,
6807 "blacklisted_os": cluster.blacklisted_os,
6813 class LUClusterConfigQuery(NoHooksLU):
6814 """Return configuration values.
6819 def CheckArguments(self):
6820 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6822 def ExpandNames(self):
6823 self.cq.ExpandNames(self)
6825 def DeclareLocks(self, level):
6826 self.cq.DeclareLocks(self, level)
6828 def Exec(self, feedback_fn):
6829 result = self.cq.OldStyleQuery(self)
6831 assert len(result) == 1
6836 class _ClusterQuery(_QueryBase):
6837 FIELDS = query.CLUSTER_FIELDS
6839 #: Do not sort (there is only one item)
6842 def ExpandNames(self, lu):
6843 lu.needed_locks = {}
6845 # The following variables interact with _QueryBase._GetNames
6846 self.wanted = locking.ALL_SET
6847 self.do_locking = self.use_locking
6850 raise errors.OpPrereqError("Can not use locking for cluster queries",
6853 def DeclareLocks(self, lu, level):
6856 def _GetQueryData(self, lu):
6857 """Computes the list of nodes and their attributes.
6860 # Locking is not used
6861 assert not (compat.any(lu.glm.is_owned(level)
6862 for level in locking.LEVELS
6863 if level != locking.LEVEL_CLUSTER) or
6864 self.do_locking or self.use_locking)
6866 if query.CQ_CONFIG in self.requested_data:
6867 cluster = lu.cfg.GetClusterInfo()
6869 cluster = NotImplemented
6871 if query.CQ_QUEUE_DRAINED in self.requested_data:
6872 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6874 drain_flag = NotImplemented
6876 if query.CQ_WATCHER_PAUSE in self.requested_data:
6877 master_name = lu.cfg.GetMasterNode()
6879 result = lu.rpc.call_get_watcher_pause(master_name)
6880 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6883 watcher_pause = result.payload
6885 watcher_pause = NotImplemented
6887 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6890 class LUInstanceActivateDisks(NoHooksLU):
6891 """Bring up an instance's disks.
6896 def ExpandNames(self):
6897 self._ExpandAndLockInstance()
6898 self.needed_locks[locking.LEVEL_NODE] = []
6899 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6901 def DeclareLocks(self, level):
6902 if level == locking.LEVEL_NODE:
6903 self._LockInstancesNodes()
6905 def CheckPrereq(self):
6906 """Check prerequisites.
6908 This checks that the instance is in the cluster.
6911 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6912 assert self.instance is not None, \
6913 "Cannot retrieve locked instance %s" % self.op.instance_name
6914 _CheckNodeOnline(self, self.instance.primary_node)
6916 def Exec(self, feedback_fn):
6917 """Activate the disks.
6920 disks_ok, disks_info = \
6921 _AssembleInstanceDisks(self, self.instance,
6922 ignore_size=self.op.ignore_size)
6924 raise errors.OpExecError("Cannot activate block devices")
6926 if self.op.wait_for_sync:
6927 if not _WaitForSync(self, self.instance):
6928 raise errors.OpExecError("Some disks of the instance are degraded!")
6933 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6935 """Prepare the block devices for an instance.
6937 This sets up the block devices on all nodes.
6939 @type lu: L{LogicalUnit}
6940 @param lu: the logical unit on whose behalf we execute
6941 @type instance: L{objects.Instance}
6942 @param instance: the instance for whose disks we assemble
6943 @type disks: list of L{objects.Disk} or None
6944 @param disks: which disks to assemble (or all, if None)
6945 @type ignore_secondaries: boolean
6946 @param ignore_secondaries: if true, errors on secondary nodes
6947 won't result in an error return from the function
6948 @type ignore_size: boolean
6949 @param ignore_size: if true, the current known size of the disk
6950 will not be used during the disk activation, useful for cases
6951 when the size is wrong
6952 @return: False if the operation failed, otherwise a list of
6953 (host, instance_visible_name, node_visible_name)
6954 with the mapping from node devices to instance devices
6959 iname = instance.name
6960 disks = _ExpandCheckDisks(instance, disks)
6962 # With the two passes mechanism we try to reduce the window of
6963 # opportunity for the race condition of switching DRBD to primary
6964 # before handshaking occured, but we do not eliminate it
6966 # The proper fix would be to wait (with some limits) until the
6967 # connection has been made and drbd transitions from WFConnection
6968 # into any other network-connected state (Connected, SyncTarget,
6971 # 1st pass, assemble on all nodes in secondary mode
6972 for idx, inst_disk in enumerate(disks):
6973 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6975 node_disk = node_disk.Copy()
6976 node_disk.UnsetSize()
6977 lu.cfg.SetDiskID(node_disk, node)
6978 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6980 msg = result.fail_msg
6982 is_offline_secondary = (node in instance.secondary_nodes and
6984 lu.LogWarning("Could not prepare block device %s on node %s"
6985 " (is_primary=False, pass=1): %s",
6986 inst_disk.iv_name, node, msg)
6987 if not (ignore_secondaries or is_offline_secondary):
6990 # FIXME: race condition on drbd migration to primary
6992 # 2nd pass, do only the primary node
6993 for idx, inst_disk in enumerate(disks):
6996 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6997 if node != instance.primary_node:
7000 node_disk = node_disk.Copy()
7001 node_disk.UnsetSize()
7002 lu.cfg.SetDiskID(node_disk, node)
7003 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
7005 msg = result.fail_msg
7007 lu.LogWarning("Could not prepare block device %s on node %s"
7008 " (is_primary=True, pass=2): %s",
7009 inst_disk.iv_name, node, msg)
7012 dev_path = result.payload
7014 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
7016 # leave the disks configured for the primary node
7017 # this is a workaround that would be fixed better by
7018 # improving the logical/physical id handling
7020 lu.cfg.SetDiskID(disk, instance.primary_node)
7022 return disks_ok, device_info
7025 def _StartInstanceDisks(lu, instance, force):
7026 """Start the disks of an instance.
7029 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
7030 ignore_secondaries=force)
7032 _ShutdownInstanceDisks(lu, instance)
7033 if force is not None and not force:
7035 hint=("If the message above refers to a secondary node,"
7036 " you can retry the operation using '--force'"))
7037 raise errors.OpExecError("Disk consistency error")
7040 class LUInstanceDeactivateDisks(NoHooksLU):
7041 """Shutdown an instance's disks.
7046 def ExpandNames(self):
7047 self._ExpandAndLockInstance()
7048 self.needed_locks[locking.LEVEL_NODE] = []
7049 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7051 def DeclareLocks(self, level):
7052 if level == locking.LEVEL_NODE:
7053 self._LockInstancesNodes()
7055 def CheckPrereq(self):
7056 """Check prerequisites.
7058 This checks that the instance is in the cluster.
7061 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7062 assert self.instance is not None, \
7063 "Cannot retrieve locked instance %s" % self.op.instance_name
7065 def Exec(self, feedback_fn):
7066 """Deactivate the disks
7069 instance = self.instance
7071 _ShutdownInstanceDisks(self, instance)
7073 _SafeShutdownInstanceDisks(self, instance)
7076 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7077 """Shutdown block devices of an instance.
7079 This function checks if an instance is running, before calling
7080 _ShutdownInstanceDisks.
7083 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7084 _ShutdownInstanceDisks(lu, instance, disks=disks)
7087 def _ExpandCheckDisks(instance, disks):
7088 """Return the instance disks selected by the disks list
7090 @type disks: list of L{objects.Disk} or None
7091 @param disks: selected disks
7092 @rtype: list of L{objects.Disk}
7093 @return: selected instance disks to act on
7097 return instance.disks
7099 if not set(disks).issubset(instance.disks):
7100 raise errors.ProgrammerError("Can only act on disks belonging to the"
7105 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7106 """Shutdown block devices of an instance.
7108 This does the shutdown on all nodes of the instance.
7110 If the ignore_primary is false, errors on the primary node are
7115 disks = _ExpandCheckDisks(instance, disks)
7118 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7119 lu.cfg.SetDiskID(top_disk, node)
7120 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7121 msg = result.fail_msg
7123 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7124 disk.iv_name, node, msg)
7125 if ((node == instance.primary_node and not ignore_primary) or
7126 (node != instance.primary_node and not result.offline)):
7131 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7132 """Checks if a node has enough free memory.
7134 This function checks if a given node has the needed amount of free
7135 memory. In case the node has less memory or we cannot get the
7136 information from the node, this function raises an OpPrereqError
7139 @type lu: C{LogicalUnit}
7140 @param lu: a logical unit from which we get configuration data
7142 @param node: the node to check
7143 @type reason: C{str}
7144 @param reason: string to use in the error message
7145 @type requested: C{int}
7146 @param requested: the amount of memory in MiB to check for
7147 @type hypervisor_name: C{str}
7148 @param hypervisor_name: the hypervisor to ask for memory stats
7150 @return: node current free memory
7151 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7152 we cannot check the node
7155 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7156 nodeinfo[node].Raise("Can't get data from node %s" % node,
7157 prereq=True, ecode=errors.ECODE_ENVIRON)
7158 (_, _, (hv_info, )) = nodeinfo[node].payload
7160 free_mem = hv_info.get("memory_free", None)
7161 if not isinstance(free_mem, int):
7162 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7163 " was '%s'" % (node, free_mem),
7164 errors.ECODE_ENVIRON)
7165 if requested > free_mem:
7166 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7167 " needed %s MiB, available %s MiB" %
7168 (node, reason, requested, free_mem),
7173 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7174 """Checks if nodes have enough free disk space in all the VGs.
7176 This function checks if all given nodes have the needed amount of
7177 free disk. In case any node has less disk or we cannot get the
7178 information from the node, this function raises an OpPrereqError
7181 @type lu: C{LogicalUnit}
7182 @param lu: a logical unit from which we get configuration data
7183 @type nodenames: C{list}
7184 @param nodenames: the list of node names to check
7185 @type req_sizes: C{dict}
7186 @param req_sizes: the hash of vg and corresponding amount of disk in
7188 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7189 or we cannot check the node
7192 for vg, req_size in req_sizes.items():
7193 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7196 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7197 """Checks if nodes have enough free disk space in the specified VG.
7199 This function checks if all given nodes have the needed amount of
7200 free disk. In case any node has less disk or we cannot get the
7201 information from the node, this function raises an OpPrereqError
7204 @type lu: C{LogicalUnit}
7205 @param lu: a logical unit from which we get configuration data
7206 @type nodenames: C{list}
7207 @param nodenames: the list of node names to check
7209 @param vg: the volume group to check
7210 @type requested: C{int}
7211 @param requested: the amount of disk in MiB to check for
7212 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7213 or we cannot check the node
7216 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7217 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7218 for node in nodenames:
7219 info = nodeinfo[node]
7220 info.Raise("Cannot get current information from node %s" % node,
7221 prereq=True, ecode=errors.ECODE_ENVIRON)
7222 (_, (vg_info, ), _) = info.payload
7223 vg_free = vg_info.get("vg_free", None)
7224 if not isinstance(vg_free, int):
7225 raise errors.OpPrereqError("Can't compute free disk space on node"
7226 " %s for vg %s, result was '%s'" %
7227 (node, vg, vg_free), errors.ECODE_ENVIRON)
7228 if requested > vg_free:
7229 raise errors.OpPrereqError("Not enough disk space on target node %s"
7230 " vg %s: required %d MiB, available %d MiB" %
7231 (node, vg, requested, vg_free),
7235 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7236 """Checks if nodes have enough physical CPUs
7238 This function checks if all given nodes have the needed number of
7239 physical CPUs. In case any node has less CPUs or we cannot get the
7240 information from the node, this function raises an OpPrereqError
7243 @type lu: C{LogicalUnit}
7244 @param lu: a logical unit from which we get configuration data
7245 @type nodenames: C{list}
7246 @param nodenames: the list of node names to check
7247 @type requested: C{int}
7248 @param requested: the minimum acceptable number of physical CPUs
7249 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7250 or we cannot check the node
7253 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7254 for node in nodenames:
7255 info = nodeinfo[node]
7256 info.Raise("Cannot get current information from node %s" % node,
7257 prereq=True, ecode=errors.ECODE_ENVIRON)
7258 (_, _, (hv_info, )) = info.payload
7259 num_cpus = hv_info.get("cpu_total", None)
7260 if not isinstance(num_cpus, int):
7261 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7262 " on node %s, result was '%s'" %
7263 (node, num_cpus), errors.ECODE_ENVIRON)
7264 if requested > num_cpus:
7265 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7266 "required" % (node, num_cpus, requested),
7270 class LUInstanceStartup(LogicalUnit):
7271 """Starts an instance.
7274 HPATH = "instance-start"
7275 HTYPE = constants.HTYPE_INSTANCE
7278 def CheckArguments(self):
7280 if self.op.beparams:
7281 # fill the beparams dict
7282 objects.UpgradeBeParams(self.op.beparams)
7283 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7285 def ExpandNames(self):
7286 self._ExpandAndLockInstance()
7287 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7289 def DeclareLocks(self, level):
7290 if level == locking.LEVEL_NODE_RES:
7291 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7293 def BuildHooksEnv(self):
7296 This runs on master, primary and secondary nodes of the instance.
7300 "FORCE": self.op.force,
7303 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7307 def BuildHooksNodes(self):
7308 """Build hooks nodes.
7311 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7314 def CheckPrereq(self):
7315 """Check prerequisites.
7317 This checks that the instance is in the cluster.
7320 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7321 assert self.instance is not None, \
7322 "Cannot retrieve locked instance %s" % self.op.instance_name
7325 if self.op.hvparams:
7326 # check hypervisor parameter syntax (locally)
7327 cluster = self.cfg.GetClusterInfo()
7328 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7329 filled_hvp = cluster.FillHV(instance)
7330 filled_hvp.update(self.op.hvparams)
7331 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7332 hv_type.CheckParameterSyntax(filled_hvp)
7333 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7335 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7337 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7339 if self.primary_offline and self.op.ignore_offline_nodes:
7340 self.LogWarning("Ignoring offline primary node")
7342 if self.op.hvparams or self.op.beparams:
7343 self.LogWarning("Overridden parameters are ignored")
7345 _CheckNodeOnline(self, instance.primary_node)
7347 bep = self.cfg.GetClusterInfo().FillBE(instance)
7348 bep.update(self.op.beparams)
7350 # check bridges existence
7351 _CheckInstanceBridgesExist(self, instance)
7353 remote_info = self.rpc.call_instance_info(instance.primary_node,
7355 instance.hypervisor)
7356 remote_info.Raise("Error checking node %s" % instance.primary_node,
7357 prereq=True, ecode=errors.ECODE_ENVIRON)
7358 if not remote_info.payload: # not running already
7359 _CheckNodeFreeMemory(self, instance.primary_node,
7360 "starting instance %s" % instance.name,
7361 bep[constants.BE_MINMEM], instance.hypervisor)
7363 def Exec(self, feedback_fn):
7364 """Start the instance.
7367 instance = self.instance
7368 force = self.op.force
7370 if not self.op.no_remember:
7371 self.cfg.MarkInstanceUp(instance.name)
7373 if self.primary_offline:
7374 assert self.op.ignore_offline_nodes
7375 self.LogInfo("Primary node offline, marked instance as started")
7377 node_current = instance.primary_node
7379 _StartInstanceDisks(self, instance, force)
7382 self.rpc.call_instance_start(node_current,
7383 (instance, self.op.hvparams,
7385 self.op.startup_paused)
7386 msg = result.fail_msg
7388 _ShutdownInstanceDisks(self, instance)
7389 raise errors.OpExecError("Could not start instance: %s" % msg)
7392 class LUInstanceReboot(LogicalUnit):
7393 """Reboot an instance.
7396 HPATH = "instance-reboot"
7397 HTYPE = constants.HTYPE_INSTANCE
7400 def ExpandNames(self):
7401 self._ExpandAndLockInstance()
7403 def BuildHooksEnv(self):
7406 This runs on master, primary and secondary nodes of the instance.
7410 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7411 "REBOOT_TYPE": self.op.reboot_type,
7412 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7415 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7419 def BuildHooksNodes(self):
7420 """Build hooks nodes.
7423 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7426 def CheckPrereq(self):
7427 """Check prerequisites.
7429 This checks that the instance is in the cluster.
7432 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7433 assert self.instance is not None, \
7434 "Cannot retrieve locked instance %s" % self.op.instance_name
7435 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7436 _CheckNodeOnline(self, instance.primary_node)
7438 # check bridges existence
7439 _CheckInstanceBridgesExist(self, instance)
7441 def Exec(self, feedback_fn):
7442 """Reboot the instance.
7445 instance = self.instance
7446 ignore_secondaries = self.op.ignore_secondaries
7447 reboot_type = self.op.reboot_type
7448 reason = self.op.reason
7450 remote_info = self.rpc.call_instance_info(instance.primary_node,
7452 instance.hypervisor)
7453 remote_info.Raise("Error checking node %s" % instance.primary_node)
7454 instance_running = bool(remote_info.payload)
7456 node_current = instance.primary_node
7458 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7459 constants.INSTANCE_REBOOT_HARD]:
7460 for disk in instance.disks:
7461 self.cfg.SetDiskID(disk, node_current)
7462 result = self.rpc.call_instance_reboot(node_current, instance,
7464 self.op.shutdown_timeout,
7466 result.Raise("Could not reboot instance")
7468 if instance_running:
7469 result = self.rpc.call_instance_shutdown(node_current, instance,
7470 self.op.shutdown_timeout)
7471 result.Raise("Could not shutdown instance for full reboot")
7472 _ShutdownInstanceDisks(self, instance)
7474 self.LogInfo("Instance %s was already stopped, starting now",
7476 _StartInstanceDisks(self, instance, ignore_secondaries)
7477 result = self.rpc.call_instance_start(node_current,
7478 (instance, None, None), False)
7479 msg = result.fail_msg
7481 _ShutdownInstanceDisks(self, instance)
7482 raise errors.OpExecError("Could not start instance for"
7483 " full reboot: %s" % msg)
7485 self.cfg.MarkInstanceUp(instance.name)
7488 class LUInstanceShutdown(LogicalUnit):
7489 """Shutdown an instance.
7492 HPATH = "instance-stop"
7493 HTYPE = constants.HTYPE_INSTANCE
7496 def ExpandNames(self):
7497 self._ExpandAndLockInstance()
7499 def BuildHooksEnv(self):
7502 This runs on master, primary and secondary nodes of the instance.
7505 env = _BuildInstanceHookEnvByObject(self, self.instance)
7506 env["TIMEOUT"] = self.op.timeout
7509 def BuildHooksNodes(self):
7510 """Build hooks nodes.
7513 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7516 def CheckPrereq(self):
7517 """Check prerequisites.
7519 This checks that the instance is in the cluster.
7522 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7523 assert self.instance is not None, \
7524 "Cannot retrieve locked instance %s" % self.op.instance_name
7526 if not self.op.force:
7527 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7529 self.LogWarning("Ignoring offline instance check")
7531 self.primary_offline = \
7532 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7534 if self.primary_offline and self.op.ignore_offline_nodes:
7535 self.LogWarning("Ignoring offline primary node")
7537 _CheckNodeOnline(self, self.instance.primary_node)
7539 def Exec(self, feedback_fn):
7540 """Shutdown the instance.
7543 instance = self.instance
7544 node_current = instance.primary_node
7545 timeout = self.op.timeout
7547 # If the instance is offline we shouldn't mark it as down, as that
7548 # resets the offline flag.
7549 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7550 self.cfg.MarkInstanceDown(instance.name)
7552 if self.primary_offline:
7553 assert self.op.ignore_offline_nodes
7554 self.LogInfo("Primary node offline, marked instance as stopped")
7556 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7557 msg = result.fail_msg
7559 self.LogWarning("Could not shutdown instance: %s", msg)
7561 _ShutdownInstanceDisks(self, instance)
7564 class LUInstanceReinstall(LogicalUnit):
7565 """Reinstall an instance.
7568 HPATH = "instance-reinstall"
7569 HTYPE = constants.HTYPE_INSTANCE
7572 def ExpandNames(self):
7573 self._ExpandAndLockInstance()
7575 def BuildHooksEnv(self):
7578 This runs on master, primary and secondary nodes of the instance.
7581 return _BuildInstanceHookEnvByObject(self, self.instance)
7583 def BuildHooksNodes(self):
7584 """Build hooks nodes.
7587 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7590 def CheckPrereq(self):
7591 """Check prerequisites.
7593 This checks that the instance is in the cluster and is not running.
7596 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7597 assert instance is not None, \
7598 "Cannot retrieve locked instance %s" % self.op.instance_name
7599 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7600 " offline, cannot reinstall")
7602 if instance.disk_template == constants.DT_DISKLESS:
7603 raise errors.OpPrereqError("Instance '%s' has no disks" %
7604 self.op.instance_name,
7606 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7608 if self.op.os_type is not None:
7610 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7611 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7612 instance_os = self.op.os_type
7614 instance_os = instance.os
7616 nodelist = list(instance.all_nodes)
7618 if self.op.osparams:
7619 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7620 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7621 self.os_inst = i_osdict # the new dict (without defaults)
7625 self.instance = instance
7627 def Exec(self, feedback_fn):
7628 """Reinstall the instance.
7631 inst = self.instance
7633 if self.op.os_type is not None:
7634 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7635 inst.os = self.op.os_type
7636 # Write to configuration
7637 self.cfg.Update(inst, feedback_fn)
7639 _StartInstanceDisks(self, inst, None)
7641 feedback_fn("Running the instance OS create scripts...")
7642 # FIXME: pass debug option from opcode to backend
7643 result = self.rpc.call_instance_os_add(inst.primary_node,
7644 (inst, self.os_inst), True,
7645 self.op.debug_level)
7646 result.Raise("Could not install OS for instance %s on node %s" %
7647 (inst.name, inst.primary_node))
7649 _ShutdownInstanceDisks(self, inst)
7652 class LUInstanceRecreateDisks(LogicalUnit):
7653 """Recreate an instance's missing disks.
7656 HPATH = "instance-recreate-disks"
7657 HTYPE = constants.HTYPE_INSTANCE
7660 _MODIFYABLE = compat.UniqueFrozenset([
7661 constants.IDISK_SIZE,
7662 constants.IDISK_MODE,
7665 # New or changed disk parameters may have different semantics
7666 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7667 constants.IDISK_ADOPT,
7669 # TODO: Implement support changing VG while recreating
7671 constants.IDISK_METAVG,
7672 constants.IDISK_PROVIDER,
7675 def _RunAllocator(self):
7676 """Run the allocator based on input opcode.
7679 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7682 # The allocator should actually run in "relocate" mode, but current
7683 # allocators don't support relocating all the nodes of an instance at
7684 # the same time. As a workaround we use "allocate" mode, but this is
7685 # suboptimal for two reasons:
7686 # - The instance name passed to the allocator is present in the list of
7687 # existing instances, so there could be a conflict within the
7688 # internal structures of the allocator. This doesn't happen with the
7689 # current allocators, but it's a liability.
7690 # - The allocator counts the resources used by the instance twice: once
7691 # because the instance exists already, and once because it tries to
7692 # allocate a new instance.
7693 # The allocator could choose some of the nodes on which the instance is
7694 # running, but that's not a problem. If the instance nodes are broken,
7695 # they should be already be marked as drained or offline, and hence
7696 # skipped by the allocator. If instance disks have been lost for other
7697 # reasons, then recreating the disks on the same nodes should be fine.
7698 disk_template = self.instance.disk_template
7699 spindle_use = be_full[constants.BE_SPINDLE_USE]
7700 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7701 disk_template=disk_template,
7702 tags=list(self.instance.GetTags()),
7703 os=self.instance.os,
7705 vcpus=be_full[constants.BE_VCPUS],
7706 memory=be_full[constants.BE_MAXMEM],
7707 spindle_use=spindle_use,
7708 disks=[{constants.IDISK_SIZE: d.size,
7709 constants.IDISK_MODE: d.mode}
7710 for d in self.instance.disks],
7711 hypervisor=self.instance.hypervisor,
7712 node_whitelist=None)
7713 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7715 ial.Run(self.op.iallocator)
7717 assert req.RequiredNodes() == len(self.instance.all_nodes)
7720 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7721 " %s" % (self.op.iallocator, ial.info),
7724 self.op.nodes = ial.result
7725 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7726 self.op.instance_name, self.op.iallocator,
7727 utils.CommaJoin(ial.result))
7729 def CheckArguments(self):
7730 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7731 # Normalize and convert deprecated list of disk indices
7732 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7734 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7736 raise errors.OpPrereqError("Some disks have been specified more than"
7737 " once: %s" % utils.CommaJoin(duplicates),
7740 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7741 # when neither iallocator nor nodes are specified
7742 if self.op.iallocator or self.op.nodes:
7743 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7745 for (idx, params) in self.op.disks:
7746 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7747 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7749 raise errors.OpPrereqError("Parameters for disk %s try to change"
7750 " unmodifyable parameter(s): %s" %
7751 (idx, utils.CommaJoin(unsupported)),
7754 def ExpandNames(self):
7755 self._ExpandAndLockInstance()
7756 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7759 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7760 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7762 self.needed_locks[locking.LEVEL_NODE] = []
7763 if self.op.iallocator:
7764 # iallocator will select a new node in the same group
7765 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7766 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7768 self.needed_locks[locking.LEVEL_NODE_RES] = []
7770 def DeclareLocks(self, level):
7771 if level == locking.LEVEL_NODEGROUP:
7772 assert self.op.iallocator is not None
7773 assert not self.op.nodes
7774 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7775 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7776 # Lock the primary group used by the instance optimistically; this
7777 # requires going via the node before it's locked, requiring
7778 # verification later on
7779 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7780 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7782 elif level == locking.LEVEL_NODE:
7783 # If an allocator is used, then we lock all the nodes in the current
7784 # instance group, as we don't know yet which ones will be selected;
7785 # if we replace the nodes without using an allocator, locks are
7786 # already declared in ExpandNames; otherwise, we need to lock all the
7787 # instance nodes for disk re-creation
7788 if self.op.iallocator:
7789 assert not self.op.nodes
7790 assert not self.needed_locks[locking.LEVEL_NODE]
7791 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7793 # Lock member nodes of the group of the primary node
7794 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7795 self.needed_locks[locking.LEVEL_NODE].extend(
7796 self.cfg.GetNodeGroup(group_uuid).members)
7798 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7799 elif not self.op.nodes:
7800 self._LockInstancesNodes(primary_only=False)
7801 elif level == locking.LEVEL_NODE_RES:
7803 self.needed_locks[locking.LEVEL_NODE_RES] = \
7804 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7806 def BuildHooksEnv(self):
7809 This runs on master, primary and secondary nodes of the instance.
7812 return _BuildInstanceHookEnvByObject(self, self.instance)
7814 def BuildHooksNodes(self):
7815 """Build hooks nodes.
7818 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7821 def CheckPrereq(self):
7822 """Check prerequisites.
7824 This checks that the instance is in the cluster and is not running.
7827 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7828 assert instance is not None, \
7829 "Cannot retrieve locked instance %s" % self.op.instance_name
7831 if len(self.op.nodes) != len(instance.all_nodes):
7832 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7833 " %d replacement nodes were specified" %
7834 (instance.name, len(instance.all_nodes),
7835 len(self.op.nodes)),
7837 assert instance.disk_template != constants.DT_DRBD8 or \
7838 len(self.op.nodes) == 2
7839 assert instance.disk_template != constants.DT_PLAIN or \
7840 len(self.op.nodes) == 1
7841 primary_node = self.op.nodes[0]
7843 primary_node = instance.primary_node
7844 if not self.op.iallocator:
7845 _CheckNodeOnline(self, primary_node)
7847 if instance.disk_template == constants.DT_DISKLESS:
7848 raise errors.OpPrereqError("Instance '%s' has no disks" %
7849 self.op.instance_name, errors.ECODE_INVAL)
7851 # Verify if node group locks are still correct
7852 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7854 # Node group locks are acquired only for the primary node (and only
7855 # when the allocator is used)
7856 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7859 # if we replace nodes *and* the old primary is offline, we don't
7860 # check the instance state
7861 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7862 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7863 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7864 msg="cannot recreate disks")
7867 self.disks = dict(self.op.disks)
7869 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7871 maxidx = max(self.disks.keys())
7872 if maxidx >= len(instance.disks):
7873 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7876 if ((self.op.nodes or self.op.iallocator) and
7877 sorted(self.disks.keys()) != range(len(instance.disks))):
7878 raise errors.OpPrereqError("Can't recreate disks partially and"
7879 " change the nodes at the same time",
7882 self.instance = instance
7884 if self.op.iallocator:
7885 self._RunAllocator()
7886 # Release unneeded node and node resource locks
7887 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7888 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7889 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7891 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7893 def Exec(self, feedback_fn):
7894 """Recreate the disks.
7897 instance = self.instance
7899 assert (self.owned_locks(locking.LEVEL_NODE) ==
7900 self.owned_locks(locking.LEVEL_NODE_RES))
7903 mods = [] # keeps track of needed changes
7905 for idx, disk in enumerate(instance.disks):
7907 changes = self.disks[idx]
7909 # Disk should not be recreated
7913 # update secondaries for disks, if needed
7914 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7915 # need to update the nodes and minors
7916 assert len(self.op.nodes) == 2
7917 assert len(disk.logical_id) == 6 # otherwise disk internals
7919 (_, _, old_port, _, _, old_secret) = disk.logical_id
7920 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7921 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7922 new_minors[0], new_minors[1], old_secret)
7923 assert len(disk.logical_id) == len(new_id)
7927 mods.append((idx, new_id, changes))
7929 # now that we have passed all asserts above, we can apply the mods
7930 # in a single run (to avoid partial changes)
7931 for idx, new_id, changes in mods:
7932 disk = instance.disks[idx]
7933 if new_id is not None:
7934 assert disk.dev_type == constants.LD_DRBD8
7935 disk.logical_id = new_id
7937 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7938 mode=changes.get(constants.IDISK_MODE, None))
7940 # change primary node, if needed
7942 instance.primary_node = self.op.nodes[0]
7943 self.LogWarning("Changing the instance's nodes, you will have to"
7944 " remove any disks left on the older nodes manually")
7947 self.cfg.Update(instance, feedback_fn)
7949 # All touched nodes must be locked
7950 mylocks = self.owned_locks(locking.LEVEL_NODE)
7951 assert mylocks.issuperset(frozenset(instance.all_nodes))
7952 _CreateDisks(self, instance, to_skip=to_skip)
7955 class LUInstanceRename(LogicalUnit):
7956 """Rename an instance.
7959 HPATH = "instance-rename"
7960 HTYPE = constants.HTYPE_INSTANCE
7962 def CheckArguments(self):
7966 if self.op.ip_check and not self.op.name_check:
7967 # TODO: make the ip check more flexible and not depend on the name check
7968 raise errors.OpPrereqError("IP address check requires a name check",
7971 def BuildHooksEnv(self):
7974 This runs on master, primary and secondary nodes of the instance.
7977 env = _BuildInstanceHookEnvByObject(self, self.instance)
7978 env["INSTANCE_NEW_NAME"] = self.op.new_name
7981 def BuildHooksNodes(self):
7982 """Build hooks nodes.
7985 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7988 def CheckPrereq(self):
7989 """Check prerequisites.
7991 This checks that the instance is in the cluster and is not running.
7994 self.op.instance_name = _ExpandInstanceName(self.cfg,
7995 self.op.instance_name)
7996 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7997 assert instance is not None
7998 _CheckNodeOnline(self, instance.primary_node)
7999 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
8000 msg="cannot rename")
8001 self.instance = instance
8003 new_name = self.op.new_name
8004 if self.op.name_check:
8005 hostname = _CheckHostnameSane(self, new_name)
8006 new_name = self.op.new_name = hostname.name
8007 if (self.op.ip_check and
8008 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
8009 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8010 (hostname.ip, new_name),
8011 errors.ECODE_NOTUNIQUE)
8013 instance_list = self.cfg.GetInstanceList()
8014 if new_name in instance_list and new_name != instance.name:
8015 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8016 new_name, errors.ECODE_EXISTS)
8018 def Exec(self, feedback_fn):
8019 """Rename the instance.
8022 inst = self.instance
8023 old_name = inst.name
8025 rename_file_storage = False
8026 if (inst.disk_template in constants.DTS_FILEBASED and
8027 self.op.new_name != inst.name):
8028 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
8029 rename_file_storage = True
8031 self.cfg.RenameInstance(inst.name, self.op.new_name)
8032 # Change the instance lock. This is definitely safe while we hold the BGL.
8033 # Otherwise the new lock would have to be added in acquired mode.
8035 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
8036 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
8037 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
8039 # re-read the instance from the configuration after rename
8040 inst = self.cfg.GetInstanceInfo(self.op.new_name)
8042 if rename_file_storage:
8043 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
8044 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
8045 old_file_storage_dir,
8046 new_file_storage_dir)
8047 result.Raise("Could not rename on node %s directory '%s' to '%s'"
8048 " (but the instance has been renamed in Ganeti)" %
8049 (inst.primary_node, old_file_storage_dir,
8050 new_file_storage_dir))
8052 _StartInstanceDisks(self, inst, None)
8053 # update info on disks
8054 info = _GetInstanceInfoText(inst)
8055 for (idx, disk) in enumerate(inst.disks):
8056 for node in inst.all_nodes:
8057 self.cfg.SetDiskID(disk, node)
8058 result = self.rpc.call_blockdev_setinfo(node, disk, info)
8060 self.LogWarning("Error setting info on node %s for disk %s: %s",
8061 node, idx, result.fail_msg)
8063 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
8064 old_name, self.op.debug_level)
8065 msg = result.fail_msg
8067 msg = ("Could not run OS rename script for instance %s on node %s"
8068 " (but the instance has been renamed in Ganeti): %s" %
8069 (inst.name, inst.primary_node, msg))
8070 self.LogWarning(msg)
8072 _ShutdownInstanceDisks(self, inst)
8077 class LUInstanceRemove(LogicalUnit):
8078 """Remove an instance.
8081 HPATH = "instance-remove"
8082 HTYPE = constants.HTYPE_INSTANCE
8085 def ExpandNames(self):
8086 self._ExpandAndLockInstance()
8087 self.needed_locks[locking.LEVEL_NODE] = []
8088 self.needed_locks[locking.LEVEL_NODE_RES] = []
8089 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8091 def DeclareLocks(self, level):
8092 if level == locking.LEVEL_NODE:
8093 self._LockInstancesNodes()
8094 elif level == locking.LEVEL_NODE_RES:
8096 self.needed_locks[locking.LEVEL_NODE_RES] = \
8097 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8099 def BuildHooksEnv(self):
8102 This runs on master, primary and secondary nodes of the instance.
8105 env = _BuildInstanceHookEnvByObject(self, self.instance)
8106 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8109 def BuildHooksNodes(self):
8110 """Build hooks nodes.
8113 nl = [self.cfg.GetMasterNode()]
8114 nl_post = list(self.instance.all_nodes) + nl
8115 return (nl, nl_post)
8117 def CheckPrereq(self):
8118 """Check prerequisites.
8120 This checks that the instance is in the cluster.
8123 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8124 assert self.instance is not None, \
8125 "Cannot retrieve locked instance %s" % self.op.instance_name
8127 def Exec(self, feedback_fn):
8128 """Remove the instance.
8131 instance = self.instance
8132 logging.info("Shutting down instance %s on node %s",
8133 instance.name, instance.primary_node)
8135 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8136 self.op.shutdown_timeout)
8137 msg = result.fail_msg
8139 if self.op.ignore_failures:
8140 feedback_fn("Warning: can't shutdown instance: %s" % msg)
8142 raise errors.OpExecError("Could not shutdown instance %s on"
8144 (instance.name, instance.primary_node, msg))
8146 assert (self.owned_locks(locking.LEVEL_NODE) ==
8147 self.owned_locks(locking.LEVEL_NODE_RES))
8148 assert not (set(instance.all_nodes) -
8149 self.owned_locks(locking.LEVEL_NODE)), \
8150 "Not owning correct locks"
8152 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8155 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8156 """Utility function to remove an instance.
8159 logging.info("Removing block devices for instance %s", instance.name)
8161 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8162 if not ignore_failures:
8163 raise errors.OpExecError("Can't remove instance's disks")
8164 feedback_fn("Warning: can't remove instance's disks")
8166 logging.info("Removing instance %s out of cluster config", instance.name)
8168 lu.cfg.RemoveInstance(instance.name)
8170 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8171 "Instance lock removal conflict"
8173 # Remove lock for the instance
8174 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8177 class LUInstanceQuery(NoHooksLU):
8178 """Logical unit for querying instances.
8181 # pylint: disable=W0142
8184 def CheckArguments(self):
8185 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8186 self.op.output_fields, self.op.use_locking)
8188 def ExpandNames(self):
8189 self.iq.ExpandNames(self)
8191 def DeclareLocks(self, level):
8192 self.iq.DeclareLocks(self, level)
8194 def Exec(self, feedback_fn):
8195 return self.iq.OldStyleQuery(self)
8198 def _ExpandNamesForMigration(lu):
8199 """Expands names for use with L{TLMigrateInstance}.
8201 @type lu: L{LogicalUnit}
8204 if lu.op.target_node is not None:
8205 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8207 lu.needed_locks[locking.LEVEL_NODE] = []
8208 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8210 lu.needed_locks[locking.LEVEL_NODE_RES] = []
8211 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8213 # The node allocation lock is actually only needed for externally replicated
8214 # instances (e.g. sharedfile or RBD) and if an iallocator is used.
8215 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8218 def _DeclareLocksForMigration(lu, level):
8219 """Declares locks for L{TLMigrateInstance}.
8221 @type lu: L{LogicalUnit}
8222 @param level: Lock level
8225 if level == locking.LEVEL_NODE_ALLOC:
8226 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8228 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8230 # Node locks are already declared here rather than at LEVEL_NODE as we need
8231 # the instance object anyway to declare the node allocation lock.
8232 if instance.disk_template in constants.DTS_EXT_MIRROR:
8233 if lu.op.target_node is None:
8234 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8235 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8237 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8239 del lu.recalculate_locks[locking.LEVEL_NODE]
8241 lu._LockInstancesNodes() # pylint: disable=W0212
8243 elif level == locking.LEVEL_NODE:
8244 # Node locks are declared together with the node allocation lock
8245 assert (lu.needed_locks[locking.LEVEL_NODE] or
8246 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8248 elif level == locking.LEVEL_NODE_RES:
8250 lu.needed_locks[locking.LEVEL_NODE_RES] = \
8251 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8254 class LUInstanceFailover(LogicalUnit):
8255 """Failover an instance.
8258 HPATH = "instance-failover"
8259 HTYPE = constants.HTYPE_INSTANCE
8262 def CheckArguments(self):
8263 """Check the arguments.
8266 self.iallocator = getattr(self.op, "iallocator", None)
8267 self.target_node = getattr(self.op, "target_node", None)
8269 def ExpandNames(self):
8270 self._ExpandAndLockInstance()
8271 _ExpandNamesForMigration(self)
8274 TLMigrateInstance(self, self.op.instance_name, False, True, False,
8275 self.op.ignore_consistency, True,
8276 self.op.shutdown_timeout, self.op.ignore_ipolicy)
8278 self.tasklets = [self._migrater]
8280 def DeclareLocks(self, level):
8281 _DeclareLocksForMigration(self, level)
8283 def BuildHooksEnv(self):
8286 This runs on master, primary and secondary nodes of the instance.
8289 instance = self._migrater.instance
8290 source_node = instance.primary_node
8291 target_node = self.op.target_node
8293 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8294 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8295 "OLD_PRIMARY": source_node,
8296 "NEW_PRIMARY": target_node,
8299 if instance.disk_template in constants.DTS_INT_MIRROR:
8300 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8301 env["NEW_SECONDARY"] = source_node
8303 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8305 env.update(_BuildInstanceHookEnvByObject(self, instance))
8309 def BuildHooksNodes(self):
8310 """Build hooks nodes.
8313 instance = self._migrater.instance
8314 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8315 return (nl, nl + [instance.primary_node])
8318 class LUInstanceMigrate(LogicalUnit):
8319 """Migrate an instance.
8321 This is migration without shutting down, compared to the failover,
8322 which is done with shutdown.
8325 HPATH = "instance-migrate"
8326 HTYPE = constants.HTYPE_INSTANCE
8329 def ExpandNames(self):
8330 self._ExpandAndLockInstance()
8331 _ExpandNamesForMigration(self)
8334 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8335 False, self.op.allow_failover, False,
8336 self.op.allow_runtime_changes,
8337 constants.DEFAULT_SHUTDOWN_TIMEOUT,
8338 self.op.ignore_ipolicy)
8340 self.tasklets = [self._migrater]
8342 def DeclareLocks(self, level):
8343 _DeclareLocksForMigration(self, level)
8345 def BuildHooksEnv(self):
8348 This runs on master, primary and secondary nodes of the instance.
8351 instance = self._migrater.instance
8352 source_node = instance.primary_node
8353 target_node = self.op.target_node
8354 env = _BuildInstanceHookEnvByObject(self, instance)
8356 "MIGRATE_LIVE": self._migrater.live,
8357 "MIGRATE_CLEANUP": self.op.cleanup,
8358 "OLD_PRIMARY": source_node,
8359 "NEW_PRIMARY": target_node,
8360 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8363 if instance.disk_template in constants.DTS_INT_MIRROR:
8364 env["OLD_SECONDARY"] = target_node
8365 env["NEW_SECONDARY"] = source_node
8367 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8371 def BuildHooksNodes(self):
8372 """Build hooks nodes.
8375 instance = self._migrater.instance
8376 snodes = list(instance.secondary_nodes)
8377 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8381 class LUInstanceMove(LogicalUnit):
8382 """Move an instance by data-copying.
8385 HPATH = "instance-move"
8386 HTYPE = constants.HTYPE_INSTANCE
8389 def ExpandNames(self):
8390 self._ExpandAndLockInstance()
8391 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8392 self.op.target_node = target_node
8393 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8394 self.needed_locks[locking.LEVEL_NODE_RES] = []
8395 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8397 def DeclareLocks(self, level):
8398 if level == locking.LEVEL_NODE:
8399 self._LockInstancesNodes(primary_only=True)
8400 elif level == locking.LEVEL_NODE_RES:
8402 self.needed_locks[locking.LEVEL_NODE_RES] = \
8403 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8405 def BuildHooksEnv(self):
8408 This runs on master, primary and secondary nodes of the instance.
8412 "TARGET_NODE": self.op.target_node,
8413 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8415 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8418 def BuildHooksNodes(self):
8419 """Build hooks nodes.
8423 self.cfg.GetMasterNode(),
8424 self.instance.primary_node,
8425 self.op.target_node,
8429 def CheckPrereq(self):
8430 """Check prerequisites.
8432 This checks that the instance is in the cluster.
8435 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8436 assert self.instance is not None, \
8437 "Cannot retrieve locked instance %s" % self.op.instance_name
8439 node = self.cfg.GetNodeInfo(self.op.target_node)
8440 assert node is not None, \
8441 "Cannot retrieve locked node %s" % self.op.target_node
8443 self.target_node = target_node = node.name
8445 if target_node == instance.primary_node:
8446 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8447 (instance.name, target_node),
8450 bep = self.cfg.GetClusterInfo().FillBE(instance)
8452 for idx, dsk in enumerate(instance.disks):
8453 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8454 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8455 " cannot copy" % idx, errors.ECODE_STATE)
8457 _CheckNodeOnline(self, target_node)
8458 _CheckNodeNotDrained(self, target_node)
8459 _CheckNodeVmCapable(self, target_node)
8460 cluster = self.cfg.GetClusterInfo()
8461 group_info = self.cfg.GetNodeGroup(node.group)
8462 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8463 _CheckTargetNodeIPolicy(self, ipolicy, instance, node, self.cfg,
8464 ignore=self.op.ignore_ipolicy)
8466 if instance.admin_state == constants.ADMINST_UP:
8467 # check memory requirements on the secondary node
8468 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8469 instance.name, bep[constants.BE_MAXMEM],
8470 instance.hypervisor)
8472 self.LogInfo("Not checking memory on the secondary node as"
8473 " instance will not be started")
8475 # check bridge existance
8476 _CheckInstanceBridgesExist(self, instance, node=target_node)
8478 def Exec(self, feedback_fn):
8479 """Move an instance.
8481 The move is done by shutting it down on its present node, copying
8482 the data over (slow) and starting it on the new node.
8485 instance = self.instance
8487 source_node = instance.primary_node
8488 target_node = self.target_node
8490 self.LogInfo("Shutting down instance %s on source node %s",
8491 instance.name, source_node)
8493 assert (self.owned_locks(locking.LEVEL_NODE) ==
8494 self.owned_locks(locking.LEVEL_NODE_RES))
8496 result = self.rpc.call_instance_shutdown(source_node, instance,
8497 self.op.shutdown_timeout)
8498 msg = result.fail_msg
8500 if self.op.ignore_consistency:
8501 self.LogWarning("Could not shutdown instance %s on node %s."
8502 " Proceeding anyway. Please make sure node"
8503 " %s is down. Error details: %s",
8504 instance.name, source_node, source_node, msg)
8506 raise errors.OpExecError("Could not shutdown instance %s on"
8508 (instance.name, source_node, msg))
8510 # create the target disks
8512 _CreateDisks(self, instance, target_node=target_node)
8513 except errors.OpExecError:
8514 self.LogWarning("Device creation failed, reverting...")
8516 _RemoveDisks(self, instance, target_node=target_node)
8518 self.cfg.ReleaseDRBDMinors(instance.name)
8521 cluster_name = self.cfg.GetClusterInfo().cluster_name
8524 # activate, get path, copy the data over
8525 for idx, disk in enumerate(instance.disks):
8526 self.LogInfo("Copying data for disk %d", idx)
8527 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8528 instance.name, True, idx)
8530 self.LogWarning("Can't assemble newly created disk %d: %s",
8531 idx, result.fail_msg)
8532 errs.append(result.fail_msg)
8534 dev_path = result.payload
8535 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8536 target_node, dev_path,
8539 self.LogWarning("Can't copy data over for disk %d: %s",
8540 idx, result.fail_msg)
8541 errs.append(result.fail_msg)
8545 self.LogWarning("Some disks failed to copy, aborting")
8547 _RemoveDisks(self, instance, target_node=target_node)
8549 self.cfg.ReleaseDRBDMinors(instance.name)
8550 raise errors.OpExecError("Errors during disk copy: %s" %
8553 instance.primary_node = target_node
8554 self.cfg.Update(instance, feedback_fn)
8556 self.LogInfo("Removing the disks on the original node")
8557 _RemoveDisks(self, instance, target_node=source_node)
8559 # Only start the instance if it's marked as up
8560 if instance.admin_state == constants.ADMINST_UP:
8561 self.LogInfo("Starting instance %s on node %s",
8562 instance.name, target_node)
8564 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8565 ignore_secondaries=True)
8567 _ShutdownInstanceDisks(self, instance)
8568 raise errors.OpExecError("Can't activate the instance's disks")
8570 result = self.rpc.call_instance_start(target_node,
8571 (instance, None, None), False)
8572 msg = result.fail_msg
8574 _ShutdownInstanceDisks(self, instance)
8575 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8576 (instance.name, target_node, msg))
8579 class LUNodeMigrate(LogicalUnit):
8580 """Migrate all instances from a node.
8583 HPATH = "node-migrate"
8584 HTYPE = constants.HTYPE_NODE
8587 def CheckArguments(self):
8590 def ExpandNames(self):
8591 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8593 self.share_locks = _ShareAll()
8594 self.needed_locks = {
8595 locking.LEVEL_NODE: [self.op.node_name],
8598 def BuildHooksEnv(self):
8601 This runs on the master, the primary and all the secondaries.
8605 "NODE_NAME": self.op.node_name,
8606 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8609 def BuildHooksNodes(self):
8610 """Build hooks nodes.
8613 nl = [self.cfg.GetMasterNode()]
8616 def CheckPrereq(self):
8619 def Exec(self, feedback_fn):
8620 # Prepare jobs for migration instances
8621 allow_runtime_changes = self.op.allow_runtime_changes
8623 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8626 iallocator=self.op.iallocator,
8627 target_node=self.op.target_node,
8628 allow_runtime_changes=allow_runtime_changes,
8629 ignore_ipolicy=self.op.ignore_ipolicy)]
8630 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8632 # TODO: Run iallocator in this opcode and pass correct placement options to
8633 # OpInstanceMigrate. Since other jobs can modify the cluster between
8634 # running the iallocator and the actual migration, a good consistency model
8635 # will have to be found.
8637 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8638 frozenset([self.op.node_name]))
8640 return ResultWithJobs(jobs)
8643 class TLMigrateInstance(Tasklet):
8644 """Tasklet class for instance migration.
8647 @ivar live: whether the migration will be done live or non-live;
8648 this variable is initalized only after CheckPrereq has run
8649 @type cleanup: boolean
8650 @ivar cleanup: Wheater we cleanup from a failed migration
8651 @type iallocator: string
8652 @ivar iallocator: The iallocator used to determine target_node
8653 @type target_node: string
8654 @ivar target_node: If given, the target_node to reallocate the instance to
8655 @type failover: boolean
8656 @ivar failover: Whether operation results in failover or migration
8657 @type fallback: boolean
8658 @ivar fallback: Whether fallback to failover is allowed if migration not
8660 @type ignore_consistency: boolean
8661 @ivar ignore_consistency: Wheter we should ignore consistency between source
8663 @type shutdown_timeout: int
8664 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8665 @type ignore_ipolicy: bool
8666 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8671 _MIGRATION_POLL_INTERVAL = 1 # seconds
8672 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8674 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8675 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8677 """Initializes this class.
8680 Tasklet.__init__(self, lu)
8683 self.instance_name = instance_name
8684 self.cleanup = cleanup
8685 self.live = False # will be overridden later
8686 self.failover = failover
8687 self.fallback = fallback
8688 self.ignore_consistency = ignore_consistency
8689 self.shutdown_timeout = shutdown_timeout
8690 self.ignore_ipolicy = ignore_ipolicy
8691 self.allow_runtime_changes = allow_runtime_changes
8693 def CheckPrereq(self):
8694 """Check prerequisites.
8696 This checks that the instance is in the cluster.
8699 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8700 instance = self.cfg.GetInstanceInfo(instance_name)
8701 assert instance is not None
8702 self.instance = instance
8703 cluster = self.cfg.GetClusterInfo()
8705 if (not self.cleanup and
8706 not instance.admin_state == constants.ADMINST_UP and
8707 not self.failover and self.fallback):
8708 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8709 " switching to failover")
8710 self.failover = True
8712 if instance.disk_template not in constants.DTS_MIRRORED:
8717 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8718 " %s" % (instance.disk_template, text),
8721 if instance.disk_template in constants.DTS_EXT_MIRROR:
8722 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8724 if self.lu.op.iallocator:
8725 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8726 self._RunAllocator()
8728 # We set set self.target_node as it is required by
8730 self.target_node = self.lu.op.target_node
8732 # Check that the target node is correct in terms of instance policy
8733 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8734 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8735 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8737 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8738 ignore=self.ignore_ipolicy)
8740 # self.target_node is already populated, either directly or by the
8742 target_node = self.target_node
8743 if self.target_node == instance.primary_node:
8744 raise errors.OpPrereqError("Cannot migrate instance %s"
8745 " to its primary (%s)" %
8746 (instance.name, instance.primary_node),
8749 if len(self.lu.tasklets) == 1:
8750 # It is safe to release locks only when we're the only tasklet
8752 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8753 keep=[instance.primary_node, self.target_node])
8754 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8757 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8759 secondary_nodes = instance.secondary_nodes
8760 if not secondary_nodes:
8761 raise errors.ConfigurationError("No secondary node but using"
8762 " %s disk template" %
8763 instance.disk_template)
8764 target_node = secondary_nodes[0]
8765 if self.lu.op.iallocator or (self.lu.op.target_node and
8766 self.lu.op.target_node != target_node):
8768 text = "failed over"
8771 raise errors.OpPrereqError("Instances with disk template %s cannot"
8772 " be %s to arbitrary nodes"
8773 " (neither an iallocator nor a target"
8774 " node can be passed)" %
8775 (instance.disk_template, text),
8777 nodeinfo = self.cfg.GetNodeInfo(target_node)
8778 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8779 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8781 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8782 ignore=self.ignore_ipolicy)
8784 i_be = cluster.FillBE(instance)
8786 # check memory requirements on the secondary node
8787 if (not self.cleanup and
8788 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8789 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8790 "migrating instance %s" %
8792 i_be[constants.BE_MINMEM],
8793 instance.hypervisor)
8795 self.lu.LogInfo("Not checking memory on the secondary node as"
8796 " instance will not be started")
8798 # check if failover must be forced instead of migration
8799 if (not self.cleanup and not self.failover and
8800 i_be[constants.BE_ALWAYS_FAILOVER]):
8801 self.lu.LogInfo("Instance configured to always failover; fallback"
8803 self.failover = True
8805 # check bridge existance
8806 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8808 if not self.cleanup:
8809 _CheckNodeNotDrained(self.lu, target_node)
8810 if not self.failover:
8811 result = self.rpc.call_instance_migratable(instance.primary_node,
8813 if result.fail_msg and self.fallback:
8814 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8816 self.failover = True
8818 result.Raise("Can't migrate, please use failover",
8819 prereq=True, ecode=errors.ECODE_STATE)
8821 assert not (self.failover and self.cleanup)
8823 if not self.failover:
8824 if self.lu.op.live is not None and self.lu.op.mode is not None:
8825 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8826 " parameters are accepted",
8828 if self.lu.op.live is not None:
8830 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8832 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8833 # reset the 'live' parameter to None so that repeated
8834 # invocations of CheckPrereq do not raise an exception
8835 self.lu.op.live = None
8836 elif self.lu.op.mode is None:
8837 # read the default value from the hypervisor
8838 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8839 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8841 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8843 # Failover is never live
8846 if not (self.failover or self.cleanup):
8847 remote_info = self.rpc.call_instance_info(instance.primary_node,
8849 instance.hypervisor)
8850 remote_info.Raise("Error checking instance on node %s" %
8851 instance.primary_node)
8852 instance_running = bool(remote_info.payload)
8853 if instance_running:
8854 self.current_mem = int(remote_info.payload["memory"])
8856 def _RunAllocator(self):
8857 """Run the allocator based on input opcode.
8860 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8862 # FIXME: add a self.ignore_ipolicy option
8863 req = iallocator.IAReqRelocate(name=self.instance_name,
8864 relocate_from=[self.instance.primary_node])
8865 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8867 ial.Run(self.lu.op.iallocator)
8870 raise errors.OpPrereqError("Can't compute nodes using"
8871 " iallocator '%s': %s" %
8872 (self.lu.op.iallocator, ial.info),
8874 self.target_node = ial.result[0]
8875 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8876 self.instance_name, self.lu.op.iallocator,
8877 utils.CommaJoin(ial.result))
8879 def _WaitUntilSync(self):
8880 """Poll with custom rpc for disk sync.
8882 This uses our own step-based rpc call.
8885 self.feedback_fn("* wait until resync is done")
8889 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8891 (self.instance.disks,
8894 for node, nres in result.items():
8895 nres.Raise("Cannot resync disks on node %s" % node)
8896 node_done, node_percent = nres.payload
8897 all_done = all_done and node_done
8898 if node_percent is not None:
8899 min_percent = min(min_percent, node_percent)
8901 if min_percent < 100:
8902 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8905 def _EnsureSecondary(self, node):
8906 """Demote a node to secondary.
8909 self.feedback_fn("* switching node %s to secondary mode" % node)
8911 for dev in self.instance.disks:
8912 self.cfg.SetDiskID(dev, node)
8914 result = self.rpc.call_blockdev_close(node, self.instance.name,
8915 self.instance.disks)
8916 result.Raise("Cannot change disk to secondary on node %s" % node)
8918 def _GoStandalone(self):
8919 """Disconnect from the network.
8922 self.feedback_fn("* changing into standalone mode")
8923 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8924 self.instance.disks)
8925 for node, nres in result.items():
8926 nres.Raise("Cannot disconnect disks node %s" % node)
8928 def _GoReconnect(self, multimaster):
8929 """Reconnect to the network.
8935 msg = "single-master"
8936 self.feedback_fn("* changing disks into %s mode" % msg)
8937 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8938 (self.instance.disks, self.instance),
8939 self.instance.name, multimaster)
8940 for node, nres in result.items():
8941 nres.Raise("Cannot change disks config on node %s" % node)
8943 def _ExecCleanup(self):
8944 """Try to cleanup after a failed migration.
8946 The cleanup is done by:
8947 - check that the instance is running only on one node
8948 (and update the config if needed)
8949 - change disks on its secondary node to secondary
8950 - wait until disks are fully synchronized
8951 - disconnect from the network
8952 - change disks into single-master mode
8953 - wait again until disks are fully synchronized
8956 instance = self.instance
8957 target_node = self.target_node
8958 source_node = self.source_node
8960 # check running on only one node
8961 self.feedback_fn("* checking where the instance actually runs"
8962 " (if this hangs, the hypervisor might be in"
8964 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8965 for node, result in ins_l.items():
8966 result.Raise("Can't contact node %s" % node)
8968 runningon_source = instance.name in ins_l[source_node].payload
8969 runningon_target = instance.name in ins_l[target_node].payload
8971 if runningon_source and runningon_target:
8972 raise errors.OpExecError("Instance seems to be running on two nodes,"
8973 " or the hypervisor is confused; you will have"
8974 " to ensure manually that it runs only on one"
8975 " and restart this operation")
8977 if not (runningon_source or runningon_target):
8978 raise errors.OpExecError("Instance does not seem to be running at all;"
8979 " in this case it's safer to repair by"
8980 " running 'gnt-instance stop' to ensure disk"
8981 " shutdown, and then restarting it")
8983 if runningon_target:
8984 # the migration has actually succeeded, we need to update the config
8985 self.feedback_fn("* instance running on secondary node (%s),"
8986 " updating config" % target_node)
8987 instance.primary_node = target_node
8988 self.cfg.Update(instance, self.feedback_fn)
8989 demoted_node = source_node
8991 self.feedback_fn("* instance confirmed to be running on its"
8992 " primary node (%s)" % source_node)
8993 demoted_node = target_node
8995 if instance.disk_template in constants.DTS_INT_MIRROR:
8996 self._EnsureSecondary(demoted_node)
8998 self._WaitUntilSync()
8999 except errors.OpExecError:
9000 # we ignore here errors, since if the device is standalone, it
9001 # won't be able to sync
9003 self._GoStandalone()
9004 self._GoReconnect(False)
9005 self._WaitUntilSync()
9007 self.feedback_fn("* done")
9009 def _RevertDiskStatus(self):
9010 """Try to revert the disk status after a failed migration.
9013 target_node = self.target_node
9014 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
9018 self._EnsureSecondary(target_node)
9019 self._GoStandalone()
9020 self._GoReconnect(False)
9021 self._WaitUntilSync()
9022 except errors.OpExecError, err:
9023 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
9024 " please try to recover the instance manually;"
9025 " error '%s'" % str(err))
9027 def _AbortMigration(self):
9028 """Call the hypervisor code to abort a started migration.
9031 instance = self.instance
9032 target_node = self.target_node
9033 source_node = self.source_node
9034 migration_info = self.migration_info
9036 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
9040 abort_msg = abort_result.fail_msg
9042 logging.error("Aborting migration failed on target node %s: %s",
9043 target_node, abort_msg)
9044 # Don't raise an exception here, as we stil have to try to revert the
9045 # disk status, even if this step failed.
9047 abort_result = self.rpc.call_instance_finalize_migration_src(
9048 source_node, instance, False, self.live)
9049 abort_msg = abort_result.fail_msg
9051 logging.error("Aborting migration failed on source node %s: %s",
9052 source_node, abort_msg)
9054 def _ExecMigration(self):
9055 """Migrate an instance.
9057 The migrate is done by:
9058 - change the disks into dual-master mode
9059 - wait until disks are fully synchronized again
9060 - migrate the instance
9061 - change disks on the new secondary node (the old primary) to secondary
9062 - wait until disks are fully synchronized
9063 - change disks into single-master mode
9066 instance = self.instance
9067 target_node = self.target_node
9068 source_node = self.source_node
9070 # Check for hypervisor version mismatch and warn the user.
9071 nodeinfo = self.rpc.call_node_info([source_node, target_node],
9072 None, [self.instance.hypervisor], False)
9073 for ninfo in nodeinfo.values():
9074 ninfo.Raise("Unable to retrieve node information from node '%s'" %
9076 (_, _, (src_info, )) = nodeinfo[source_node].payload
9077 (_, _, (dst_info, )) = nodeinfo[target_node].payload
9079 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9080 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9081 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9082 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9083 if src_version != dst_version:
9084 self.feedback_fn("* warning: hypervisor version mismatch between"
9085 " source (%s) and target (%s) node" %
9086 (src_version, dst_version))
9088 self.feedback_fn("* checking disk consistency between source and target")
9089 for (idx, dev) in enumerate(instance.disks):
9090 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9091 raise errors.OpExecError("Disk %s is degraded or not fully"
9092 " synchronized on target node,"
9093 " aborting migration" % idx)
9095 if self.current_mem > self.tgt_free_mem:
9096 if not self.allow_runtime_changes:
9097 raise errors.OpExecError("Memory ballooning not allowed and not enough"
9098 " free memory to fit instance %s on target"
9099 " node %s (have %dMB, need %dMB)" %
9100 (instance.name, target_node,
9101 self.tgt_free_mem, self.current_mem))
9102 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9103 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9106 rpcres.Raise("Cannot modify instance runtime memory")
9108 # First get the migration information from the remote node
9109 result = self.rpc.call_migration_info(source_node, instance)
9110 msg = result.fail_msg
9112 log_err = ("Failed fetching source migration information from %s: %s" %
9114 logging.error(log_err)
9115 raise errors.OpExecError(log_err)
9117 self.migration_info = migration_info = result.payload
9119 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9120 # Then switch the disks to master/master mode
9121 self._EnsureSecondary(target_node)
9122 self._GoStandalone()
9123 self._GoReconnect(True)
9124 self._WaitUntilSync()
9126 self.feedback_fn("* preparing %s to accept the instance" % target_node)
9127 result = self.rpc.call_accept_instance(target_node,
9130 self.nodes_ip[target_node])
9132 msg = result.fail_msg
9134 logging.error("Instance pre-migration failed, trying to revert"
9135 " disk status: %s", msg)
9136 self.feedback_fn("Pre-migration failed, aborting")
9137 self._AbortMigration()
9138 self._RevertDiskStatus()
9139 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9140 (instance.name, msg))
9142 self.feedback_fn("* migrating instance to %s" % target_node)
9143 result = self.rpc.call_instance_migrate(source_node, instance,
9144 self.nodes_ip[target_node],
9146 msg = result.fail_msg
9148 logging.error("Instance migration failed, trying to revert"
9149 " disk status: %s", msg)
9150 self.feedback_fn("Migration failed, aborting")
9151 self._AbortMigration()
9152 self._RevertDiskStatus()
9153 raise errors.OpExecError("Could not migrate instance %s: %s" %
9154 (instance.name, msg))
9156 self.feedback_fn("* starting memory transfer")
9157 last_feedback = time.time()
9159 result = self.rpc.call_instance_get_migration_status(source_node,
9161 msg = result.fail_msg
9162 ms = result.payload # MigrationStatus instance
9163 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9164 logging.error("Instance migration failed, trying to revert"
9165 " disk status: %s", msg)
9166 self.feedback_fn("Migration failed, aborting")
9167 self._AbortMigration()
9168 self._RevertDiskStatus()
9170 msg = "hypervisor returned failure"
9171 raise errors.OpExecError("Could not migrate instance %s: %s" %
9172 (instance.name, msg))
9174 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9175 self.feedback_fn("* memory transfer complete")
9178 if (utils.TimeoutExpired(last_feedback,
9179 self._MIGRATION_FEEDBACK_INTERVAL) and
9180 ms.transferred_ram is not None):
9181 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9182 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9183 last_feedback = time.time()
9185 time.sleep(self._MIGRATION_POLL_INTERVAL)
9187 result = self.rpc.call_instance_finalize_migration_src(source_node,
9191 msg = result.fail_msg
9193 logging.error("Instance migration succeeded, but finalization failed"
9194 " on the source node: %s", msg)
9195 raise errors.OpExecError("Could not finalize instance migration: %s" %
9198 instance.primary_node = target_node
9200 # distribute new instance config to the other nodes
9201 self.cfg.Update(instance, self.feedback_fn)
9203 result = self.rpc.call_instance_finalize_migration_dst(target_node,
9207 msg = result.fail_msg
9209 logging.error("Instance migration succeeded, but finalization failed"
9210 " on the target node: %s", msg)
9211 raise errors.OpExecError("Could not finalize instance migration: %s" %
9214 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9215 self._EnsureSecondary(source_node)
9216 self._WaitUntilSync()
9217 self._GoStandalone()
9218 self._GoReconnect(False)
9219 self._WaitUntilSync()
9221 # If the instance's disk template is `rbd' or `ext' and there was a
9222 # successful migration, unmap the device from the source node.
9223 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9224 disks = _ExpandCheckDisks(instance, instance.disks)
9225 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9227 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9228 msg = result.fail_msg
9230 logging.error("Migration was successful, but couldn't unmap the"
9231 " block device %s on source node %s: %s",
9232 disk.iv_name, source_node, msg)
9233 logging.error("You need to unmap the device %s manually on %s",
9234 disk.iv_name, source_node)
9236 self.feedback_fn("* done")
9238 def _ExecFailover(self):
9239 """Failover an instance.
9241 The failover is done by shutting it down on its present node and
9242 starting it on the secondary.
9245 instance = self.instance
9246 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9248 source_node = instance.primary_node
9249 target_node = self.target_node
9251 if instance.admin_state == constants.ADMINST_UP:
9252 self.feedback_fn("* checking disk consistency between source and target")
9253 for (idx, dev) in enumerate(instance.disks):
9254 # for drbd, these are drbd over lvm
9255 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9257 if primary_node.offline:
9258 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9260 (primary_node.name, idx, target_node))
9261 elif not self.ignore_consistency:
9262 raise errors.OpExecError("Disk %s is degraded on target node,"
9263 " aborting failover" % idx)
9265 self.feedback_fn("* not checking disk consistency as instance is not"
9268 self.feedback_fn("* shutting down instance on source node")
9269 logging.info("Shutting down instance %s on node %s",
9270 instance.name, source_node)
9272 result = self.rpc.call_instance_shutdown(source_node, instance,
9273 self.shutdown_timeout)
9274 msg = result.fail_msg
9276 if self.ignore_consistency or primary_node.offline:
9277 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9278 " proceeding anyway; please make sure node"
9279 " %s is down; error details: %s",
9280 instance.name, source_node, source_node, msg)
9282 raise errors.OpExecError("Could not shutdown instance %s on"
9284 (instance.name, source_node, msg))
9286 self.feedback_fn("* deactivating the instance's disks on source node")
9287 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9288 raise errors.OpExecError("Can't shut down the instance's disks")
9290 instance.primary_node = target_node
9291 # distribute new instance config to the other nodes
9292 self.cfg.Update(instance, self.feedback_fn)
9294 # Only start the instance if it's marked as up
9295 if instance.admin_state == constants.ADMINST_UP:
9296 self.feedback_fn("* activating the instance's disks on target node %s" %
9298 logging.info("Starting instance %s on node %s",
9299 instance.name, target_node)
9301 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9302 ignore_secondaries=True)
9304 _ShutdownInstanceDisks(self.lu, instance)
9305 raise errors.OpExecError("Can't activate the instance's disks")
9307 self.feedback_fn("* starting the instance on the target node %s" %
9309 result = self.rpc.call_instance_start(target_node, (instance, None, None),
9311 msg = result.fail_msg
9313 _ShutdownInstanceDisks(self.lu, instance)
9314 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9315 (instance.name, target_node, msg))
9317 def Exec(self, feedback_fn):
9318 """Perform the migration.
9321 self.feedback_fn = feedback_fn
9322 self.source_node = self.instance.primary_node
9324 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9325 if self.instance.disk_template in constants.DTS_INT_MIRROR:
9326 self.target_node = self.instance.secondary_nodes[0]
9327 # Otherwise self.target_node has been populated either
9328 # directly, or through an iallocator.
9330 self.all_nodes = [self.source_node, self.target_node]
9331 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9332 in self.cfg.GetMultiNodeInfo(self.all_nodes))
9335 feedback_fn("Failover instance %s" % self.instance.name)
9336 self._ExecFailover()
9338 feedback_fn("Migrating instance %s" % self.instance.name)
9341 return self._ExecCleanup()
9343 return self._ExecMigration()
9346 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9348 """Wrapper around L{_CreateBlockDevInner}.
9350 This method annotates the root device first.
9353 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9354 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9355 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9356 force_open, excl_stor)
9359 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9360 info, force_open, excl_stor):
9361 """Create a tree of block devices on a given node.
9363 If this device type has to be created on secondaries, create it and
9366 If not, just recurse to children keeping the same 'force' value.
9368 @attention: The device has to be annotated already.
9370 @param lu: the lu on whose behalf we execute
9371 @param node: the node on which to create the device
9372 @type instance: L{objects.Instance}
9373 @param instance: the instance which owns the device
9374 @type device: L{objects.Disk}
9375 @param device: the device to create
9376 @type force_create: boolean
9377 @param force_create: whether to force creation of this device; this
9378 will be change to True whenever we find a device which has
9379 CreateOnSecondary() attribute
9380 @param info: the extra 'metadata' we should attach to the device
9381 (this will be represented as a LVM tag)
9382 @type force_open: boolean
9383 @param force_open: this parameter will be passes to the
9384 L{backend.BlockdevCreate} function where it specifies
9385 whether we run on primary or not, and it affects both
9386 the child assembly and the device own Open() execution
9387 @type excl_stor: boolean
9388 @param excl_stor: Whether exclusive_storage is active for the node
9391 if device.CreateOnSecondary():
9395 for child in device.children:
9396 _CreateBlockDevInner(lu, node, instance, child, force_create,
9397 info, force_open, excl_stor)
9399 if not force_create:
9402 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9406 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9408 """Create a single block device on a given node.
9410 This will not recurse over children of the device, so they must be
9413 @param lu: the lu on whose behalf we execute
9414 @param node: the node on which to create the device
9415 @type instance: L{objects.Instance}
9416 @param instance: the instance which owns the device
9417 @type device: L{objects.Disk}
9418 @param device: the device to create
9419 @param info: the extra 'metadata' we should attach to the device
9420 (this will be represented as a LVM tag)
9421 @type force_open: boolean
9422 @param force_open: this parameter will be passes to the
9423 L{backend.BlockdevCreate} function where it specifies
9424 whether we run on primary or not, and it affects both
9425 the child assembly and the device own Open() execution
9426 @type excl_stor: boolean
9427 @param excl_stor: Whether exclusive_storage is active for the node
9430 lu.cfg.SetDiskID(device, node)
9431 result = lu.rpc.call_blockdev_create(node, device, device.size,
9432 instance.name, force_open, info,
9434 result.Raise("Can't create block device %s on"
9435 " node %s for instance %s" % (device, node, instance.name))
9436 if device.physical_id is None:
9437 device.physical_id = result.payload
9440 def _GenerateUniqueNames(lu, exts):
9441 """Generate a suitable LV name.
9443 This will generate a logical volume name for the given instance.
9448 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9449 results.append("%s%s" % (new_id, val))
9453 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9454 iv_name, p_minor, s_minor):
9455 """Generate a drbd8 device complete with its children.
9458 assert len(vgnames) == len(names) == 2
9459 port = lu.cfg.AllocatePort()
9460 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9462 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9463 logical_id=(vgnames[0], names[0]),
9465 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9466 size=constants.DRBD_META_SIZE,
9467 logical_id=(vgnames[1], names[1]),
9469 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9470 logical_id=(primary, secondary, port,
9473 children=[dev_data, dev_meta],
9474 iv_name=iv_name, params={})
9478 _DISK_TEMPLATE_NAME_PREFIX = {
9479 constants.DT_PLAIN: "",
9480 constants.DT_RBD: ".rbd",
9481 constants.DT_EXT: ".ext",
9485 _DISK_TEMPLATE_DEVICE_TYPE = {
9486 constants.DT_PLAIN: constants.LD_LV,
9487 constants.DT_FILE: constants.LD_FILE,
9488 constants.DT_SHARED_FILE: constants.LD_FILE,
9489 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9490 constants.DT_RBD: constants.LD_RBD,
9491 constants.DT_EXT: constants.LD_EXT,
9495 def _GenerateDiskTemplate(
9496 lu, template_name, instance_name, primary_node, secondary_nodes,
9497 disk_info, file_storage_dir, file_driver, base_index,
9498 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9499 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9500 """Generate the entire disk layout for a given template type.
9503 vgname = lu.cfg.GetVGName()
9504 disk_count = len(disk_info)
9507 if template_name == constants.DT_DISKLESS:
9509 elif template_name == constants.DT_DRBD8:
9510 if len(secondary_nodes) != 1:
9511 raise errors.ProgrammerError("Wrong template configuration")
9512 remote_node = secondary_nodes[0]
9513 minors = lu.cfg.AllocateDRBDMinor(
9514 [primary_node, remote_node] * len(disk_info), instance_name)
9516 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9518 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9521 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9522 for i in range(disk_count)]):
9523 names.append(lv_prefix + "_data")
9524 names.append(lv_prefix + "_meta")
9525 for idx, disk in enumerate(disk_info):
9526 disk_index = idx + base_index
9527 data_vg = disk.get(constants.IDISK_VG, vgname)
9528 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9529 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9530 disk[constants.IDISK_SIZE],
9532 names[idx * 2:idx * 2 + 2],
9533 "disk/%d" % disk_index,
9534 minors[idx * 2], minors[idx * 2 + 1])
9535 disk_dev.mode = disk[constants.IDISK_MODE]
9536 disks.append(disk_dev)
9539 raise errors.ProgrammerError("Wrong template configuration")
9541 if template_name == constants.DT_FILE:
9543 elif template_name == constants.DT_SHARED_FILE:
9544 _req_shr_file_storage()
9546 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9547 if name_prefix is None:
9550 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9551 (name_prefix, base_index + i)
9552 for i in range(disk_count)])
9554 if template_name == constants.DT_PLAIN:
9556 def logical_id_fn(idx, _, disk):
9557 vg = disk.get(constants.IDISK_VG, vgname)
9558 return (vg, names[idx])
9560 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9562 lambda _, disk_index, disk: (file_driver,
9563 "%s/disk%d" % (file_storage_dir,
9565 elif template_name == constants.DT_BLOCK:
9567 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9568 disk[constants.IDISK_ADOPT])
9569 elif template_name == constants.DT_RBD:
9570 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9571 elif template_name == constants.DT_EXT:
9572 def logical_id_fn(idx, _, disk):
9573 provider = disk.get(constants.IDISK_PROVIDER, None)
9574 if provider is None:
9575 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9576 " not found", constants.DT_EXT,
9577 constants.IDISK_PROVIDER)
9578 return (provider, names[idx])
9580 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9582 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9584 for idx, disk in enumerate(disk_info):
9586 # Only for the Ext template add disk_info to params
9587 if template_name == constants.DT_EXT:
9588 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9590 if key not in constants.IDISK_PARAMS:
9591 params[key] = disk[key]
9592 disk_index = idx + base_index
9593 size = disk[constants.IDISK_SIZE]
9594 feedback_fn("* disk %s, size %s" %
9595 (disk_index, utils.FormatUnit(size, "h")))
9596 disks.append(objects.Disk(dev_type=dev_type, size=size,
9597 logical_id=logical_id_fn(idx, disk_index, disk),
9598 iv_name="disk/%d" % disk_index,
9599 mode=disk[constants.IDISK_MODE],
9605 def _GetInstanceInfoText(instance):
9606 """Compute that text that should be added to the disk's metadata.
9609 return "originstname+%s" % instance.name
9612 def _CalcEta(time_taken, written, total_size):
9613 """Calculates the ETA based on size written and total size.
9615 @param time_taken: The time taken so far
9616 @param written: amount written so far
9617 @param total_size: The total size of data to be written
9618 @return: The remaining time in seconds
9621 avg_time = time_taken / float(written)
9622 return (total_size - written) * avg_time
9625 def _WipeDisks(lu, instance, disks=None):
9626 """Wipes instance disks.
9628 @type lu: L{LogicalUnit}
9629 @param lu: the logical unit on whose behalf we execute
9630 @type instance: L{objects.Instance}
9631 @param instance: the instance whose disks we should create
9632 @type disks: None or list of tuple of (number, L{objects.Disk}, number)
9633 @param disks: Disk details; tuple contains disk index, disk object and the
9637 node = instance.primary_node
9640 disks = [(idx, disk, 0)
9641 for (idx, disk) in enumerate(instance.disks)]
9643 for (_, device, _) in disks:
9644 lu.cfg.SetDiskID(device, node)
9646 logging.info("Pausing synchronization of disks of instance '%s'",
9648 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9649 (map(compat.snd, disks),
9652 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9654 for idx, success in enumerate(result.payload):
9656 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9657 " failed", idx, instance.name)
9660 for (idx, device, offset) in disks:
9661 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9662 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9664 int(min(constants.MAX_WIPE_CHUNK,
9665 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9669 start_time = time.time()
9674 info_text = (" (from %s to %s)" %
9675 (utils.FormatUnit(offset, "h"),
9676 utils.FormatUnit(size, "h")))
9678 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9680 logging.info("Wiping disk %d for instance %s on node %s using"
9681 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9683 while offset < size:
9684 wipe_size = min(wipe_chunk_size, size - offset)
9686 logging.debug("Wiping disk %d, offset %s, chunk %s",
9687 idx, offset, wipe_size)
9689 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9691 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9692 (idx, offset, wipe_size))
9696 if now - last_output >= 60:
9697 eta = _CalcEta(now - start_time, offset, size)
9698 lu.LogInfo(" - done: %.1f%% ETA: %s",
9699 offset / float(size) * 100, utils.FormatSeconds(eta))
9702 logging.info("Resuming synchronization of disks for instance '%s'",
9705 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9706 (map(compat.snd, disks),
9711 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9712 node, result.fail_msg)
9714 for idx, success in enumerate(result.payload):
9716 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9717 " failed", idx, instance.name)
9720 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9721 """Create all disks for an instance.
9723 This abstracts away some work from AddInstance.
9725 @type lu: L{LogicalUnit}
9726 @param lu: the logical unit on whose behalf we execute
9727 @type instance: L{objects.Instance}
9728 @param instance: the instance whose disks we should create
9730 @param to_skip: list of indices to skip
9731 @type target_node: string
9732 @param target_node: if passed, overrides the target node for creation
9734 @return: the success of the creation
9737 info = _GetInstanceInfoText(instance)
9738 if target_node is None:
9739 pnode = instance.primary_node
9740 all_nodes = instance.all_nodes
9745 if instance.disk_template in constants.DTS_FILEBASED:
9746 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9747 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9749 result.Raise("Failed to create directory '%s' on"
9750 " node %s" % (file_storage_dir, pnode))
9752 # Note: this needs to be kept in sync with adding of disks in
9753 # LUInstanceSetParams
9754 for idx, device in enumerate(instance.disks):
9755 if to_skip and idx in to_skip:
9757 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9759 for node in all_nodes:
9760 f_create = node == pnode
9761 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9764 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9765 """Remove all disks for an instance.
9767 This abstracts away some work from `AddInstance()` and
9768 `RemoveInstance()`. Note that in case some of the devices couldn't
9769 be removed, the removal will continue with the other ones (compare
9770 with `_CreateDisks()`).
9772 @type lu: L{LogicalUnit}
9773 @param lu: the logical unit on whose behalf we execute
9774 @type instance: L{objects.Instance}
9775 @param instance: the instance whose disks we should remove
9776 @type target_node: string
9777 @param target_node: used to override the node on which to remove the disks
9779 @return: the success of the removal
9782 logging.info("Removing block devices for instance %s", instance.name)
9785 ports_to_release = set()
9786 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9787 for (idx, device) in enumerate(anno_disks):
9789 edata = [(target_node, device)]
9791 edata = device.ComputeNodeTree(instance.primary_node)
9792 for node, disk in edata:
9793 lu.cfg.SetDiskID(disk, node)
9794 result = lu.rpc.call_blockdev_remove(node, disk)
9796 lu.LogWarning("Could not remove disk %s on node %s,"
9797 " continuing anyway: %s", idx, node, result.fail_msg)
9798 if not (result.offline and node != instance.primary_node):
9801 # if this is a DRBD disk, return its port to the pool
9802 if device.dev_type in constants.LDS_DRBD:
9803 ports_to_release.add(device.logical_id[2])
9805 if all_result or ignore_failures:
9806 for port in ports_to_release:
9807 lu.cfg.AddTcpUdpPort(port)
9809 if instance.disk_template in constants.DTS_FILEBASED:
9810 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9814 tgt = instance.primary_node
9815 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9817 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9818 file_storage_dir, instance.primary_node, result.fail_msg)
9824 def _ComputeDiskSizePerVG(disk_template, disks):
9825 """Compute disk size requirements in the volume group
9828 def _compute(disks, payload):
9829 """Universal algorithm.
9834 vgs[disk[constants.IDISK_VG]] = \
9835 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9839 # Required free disk space as a function of disk and swap space
9841 constants.DT_DISKLESS: {},
9842 constants.DT_PLAIN: _compute(disks, 0),
9843 # 128 MB are added for drbd metadata for each disk
9844 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9845 constants.DT_FILE: {},
9846 constants.DT_SHARED_FILE: {},
9849 if disk_template not in req_size_dict:
9850 raise errors.ProgrammerError("Disk template '%s' size requirement"
9851 " is unknown" % disk_template)
9853 return req_size_dict[disk_template]
9856 def _FilterVmNodes(lu, nodenames):
9857 """Filters out non-vm_capable nodes from a list.
9859 @type lu: L{LogicalUnit}
9860 @param lu: the logical unit for which we check
9861 @type nodenames: list
9862 @param nodenames: the list of nodes on which we should check
9864 @return: the list of vm-capable nodes
9867 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9868 return [name for name in nodenames if name not in vm_nodes]
9871 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9872 """Hypervisor parameter validation.
9874 This function abstract the hypervisor parameter validation to be
9875 used in both instance create and instance modify.
9877 @type lu: L{LogicalUnit}
9878 @param lu: the logical unit for which we check
9879 @type nodenames: list
9880 @param nodenames: the list of nodes on which we should check
9881 @type hvname: string
9882 @param hvname: the name of the hypervisor we should use
9883 @type hvparams: dict
9884 @param hvparams: the parameters which we need to check
9885 @raise errors.OpPrereqError: if the parameters are not valid
9888 nodenames = _FilterVmNodes(lu, nodenames)
9890 cluster = lu.cfg.GetClusterInfo()
9891 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9893 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9894 for node in nodenames:
9898 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9901 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9902 """OS parameters validation.
9904 @type lu: L{LogicalUnit}
9905 @param lu: the logical unit for which we check
9906 @type required: boolean
9907 @param required: whether the validation should fail if the OS is not
9909 @type nodenames: list
9910 @param nodenames: the list of nodes on which we should check
9911 @type osname: string
9912 @param osname: the name of the hypervisor we should use
9913 @type osparams: dict
9914 @param osparams: the parameters which we need to check
9915 @raise errors.OpPrereqError: if the parameters are not valid
9918 nodenames = _FilterVmNodes(lu, nodenames)
9919 result = lu.rpc.call_os_validate(nodenames, required, osname,
9920 [constants.OS_VALIDATE_PARAMETERS],
9922 for node, nres in result.items():
9923 # we don't check for offline cases since this should be run only
9924 # against the master node and/or an instance's nodes
9925 nres.Raise("OS Parameters validation failed on node %s" % node)
9926 if not nres.payload:
9927 lu.LogInfo("OS %s not found on node %s, validation skipped",
9931 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9932 """Wrapper around IAReqInstanceAlloc.
9934 @param op: The instance opcode
9935 @param disks: The computed disks
9936 @param nics: The computed nics
9937 @param beparams: The full filled beparams
9938 @param node_whitelist: List of nodes which should appear as online to the
9939 allocator (unless the node is already marked offline)
9941 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9944 spindle_use = beparams[constants.BE_SPINDLE_USE]
9945 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9946 disk_template=op.disk_template,
9949 vcpus=beparams[constants.BE_VCPUS],
9950 memory=beparams[constants.BE_MAXMEM],
9951 spindle_use=spindle_use,
9953 nics=[n.ToDict() for n in nics],
9954 hypervisor=op.hypervisor,
9955 node_whitelist=node_whitelist)
9958 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9959 """Computes the nics.
9961 @param op: The instance opcode
9962 @param cluster: Cluster configuration object
9963 @param default_ip: The default ip to assign
9964 @param cfg: An instance of the configuration object
9965 @param ec_id: Execution context ID
9967 @returns: The build up nics
9972 nic_mode_req = nic.get(constants.INIC_MODE, None)
9973 nic_mode = nic_mode_req
9974 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9975 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9977 net = nic.get(constants.INIC_NETWORK, None)
9978 link = nic.get(constants.NIC_LINK, None)
9979 ip = nic.get(constants.INIC_IP, None)
9981 if net is None or net.lower() == constants.VALUE_NONE:
9984 if nic_mode_req is not None or link is not None:
9985 raise errors.OpPrereqError("If network is given, no mode or link"
9986 " is allowed to be passed",
9989 # ip validity checks
9990 if ip is None or ip.lower() == constants.VALUE_NONE:
9992 elif ip.lower() == constants.VALUE_AUTO:
9993 if not op.name_check:
9994 raise errors.OpPrereqError("IP address set to auto but name checks"
9995 " have been skipped",
9999 # We defer pool operations until later, so that the iallocator has
10000 # filled in the instance's node(s) dimara
10001 if ip.lower() == constants.NIC_IP_POOL:
10003 raise errors.OpPrereqError("if ip=pool, parameter network"
10004 " must be passed too",
10005 errors.ECODE_INVAL)
10007 elif not netutils.IPAddress.IsValid(ip):
10008 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
10009 errors.ECODE_INVAL)
10013 # TODO: check the ip address for uniqueness
10014 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
10015 raise errors.OpPrereqError("Routed nic mode requires an ip address",
10016 errors.ECODE_INVAL)
10018 # MAC address verification
10019 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
10020 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10021 mac = utils.NormalizeAndValidateMac(mac)
10024 # TODO: We need to factor this out
10025 cfg.ReserveMAC(mac, ec_id)
10026 except errors.ReservationError:
10027 raise errors.OpPrereqError("MAC address %s already in use"
10028 " in cluster" % mac,
10029 errors.ECODE_NOTUNIQUE)
10031 # Build nic parameters
10034 nicparams[constants.NIC_MODE] = nic_mode
10036 nicparams[constants.NIC_LINK] = link
10038 check_params = cluster.SimpleFillNIC(nicparams)
10039 objects.NIC.CheckParameterSyntax(check_params)
10040 net_uuid = cfg.LookupNetwork(net)
10041 nics.append(objects.NIC(mac=mac, ip=nic_ip,
10042 network=net_uuid, nicparams=nicparams))
10047 def _ComputeDisks(op, default_vg):
10048 """Computes the instance disks.
10050 @param op: The instance opcode
10051 @param default_vg: The default_vg to assume
10053 @return: The computed disks
10057 for disk in op.disks:
10058 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
10059 if mode not in constants.DISK_ACCESS_SET:
10060 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
10061 mode, errors.ECODE_INVAL)
10062 size = disk.get(constants.IDISK_SIZE, None)
10064 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
10067 except (TypeError, ValueError):
10068 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
10069 errors.ECODE_INVAL)
10071 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
10072 if ext_provider and op.disk_template != constants.DT_EXT:
10073 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10074 " disk template, not %s" %
10075 (constants.IDISK_PROVIDER, constants.DT_EXT,
10076 op.disk_template), errors.ECODE_INVAL)
10078 data_vg = disk.get(constants.IDISK_VG, default_vg)
10080 constants.IDISK_SIZE: size,
10081 constants.IDISK_MODE: mode,
10082 constants.IDISK_VG: data_vg,
10085 if constants.IDISK_METAVG in disk:
10086 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10087 if constants.IDISK_ADOPT in disk:
10088 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10090 # For extstorage, demand the `provider' option and add any
10091 # additional parameters (ext-params) to the dict
10092 if op.disk_template == constants.DT_EXT:
10094 new_disk[constants.IDISK_PROVIDER] = ext_provider
10096 if key not in constants.IDISK_PARAMS:
10097 new_disk[key] = disk[key]
10099 raise errors.OpPrereqError("Missing provider for template '%s'" %
10100 constants.DT_EXT, errors.ECODE_INVAL)
10102 disks.append(new_disk)
10107 def _ComputeFullBeParams(op, cluster):
10108 """Computes the full beparams.
10110 @param op: The instance opcode
10111 @param cluster: The cluster config object
10113 @return: The fully filled beparams
10116 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10117 for param, value in op.beparams.iteritems():
10118 if value == constants.VALUE_AUTO:
10119 op.beparams[param] = default_beparams[param]
10120 objects.UpgradeBeParams(op.beparams)
10121 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10122 return cluster.SimpleFillBE(op.beparams)
10125 def _CheckOpportunisticLocking(op):
10126 """Generate error if opportunistic locking is not possible.
10129 if op.opportunistic_locking and not op.iallocator:
10130 raise errors.OpPrereqError("Opportunistic locking is only available in"
10131 " combination with an instance allocator",
10132 errors.ECODE_INVAL)
10135 class LUInstanceCreate(LogicalUnit):
10136 """Create an instance.
10139 HPATH = "instance-add"
10140 HTYPE = constants.HTYPE_INSTANCE
10143 def CheckArguments(self):
10144 """Check arguments.
10147 # do not require name_check to ease forward/backward compatibility
10149 if self.op.no_install and self.op.start:
10150 self.LogInfo("No-installation mode selected, disabling startup")
10151 self.op.start = False
10152 # validate/normalize the instance name
10153 self.op.instance_name = \
10154 netutils.Hostname.GetNormalizedName(self.op.instance_name)
10156 if self.op.ip_check and not self.op.name_check:
10157 # TODO: make the ip check more flexible and not depend on the name check
10158 raise errors.OpPrereqError("Cannot do IP address check without a name"
10159 " check", errors.ECODE_INVAL)
10161 # check nics' parameter names
10162 for nic in self.op.nics:
10163 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10165 # check disks. parameter names and consistent adopt/no-adopt strategy
10166 has_adopt = has_no_adopt = False
10167 for disk in self.op.disks:
10168 if self.op.disk_template != constants.DT_EXT:
10169 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10170 if constants.IDISK_ADOPT in disk:
10173 has_no_adopt = True
10174 if has_adopt and has_no_adopt:
10175 raise errors.OpPrereqError("Either all disks are adopted or none is",
10176 errors.ECODE_INVAL)
10178 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10179 raise errors.OpPrereqError("Disk adoption is not supported for the"
10180 " '%s' disk template" %
10181 self.op.disk_template,
10182 errors.ECODE_INVAL)
10183 if self.op.iallocator is not None:
10184 raise errors.OpPrereqError("Disk adoption not allowed with an"
10185 " iallocator script", errors.ECODE_INVAL)
10186 if self.op.mode == constants.INSTANCE_IMPORT:
10187 raise errors.OpPrereqError("Disk adoption not allowed for"
10188 " instance import", errors.ECODE_INVAL)
10190 if self.op.disk_template in constants.DTS_MUST_ADOPT:
10191 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10192 " but no 'adopt' parameter given" %
10193 self.op.disk_template,
10194 errors.ECODE_INVAL)
10196 self.adopt_disks = has_adopt
10198 # instance name verification
10199 if self.op.name_check:
10200 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10201 self.op.instance_name = self.hostname1.name
10202 # used in CheckPrereq for ip ping check
10203 self.check_ip = self.hostname1.ip
10205 self.check_ip = None
10207 # file storage checks
10208 if (self.op.file_driver and
10209 not self.op.file_driver in constants.FILE_DRIVER):
10210 raise errors.OpPrereqError("Invalid file driver name '%s'" %
10211 self.op.file_driver, errors.ECODE_INVAL)
10213 if self.op.disk_template == constants.DT_FILE:
10214 opcodes.RequireFileStorage()
10215 elif self.op.disk_template == constants.DT_SHARED_FILE:
10216 opcodes.RequireSharedFileStorage()
10218 ### Node/iallocator related checks
10219 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10221 if self.op.pnode is not None:
10222 if self.op.disk_template in constants.DTS_INT_MIRROR:
10223 if self.op.snode is None:
10224 raise errors.OpPrereqError("The networked disk templates need"
10225 " a mirror node", errors.ECODE_INVAL)
10226 elif self.op.snode:
10227 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10229 self.op.snode = None
10231 _CheckOpportunisticLocking(self.op)
10233 self._cds = _GetClusterDomainSecret()
10235 if self.op.mode == constants.INSTANCE_IMPORT:
10236 # On import force_variant must be True, because if we forced it at
10237 # initial install, our only chance when importing it back is that it
10239 self.op.force_variant = True
10241 if self.op.no_install:
10242 self.LogInfo("No-installation mode has no effect during import")
10244 elif self.op.mode == constants.INSTANCE_CREATE:
10245 if self.op.os_type is None:
10246 raise errors.OpPrereqError("No guest OS specified",
10247 errors.ECODE_INVAL)
10248 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10249 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10250 " installation" % self.op.os_type,
10251 errors.ECODE_STATE)
10252 if self.op.disk_template is None:
10253 raise errors.OpPrereqError("No disk template specified",
10254 errors.ECODE_INVAL)
10256 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10257 # Check handshake to ensure both clusters have the same domain secret
10258 src_handshake = self.op.source_handshake
10259 if not src_handshake:
10260 raise errors.OpPrereqError("Missing source handshake",
10261 errors.ECODE_INVAL)
10263 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10266 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10267 errors.ECODE_INVAL)
10269 # Load and check source CA
10270 self.source_x509_ca_pem = self.op.source_x509_ca
10271 if not self.source_x509_ca_pem:
10272 raise errors.OpPrereqError("Missing source X509 CA",
10273 errors.ECODE_INVAL)
10276 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10278 except OpenSSL.crypto.Error, err:
10279 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10280 (err, ), errors.ECODE_INVAL)
10282 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10283 if errcode is not None:
10284 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10285 errors.ECODE_INVAL)
10287 self.source_x509_ca = cert
10289 src_instance_name = self.op.source_instance_name
10290 if not src_instance_name:
10291 raise errors.OpPrereqError("Missing source instance name",
10292 errors.ECODE_INVAL)
10294 self.source_instance_name = \
10295 netutils.GetHostname(name=src_instance_name).name
10298 raise errors.OpPrereqError("Invalid instance creation mode %r" %
10299 self.op.mode, errors.ECODE_INVAL)
10301 def ExpandNames(self):
10302 """ExpandNames for CreateInstance.
10304 Figure out the right locks for instance creation.
10307 self.needed_locks = {}
10309 instance_name = self.op.instance_name
10310 # this is just a preventive check, but someone might still add this
10311 # instance in the meantime, and creation will fail at lock-add time
10312 if instance_name in self.cfg.GetInstanceList():
10313 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10314 instance_name, errors.ECODE_EXISTS)
10316 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10318 if self.op.iallocator:
10319 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10320 # specifying a group on instance creation and then selecting nodes from
10322 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10323 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10325 if self.op.opportunistic_locking:
10326 self.opportunistic_locks[locking.LEVEL_NODE] = True
10327 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10329 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10330 nodelist = [self.op.pnode]
10331 if self.op.snode is not None:
10332 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10333 nodelist.append(self.op.snode)
10334 self.needed_locks[locking.LEVEL_NODE] = nodelist
10336 # in case of import lock the source node too
10337 if self.op.mode == constants.INSTANCE_IMPORT:
10338 src_node = self.op.src_node
10339 src_path = self.op.src_path
10341 if src_path is None:
10342 self.op.src_path = src_path = self.op.instance_name
10344 if src_node is None:
10345 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10346 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10347 self.op.src_node = None
10348 if os.path.isabs(src_path):
10349 raise errors.OpPrereqError("Importing an instance from a path"
10350 " requires a source node option",
10351 errors.ECODE_INVAL)
10353 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10354 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10355 self.needed_locks[locking.LEVEL_NODE].append(src_node)
10356 if not os.path.isabs(src_path):
10357 self.op.src_path = src_path = \
10358 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10360 self.needed_locks[locking.LEVEL_NODE_RES] = \
10361 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10363 def _RunAllocator(self):
10364 """Run the allocator based on input opcode.
10367 if self.op.opportunistic_locking:
10368 # Only consider nodes for which a lock is held
10369 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10371 node_whitelist = None
10373 #TODO Export network to iallocator so that it chooses a pnode
10374 # in a nodegroup that has the desired network connected to
10375 req = _CreateInstanceAllocRequest(self.op, self.disks,
10376 self.nics, self.be_full,
10378 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10380 ial.Run(self.op.iallocator)
10382 if not ial.success:
10383 # When opportunistic locks are used only a temporary failure is generated
10384 if self.op.opportunistic_locking:
10385 ecode = errors.ECODE_TEMP_NORES
10387 ecode = errors.ECODE_NORES
10389 raise errors.OpPrereqError("Can't compute nodes using"
10390 " iallocator '%s': %s" %
10391 (self.op.iallocator, ial.info),
10394 self.op.pnode = ial.result[0]
10395 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10396 self.op.instance_name, self.op.iallocator,
10397 utils.CommaJoin(ial.result))
10399 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10401 if req.RequiredNodes() == 2:
10402 self.op.snode = ial.result[1]
10404 def BuildHooksEnv(self):
10405 """Build hooks env.
10407 This runs on master, primary and secondary nodes of the instance.
10411 "ADD_MODE": self.op.mode,
10413 if self.op.mode == constants.INSTANCE_IMPORT:
10414 env["SRC_NODE"] = self.op.src_node
10415 env["SRC_PATH"] = self.op.src_path
10416 env["SRC_IMAGES"] = self.src_images
10418 env.update(_BuildInstanceHookEnv(
10419 name=self.op.instance_name,
10420 primary_node=self.op.pnode,
10421 secondary_nodes=self.secondaries,
10422 status=self.op.start,
10423 os_type=self.op.os_type,
10424 minmem=self.be_full[constants.BE_MINMEM],
10425 maxmem=self.be_full[constants.BE_MAXMEM],
10426 vcpus=self.be_full[constants.BE_VCPUS],
10427 nics=_NICListToTuple(self, self.nics),
10428 disk_template=self.op.disk_template,
10429 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10430 for d in self.disks],
10433 hypervisor_name=self.op.hypervisor,
10439 def BuildHooksNodes(self):
10440 """Build hooks nodes.
10443 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10446 def _ReadExportInfo(self):
10447 """Reads the export information from disk.
10449 It will override the opcode source node and path with the actual
10450 information, if these two were not specified before.
10452 @return: the export information
10455 assert self.op.mode == constants.INSTANCE_IMPORT
10457 src_node = self.op.src_node
10458 src_path = self.op.src_path
10460 if src_node is None:
10461 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10462 exp_list = self.rpc.call_export_list(locked_nodes)
10464 for node in exp_list:
10465 if exp_list[node].fail_msg:
10467 if src_path in exp_list[node].payload:
10469 self.op.src_node = src_node = node
10470 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10474 raise errors.OpPrereqError("No export found for relative path %s" %
10475 src_path, errors.ECODE_INVAL)
10477 _CheckNodeOnline(self, src_node)
10478 result = self.rpc.call_export_info(src_node, src_path)
10479 result.Raise("No export or invalid export found in dir %s" % src_path)
10481 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10482 if not export_info.has_section(constants.INISECT_EXP):
10483 raise errors.ProgrammerError("Corrupted export config",
10484 errors.ECODE_ENVIRON)
10486 ei_version = export_info.get(constants.INISECT_EXP, "version")
10487 if (int(ei_version) != constants.EXPORT_VERSION):
10488 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10489 (ei_version, constants.EXPORT_VERSION),
10490 errors.ECODE_ENVIRON)
10493 def _ReadExportParams(self, einfo):
10494 """Use export parameters as defaults.
10496 In case the opcode doesn't specify (as in override) some instance
10497 parameters, then try to use them from the export information, if
10498 that declares them.
10501 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10503 if self.op.disk_template is None:
10504 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10505 self.op.disk_template = einfo.get(constants.INISECT_INS,
10507 if self.op.disk_template not in constants.DISK_TEMPLATES:
10508 raise errors.OpPrereqError("Disk template specified in configuration"
10509 " file is not one of the allowed values:"
10511 " ".join(constants.DISK_TEMPLATES),
10512 errors.ECODE_INVAL)
10514 raise errors.OpPrereqError("No disk template specified and the export"
10515 " is missing the disk_template information",
10516 errors.ECODE_INVAL)
10518 if not self.op.disks:
10520 # TODO: import the disk iv_name too
10521 for idx in range(constants.MAX_DISKS):
10522 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10523 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10524 disks.append({constants.IDISK_SIZE: disk_sz})
10525 self.op.disks = disks
10526 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10527 raise errors.OpPrereqError("No disk info specified and the export"
10528 " is missing the disk information",
10529 errors.ECODE_INVAL)
10531 if not self.op.nics:
10533 for idx in range(constants.MAX_NICS):
10534 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10536 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10537 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10542 self.op.nics = nics
10544 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10545 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10547 if (self.op.hypervisor is None and
10548 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10549 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10551 if einfo.has_section(constants.INISECT_HYP):
10552 # use the export parameters but do not override the ones
10553 # specified by the user
10554 for name, value in einfo.items(constants.INISECT_HYP):
10555 if name not in self.op.hvparams:
10556 self.op.hvparams[name] = value
10558 if einfo.has_section(constants.INISECT_BEP):
10559 # use the parameters, without overriding
10560 for name, value in einfo.items(constants.INISECT_BEP):
10561 if name not in self.op.beparams:
10562 self.op.beparams[name] = value
10563 # Compatibility for the old "memory" be param
10564 if name == constants.BE_MEMORY:
10565 if constants.BE_MAXMEM not in self.op.beparams:
10566 self.op.beparams[constants.BE_MAXMEM] = value
10567 if constants.BE_MINMEM not in self.op.beparams:
10568 self.op.beparams[constants.BE_MINMEM] = value
10570 # try to read the parameters old style, from the main section
10571 for name in constants.BES_PARAMETERS:
10572 if (name not in self.op.beparams and
10573 einfo.has_option(constants.INISECT_INS, name)):
10574 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10576 if einfo.has_section(constants.INISECT_OSP):
10577 # use the parameters, without overriding
10578 for name, value in einfo.items(constants.INISECT_OSP):
10579 if name not in self.op.osparams:
10580 self.op.osparams[name] = value
10582 def _RevertToDefaults(self, cluster):
10583 """Revert the instance parameters to the default values.
10587 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10588 for name in self.op.hvparams.keys():
10589 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10590 del self.op.hvparams[name]
10592 be_defs = cluster.SimpleFillBE({})
10593 for name in self.op.beparams.keys():
10594 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10595 del self.op.beparams[name]
10597 nic_defs = cluster.SimpleFillNIC({})
10598 for nic in self.op.nics:
10599 for name in constants.NICS_PARAMETERS:
10600 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10603 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10604 for name in self.op.osparams.keys():
10605 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10606 del self.op.osparams[name]
10608 def _CalculateFileStorageDir(self):
10609 """Calculate final instance file storage dir.
10612 # file storage dir calculation/check
10613 self.instance_file_storage_dir = None
10614 if self.op.disk_template in constants.DTS_FILEBASED:
10615 # build the full file storage dir path
10618 if self.op.disk_template == constants.DT_SHARED_FILE:
10619 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10621 get_fsd_fn = self.cfg.GetFileStorageDir
10623 cfg_storagedir = get_fsd_fn()
10624 if not cfg_storagedir:
10625 raise errors.OpPrereqError("Cluster file storage dir not defined",
10626 errors.ECODE_STATE)
10627 joinargs.append(cfg_storagedir)
10629 if self.op.file_storage_dir is not None:
10630 joinargs.append(self.op.file_storage_dir)
10632 joinargs.append(self.op.instance_name)
10634 # pylint: disable=W0142
10635 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10637 def CheckPrereq(self): # pylint: disable=R0914
10638 """Check prerequisites.
10641 self._CalculateFileStorageDir()
10643 if self.op.mode == constants.INSTANCE_IMPORT:
10644 export_info = self._ReadExportInfo()
10645 self._ReadExportParams(export_info)
10646 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10648 self._old_instance_name = None
10650 if (not self.cfg.GetVGName() and
10651 self.op.disk_template not in constants.DTS_NOT_LVM):
10652 raise errors.OpPrereqError("Cluster does not support lvm-based"
10653 " instances", errors.ECODE_STATE)
10655 if (self.op.hypervisor is None or
10656 self.op.hypervisor == constants.VALUE_AUTO):
10657 self.op.hypervisor = self.cfg.GetHypervisorType()
10659 cluster = self.cfg.GetClusterInfo()
10660 enabled_hvs = cluster.enabled_hypervisors
10661 if self.op.hypervisor not in enabled_hvs:
10662 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10664 (self.op.hypervisor, ",".join(enabled_hvs)),
10665 errors.ECODE_STATE)
10667 # Check tag validity
10668 for tag in self.op.tags:
10669 objects.TaggableObject.ValidateTag(tag)
10671 # check hypervisor parameter syntax (locally)
10672 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10673 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10675 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10676 hv_type.CheckParameterSyntax(filled_hvp)
10677 self.hv_full = filled_hvp
10678 # check that we don't specify global parameters on an instance
10679 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10680 "instance", "cluster")
10682 # fill and remember the beparams dict
10683 self.be_full = _ComputeFullBeParams(self.op, cluster)
10685 # build os parameters
10686 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10688 # now that hvp/bep are in final format, let's reset to defaults,
10690 if self.op.identify_defaults:
10691 self._RevertToDefaults(cluster)
10694 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10695 self.proc.GetECId())
10697 # disk checks/pre-build
10698 default_vg = self.cfg.GetVGName()
10699 self.disks = _ComputeDisks(self.op, default_vg)
10701 if self.op.mode == constants.INSTANCE_IMPORT:
10703 for idx in range(len(self.disks)):
10704 option = "disk%d_dump" % idx
10705 if export_info.has_option(constants.INISECT_INS, option):
10706 # FIXME: are the old os-es, disk sizes, etc. useful?
10707 export_name = export_info.get(constants.INISECT_INS, option)
10708 image = utils.PathJoin(self.op.src_path, export_name)
10709 disk_images.append(image)
10711 disk_images.append(False)
10713 self.src_images = disk_images
10715 if self.op.instance_name == self._old_instance_name:
10716 for idx, nic in enumerate(self.nics):
10717 if nic.mac == constants.VALUE_AUTO:
10718 nic_mac_ini = "nic%d_mac" % idx
10719 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10721 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10723 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10724 if self.op.ip_check:
10725 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10726 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10727 (self.check_ip, self.op.instance_name),
10728 errors.ECODE_NOTUNIQUE)
10730 #### mac address generation
10731 # By generating here the mac address both the allocator and the hooks get
10732 # the real final mac address rather than the 'auto' or 'generate' value.
10733 # There is a race condition between the generation and the instance object
10734 # creation, which means that we know the mac is valid now, but we're not
10735 # sure it will be when we actually add the instance. If things go bad
10736 # adding the instance will abort because of a duplicate mac, and the
10737 # creation job will fail.
10738 for nic in self.nics:
10739 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10740 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10744 if self.op.iallocator is not None:
10745 self._RunAllocator()
10747 # Release all unneeded node locks
10748 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10749 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10750 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10751 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10753 assert (self.owned_locks(locking.LEVEL_NODE) ==
10754 self.owned_locks(locking.LEVEL_NODE_RES)), \
10755 "Node locks differ from node resource locks"
10757 #### node related checks
10759 # check primary node
10760 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10761 assert self.pnode is not None, \
10762 "Cannot retrieve locked node %s" % self.op.pnode
10764 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10765 pnode.name, errors.ECODE_STATE)
10767 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10768 pnode.name, errors.ECODE_STATE)
10769 if not pnode.vm_capable:
10770 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10771 " '%s'" % pnode.name, errors.ECODE_STATE)
10773 self.secondaries = []
10775 # Fill in any IPs from IP pools. This must happen here, because we need to
10776 # know the nic's primary node, as specified by the iallocator
10777 for idx, nic in enumerate(self.nics):
10778 net_uuid = nic.network
10779 if net_uuid is not None:
10780 nobj = self.cfg.GetNetwork(net_uuid)
10781 netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
10782 if netparams is None:
10783 raise errors.OpPrereqError("No netparams found for network"
10784 " %s. Propably not connected to"
10785 " node's %s nodegroup" %
10786 (nobj.name, self.pnode.name),
10787 errors.ECODE_INVAL)
10788 self.LogInfo("NIC/%d inherits netparams %s" %
10789 (idx, netparams.values()))
10790 nic.nicparams = dict(netparams)
10791 if nic.ip is not None:
10792 if nic.ip.lower() == constants.NIC_IP_POOL:
10794 nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
10795 except errors.ReservationError:
10796 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10797 " from the address pool" % idx,
10798 errors.ECODE_STATE)
10799 self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
10802 self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
10803 except errors.ReservationError:
10804 raise errors.OpPrereqError("IP address %s already in use"
10805 " or does not belong to network %s" %
10806 (nic.ip, nobj.name),
10807 errors.ECODE_NOTUNIQUE)
10809 # net is None, ip None or given
10810 elif self.op.conflicts_check:
10811 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10813 # mirror node verification
10814 if self.op.disk_template in constants.DTS_INT_MIRROR:
10815 if self.op.snode == pnode.name:
10816 raise errors.OpPrereqError("The secondary node cannot be the"
10817 " primary node", errors.ECODE_INVAL)
10818 _CheckNodeOnline(self, self.op.snode)
10819 _CheckNodeNotDrained(self, self.op.snode)
10820 _CheckNodeVmCapable(self, self.op.snode)
10821 self.secondaries.append(self.op.snode)
10823 snode = self.cfg.GetNodeInfo(self.op.snode)
10824 if pnode.group != snode.group:
10825 self.LogWarning("The primary and secondary nodes are in two"
10826 " different node groups; the disk parameters"
10827 " from the first disk's node group will be"
10830 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10832 if self.op.disk_template in constants.DTS_INT_MIRROR:
10833 nodes.append(snode)
10834 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10835 if compat.any(map(has_es, nodes)):
10836 raise errors.OpPrereqError("Disk template %s not supported with"
10837 " exclusive storage" % self.op.disk_template,
10838 errors.ECODE_STATE)
10840 nodenames = [pnode.name] + self.secondaries
10842 if not self.adopt_disks:
10843 if self.op.disk_template == constants.DT_RBD:
10844 # _CheckRADOSFreeSpace() is just a placeholder.
10845 # Any function that checks prerequisites can be placed here.
10846 # Check if there is enough space on the RADOS cluster.
10847 _CheckRADOSFreeSpace()
10848 elif self.op.disk_template == constants.DT_EXT:
10849 # FIXME: Function that checks prereqs if needed
10852 # Check lv size requirements, if not adopting
10853 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10854 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10856 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10857 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10858 disk[constants.IDISK_ADOPT])
10859 for disk in self.disks])
10860 if len(all_lvs) != len(self.disks):
10861 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10862 errors.ECODE_INVAL)
10863 for lv_name in all_lvs:
10865 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10866 # to ReserveLV uses the same syntax
10867 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10868 except errors.ReservationError:
10869 raise errors.OpPrereqError("LV named %s used by another instance" %
10870 lv_name, errors.ECODE_NOTUNIQUE)
10872 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10873 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10875 node_lvs = self.rpc.call_lv_list([pnode.name],
10876 vg_names.payload.keys())[pnode.name]
10877 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10878 node_lvs = node_lvs.payload
10880 delta = all_lvs.difference(node_lvs.keys())
10882 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10883 utils.CommaJoin(delta),
10884 errors.ECODE_INVAL)
10885 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10887 raise errors.OpPrereqError("Online logical volumes found, cannot"
10888 " adopt: %s" % utils.CommaJoin(online_lvs),
10889 errors.ECODE_STATE)
10890 # update the size of disk based on what is found
10891 for dsk in self.disks:
10892 dsk[constants.IDISK_SIZE] = \
10893 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10894 dsk[constants.IDISK_ADOPT])][0]))
10896 elif self.op.disk_template == constants.DT_BLOCK:
10897 # Normalize and de-duplicate device paths
10898 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10899 for disk in self.disks])
10900 if len(all_disks) != len(self.disks):
10901 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10902 errors.ECODE_INVAL)
10903 baddisks = [d for d in all_disks
10904 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10906 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10907 " cannot be adopted" %
10908 (utils.CommaJoin(baddisks),
10909 constants.ADOPTABLE_BLOCKDEV_ROOT),
10910 errors.ECODE_INVAL)
10912 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10913 list(all_disks))[pnode.name]
10914 node_disks.Raise("Cannot get block device information from node %s" %
10916 node_disks = node_disks.payload
10917 delta = all_disks.difference(node_disks.keys())
10919 raise errors.OpPrereqError("Missing block device(s): %s" %
10920 utils.CommaJoin(delta),
10921 errors.ECODE_INVAL)
10922 for dsk in self.disks:
10923 dsk[constants.IDISK_SIZE] = \
10924 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10926 # Verify instance specs
10927 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10929 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10930 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10931 constants.ISPEC_DISK_COUNT: len(self.disks),
10932 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10933 for disk in self.disks],
10934 constants.ISPEC_NIC_COUNT: len(self.nics),
10935 constants.ISPEC_SPINDLE_USE: spindle_use,
10938 group_info = self.cfg.GetNodeGroup(pnode.group)
10939 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10940 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec,
10941 self.op.disk_template)
10942 if not self.op.ignore_ipolicy and res:
10943 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10944 (pnode.group, group_info.name, utils.CommaJoin(res)))
10945 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10947 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10949 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10950 # check OS parameters (remotely)
10951 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10953 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10955 #TODO: _CheckExtParams (remotely)
10956 # Check parameters for extstorage
10958 # memory check on primary node
10959 #TODO(dynmem): use MINMEM for checking
10961 _CheckNodeFreeMemory(self, self.pnode.name,
10962 "creating instance %s" % self.op.instance_name,
10963 self.be_full[constants.BE_MAXMEM],
10964 self.op.hypervisor)
10966 self.dry_run_result = list(nodenames)
10968 def Exec(self, feedback_fn):
10969 """Create and add the instance to the cluster.
10972 instance = self.op.instance_name
10973 pnode_name = self.pnode.name
10975 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10976 self.owned_locks(locking.LEVEL_NODE)), \
10977 "Node locks differ from node resource locks"
10978 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10980 ht_kind = self.op.hypervisor
10981 if ht_kind in constants.HTS_REQ_PORT:
10982 network_port = self.cfg.AllocatePort()
10984 network_port = None
10986 # This is ugly but we got a chicken-egg problem here
10987 # We can only take the group disk parameters, as the instance
10988 # has no disks yet (we are generating them right here).
10989 node = self.cfg.GetNodeInfo(pnode_name)
10990 nodegroup = self.cfg.GetNodeGroup(node.group)
10991 disks = _GenerateDiskTemplate(self,
10992 self.op.disk_template,
10993 instance, pnode_name,
10996 self.instance_file_storage_dir,
10997 self.op.file_driver,
11000 self.cfg.GetGroupDiskParams(nodegroup))
11002 iobj = objects.Instance(name=instance, os=self.op.os_type,
11003 primary_node=pnode_name,
11004 nics=self.nics, disks=disks,
11005 disk_template=self.op.disk_template,
11006 admin_state=constants.ADMINST_DOWN,
11007 network_port=network_port,
11008 beparams=self.op.beparams,
11009 hvparams=self.op.hvparams,
11010 hypervisor=self.op.hypervisor,
11011 osparams=self.op.osparams,
11015 for tag in self.op.tags:
11018 if self.adopt_disks:
11019 if self.op.disk_template == constants.DT_PLAIN:
11020 # rename LVs to the newly-generated names; we need to construct
11021 # 'fake' LV disks with the old data, plus the new unique_id
11022 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
11024 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
11025 rename_to.append(t_dsk.logical_id)
11026 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
11027 self.cfg.SetDiskID(t_dsk, pnode_name)
11028 result = self.rpc.call_blockdev_rename(pnode_name,
11029 zip(tmp_disks, rename_to))
11030 result.Raise("Failed to rename adoped LVs")
11032 feedback_fn("* creating instance disks...")
11034 _CreateDisks(self, iobj)
11035 except errors.OpExecError:
11036 self.LogWarning("Device creation failed, reverting...")
11038 _RemoveDisks(self, iobj)
11040 self.cfg.ReleaseDRBDMinors(instance)
11043 feedback_fn("adding instance %s to cluster config" % instance)
11045 self.cfg.AddInstance(iobj, self.proc.GetECId())
11047 # Declare that we don't want to remove the instance lock anymore, as we've
11048 # added the instance to the config
11049 del self.remove_locks[locking.LEVEL_INSTANCE]
11051 if self.op.mode == constants.INSTANCE_IMPORT:
11052 # Release unused nodes
11053 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
11055 # Release all nodes
11056 _ReleaseLocks(self, locking.LEVEL_NODE)
11059 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
11060 feedback_fn("* wiping instance disks...")
11062 _WipeDisks(self, iobj)
11063 except errors.OpExecError, err:
11064 logging.exception("Wiping disks failed")
11065 self.LogWarning("Wiping instance disks failed (%s)", err)
11069 # Something is already wrong with the disks, don't do anything else
11071 elif self.op.wait_for_sync:
11072 disk_abort = not _WaitForSync(self, iobj)
11073 elif iobj.disk_template in constants.DTS_INT_MIRROR:
11074 # make sure the disks are not degraded (still sync-ing is ok)
11075 feedback_fn("* checking mirrors status")
11076 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11081 _RemoveDisks(self, iobj)
11082 self.cfg.RemoveInstance(iobj.name)
11083 # Make sure the instance lock gets removed
11084 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11085 raise errors.OpExecError("There are some degraded disks for"
11088 # Release all node resource locks
11089 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11091 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11092 # we need to set the disks ID to the primary node, since the
11093 # preceding code might or might have not done it, depending on
11094 # disk template and other options
11095 for disk in iobj.disks:
11096 self.cfg.SetDiskID(disk, pnode_name)
11097 if self.op.mode == constants.INSTANCE_CREATE:
11098 if not self.op.no_install:
11099 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11100 not self.op.wait_for_sync)
11102 feedback_fn("* pausing disk sync to install instance OS")
11103 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11106 for idx, success in enumerate(result.payload):
11108 logging.warn("pause-sync of instance %s for disk %d failed",
11111 feedback_fn("* running the instance OS create scripts...")
11112 # FIXME: pass debug option from opcode to backend
11114 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11115 self.op.debug_level)
11117 feedback_fn("* resuming disk sync")
11118 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11121 for idx, success in enumerate(result.payload):
11123 logging.warn("resume-sync of instance %s for disk %d failed",
11126 os_add_result.Raise("Could not add os for instance %s"
11127 " on node %s" % (instance, pnode_name))
11130 if self.op.mode == constants.INSTANCE_IMPORT:
11131 feedback_fn("* running the instance OS import scripts...")
11135 for idx, image in enumerate(self.src_images):
11139 # FIXME: pass debug option from opcode to backend
11140 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11141 constants.IEIO_FILE, (image, ),
11142 constants.IEIO_SCRIPT,
11143 (iobj.disks[idx], idx),
11145 transfers.append(dt)
11148 masterd.instance.TransferInstanceData(self, feedback_fn,
11149 self.op.src_node, pnode_name,
11150 self.pnode.secondary_ip,
11152 if not compat.all(import_result):
11153 self.LogWarning("Some disks for instance %s on node %s were not"
11154 " imported successfully" % (instance, pnode_name))
11156 rename_from = self._old_instance_name
11158 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11159 feedback_fn("* preparing remote import...")
11160 # The source cluster will stop the instance before attempting to make
11161 # a connection. In some cases stopping an instance can take a long
11162 # time, hence the shutdown timeout is added to the connection
11164 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11165 self.op.source_shutdown_timeout)
11166 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11168 assert iobj.primary_node == self.pnode.name
11170 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11171 self.source_x509_ca,
11172 self._cds, timeouts)
11173 if not compat.all(disk_results):
11174 # TODO: Should the instance still be started, even if some disks
11175 # failed to import (valid for local imports, too)?
11176 self.LogWarning("Some disks for instance %s on node %s were not"
11177 " imported successfully" % (instance, pnode_name))
11179 rename_from = self.source_instance_name
11182 # also checked in the prereq part
11183 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11186 # Run rename script on newly imported instance
11187 assert iobj.name == instance
11188 feedback_fn("Running rename script for %s" % instance)
11189 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11191 self.op.debug_level)
11192 if result.fail_msg:
11193 self.LogWarning("Failed to run rename script for %s on node"
11194 " %s: %s" % (instance, pnode_name, result.fail_msg))
11196 assert not self.owned_locks(locking.LEVEL_NODE_RES)
11199 iobj.admin_state = constants.ADMINST_UP
11200 self.cfg.Update(iobj, feedback_fn)
11201 logging.info("Starting instance %s on node %s", instance, pnode_name)
11202 feedback_fn("* starting instance...")
11203 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11205 result.Raise("Could not start instance")
11207 return list(iobj.all_nodes)
11210 class LUInstanceMultiAlloc(NoHooksLU):
11211 """Allocates multiple instances at the same time.
11216 def CheckArguments(self):
11217 """Check arguments.
11221 for inst in self.op.instances:
11222 if inst.iallocator is not None:
11223 raise errors.OpPrereqError("iallocator are not allowed to be set on"
11224 " instance objects", errors.ECODE_INVAL)
11225 nodes.append(bool(inst.pnode))
11226 if inst.disk_template in constants.DTS_INT_MIRROR:
11227 nodes.append(bool(inst.snode))
11229 has_nodes = compat.any(nodes)
11230 if compat.all(nodes) ^ has_nodes:
11231 raise errors.OpPrereqError("There are instance objects providing"
11232 " pnode/snode while others do not",
11233 errors.ECODE_INVAL)
11235 if self.op.iallocator is None:
11236 default_iallocator = self.cfg.GetDefaultIAllocator()
11237 if default_iallocator and has_nodes:
11238 self.op.iallocator = default_iallocator
11240 raise errors.OpPrereqError("No iallocator or nodes on the instances"
11241 " given and no cluster-wide default"
11242 " iallocator found; please specify either"
11243 " an iallocator or nodes on the instances"
11244 " or set a cluster-wide default iallocator",
11245 errors.ECODE_INVAL)
11247 _CheckOpportunisticLocking(self.op)
11249 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11251 raise errors.OpPrereqError("There are duplicate instance names: %s" %
11252 utils.CommaJoin(dups), errors.ECODE_INVAL)
11254 def ExpandNames(self):
11255 """Calculate the locks.
11258 self.share_locks = _ShareAll()
11259 self.needed_locks = {
11260 # iallocator will select nodes and even if no iallocator is used,
11261 # collisions with LUInstanceCreate should be avoided
11262 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11265 if self.op.iallocator:
11266 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11267 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11269 if self.op.opportunistic_locking:
11270 self.opportunistic_locks[locking.LEVEL_NODE] = True
11271 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11274 for inst in self.op.instances:
11275 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11276 nodeslist.append(inst.pnode)
11277 if inst.snode is not None:
11278 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11279 nodeslist.append(inst.snode)
11281 self.needed_locks[locking.LEVEL_NODE] = nodeslist
11282 # Lock resources of instance's primary and secondary nodes (copy to
11283 # prevent accidential modification)
11284 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11286 def CheckPrereq(self):
11287 """Check prerequisite.
11290 cluster = self.cfg.GetClusterInfo()
11291 default_vg = self.cfg.GetVGName()
11292 ec_id = self.proc.GetECId()
11294 if self.op.opportunistic_locking:
11295 # Only consider nodes for which a lock is held
11296 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11298 node_whitelist = None
11300 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11301 _ComputeNics(op, cluster, None,
11303 _ComputeFullBeParams(op, cluster),
11305 for op in self.op.instances]
11307 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11308 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11310 ial.Run(self.op.iallocator)
11312 if not ial.success:
11313 raise errors.OpPrereqError("Can't compute nodes using"
11314 " iallocator '%s': %s" %
11315 (self.op.iallocator, ial.info),
11316 errors.ECODE_NORES)
11318 self.ia_result = ial.result
11320 if self.op.dry_run:
11321 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11322 constants.JOB_IDS_KEY: [],
11325 def _ConstructPartialResult(self):
11326 """Contructs the partial result.
11329 (allocatable, failed) = self.ia_result
11331 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11332 map(compat.fst, allocatable),
11333 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11336 def Exec(self, feedback_fn):
11337 """Executes the opcode.
11340 op2inst = dict((op.instance_name, op) for op in self.op.instances)
11341 (allocatable, failed) = self.ia_result
11344 for (name, nodes) in allocatable:
11345 op = op2inst.pop(name)
11348 (op.pnode, op.snode) = nodes
11350 (op.pnode,) = nodes
11354 missing = set(op2inst.keys()) - set(failed)
11355 assert not missing, \
11356 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11358 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11361 def _CheckRADOSFreeSpace():
11362 """Compute disk size requirements inside the RADOS cluster.
11365 # For the RADOS cluster we assume there is always enough space.
11369 class LUInstanceConsole(NoHooksLU):
11370 """Connect to an instance's console.
11372 This is somewhat special in that it returns the command line that
11373 you need to run on the master node in order to connect to the
11379 def ExpandNames(self):
11380 self.share_locks = _ShareAll()
11381 self._ExpandAndLockInstance()
11383 def CheckPrereq(self):
11384 """Check prerequisites.
11386 This checks that the instance is in the cluster.
11389 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11390 assert self.instance is not None, \
11391 "Cannot retrieve locked instance %s" % self.op.instance_name
11392 _CheckNodeOnline(self, self.instance.primary_node)
11394 def Exec(self, feedback_fn):
11395 """Connect to the console of an instance
11398 instance = self.instance
11399 node = instance.primary_node
11401 node_insts = self.rpc.call_instance_list([node],
11402 [instance.hypervisor])[node]
11403 node_insts.Raise("Can't get node information from %s" % node)
11405 if instance.name not in node_insts.payload:
11406 if instance.admin_state == constants.ADMINST_UP:
11407 state = constants.INSTST_ERRORDOWN
11408 elif instance.admin_state == constants.ADMINST_DOWN:
11409 state = constants.INSTST_ADMINDOWN
11411 state = constants.INSTST_ADMINOFFLINE
11412 raise errors.OpExecError("Instance %s is not running (state %s)" %
11413 (instance.name, state))
11415 logging.debug("Connecting to console of %s on %s", instance.name, node)
11417 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11420 def _GetInstanceConsole(cluster, instance):
11421 """Returns console information for an instance.
11423 @type cluster: L{objects.Cluster}
11424 @type instance: L{objects.Instance}
11428 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11429 # beparams and hvparams are passed separately, to avoid editing the
11430 # instance and then saving the defaults in the instance itself.
11431 hvparams = cluster.FillHV(instance)
11432 beparams = cluster.FillBE(instance)
11433 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11435 assert console.instance == instance.name
11436 assert console.Validate()
11438 return console.ToDict()
11441 class LUInstanceReplaceDisks(LogicalUnit):
11442 """Replace the disks of an instance.
11445 HPATH = "mirrors-replace"
11446 HTYPE = constants.HTYPE_INSTANCE
11449 def CheckArguments(self):
11450 """Check arguments.
11453 remote_node = self.op.remote_node
11454 ialloc = self.op.iallocator
11455 if self.op.mode == constants.REPLACE_DISK_CHG:
11456 if remote_node is None and ialloc is None:
11457 raise errors.OpPrereqError("When changing the secondary either an"
11458 " iallocator script must be used or the"
11459 " new node given", errors.ECODE_INVAL)
11461 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11463 elif remote_node is not None or ialloc is not None:
11464 # Not replacing the secondary
11465 raise errors.OpPrereqError("The iallocator and new node options can"
11466 " only be used when changing the"
11467 " secondary node", errors.ECODE_INVAL)
11469 def ExpandNames(self):
11470 self._ExpandAndLockInstance()
11472 assert locking.LEVEL_NODE not in self.needed_locks
11473 assert locking.LEVEL_NODE_RES not in self.needed_locks
11474 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11476 assert self.op.iallocator is None or self.op.remote_node is None, \
11477 "Conflicting options"
11479 if self.op.remote_node is not None:
11480 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11482 # Warning: do not remove the locking of the new secondary here
11483 # unless DRBD8.AddChildren is changed to work in parallel;
11484 # currently it doesn't since parallel invocations of
11485 # FindUnusedMinor will conflict
11486 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11487 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11489 self.needed_locks[locking.LEVEL_NODE] = []
11490 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11492 if self.op.iallocator is not None:
11493 # iallocator will select a new node in the same group
11494 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11495 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11497 self.needed_locks[locking.LEVEL_NODE_RES] = []
11499 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11500 self.op.iallocator, self.op.remote_node,
11501 self.op.disks, self.op.early_release,
11502 self.op.ignore_ipolicy)
11504 self.tasklets = [self.replacer]
11506 def DeclareLocks(self, level):
11507 if level == locking.LEVEL_NODEGROUP:
11508 assert self.op.remote_node is None
11509 assert self.op.iallocator is not None
11510 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11512 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11513 # Lock all groups used by instance optimistically; this requires going
11514 # via the node before it's locked, requiring verification later on
11515 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11516 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11518 elif level == locking.LEVEL_NODE:
11519 if self.op.iallocator is not None:
11520 assert self.op.remote_node is None
11521 assert not self.needed_locks[locking.LEVEL_NODE]
11522 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11524 # Lock member nodes of all locked groups
11525 self.needed_locks[locking.LEVEL_NODE] = \
11527 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11528 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11530 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11532 self._LockInstancesNodes()
11534 elif level == locking.LEVEL_NODE_RES:
11536 self.needed_locks[locking.LEVEL_NODE_RES] = \
11537 self.needed_locks[locking.LEVEL_NODE]
11539 def BuildHooksEnv(self):
11540 """Build hooks env.
11542 This runs on the master, the primary and all the secondaries.
11545 instance = self.replacer.instance
11547 "MODE": self.op.mode,
11548 "NEW_SECONDARY": self.op.remote_node,
11549 "OLD_SECONDARY": instance.secondary_nodes[0],
11551 env.update(_BuildInstanceHookEnvByObject(self, instance))
11554 def BuildHooksNodes(self):
11555 """Build hooks nodes.
11558 instance = self.replacer.instance
11560 self.cfg.GetMasterNode(),
11561 instance.primary_node,
11563 if self.op.remote_node is not None:
11564 nl.append(self.op.remote_node)
11567 def CheckPrereq(self):
11568 """Check prerequisites.
11571 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11572 self.op.iallocator is None)
11574 # Verify if node group locks are still correct
11575 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11577 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11579 return LogicalUnit.CheckPrereq(self)
11582 class TLReplaceDisks(Tasklet):
11583 """Replaces disks for an instance.
11585 Note: Locking is not within the scope of this class.
11588 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11589 disks, early_release, ignore_ipolicy):
11590 """Initializes this class.
11593 Tasklet.__init__(self, lu)
11596 self.instance_name = instance_name
11598 self.iallocator_name = iallocator_name
11599 self.remote_node = remote_node
11601 self.early_release = early_release
11602 self.ignore_ipolicy = ignore_ipolicy
11605 self.instance = None
11606 self.new_node = None
11607 self.target_node = None
11608 self.other_node = None
11609 self.remote_node_info = None
11610 self.node_secondary_ip = None
11613 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11614 """Compute a new secondary node using an IAllocator.
11617 req = iallocator.IAReqRelocate(name=instance_name,
11618 relocate_from=list(relocate_from))
11619 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11621 ial.Run(iallocator_name)
11623 if not ial.success:
11624 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11625 " %s" % (iallocator_name, ial.info),
11626 errors.ECODE_NORES)
11628 remote_node_name = ial.result[0]
11630 lu.LogInfo("Selected new secondary for instance '%s': %s",
11631 instance_name, remote_node_name)
11633 return remote_node_name
11635 def _FindFaultyDisks(self, node_name):
11636 """Wrapper for L{_FindFaultyInstanceDisks}.
11639 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11642 def _CheckDisksActivated(self, instance):
11643 """Checks if the instance disks are activated.
11645 @param instance: The instance to check disks
11646 @return: True if they are activated, False otherwise
11649 nodes = instance.all_nodes
11651 for idx, dev in enumerate(instance.disks):
11653 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11654 self.cfg.SetDiskID(dev, node)
11656 result = _BlockdevFind(self, node, dev, instance)
11660 elif result.fail_msg or not result.payload:
11665 def CheckPrereq(self):
11666 """Check prerequisites.
11668 This checks that the instance is in the cluster.
11671 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11672 assert instance is not None, \
11673 "Cannot retrieve locked instance %s" % self.instance_name
11675 if instance.disk_template != constants.DT_DRBD8:
11676 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11677 " instances", errors.ECODE_INVAL)
11679 if len(instance.secondary_nodes) != 1:
11680 raise errors.OpPrereqError("The instance has a strange layout,"
11681 " expected one secondary but found %d" %
11682 len(instance.secondary_nodes),
11683 errors.ECODE_FAULT)
11685 instance = self.instance
11686 secondary_node = instance.secondary_nodes[0]
11688 if self.iallocator_name is None:
11689 remote_node = self.remote_node
11691 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11692 instance.name, instance.secondary_nodes)
11694 if remote_node is None:
11695 self.remote_node_info = None
11697 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11698 "Remote node '%s' is not locked" % remote_node
11700 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11701 assert self.remote_node_info is not None, \
11702 "Cannot retrieve locked node %s" % remote_node
11704 if remote_node == self.instance.primary_node:
11705 raise errors.OpPrereqError("The specified node is the primary node of"
11706 " the instance", errors.ECODE_INVAL)
11708 if remote_node == secondary_node:
11709 raise errors.OpPrereqError("The specified node is already the"
11710 " secondary node of the instance",
11711 errors.ECODE_INVAL)
11713 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11714 constants.REPLACE_DISK_CHG):
11715 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11716 errors.ECODE_INVAL)
11718 if self.mode == constants.REPLACE_DISK_AUTO:
11719 if not self._CheckDisksActivated(instance):
11720 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11721 " first" % self.instance_name,
11722 errors.ECODE_STATE)
11723 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11724 faulty_secondary = self._FindFaultyDisks(secondary_node)
11726 if faulty_primary and faulty_secondary:
11727 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11728 " one node and can not be repaired"
11729 " automatically" % self.instance_name,
11730 errors.ECODE_STATE)
11733 self.disks = faulty_primary
11734 self.target_node = instance.primary_node
11735 self.other_node = secondary_node
11736 check_nodes = [self.target_node, self.other_node]
11737 elif faulty_secondary:
11738 self.disks = faulty_secondary
11739 self.target_node = secondary_node
11740 self.other_node = instance.primary_node
11741 check_nodes = [self.target_node, self.other_node]
11747 # Non-automatic modes
11748 if self.mode == constants.REPLACE_DISK_PRI:
11749 self.target_node = instance.primary_node
11750 self.other_node = secondary_node
11751 check_nodes = [self.target_node, self.other_node]
11753 elif self.mode == constants.REPLACE_DISK_SEC:
11754 self.target_node = secondary_node
11755 self.other_node = instance.primary_node
11756 check_nodes = [self.target_node, self.other_node]
11758 elif self.mode == constants.REPLACE_DISK_CHG:
11759 self.new_node = remote_node
11760 self.other_node = instance.primary_node
11761 self.target_node = secondary_node
11762 check_nodes = [self.new_node, self.other_node]
11764 _CheckNodeNotDrained(self.lu, remote_node)
11765 _CheckNodeVmCapable(self.lu, remote_node)
11767 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11768 assert old_node_info is not None
11769 if old_node_info.offline and not self.early_release:
11770 # doesn't make sense to delay the release
11771 self.early_release = True
11772 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11773 " early-release mode", secondary_node)
11776 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11779 # If not specified all disks should be replaced
11781 self.disks = range(len(self.instance.disks))
11783 # TODO: This is ugly, but right now we can't distinguish between internal
11784 # submitted opcode and external one. We should fix that.
11785 if self.remote_node_info:
11786 # We change the node, lets verify it still meets instance policy
11787 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11788 cluster = self.cfg.GetClusterInfo()
11789 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11791 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11792 self.cfg, ignore=self.ignore_ipolicy)
11794 for node in check_nodes:
11795 _CheckNodeOnline(self.lu, node)
11797 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11800 if node_name is not None)
11802 # Release unneeded node and node resource locks
11803 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11804 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11805 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11807 # Release any owned node group
11808 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11810 # Check whether disks are valid
11811 for disk_idx in self.disks:
11812 instance.FindDisk(disk_idx)
11814 # Get secondary node IP addresses
11815 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11816 in self.cfg.GetMultiNodeInfo(touched_nodes))
11818 def Exec(self, feedback_fn):
11819 """Execute disk replacement.
11821 This dispatches the disk replacement to the appropriate handler.
11825 # Verify owned locks before starting operation
11826 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11827 assert set(owned_nodes) == set(self.node_secondary_ip), \
11828 ("Incorrect node locks, owning %s, expected %s" %
11829 (owned_nodes, self.node_secondary_ip.keys()))
11830 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11831 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11832 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11834 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11835 assert list(owned_instances) == [self.instance_name], \
11836 "Instance '%s' not locked" % self.instance_name
11838 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11839 "Should not own any node group lock at this point"
11842 feedback_fn("No disks need replacement for instance '%s'" %
11843 self.instance.name)
11846 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11847 (utils.CommaJoin(self.disks), self.instance.name))
11848 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11849 feedback_fn("Current seconary node: %s" %
11850 utils.CommaJoin(self.instance.secondary_nodes))
11852 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11854 # Activate the instance disks if we're replacing them on a down instance
11856 _StartInstanceDisks(self.lu, self.instance, True)
11859 # Should we replace the secondary node?
11860 if self.new_node is not None:
11861 fn = self._ExecDrbd8Secondary
11863 fn = self._ExecDrbd8DiskOnly
11865 result = fn(feedback_fn)
11867 # Deactivate the instance disks if we're replacing them on a
11870 _SafeShutdownInstanceDisks(self.lu, self.instance)
11872 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11875 # Verify owned locks
11876 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11877 nodes = frozenset(self.node_secondary_ip)
11878 assert ((self.early_release and not owned_nodes) or
11879 (not self.early_release and not (set(owned_nodes) - nodes))), \
11880 ("Not owning the correct locks, early_release=%s, owned=%r,"
11881 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11885 def _CheckVolumeGroup(self, nodes):
11886 self.lu.LogInfo("Checking volume groups")
11888 vgname = self.cfg.GetVGName()
11890 # Make sure volume group exists on all involved nodes
11891 results = self.rpc.call_vg_list(nodes)
11893 raise errors.OpExecError("Can't list volume groups on the nodes")
11896 res = results[node]
11897 res.Raise("Error checking node %s" % node)
11898 if vgname not in res.payload:
11899 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11902 def _CheckDisksExistence(self, nodes):
11903 # Check disk existence
11904 for idx, dev in enumerate(self.instance.disks):
11905 if idx not in self.disks:
11909 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11910 self.cfg.SetDiskID(dev, node)
11912 result = _BlockdevFind(self, node, dev, self.instance)
11914 msg = result.fail_msg
11915 if msg or not result.payload:
11917 msg = "disk not found"
11918 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11921 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11922 for idx, dev in enumerate(self.instance.disks):
11923 if idx not in self.disks:
11926 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11929 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11930 on_primary, ldisk=ldisk):
11931 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11932 " replace disks for instance %s" %
11933 (node_name, self.instance.name))
11935 def _CreateNewStorage(self, node_name):
11936 """Create new storage on the primary or secondary node.
11938 This is only used for same-node replaces, not for changing the
11939 secondary node, hence we don't want to modify the existing disk.
11944 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11945 for idx, dev in enumerate(disks):
11946 if idx not in self.disks:
11949 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11951 self.cfg.SetDiskID(dev, node_name)
11953 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11954 names = _GenerateUniqueNames(self.lu, lv_names)
11956 (data_disk, meta_disk) = dev.children
11957 vg_data = data_disk.logical_id[0]
11958 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11959 logical_id=(vg_data, names[0]),
11960 params=data_disk.params)
11961 vg_meta = meta_disk.logical_id[0]
11962 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11963 size=constants.DRBD_META_SIZE,
11964 logical_id=(vg_meta, names[1]),
11965 params=meta_disk.params)
11967 new_lvs = [lv_data, lv_meta]
11968 old_lvs = [child.Copy() for child in dev.children]
11969 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11970 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11972 # we pass force_create=True to force the LVM creation
11973 for new_lv in new_lvs:
11974 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11975 _GetInstanceInfoText(self.instance), False,
11980 def _CheckDevices(self, node_name, iv_names):
11981 for name, (dev, _, _) in iv_names.iteritems():
11982 self.cfg.SetDiskID(dev, node_name)
11984 result = _BlockdevFind(self, node_name, dev, self.instance)
11986 msg = result.fail_msg
11987 if msg or not result.payload:
11989 msg = "disk not found"
11990 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11993 if result.payload.is_degraded:
11994 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11996 def _RemoveOldStorage(self, node_name, iv_names):
11997 for name, (_, old_lvs, _) in iv_names.iteritems():
11998 self.lu.LogInfo("Remove logical volumes for %s", name)
12001 self.cfg.SetDiskID(lv, node_name)
12003 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
12005 self.lu.LogWarning("Can't remove old LV: %s", msg,
12006 hint="remove unused LVs manually")
12008 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
12009 """Replace a disk on the primary or secondary for DRBD 8.
12011 The algorithm for replace is quite complicated:
12013 1. for each disk to be replaced:
12015 1. create new LVs on the target node with unique names
12016 1. detach old LVs from the drbd device
12017 1. rename old LVs to name_replaced.<time_t>
12018 1. rename new LVs to old LVs
12019 1. attach the new LVs (with the old names now) to the drbd device
12021 1. wait for sync across all devices
12023 1. for each modified disk:
12025 1. remove old LVs (which have the name name_replaces.<time_t>)
12027 Failures are not very well handled.
12032 # Step: check device activation
12033 self.lu.LogStep(1, steps_total, "Check device existence")
12034 self._CheckDisksExistence([self.other_node, self.target_node])
12035 self._CheckVolumeGroup([self.target_node, self.other_node])
12037 # Step: check other node consistency
12038 self.lu.LogStep(2, steps_total, "Check peer consistency")
12039 self._CheckDisksConsistency(self.other_node,
12040 self.other_node == self.instance.primary_node,
12043 # Step: create new storage
12044 self.lu.LogStep(3, steps_total, "Allocate new storage")
12045 iv_names = self._CreateNewStorage(self.target_node)
12047 # Step: for each lv, detach+rename*2+attach
12048 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12049 for dev, old_lvs, new_lvs in iv_names.itervalues():
12050 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
12052 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
12054 result.Raise("Can't detach drbd from local storage on node"
12055 " %s for device %s" % (self.target_node, dev.iv_name))
12057 #cfg.Update(instance)
12059 # ok, we created the new LVs, so now we know we have the needed
12060 # storage; as such, we proceed on the target node to rename
12061 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12062 # using the assumption that logical_id == physical_id (which in
12063 # turn is the unique_id on that node)
12065 # FIXME(iustin): use a better name for the replaced LVs
12066 temp_suffix = int(time.time())
12067 ren_fn = lambda d, suff: (d.physical_id[0],
12068 d.physical_id[1] + "_replaced-%s" % suff)
12070 # Build the rename list based on what LVs exist on the node
12071 rename_old_to_new = []
12072 for to_ren in old_lvs:
12073 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12074 if not result.fail_msg and result.payload:
12076 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12078 self.lu.LogInfo("Renaming the old LVs on the target node")
12079 result = self.rpc.call_blockdev_rename(self.target_node,
12081 result.Raise("Can't rename old LVs on node %s" % self.target_node)
12083 # Now we rename the new LVs to the old LVs
12084 self.lu.LogInfo("Renaming the new LVs on the target node")
12085 rename_new_to_old = [(new, old.physical_id)
12086 for old, new in zip(old_lvs, new_lvs)]
12087 result = self.rpc.call_blockdev_rename(self.target_node,
12089 result.Raise("Can't rename new LVs on node %s" % self.target_node)
12091 # Intermediate steps of in memory modifications
12092 for old, new in zip(old_lvs, new_lvs):
12093 new.logical_id = old.logical_id
12094 self.cfg.SetDiskID(new, self.target_node)
12096 # We need to modify old_lvs so that removal later removes the
12097 # right LVs, not the newly added ones; note that old_lvs is a
12099 for disk in old_lvs:
12100 disk.logical_id = ren_fn(disk, temp_suffix)
12101 self.cfg.SetDiskID(disk, self.target_node)
12103 # Now that the new lvs have the old name, we can add them to the device
12104 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12105 result = self.rpc.call_blockdev_addchildren(self.target_node,
12106 (dev, self.instance), new_lvs)
12107 msg = result.fail_msg
12109 for new_lv in new_lvs:
12110 msg2 = self.rpc.call_blockdev_remove(self.target_node,
12113 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12114 hint=("cleanup manually the unused logical"
12116 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12118 cstep = itertools.count(5)
12120 if self.early_release:
12121 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12122 self._RemoveOldStorage(self.target_node, iv_names)
12123 # TODO: Check if releasing locks early still makes sense
12124 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12126 # Release all resource locks except those used by the instance
12127 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12128 keep=self.node_secondary_ip.keys())
12130 # Release all node locks while waiting for sync
12131 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12133 # TODO: Can the instance lock be downgraded here? Take the optional disk
12134 # shutdown in the caller into consideration.
12137 # This can fail as the old devices are degraded and _WaitForSync
12138 # does a combined result over all disks, so we don't check its return value
12139 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12140 _WaitForSync(self.lu, self.instance)
12142 # Check all devices manually
12143 self._CheckDevices(self.instance.primary_node, iv_names)
12145 # Step: remove old storage
12146 if not self.early_release:
12147 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12148 self._RemoveOldStorage(self.target_node, iv_names)
12150 def _ExecDrbd8Secondary(self, feedback_fn):
12151 """Replace the secondary node for DRBD 8.
12153 The algorithm for replace is quite complicated:
12154 - for all disks of the instance:
12155 - create new LVs on the new node with same names
12156 - shutdown the drbd device on the old secondary
12157 - disconnect the drbd network on the primary
12158 - create the drbd device on the new secondary
12159 - network attach the drbd on the primary, using an artifice:
12160 the drbd code for Attach() will connect to the network if it
12161 finds a device which is connected to the good local disks but
12162 not network enabled
12163 - wait for sync across all devices
12164 - remove all disks from the old secondary
12166 Failures are not very well handled.
12171 pnode = self.instance.primary_node
12173 # Step: check device activation
12174 self.lu.LogStep(1, steps_total, "Check device existence")
12175 self._CheckDisksExistence([self.instance.primary_node])
12176 self._CheckVolumeGroup([self.instance.primary_node])
12178 # Step: check other node consistency
12179 self.lu.LogStep(2, steps_total, "Check peer consistency")
12180 self._CheckDisksConsistency(self.instance.primary_node, True, True)
12182 # Step: create new storage
12183 self.lu.LogStep(3, steps_total, "Allocate new storage")
12184 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12185 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12186 for idx, dev in enumerate(disks):
12187 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12188 (self.new_node, idx))
12189 # we pass force_create=True to force LVM creation
12190 for new_lv in dev.children:
12191 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12192 True, _GetInstanceInfoText(self.instance), False,
12195 # Step 4: dbrd minors and drbd setups changes
12196 # after this, we must manually remove the drbd minors on both the
12197 # error and the success paths
12198 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12199 minors = self.cfg.AllocateDRBDMinor([self.new_node
12200 for dev in self.instance.disks],
12201 self.instance.name)
12202 logging.debug("Allocated minors %r", minors)
12205 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12206 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12207 (self.new_node, idx))
12208 # create new devices on new_node; note that we create two IDs:
12209 # one without port, so the drbd will be activated without
12210 # networking information on the new node at this stage, and one
12211 # with network, for the latter activation in step 4
12212 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12213 if self.instance.primary_node == o_node1:
12216 assert self.instance.primary_node == o_node2, "Three-node instance?"
12219 new_alone_id = (self.instance.primary_node, self.new_node, None,
12220 p_minor, new_minor, o_secret)
12221 new_net_id = (self.instance.primary_node, self.new_node, o_port,
12222 p_minor, new_minor, o_secret)
12224 iv_names[idx] = (dev, dev.children, new_net_id)
12225 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12227 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12228 logical_id=new_alone_id,
12229 children=dev.children,
12232 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12235 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12237 _GetInstanceInfoText(self.instance), False,
12239 except errors.GenericError:
12240 self.cfg.ReleaseDRBDMinors(self.instance.name)
12243 # We have new devices, shutdown the drbd on the old secondary
12244 for idx, dev in enumerate(self.instance.disks):
12245 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12246 self.cfg.SetDiskID(dev, self.target_node)
12247 msg = self.rpc.call_blockdev_shutdown(self.target_node,
12248 (dev, self.instance)).fail_msg
12250 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12251 "node: %s" % (idx, msg),
12252 hint=("Please cleanup this device manually as"
12253 " soon as possible"))
12255 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12256 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12257 self.instance.disks)[pnode]
12259 msg = result.fail_msg
12261 # detaches didn't succeed (unlikely)
12262 self.cfg.ReleaseDRBDMinors(self.instance.name)
12263 raise errors.OpExecError("Can't detach the disks from the network on"
12264 " old node: %s" % (msg,))
12266 # if we managed to detach at least one, we update all the disks of
12267 # the instance to point to the new secondary
12268 self.lu.LogInfo("Updating instance configuration")
12269 for dev, _, new_logical_id in iv_names.itervalues():
12270 dev.logical_id = new_logical_id
12271 self.cfg.SetDiskID(dev, self.instance.primary_node)
12273 self.cfg.Update(self.instance, feedback_fn)
12275 # Release all node locks (the configuration has been updated)
12276 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12278 # and now perform the drbd attach
12279 self.lu.LogInfo("Attaching primary drbds to new secondary"
12280 " (standalone => connected)")
12281 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12283 self.node_secondary_ip,
12284 (self.instance.disks, self.instance),
12285 self.instance.name,
12287 for to_node, to_result in result.items():
12288 msg = to_result.fail_msg
12290 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12292 hint=("please do a gnt-instance info to see the"
12293 " status of disks"))
12295 cstep = itertools.count(5)
12297 if self.early_release:
12298 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12299 self._RemoveOldStorage(self.target_node, iv_names)
12300 # TODO: Check if releasing locks early still makes sense
12301 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12303 # Release all resource locks except those used by the instance
12304 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12305 keep=self.node_secondary_ip.keys())
12307 # TODO: Can the instance lock be downgraded here? Take the optional disk
12308 # shutdown in the caller into consideration.
12311 # This can fail as the old devices are degraded and _WaitForSync
12312 # does a combined result over all disks, so we don't check its return value
12313 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12314 _WaitForSync(self.lu, self.instance)
12316 # Check all devices manually
12317 self._CheckDevices(self.instance.primary_node, iv_names)
12319 # Step: remove old storage
12320 if not self.early_release:
12321 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12322 self._RemoveOldStorage(self.target_node, iv_names)
12325 class LURepairNodeStorage(NoHooksLU):
12326 """Repairs the volume group on a node.
12331 def CheckArguments(self):
12332 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12334 storage_type = self.op.storage_type
12336 if (constants.SO_FIX_CONSISTENCY not in
12337 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12338 raise errors.OpPrereqError("Storage units of type '%s' can not be"
12339 " repaired" % storage_type,
12340 errors.ECODE_INVAL)
12342 def ExpandNames(self):
12343 self.needed_locks = {
12344 locking.LEVEL_NODE: [self.op.node_name],
12347 def _CheckFaultyDisks(self, instance, node_name):
12348 """Ensure faulty disks abort the opcode or at least warn."""
12350 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12352 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12353 " node '%s'" % (instance.name, node_name),
12354 errors.ECODE_STATE)
12355 except errors.OpPrereqError, err:
12356 if self.op.ignore_consistency:
12357 self.LogWarning(str(err.args[0]))
12361 def CheckPrereq(self):
12362 """Check prerequisites.
12365 # Check whether any instance on this node has faulty disks
12366 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12367 if inst.admin_state != constants.ADMINST_UP:
12369 check_nodes = set(inst.all_nodes)
12370 check_nodes.discard(self.op.node_name)
12371 for inst_node_name in check_nodes:
12372 self._CheckFaultyDisks(inst, inst_node_name)
12374 def Exec(self, feedback_fn):
12375 feedback_fn("Repairing storage unit '%s' on %s ..." %
12376 (self.op.name, self.op.node_name))
12378 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12379 result = self.rpc.call_storage_execute(self.op.node_name,
12380 self.op.storage_type, st_args,
12382 constants.SO_FIX_CONSISTENCY)
12383 result.Raise("Failed to repair storage unit '%s' on %s" %
12384 (self.op.name, self.op.node_name))
12387 class LUNodeEvacuate(NoHooksLU):
12388 """Evacuates instances off a list of nodes.
12393 _MODE2IALLOCATOR = {
12394 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12395 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12396 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12398 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12399 assert (frozenset(_MODE2IALLOCATOR.values()) ==
12400 constants.IALLOCATOR_NEVAC_MODES)
12402 def CheckArguments(self):
12403 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12405 def ExpandNames(self):
12406 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12408 if self.op.remote_node is not None:
12409 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12410 assert self.op.remote_node
12412 if self.op.remote_node == self.op.node_name:
12413 raise errors.OpPrereqError("Can not use evacuated node as a new"
12414 " secondary node", errors.ECODE_INVAL)
12416 if self.op.mode != constants.NODE_EVAC_SEC:
12417 raise errors.OpPrereqError("Without the use of an iallocator only"
12418 " secondary instances can be evacuated",
12419 errors.ECODE_INVAL)
12422 self.share_locks = _ShareAll()
12423 self.needed_locks = {
12424 locking.LEVEL_INSTANCE: [],
12425 locking.LEVEL_NODEGROUP: [],
12426 locking.LEVEL_NODE: [],
12429 # Determine nodes (via group) optimistically, needs verification once locks
12430 # have been acquired
12431 self.lock_nodes = self._DetermineNodes()
12433 def _DetermineNodes(self):
12434 """Gets the list of nodes to operate on.
12437 if self.op.remote_node is None:
12438 # Iallocator will choose any node(s) in the same group
12439 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12441 group_nodes = frozenset([self.op.remote_node])
12443 # Determine nodes to be locked
12444 return set([self.op.node_name]) | group_nodes
12446 def _DetermineInstances(self):
12447 """Builds list of instances to operate on.
12450 assert self.op.mode in constants.NODE_EVAC_MODES
12452 if self.op.mode == constants.NODE_EVAC_PRI:
12453 # Primary instances only
12454 inst_fn = _GetNodePrimaryInstances
12455 assert self.op.remote_node is None, \
12456 "Evacuating primary instances requires iallocator"
12457 elif self.op.mode == constants.NODE_EVAC_SEC:
12458 # Secondary instances only
12459 inst_fn = _GetNodeSecondaryInstances
12462 assert self.op.mode == constants.NODE_EVAC_ALL
12463 inst_fn = _GetNodeInstances
12464 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12466 raise errors.OpPrereqError("Due to an issue with the iallocator"
12467 " interface it is not possible to evacuate"
12468 " all instances at once; specify explicitly"
12469 " whether to evacuate primary or secondary"
12471 errors.ECODE_INVAL)
12473 return inst_fn(self.cfg, self.op.node_name)
12475 def DeclareLocks(self, level):
12476 if level == locking.LEVEL_INSTANCE:
12477 # Lock instances optimistically, needs verification once node and group
12478 # locks have been acquired
12479 self.needed_locks[locking.LEVEL_INSTANCE] = \
12480 set(i.name for i in self._DetermineInstances())
12482 elif level == locking.LEVEL_NODEGROUP:
12483 # Lock node groups for all potential target nodes optimistically, needs
12484 # verification once nodes have been acquired
12485 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12486 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12488 elif level == locking.LEVEL_NODE:
12489 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12491 def CheckPrereq(self):
12493 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12494 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12495 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12497 need_nodes = self._DetermineNodes()
12499 if not owned_nodes.issuperset(need_nodes):
12500 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12501 " locks were acquired, current nodes are"
12502 " are '%s', used to be '%s'; retry the"
12504 (self.op.node_name,
12505 utils.CommaJoin(need_nodes),
12506 utils.CommaJoin(owned_nodes)),
12507 errors.ECODE_STATE)
12509 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12510 if owned_groups != wanted_groups:
12511 raise errors.OpExecError("Node groups changed since locks were acquired,"
12512 " current groups are '%s', used to be '%s';"
12513 " retry the operation" %
12514 (utils.CommaJoin(wanted_groups),
12515 utils.CommaJoin(owned_groups)))
12517 # Determine affected instances
12518 self.instances = self._DetermineInstances()
12519 self.instance_names = [i.name for i in self.instances]
12521 if set(self.instance_names) != owned_instances:
12522 raise errors.OpExecError("Instances on node '%s' changed since locks"
12523 " were acquired, current instances are '%s',"
12524 " used to be '%s'; retry the operation" %
12525 (self.op.node_name,
12526 utils.CommaJoin(self.instance_names),
12527 utils.CommaJoin(owned_instances)))
12529 if self.instance_names:
12530 self.LogInfo("Evacuating instances from node '%s': %s",
12532 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12534 self.LogInfo("No instances to evacuate from node '%s'",
12537 if self.op.remote_node is not None:
12538 for i in self.instances:
12539 if i.primary_node == self.op.remote_node:
12540 raise errors.OpPrereqError("Node %s is the primary node of"
12541 " instance %s, cannot use it as"
12543 (self.op.remote_node, i.name),
12544 errors.ECODE_INVAL)
12546 def Exec(self, feedback_fn):
12547 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12549 if not self.instance_names:
12550 # No instances to evacuate
12553 elif self.op.iallocator is not None:
12554 # TODO: Implement relocation to other group
12555 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12556 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12557 instances=list(self.instance_names))
12558 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12560 ial.Run(self.op.iallocator)
12562 if not ial.success:
12563 raise errors.OpPrereqError("Can't compute node evacuation using"
12564 " iallocator '%s': %s" %
12565 (self.op.iallocator, ial.info),
12566 errors.ECODE_NORES)
12568 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12570 elif self.op.remote_node is not None:
12571 assert self.op.mode == constants.NODE_EVAC_SEC
12573 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12574 remote_node=self.op.remote_node,
12576 mode=constants.REPLACE_DISK_CHG,
12577 early_release=self.op.early_release)]
12578 for instance_name in self.instance_names]
12581 raise errors.ProgrammerError("No iallocator or remote node")
12583 return ResultWithJobs(jobs)
12586 def _SetOpEarlyRelease(early_release, op):
12587 """Sets C{early_release} flag on opcodes if available.
12591 op.early_release = early_release
12592 except AttributeError:
12593 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12598 def _NodeEvacDest(use_nodes, group, nodes):
12599 """Returns group or nodes depending on caller's choice.
12603 return utils.CommaJoin(nodes)
12608 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12609 """Unpacks the result of change-group and node-evacuate iallocator requests.
12611 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12612 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12614 @type lu: L{LogicalUnit}
12615 @param lu: Logical unit instance
12616 @type alloc_result: tuple/list
12617 @param alloc_result: Result from iallocator
12618 @type early_release: bool
12619 @param early_release: Whether to release locks early if possible
12620 @type use_nodes: bool
12621 @param use_nodes: Whether to display node names instead of groups
12624 (moved, failed, jobs) = alloc_result
12627 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12628 for (name, reason) in failed)
12629 lu.LogWarning("Unable to evacuate instances %s", failreason)
12630 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12633 lu.LogInfo("Instances to be moved: %s",
12634 utils.CommaJoin("%s (to %s)" %
12635 (name, _NodeEvacDest(use_nodes, group, nodes))
12636 for (name, group, nodes) in moved))
12638 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12639 map(opcodes.OpCode.LoadOpCode, ops))
12643 def _DiskSizeInBytesToMebibytes(lu, size):
12644 """Converts a disk size in bytes to mebibytes.
12646 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12649 (mib, remainder) = divmod(size, 1024 * 1024)
12652 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12653 " to not overwrite existing data (%s bytes will not be"
12654 " wiped)", (1024 * 1024) - remainder)
12660 class LUInstanceGrowDisk(LogicalUnit):
12661 """Grow a disk of an instance.
12664 HPATH = "disk-grow"
12665 HTYPE = constants.HTYPE_INSTANCE
12668 def ExpandNames(self):
12669 self._ExpandAndLockInstance()
12670 self.needed_locks[locking.LEVEL_NODE] = []
12671 self.needed_locks[locking.LEVEL_NODE_RES] = []
12672 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12673 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12675 def DeclareLocks(self, level):
12676 if level == locking.LEVEL_NODE:
12677 self._LockInstancesNodes()
12678 elif level == locking.LEVEL_NODE_RES:
12680 self.needed_locks[locking.LEVEL_NODE_RES] = \
12681 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12683 def BuildHooksEnv(self):
12684 """Build hooks env.
12686 This runs on the master, the primary and all the secondaries.
12690 "DISK": self.op.disk,
12691 "AMOUNT": self.op.amount,
12692 "ABSOLUTE": self.op.absolute,
12694 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12697 def BuildHooksNodes(self):
12698 """Build hooks nodes.
12701 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12704 def CheckPrereq(self):
12705 """Check prerequisites.
12707 This checks that the instance is in the cluster.
12710 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12711 assert instance is not None, \
12712 "Cannot retrieve locked instance %s" % self.op.instance_name
12713 nodenames = list(instance.all_nodes)
12714 for node in nodenames:
12715 _CheckNodeOnline(self, node)
12717 self.instance = instance
12719 if instance.disk_template not in constants.DTS_GROWABLE:
12720 raise errors.OpPrereqError("Instance's disk layout does not support"
12721 " growing", errors.ECODE_INVAL)
12723 self.disk = instance.FindDisk(self.op.disk)
12725 if self.op.absolute:
12726 self.target = self.op.amount
12727 self.delta = self.target - self.disk.size
12729 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12730 "current disk size (%s)" %
12731 (utils.FormatUnit(self.target, "h"),
12732 utils.FormatUnit(self.disk.size, "h")),
12733 errors.ECODE_STATE)
12735 self.delta = self.op.amount
12736 self.target = self.disk.size + self.delta
12738 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12739 utils.FormatUnit(self.delta, "h"),
12740 errors.ECODE_INVAL)
12742 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12744 def _CheckDiskSpace(self, nodenames, req_vgspace):
12745 template = self.instance.disk_template
12746 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12747 # TODO: check the free disk space for file, when that feature will be
12749 nodes = map(self.cfg.GetNodeInfo, nodenames)
12750 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12753 # With exclusive storage we need to something smarter than just looking
12754 # at free space; for now, let's simply abort the operation.
12755 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12756 " is enabled", errors.ECODE_STATE)
12757 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12759 def Exec(self, feedback_fn):
12760 """Execute disk grow.
12763 instance = self.instance
12766 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12767 assert (self.owned_locks(locking.LEVEL_NODE) ==
12768 self.owned_locks(locking.LEVEL_NODE_RES))
12770 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12772 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12774 raise errors.OpExecError("Cannot activate block device to grow")
12776 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12777 (self.op.disk, instance.name,
12778 utils.FormatUnit(self.delta, "h"),
12779 utils.FormatUnit(self.target, "h")))
12781 # First run all grow ops in dry-run mode
12782 for node in instance.all_nodes:
12783 self.cfg.SetDiskID(disk, node)
12784 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12786 result.Raise("Dry-run grow request failed to node %s" % node)
12789 # Get disk size from primary node for wiping
12790 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12791 result.Raise("Failed to retrieve disk size from node '%s'" %
12792 instance.primary_node)
12794 (disk_size_in_bytes, ) = result.payload
12796 if disk_size_in_bytes is None:
12797 raise errors.OpExecError("Failed to retrieve disk size from primary"
12798 " node '%s'" % instance.primary_node)
12800 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12802 assert old_disk_size >= disk.size, \
12803 ("Retrieved disk size too small (got %s, should be at least %s)" %
12804 (old_disk_size, disk.size))
12806 old_disk_size = None
12808 # We know that (as far as we can test) operations across different
12809 # nodes will succeed, time to run it for real on the backing storage
12810 for node in instance.all_nodes:
12811 self.cfg.SetDiskID(disk, node)
12812 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12814 result.Raise("Grow request failed to node %s" % node)
12816 # And now execute it for logical storage, on the primary node
12817 node = instance.primary_node
12818 self.cfg.SetDiskID(disk, node)
12819 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12821 result.Raise("Grow request failed to node %s" % node)
12823 disk.RecordGrow(self.delta)
12824 self.cfg.Update(instance, feedback_fn)
12826 # Changes have been recorded, release node lock
12827 _ReleaseLocks(self, locking.LEVEL_NODE)
12829 # Downgrade lock while waiting for sync
12830 self.glm.downgrade(locking.LEVEL_INSTANCE)
12832 assert wipe_disks ^ (old_disk_size is None)
12835 assert instance.disks[self.op.disk] == disk
12837 # Wipe newly added disk space
12838 _WipeDisks(self, instance,
12839 disks=[(self.op.disk, disk, old_disk_size)])
12841 if self.op.wait_for_sync:
12842 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12844 self.LogWarning("Disk syncing has not returned a good status; check"
12846 if instance.admin_state != constants.ADMINST_UP:
12847 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12848 elif instance.admin_state != constants.ADMINST_UP:
12849 self.LogWarning("Not shutting down the disk even if the instance is"
12850 " not supposed to be running because no wait for"
12851 " sync mode was requested")
12853 assert self.owned_locks(locking.LEVEL_NODE_RES)
12854 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12857 class LUInstanceQueryData(NoHooksLU):
12858 """Query runtime instance data.
12863 def ExpandNames(self):
12864 self.needed_locks = {}
12866 # Use locking if requested or when non-static information is wanted
12867 if not (self.op.static or self.op.use_locking):
12868 self.LogWarning("Non-static data requested, locks need to be acquired")
12869 self.op.use_locking = True
12871 if self.op.instances or not self.op.use_locking:
12872 # Expand instance names right here
12873 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12875 # Will use acquired locks
12876 self.wanted_names = None
12878 if self.op.use_locking:
12879 self.share_locks = _ShareAll()
12881 if self.wanted_names is None:
12882 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12884 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12886 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12887 self.needed_locks[locking.LEVEL_NODE] = []
12888 self.needed_locks[locking.LEVEL_NETWORK] = []
12889 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12891 def DeclareLocks(self, level):
12892 if self.op.use_locking:
12893 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12894 if level == locking.LEVEL_NODEGROUP:
12896 # Lock all groups used by instances optimistically; this requires going
12897 # via the node before it's locked, requiring verification later on
12898 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12899 frozenset(group_uuid
12900 for instance_name in owned_instances
12902 self.cfg.GetInstanceNodeGroups(instance_name))
12904 elif level == locking.LEVEL_NODE:
12905 self._LockInstancesNodes()
12907 elif level == locking.LEVEL_NETWORK:
12908 self.needed_locks[locking.LEVEL_NETWORK] = \
12910 for instance_name in owned_instances
12912 self.cfg.GetInstanceNetworks(instance_name))
12914 def CheckPrereq(self):
12915 """Check prerequisites.
12917 This only checks the optional instance list against the existing names.
12920 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12921 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12922 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12923 owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
12925 if self.wanted_names is None:
12926 assert self.op.use_locking, "Locking was not used"
12927 self.wanted_names = owned_instances
12929 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12931 if self.op.use_locking:
12932 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12935 assert not (owned_instances or owned_groups or
12936 owned_nodes or owned_networks)
12938 self.wanted_instances = instances.values()
12940 def _ComputeBlockdevStatus(self, node, instance, dev):
12941 """Returns the status of a block device
12944 if self.op.static or not node:
12947 self.cfg.SetDiskID(dev, node)
12949 result = self.rpc.call_blockdev_find(node, dev)
12953 result.Raise("Can't compute disk status for %s" % instance.name)
12955 status = result.payload
12959 return (status.dev_path, status.major, status.minor,
12960 status.sync_percent, status.estimated_time,
12961 status.is_degraded, status.ldisk_status)
12963 def _ComputeDiskStatus(self, instance, snode, dev):
12964 """Compute block device status.
12967 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12969 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12971 def _ComputeDiskStatusInner(self, instance, snode, dev):
12972 """Compute block device status.
12974 @attention: The device has to be annotated already.
12977 if dev.dev_type in constants.LDS_DRBD:
12978 # we change the snode then (otherwise we use the one passed in)
12979 if dev.logical_id[0] == instance.primary_node:
12980 snode = dev.logical_id[1]
12982 snode = dev.logical_id[0]
12984 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12986 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12989 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12996 "iv_name": dev.iv_name,
12997 "dev_type": dev.dev_type,
12998 "logical_id": dev.logical_id,
12999 "physical_id": dev.physical_id,
13000 "pstatus": dev_pstatus,
13001 "sstatus": dev_sstatus,
13002 "children": dev_children,
13007 def Exec(self, feedback_fn):
13008 """Gather and return data"""
13011 cluster = self.cfg.GetClusterInfo()
13013 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
13014 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
13016 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
13017 for node in nodes.values()))
13019 group2name_fn = lambda uuid: groups[uuid].name
13020 for instance in self.wanted_instances:
13021 pnode = nodes[instance.primary_node]
13023 if self.op.static or pnode.offline:
13024 remote_state = None
13026 self.LogWarning("Primary node %s is marked offline, returning static"
13027 " information only for instance %s" %
13028 (pnode.name, instance.name))
13030 remote_info = self.rpc.call_instance_info(instance.primary_node,
13032 instance.hypervisor)
13033 remote_info.Raise("Error checking node %s" % instance.primary_node)
13034 remote_info = remote_info.payload
13035 if remote_info and "state" in remote_info:
13036 remote_state = "up"
13038 if instance.admin_state == constants.ADMINST_UP:
13039 remote_state = "down"
13041 remote_state = instance.admin_state
13043 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
13046 snodes_group_uuids = [nodes[snode_name].group
13047 for snode_name in instance.secondary_nodes]
13049 result[instance.name] = {
13050 "name": instance.name,
13051 "config_state": instance.admin_state,
13052 "run_state": remote_state,
13053 "pnode": instance.primary_node,
13054 "pnode_group_uuid": pnode.group,
13055 "pnode_group_name": group2name_fn(pnode.group),
13056 "snodes": instance.secondary_nodes,
13057 "snodes_group_uuids": snodes_group_uuids,
13058 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
13060 # this happens to be the same format used for hooks
13061 "nics": _NICListToTuple(self, instance.nics),
13062 "disk_template": instance.disk_template,
13064 "hypervisor": instance.hypervisor,
13065 "network_port": instance.network_port,
13066 "hv_instance": instance.hvparams,
13067 "hv_actual": cluster.FillHV(instance, skip_globals=True),
13068 "be_instance": instance.beparams,
13069 "be_actual": cluster.FillBE(instance),
13070 "os_instance": instance.osparams,
13071 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13072 "serial_no": instance.serial_no,
13073 "mtime": instance.mtime,
13074 "ctime": instance.ctime,
13075 "uuid": instance.uuid,
13081 def PrepareContainerMods(mods, private_fn):
13082 """Prepares a list of container modifications by adding a private data field.
13084 @type mods: list of tuples; (operation, index, parameters)
13085 @param mods: List of modifications
13086 @type private_fn: callable or None
13087 @param private_fn: Callable for constructing a private data field for a
13092 if private_fn is None:
13097 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13100 #: Type description for changes as returned by L{ApplyContainerMods}'s
13102 _TApplyContModsCbChanges = \
13103 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13104 ht.TNonEmptyString,
13109 def ApplyContainerMods(kind, container, chgdesc, mods,
13110 create_fn, modify_fn, remove_fn):
13111 """Applies descriptions in C{mods} to C{container}.
13114 @param kind: One-word item description
13115 @type container: list
13116 @param container: Container to modify
13117 @type chgdesc: None or list
13118 @param chgdesc: List of applied changes
13120 @param mods: Modifications as returned by L{PrepareContainerMods}
13121 @type create_fn: callable
13122 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13123 receives absolute item index, parameters and private data object as added
13124 by L{PrepareContainerMods}, returns tuple containing new item and changes
13126 @type modify_fn: callable
13127 @param modify_fn: Callback for modifying an existing item
13128 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13129 and private data object as added by L{PrepareContainerMods}, returns
13131 @type remove_fn: callable
13132 @param remove_fn: Callback on removing item; receives absolute item index,
13133 item and private data object as added by L{PrepareContainerMods}
13136 for (op, idx, params, private) in mods:
13139 absidx = len(container) - 1
13141 raise IndexError("Not accepting negative indices other than -1")
13142 elif idx > len(container):
13143 raise IndexError("Got %s index %s, but there are only %s" %
13144 (kind, idx, len(container)))
13150 if op == constants.DDM_ADD:
13151 # Calculate where item will be added
13153 addidx = len(container)
13157 if create_fn is None:
13160 (item, changes) = create_fn(addidx, params, private)
13163 container.append(item)
13166 assert idx <= len(container)
13167 # list.insert does so before the specified index
13168 container.insert(idx, item)
13170 # Retrieve existing item
13172 item = container[absidx]
13174 raise IndexError("Invalid %s index %s" % (kind, idx))
13176 if op == constants.DDM_REMOVE:
13179 if remove_fn is not None:
13180 remove_fn(absidx, item, private)
13182 changes = [("%s/%s" % (kind, absidx), "remove")]
13184 assert container[absidx] == item
13185 del container[absidx]
13186 elif op == constants.DDM_MODIFY:
13187 if modify_fn is not None:
13188 changes = modify_fn(absidx, item, params, private)
13190 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13192 assert _TApplyContModsCbChanges(changes)
13194 if not (chgdesc is None or changes is None):
13195 chgdesc.extend(changes)
13198 def _UpdateIvNames(base_index, disks):
13199 """Updates the C{iv_name} attribute of disks.
13201 @type disks: list of L{objects.Disk}
13204 for (idx, disk) in enumerate(disks):
13205 disk.iv_name = "disk/%s" % (base_index + idx, )
13208 class _InstNicModPrivate:
13209 """Data structure for network interface modifications.
13211 Used by L{LUInstanceSetParams}.
13214 def __init__(self):
13219 class LUInstanceSetParams(LogicalUnit):
13220 """Modifies an instances's parameters.
13223 HPATH = "instance-modify"
13224 HTYPE = constants.HTYPE_INSTANCE
13228 def _UpgradeDiskNicMods(kind, mods, verify_fn):
13229 assert ht.TList(mods)
13230 assert not mods or len(mods[0]) in (2, 3)
13232 if mods and len(mods[0]) == 2:
13236 for op, params in mods:
13237 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13238 result.append((op, -1, params))
13242 raise errors.OpPrereqError("Only one %s add or remove operation is"
13243 " supported at a time" % kind,
13244 errors.ECODE_INVAL)
13246 result.append((constants.DDM_MODIFY, op, params))
13248 assert verify_fn(result)
13255 def _CheckMods(kind, mods, key_types, item_fn):
13256 """Ensures requested disk/NIC modifications are valid.
13259 for (op, _, params) in mods:
13260 assert ht.TDict(params)
13262 # If 'key_types' is an empty dict, we assume we have an
13263 # 'ext' template and thus do not ForceDictType
13265 utils.ForceDictType(params, key_types)
13267 if op == constants.DDM_REMOVE:
13269 raise errors.OpPrereqError("No settings should be passed when"
13270 " removing a %s" % kind,
13271 errors.ECODE_INVAL)
13272 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13273 item_fn(op, params)
13275 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13278 def _VerifyDiskModification(op, params):
13279 """Verifies a disk modification.
13282 if op == constants.DDM_ADD:
13283 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13284 if mode not in constants.DISK_ACCESS_SET:
13285 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13286 errors.ECODE_INVAL)
13288 size = params.get(constants.IDISK_SIZE, None)
13290 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13291 constants.IDISK_SIZE, errors.ECODE_INVAL)
13295 except (TypeError, ValueError), err:
13296 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13297 errors.ECODE_INVAL)
13299 params[constants.IDISK_SIZE] = size
13301 elif op == constants.DDM_MODIFY:
13302 if constants.IDISK_SIZE in params:
13303 raise errors.OpPrereqError("Disk size change not possible, use"
13304 " grow-disk", errors.ECODE_INVAL)
13305 if constants.IDISK_MODE not in params:
13306 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13307 " modification supported, but missing",
13308 errors.ECODE_NOENT)
13309 if len(params) > 1:
13310 raise errors.OpPrereqError("Disk modification doesn't support"
13311 " additional arbitrary parameters",
13312 errors.ECODE_INVAL)
13315 def _VerifyNicModification(op, params):
13316 """Verifies a network interface modification.
13319 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13320 ip = params.get(constants.INIC_IP, None)
13321 req_net = params.get(constants.INIC_NETWORK, None)
13322 link = params.get(constants.NIC_LINK, None)
13323 mode = params.get(constants.NIC_MODE, None)
13324 if req_net is not None:
13325 if req_net.lower() == constants.VALUE_NONE:
13326 params[constants.INIC_NETWORK] = None
13328 elif link is not None or mode is not None:
13329 raise errors.OpPrereqError("If network is given"
13330 " mode or link should not",
13331 errors.ECODE_INVAL)
13333 if op == constants.DDM_ADD:
13334 macaddr = params.get(constants.INIC_MAC, None)
13335 if macaddr is None:
13336 params[constants.INIC_MAC] = constants.VALUE_AUTO
13339 if ip.lower() == constants.VALUE_NONE:
13340 params[constants.INIC_IP] = None
13342 if ip.lower() == constants.NIC_IP_POOL:
13343 if op == constants.DDM_ADD and req_net is None:
13344 raise errors.OpPrereqError("If ip=pool, parameter network"
13346 errors.ECODE_INVAL)
13348 if not netutils.IPAddress.IsValid(ip):
13349 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13350 errors.ECODE_INVAL)
13352 if constants.INIC_MAC in params:
13353 macaddr = params[constants.INIC_MAC]
13354 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13355 macaddr = utils.NormalizeAndValidateMac(macaddr)
13357 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13358 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13359 " modifying an existing NIC",
13360 errors.ECODE_INVAL)
13362 def CheckArguments(self):
13363 if not (self.op.nics or self.op.disks or self.op.disk_template or
13364 self.op.hvparams or self.op.beparams or self.op.os_name or
13365 self.op.offline is not None or self.op.runtime_mem or
13367 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13369 if self.op.hvparams:
13370 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
13371 "hypervisor", "instance", "cluster")
13373 self.op.disks = self._UpgradeDiskNicMods(
13374 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13375 self.op.nics = self._UpgradeDiskNicMods(
13376 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13378 if self.op.disks and self.op.disk_template is not None:
13379 raise errors.OpPrereqError("Disk template conversion and other disk"
13380 " changes not supported at the same time",
13381 errors.ECODE_INVAL)
13383 if (self.op.disk_template and
13384 self.op.disk_template in constants.DTS_INT_MIRROR and
13385 self.op.remote_node is None):
13386 raise errors.OpPrereqError("Changing the disk template to a mirrored"
13387 " one requires specifying a secondary node",
13388 errors.ECODE_INVAL)
13390 # Check NIC modifications
13391 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13392 self._VerifyNicModification)
13395 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
13397 def ExpandNames(self):
13398 self._ExpandAndLockInstance()
13399 self.needed_locks[locking.LEVEL_NODEGROUP] = []
13400 # Can't even acquire node locks in shared mode as upcoming changes in
13401 # Ganeti 2.6 will start to modify the node object on disk conversion
13402 self.needed_locks[locking.LEVEL_NODE] = []
13403 self.needed_locks[locking.LEVEL_NODE_RES] = []
13404 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13405 # Look node group to look up the ipolicy
13406 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13408 def DeclareLocks(self, level):
13409 if level == locking.LEVEL_NODEGROUP:
13410 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13411 # Acquire locks for the instance's nodegroups optimistically. Needs
13412 # to be verified in CheckPrereq
13413 self.needed_locks[locking.LEVEL_NODEGROUP] = \
13414 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13415 elif level == locking.LEVEL_NODE:
13416 self._LockInstancesNodes()
13417 if self.op.disk_template and self.op.remote_node:
13418 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13419 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13420 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13422 self.needed_locks[locking.LEVEL_NODE_RES] = \
13423 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13425 def BuildHooksEnv(self):
13426 """Build hooks env.
13428 This runs on the master, primary and secondaries.
13432 if constants.BE_MINMEM in self.be_new:
13433 args["minmem"] = self.be_new[constants.BE_MINMEM]
13434 if constants.BE_MAXMEM in self.be_new:
13435 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13436 if constants.BE_VCPUS in self.be_new:
13437 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13438 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13439 # information at all.
13441 if self._new_nics is not None:
13444 for nic in self._new_nics:
13445 n = copy.deepcopy(nic)
13446 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13447 n.nicparams = nicparams
13448 nics.append(_NICToTuple(self, n))
13450 args["nics"] = nics
13452 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13453 if self.op.disk_template:
13454 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13455 if self.op.runtime_mem:
13456 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13460 def BuildHooksNodes(self):
13461 """Build hooks nodes.
13464 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13467 def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
13468 old_params, cluster, pnode):
13470 update_params_dict = dict([(key, params[key])
13471 for key in constants.NICS_PARAMETERS
13474 req_link = update_params_dict.get(constants.NIC_LINK, None)
13475 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13477 new_net_uuid = None
13478 new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
13479 if new_net_uuid_or_name:
13480 new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
13481 new_net_obj = self.cfg.GetNetwork(new_net_uuid)
13484 old_net_obj = self.cfg.GetNetwork(old_net_uuid)
13487 netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
13489 raise errors.OpPrereqError("No netparams found for the network"
13490 " %s, probably not connected" %
13491 new_net_obj.name, errors.ECODE_INVAL)
13492 new_params = dict(netparams)
13494 new_params = _GetUpdatedParams(old_params, update_params_dict)
13496 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13498 new_filled_params = cluster.SimpleFillNIC(new_params)
13499 objects.NIC.CheckParameterSyntax(new_filled_params)
13501 new_mode = new_filled_params[constants.NIC_MODE]
13502 if new_mode == constants.NIC_MODE_BRIDGED:
13503 bridge = new_filled_params[constants.NIC_LINK]
13504 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13506 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13508 self.warn.append(msg)
13510 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13512 elif new_mode == constants.NIC_MODE_ROUTED:
13513 ip = params.get(constants.INIC_IP, old_ip)
13515 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13516 " on a routed NIC", errors.ECODE_INVAL)
13518 elif new_mode == constants.NIC_MODE_OVS:
13519 # TODO: check OVS link
13520 self.LogInfo("OVS links are currently not checked for correctness")
13522 if constants.INIC_MAC in params:
13523 mac = params[constants.INIC_MAC]
13525 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13526 errors.ECODE_INVAL)
13527 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13528 # otherwise generate the MAC address
13529 params[constants.INIC_MAC] = \
13530 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13532 # or validate/reserve the current one
13534 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13535 except errors.ReservationError:
13536 raise errors.OpPrereqError("MAC address '%s' already in use"
13537 " in cluster" % mac,
13538 errors.ECODE_NOTUNIQUE)
13539 elif new_net_uuid != old_net_uuid:
13541 def get_net_prefix(net_uuid):
13544 nobj = self.cfg.GetNetwork(net_uuid)
13545 mac_prefix = nobj.mac_prefix
13549 new_prefix = get_net_prefix(new_net_uuid)
13550 old_prefix = get_net_prefix(old_net_uuid)
13551 if old_prefix != new_prefix:
13552 params[constants.INIC_MAC] = \
13553 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13555 # if there is a change in (ip, network) tuple
13556 new_ip = params.get(constants.INIC_IP, old_ip)
13557 if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
13559 # if IP is pool then require a network and generate one IP
13560 if new_ip.lower() == constants.NIC_IP_POOL:
13563 new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
13564 except errors.ReservationError:
13565 raise errors.OpPrereqError("Unable to get a free IP"
13566 " from the address pool",
13567 errors.ECODE_STATE)
13568 self.LogInfo("Chose IP %s from network %s",
13571 params[constants.INIC_IP] = new_ip
13573 raise errors.OpPrereqError("ip=pool, but no network found",
13574 errors.ECODE_INVAL)
13575 # Reserve new IP if in the new network if any
13578 self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
13579 self.LogInfo("Reserving IP %s in network %s",
13580 new_ip, new_net_obj.name)
13581 except errors.ReservationError:
13582 raise errors.OpPrereqError("IP %s not available in network %s" %
13583 (new_ip, new_net_obj.name),
13584 errors.ECODE_NOTUNIQUE)
13585 # new network is None so check if new IP is a conflicting IP
13586 elif self.op.conflicts_check:
13587 _CheckForConflictingIp(self, new_ip, pnode)
13589 # release old IP if old network is not None
13590 if old_ip and old_net_uuid:
13592 self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
13593 except errors.AddressPoolError:
13594 logging.warning("Release IP %s not contained in network %s",
13595 old_ip, old_net_obj.name)
13597 # there are no changes in (ip, network) tuple and old network is not None
13598 elif (old_net_uuid is not None and
13599 (req_link is not None or req_mode is not None)):
13600 raise errors.OpPrereqError("Not allowed to change link or mode of"
13601 " a NIC that is connected to a network",
13602 errors.ECODE_INVAL)
13604 private.params = new_params
13605 private.filled = new_filled_params
13607 def _PreCheckDiskTemplate(self, pnode_info):
13608 """CheckPrereq checks related to a new disk template."""
13609 # Arguments are passed to avoid configuration lookups
13610 instance = self.instance
13611 pnode = instance.primary_node
13612 cluster = self.cluster
13613 if instance.disk_template == self.op.disk_template:
13614 raise errors.OpPrereqError("Instance already has disk template %s" %
13615 instance.disk_template, errors.ECODE_INVAL)
13617 if (instance.disk_template,
13618 self.op.disk_template) not in self._DISK_CONVERSIONS:
13619 raise errors.OpPrereqError("Unsupported disk template conversion from"
13620 " %s to %s" % (instance.disk_template,
13621 self.op.disk_template),
13622 errors.ECODE_INVAL)
13623 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13624 msg="cannot change disk template")
13625 if self.op.disk_template in constants.DTS_INT_MIRROR:
13626 if self.op.remote_node == pnode:
13627 raise errors.OpPrereqError("Given new secondary node %s is the same"
13628 " as the primary node of the instance" %
13629 self.op.remote_node, errors.ECODE_STATE)
13630 _CheckNodeOnline(self, self.op.remote_node)
13631 _CheckNodeNotDrained(self, self.op.remote_node)
13632 # FIXME: here we assume that the old instance type is DT_PLAIN
13633 assert instance.disk_template == constants.DT_PLAIN
13634 disks = [{constants.IDISK_SIZE: d.size,
13635 constants.IDISK_VG: d.logical_id[0]}
13636 for d in instance.disks]
13637 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13638 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13640 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13641 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13642 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13644 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, self.cfg,
13645 ignore=self.op.ignore_ipolicy)
13646 if pnode_info.group != snode_info.group:
13647 self.LogWarning("The primary and secondary nodes are in two"
13648 " different node groups; the disk parameters"
13649 " from the first disk's node group will be"
13652 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13653 # Make sure none of the nodes require exclusive storage
13654 nodes = [pnode_info]
13655 if self.op.disk_template in constants.DTS_INT_MIRROR:
13657 nodes.append(snode_info)
13658 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13659 if compat.any(map(has_es, nodes)):
13660 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13661 " storage is enabled" % (instance.disk_template,
13662 self.op.disk_template))
13663 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13665 def CheckPrereq(self):
13666 """Check prerequisites.
13668 This only checks the instance list against the existing names.
13671 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13672 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13674 cluster = self.cluster = self.cfg.GetClusterInfo()
13675 assert self.instance is not None, \
13676 "Cannot retrieve locked instance %s" % self.op.instance_name
13678 pnode = instance.primary_node
13682 if (self.op.pnode is not None and self.op.pnode != pnode and
13683 not self.op.force):
13684 # verify that the instance is not up
13685 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13686 instance.hypervisor)
13687 if instance_info.fail_msg:
13688 self.warn.append("Can't get instance runtime information: %s" %
13689 instance_info.fail_msg)
13690 elif instance_info.payload:
13691 raise errors.OpPrereqError("Instance is still running on %s" % pnode,
13692 errors.ECODE_STATE)
13694 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13695 nodelist = list(instance.all_nodes)
13696 pnode_info = self.cfg.GetNodeInfo(pnode)
13697 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13699 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13700 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13701 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13703 # dictionary with instance information after the modification
13706 # Check disk modifications. This is done here and not in CheckArguments
13707 # (as with NICs), because we need to know the instance's disk template
13708 if instance.disk_template == constants.DT_EXT:
13709 self._CheckMods("disk", self.op.disks, {},
13710 self._VerifyDiskModification)
13712 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13713 self._VerifyDiskModification)
13715 # Prepare disk/NIC modifications
13716 self.diskmod = PrepareContainerMods(self.op.disks, None)
13717 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13719 # Check the validity of the `provider' parameter
13720 if instance.disk_template in constants.DT_EXT:
13721 for mod in self.diskmod:
13722 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13723 if mod[0] == constants.DDM_ADD:
13724 if ext_provider is None:
13725 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13726 " '%s' missing, during disk add" %
13728 constants.IDISK_PROVIDER),
13729 errors.ECODE_NOENT)
13730 elif mod[0] == constants.DDM_MODIFY:
13732 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13734 constants.IDISK_PROVIDER,
13735 errors.ECODE_INVAL)
13737 for mod in self.diskmod:
13738 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13739 if ext_provider is not None:
13740 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13741 " instances of type '%s'" %
13742 (constants.IDISK_PROVIDER,
13744 errors.ECODE_INVAL)
13747 if self.op.os_name and not self.op.force:
13748 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13749 self.op.force_variant)
13750 instance_os = self.op.os_name
13752 instance_os = instance.os
13754 assert not (self.op.disk_template and self.op.disks), \
13755 "Can't modify disk template and apply disk changes at the same time"
13757 if self.op.disk_template:
13758 self._PreCheckDiskTemplate(pnode_info)
13760 # hvparams processing
13761 if self.op.hvparams:
13762 hv_type = instance.hypervisor
13763 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13764 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13765 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13768 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13769 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13770 self.hv_proposed = self.hv_new = hv_new # the new actual values
13771 self.hv_inst = i_hvdict # the new dict (without defaults)
13773 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13775 self.hv_new = self.hv_inst = {}
13777 # beparams processing
13778 if self.op.beparams:
13779 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13781 objects.UpgradeBeParams(i_bedict)
13782 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13783 be_new = cluster.SimpleFillBE(i_bedict)
13784 self.be_proposed = self.be_new = be_new # the new actual values
13785 self.be_inst = i_bedict # the new dict (without defaults)
13787 self.be_new = self.be_inst = {}
13788 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13789 be_old = cluster.FillBE(instance)
13791 # CPU param validation -- checking every time a parameter is
13792 # changed to cover all cases where either CPU mask or vcpus have
13794 if (constants.BE_VCPUS in self.be_proposed and
13795 constants.HV_CPU_MASK in self.hv_proposed):
13797 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13798 # Verify mask is consistent with number of vCPUs. Can skip this
13799 # test if only 1 entry in the CPU mask, which means same mask
13800 # is applied to all vCPUs.
13801 if (len(cpu_list) > 1 and
13802 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13803 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13805 (self.be_proposed[constants.BE_VCPUS],
13806 self.hv_proposed[constants.HV_CPU_MASK]),
13807 errors.ECODE_INVAL)
13809 # Only perform this test if a new CPU mask is given
13810 if constants.HV_CPU_MASK in self.hv_new:
13811 # Calculate the largest CPU number requested
13812 max_requested_cpu = max(map(max, cpu_list))
13813 # Check that all of the instance's nodes have enough physical CPUs to
13814 # satisfy the requested CPU mask
13815 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13816 max_requested_cpu + 1, instance.hypervisor)
13818 # osparams processing
13819 if self.op.osparams:
13820 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13821 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13822 self.os_inst = i_osdict # the new dict (without defaults)
13826 #TODO(dynmem): do the appropriate check involving MINMEM
13827 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13828 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13829 mem_check_list = [pnode]
13830 if be_new[constants.BE_AUTO_BALANCE]:
13831 # either we changed auto_balance to yes or it was from before
13832 mem_check_list.extend(instance.secondary_nodes)
13833 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13834 instance.hypervisor)
13835 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13836 [instance.hypervisor], False)
13837 pninfo = nodeinfo[pnode]
13838 msg = pninfo.fail_msg
13840 # Assume the primary node is unreachable and go ahead
13841 self.warn.append("Can't get info from primary node %s: %s" %
13844 (_, _, (pnhvinfo, )) = pninfo.payload
13845 if not isinstance(pnhvinfo.get("memory_free", None), int):
13846 self.warn.append("Node data from primary node %s doesn't contain"
13847 " free memory information" % pnode)
13848 elif instance_info.fail_msg:
13849 self.warn.append("Can't get instance runtime information: %s" %
13850 instance_info.fail_msg)
13852 if instance_info.payload:
13853 current_mem = int(instance_info.payload["memory"])
13855 # Assume instance not running
13856 # (there is a slight race condition here, but it's not very
13857 # probable, and we have no other way to check)
13858 # TODO: Describe race condition
13860 #TODO(dynmem): do the appropriate check involving MINMEM
13861 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13862 pnhvinfo["memory_free"])
13864 raise errors.OpPrereqError("This change will prevent the instance"
13865 " from starting, due to %d MB of memory"
13866 " missing on its primary node" %
13867 miss_mem, errors.ECODE_NORES)
13869 if be_new[constants.BE_AUTO_BALANCE]:
13870 for node, nres in nodeinfo.items():
13871 if node not in instance.secondary_nodes:
13873 nres.Raise("Can't get info from secondary node %s" % node,
13874 prereq=True, ecode=errors.ECODE_STATE)
13875 (_, _, (nhvinfo, )) = nres.payload
13876 if not isinstance(nhvinfo.get("memory_free", None), int):
13877 raise errors.OpPrereqError("Secondary node %s didn't return free"
13878 " memory information" % node,
13879 errors.ECODE_STATE)
13880 #TODO(dynmem): do the appropriate check involving MINMEM
13881 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13882 raise errors.OpPrereqError("This change will prevent the instance"
13883 " from failover to its secondary node"
13884 " %s, due to not enough memory" % node,
13885 errors.ECODE_STATE)
13887 if self.op.runtime_mem:
13888 remote_info = self.rpc.call_instance_info(instance.primary_node,
13890 instance.hypervisor)
13891 remote_info.Raise("Error checking node %s" % instance.primary_node)
13892 if not remote_info.payload: # not running already
13893 raise errors.OpPrereqError("Instance %s is not running" %
13894 instance.name, errors.ECODE_STATE)
13896 current_memory = remote_info.payload["memory"]
13897 if (not self.op.force and
13898 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13899 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13900 raise errors.OpPrereqError("Instance %s must have memory between %d"
13901 " and %d MB of memory unless --force is"
13904 self.be_proposed[constants.BE_MINMEM],
13905 self.be_proposed[constants.BE_MAXMEM]),
13906 errors.ECODE_INVAL)
13908 delta = self.op.runtime_mem - current_memory
13910 _CheckNodeFreeMemory(self, instance.primary_node,
13911 "ballooning memory for instance %s" %
13912 instance.name, delta, instance.hypervisor)
13914 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13915 raise errors.OpPrereqError("Disk operations not supported for"
13916 " diskless instances", errors.ECODE_INVAL)
13918 def _PrepareNicCreate(_, params, private):
13919 self._PrepareNicModification(params, private, None, None,
13920 {}, cluster, pnode)
13921 return (None, None)
13923 def _PrepareNicMod(_, nic, params, private):
13924 self._PrepareNicModification(params, private, nic.ip, nic.network,
13925 nic.nicparams, cluster, pnode)
13928 def _PrepareNicRemove(_, params, __):
13930 net = params.network
13931 if net is not None and ip is not None:
13932 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13934 # Verify NIC changes (operating on copy)
13935 nics = instance.nics[:]
13936 ApplyContainerMods("NIC", nics, None, self.nicmod,
13937 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13938 if len(nics) > constants.MAX_NICS:
13939 raise errors.OpPrereqError("Instance has too many network interfaces"
13940 " (%d), cannot add more" % constants.MAX_NICS,
13941 errors.ECODE_STATE)
13943 # Verify disk changes (operating on a copy)
13944 disks = instance.disks[:]
13945 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13946 if len(disks) > constants.MAX_DISKS:
13947 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13948 " more" % constants.MAX_DISKS,
13949 errors.ECODE_STATE)
13950 disk_sizes = [disk.size for disk in instance.disks]
13951 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13952 self.diskmod if op == constants.DDM_ADD)
13953 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13954 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13956 if self.op.offline is not None and self.op.offline:
13957 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13958 msg="can't change to offline")
13960 # Pre-compute NIC changes (necessary to use result in hooks)
13961 self._nic_chgdesc = []
13963 # Operate on copies as this is still in prereq
13964 nics = [nic.Copy() for nic in instance.nics]
13965 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13966 self._CreateNewNic, self._ApplyNicMods, None)
13967 self._new_nics = nics
13968 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13970 self._new_nics = None
13971 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13973 if not self.op.ignore_ipolicy:
13974 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13977 # Fill ispec with backend parameters
13978 ispec[constants.ISPEC_SPINDLE_USE] = \
13979 self.be_new.get(constants.BE_SPINDLE_USE, None)
13980 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13983 # Copy ispec to verify parameters with min/max values separately
13984 if self.op.disk_template:
13985 new_disk_template = self.op.disk_template
13987 new_disk_template = instance.disk_template
13988 ispec_max = ispec.copy()
13989 ispec_max[constants.ISPEC_MEM_SIZE] = \
13990 self.be_new.get(constants.BE_MAXMEM, None)
13991 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max,
13993 ispec_min = ispec.copy()
13994 ispec_min[constants.ISPEC_MEM_SIZE] = \
13995 self.be_new.get(constants.BE_MINMEM, None)
13996 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min,
13999 if (res_max or res_min):
14000 # FIXME: Improve error message by including information about whether
14001 # the upper or lower limit of the parameter fails the ipolicy.
14002 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
14003 (group_info, group_info.name,
14004 utils.CommaJoin(set(res_max + res_min))))
14005 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
14007 def _ConvertPlainToDrbd(self, feedback_fn):
14008 """Converts an instance from plain to drbd.
14011 feedback_fn("Converting template to drbd")
14012 instance = self.instance
14013 pnode = instance.primary_node
14014 snode = self.op.remote_node
14016 assert instance.disk_template == constants.DT_PLAIN
14018 # create a fake disk info for _GenerateDiskTemplate
14019 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
14020 constants.IDISK_VG: d.logical_id[0]}
14021 for d in instance.disks]
14022 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
14023 instance.name, pnode, [snode],
14024 disk_info, None, None, 0, feedback_fn,
14026 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
14028 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
14029 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
14030 info = _GetInstanceInfoText(instance)
14031 feedback_fn("Creating additional volumes...")
14032 # first, create the missing data and meta devices
14033 for disk in anno_disks:
14034 # unfortunately this is... not too nice
14035 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
14036 info, True, p_excl_stor)
14037 for child in disk.children:
14038 _CreateSingleBlockDev(self, snode, instance, child, info, True,
14040 # at this stage, all new LVs have been created, we can rename the
14042 feedback_fn("Renaming original volumes...")
14043 rename_list = [(o, n.children[0].logical_id)
14044 for (o, n) in zip(instance.disks, new_disks)]
14045 result = self.rpc.call_blockdev_rename(pnode, rename_list)
14046 result.Raise("Failed to rename original LVs")
14048 feedback_fn("Initializing DRBD devices...")
14049 # all child devices are in place, we can now create the DRBD devices
14050 for disk in anno_disks:
14051 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
14052 f_create = node == pnode
14053 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
14056 # at this point, the instance has been modified
14057 instance.disk_template = constants.DT_DRBD8
14058 instance.disks = new_disks
14059 self.cfg.Update(instance, feedback_fn)
14061 # Release node locks while waiting for sync
14062 _ReleaseLocks(self, locking.LEVEL_NODE)
14064 # disks are created, waiting for sync
14065 disk_abort = not _WaitForSync(self, instance,
14066 oneshot=not self.op.wait_for_sync)
14068 raise errors.OpExecError("There are some degraded disks for"
14069 " this instance, please cleanup manually")
14071 # Node resource locks will be released by caller
14073 def _ConvertDrbdToPlain(self, feedback_fn):
14074 """Converts an instance from drbd to plain.
14077 instance = self.instance
14079 assert len(instance.secondary_nodes) == 1
14080 assert instance.disk_template == constants.DT_DRBD8
14082 pnode = instance.primary_node
14083 snode = instance.secondary_nodes[0]
14084 feedback_fn("Converting template to plain")
14086 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
14087 new_disks = [d.children[0] for d in instance.disks]
14089 # copy over size and mode
14090 for parent, child in zip(old_disks, new_disks):
14091 child.size = parent.size
14092 child.mode = parent.mode
14094 # this is a DRBD disk, return its port to the pool
14095 # NOTE: this must be done right before the call to cfg.Update!
14096 for disk in old_disks:
14097 tcp_port = disk.logical_id[2]
14098 self.cfg.AddTcpUdpPort(tcp_port)
14100 # update instance structure
14101 instance.disks = new_disks
14102 instance.disk_template = constants.DT_PLAIN
14103 self.cfg.Update(instance, feedback_fn)
14105 # Release locks in case removing disks takes a while
14106 _ReleaseLocks(self, locking.LEVEL_NODE)
14108 feedback_fn("Removing volumes on the secondary node...")
14109 for disk in old_disks:
14110 self.cfg.SetDiskID(disk, snode)
14111 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14113 self.LogWarning("Could not remove block device %s on node %s,"
14114 " continuing anyway: %s", disk.iv_name, snode, msg)
14116 feedback_fn("Removing unneeded volumes on the primary node...")
14117 for idx, disk in enumerate(old_disks):
14118 meta = disk.children[1]
14119 self.cfg.SetDiskID(meta, pnode)
14120 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14122 self.LogWarning("Could not remove metadata for disk %d on node %s,"
14123 " continuing anyway: %s", idx, pnode, msg)
14125 def _CreateNewDisk(self, idx, params, _):
14126 """Creates a new disk.
14129 instance = self.instance
14132 if instance.disk_template in constants.DTS_FILEBASED:
14133 (file_driver, file_path) = instance.disks[0].logical_id
14134 file_path = os.path.dirname(file_path)
14136 file_driver = file_path = None
14139 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14140 instance.primary_node, instance.secondary_nodes,
14141 [params], file_path, file_driver, idx,
14142 self.Log, self.diskparams)[0]
14144 info = _GetInstanceInfoText(instance)
14146 logging.info("Creating volume %s for instance %s",
14147 disk.iv_name, instance.name)
14148 # Note: this needs to be kept in sync with _CreateDisks
14150 for node in instance.all_nodes:
14151 f_create = (node == instance.primary_node)
14153 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14154 except errors.OpExecError, err:
14155 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14156 disk.iv_name, disk, node, err)
14158 if self.cluster.prealloc_wipe_disks:
14160 _WipeDisks(self, instance,
14161 disks=[(idx, disk, 0)])
14164 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14168 def _ModifyDisk(idx, disk, params, _):
14169 """Modifies a disk.
14172 disk.mode = params[constants.IDISK_MODE]
14175 ("disk.mode/%d" % idx, disk.mode),
14178 def _RemoveDisk(self, idx, root, _):
14182 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14183 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14184 self.cfg.SetDiskID(disk, node)
14185 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14187 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14188 " continuing anyway", idx, node, msg)
14190 # if this is a DRBD disk, return its port to the pool
14191 if root.dev_type in constants.LDS_DRBD:
14192 self.cfg.AddTcpUdpPort(root.logical_id[2])
14194 def _CreateNewNic(self, idx, params, private):
14195 """Creates data structure for a new network interface.
14198 mac = params[constants.INIC_MAC]
14199 ip = params.get(constants.INIC_IP, None)
14200 net = params.get(constants.INIC_NETWORK, None)
14201 net_uuid = self.cfg.LookupNetwork(net)
14202 #TODO: not private.filled?? can a nic have no nicparams??
14203 nicparams = private.filled
14204 nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, nicparams=nicparams)
14208 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14209 (mac, ip, private.filled[constants.NIC_MODE],
14210 private.filled[constants.NIC_LINK],
14214 def _ApplyNicMods(self, idx, nic, params, private):
14215 """Modifies a network interface.
14220 for key in [constants.INIC_MAC, constants.INIC_IP]:
14222 changes.append(("nic.%s/%d" % (key, idx), params[key]))
14223 setattr(nic, key, params[key])
14225 new_net = params.get(constants.INIC_NETWORK, nic.network)
14226 new_net_uuid = self.cfg.LookupNetwork(new_net)
14227 if new_net_uuid != nic.network:
14228 changes.append(("nic.network/%d" % idx, new_net))
14229 nic.network = new_net_uuid
14232 nic.nicparams = private.filled
14234 for (key, val) in nic.nicparams.items():
14235 changes.append(("nic.%s/%d" % (key, idx), val))
14239 def Exec(self, feedback_fn):
14240 """Modifies an instance.
14242 All parameters take effect only at the next restart of the instance.
14245 # Process here the warnings from CheckPrereq, as we don't have a
14246 # feedback_fn there.
14247 # TODO: Replace with self.LogWarning
14248 for warn in self.warn:
14249 feedback_fn("WARNING: %s" % warn)
14251 assert ((self.op.disk_template is None) ^
14252 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14253 "Not owning any node resource locks"
14256 instance = self.instance
14260 instance.primary_node = self.op.pnode
14263 if self.op.runtime_mem:
14264 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14266 self.op.runtime_mem)
14267 rpcres.Raise("Cannot modify instance runtime memory")
14268 result.append(("runtime_memory", self.op.runtime_mem))
14270 # Apply disk changes
14271 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14272 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14273 _UpdateIvNames(0, instance.disks)
14275 if self.op.disk_template:
14277 check_nodes = set(instance.all_nodes)
14278 if self.op.remote_node:
14279 check_nodes.add(self.op.remote_node)
14280 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14281 owned = self.owned_locks(level)
14282 assert not (check_nodes - owned), \
14283 ("Not owning the correct locks, owning %r, expected at least %r" %
14284 (owned, check_nodes))
14286 r_shut = _ShutdownInstanceDisks(self, instance)
14288 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14289 " proceed with disk template conversion")
14290 mode = (instance.disk_template, self.op.disk_template)
14292 self._DISK_CONVERSIONS[mode](self, feedback_fn)
14294 self.cfg.ReleaseDRBDMinors(instance.name)
14296 result.append(("disk_template", self.op.disk_template))
14298 assert instance.disk_template == self.op.disk_template, \
14299 ("Expected disk template '%s', found '%s'" %
14300 (self.op.disk_template, instance.disk_template))
14302 # Release node and resource locks if there are any (they might already have
14303 # been released during disk conversion)
14304 _ReleaseLocks(self, locking.LEVEL_NODE)
14305 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14307 # Apply NIC changes
14308 if self._new_nics is not None:
14309 instance.nics = self._new_nics
14310 result.extend(self._nic_chgdesc)
14313 if self.op.hvparams:
14314 instance.hvparams = self.hv_inst
14315 for key, val in self.op.hvparams.iteritems():
14316 result.append(("hv/%s" % key, val))
14319 if self.op.beparams:
14320 instance.beparams = self.be_inst
14321 for key, val in self.op.beparams.iteritems():
14322 result.append(("be/%s" % key, val))
14325 if self.op.os_name:
14326 instance.os = self.op.os_name
14329 if self.op.osparams:
14330 instance.osparams = self.os_inst
14331 for key, val in self.op.osparams.iteritems():
14332 result.append(("os/%s" % key, val))
14334 if self.op.offline is None:
14337 elif self.op.offline:
14338 # Mark instance as offline
14339 self.cfg.MarkInstanceOffline(instance.name)
14340 result.append(("admin_state", constants.ADMINST_OFFLINE))
14342 # Mark instance as online, but stopped
14343 self.cfg.MarkInstanceDown(instance.name)
14344 result.append(("admin_state", constants.ADMINST_DOWN))
14346 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14348 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14349 self.owned_locks(locking.LEVEL_NODE)), \
14350 "All node locks should have been released by now"
14354 _DISK_CONVERSIONS = {
14355 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14356 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14360 class LUInstanceChangeGroup(LogicalUnit):
14361 HPATH = "instance-change-group"
14362 HTYPE = constants.HTYPE_INSTANCE
14365 def ExpandNames(self):
14366 self.share_locks = _ShareAll()
14368 self.needed_locks = {
14369 locking.LEVEL_NODEGROUP: [],
14370 locking.LEVEL_NODE: [],
14371 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14374 self._ExpandAndLockInstance()
14376 if self.op.target_groups:
14377 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14378 self.op.target_groups)
14380 self.req_target_uuids = None
14382 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14384 def DeclareLocks(self, level):
14385 if level == locking.LEVEL_NODEGROUP:
14386 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14388 if self.req_target_uuids:
14389 lock_groups = set(self.req_target_uuids)
14391 # Lock all groups used by instance optimistically; this requires going
14392 # via the node before it's locked, requiring verification later on
14393 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14394 lock_groups.update(instance_groups)
14396 # No target groups, need to lock all of them
14397 lock_groups = locking.ALL_SET
14399 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14401 elif level == locking.LEVEL_NODE:
14402 if self.req_target_uuids:
14403 # Lock all nodes used by instances
14404 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14405 self._LockInstancesNodes()
14407 # Lock all nodes in all potential target groups
14408 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14409 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14410 member_nodes = [node_name
14411 for group in lock_groups
14412 for node_name in self.cfg.GetNodeGroup(group).members]
14413 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14415 # Lock all nodes as all groups are potential targets
14416 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14418 def CheckPrereq(self):
14419 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14420 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14421 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14423 assert (self.req_target_uuids is None or
14424 owned_groups.issuperset(self.req_target_uuids))
14425 assert owned_instances == set([self.op.instance_name])
14427 # Get instance information
14428 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14430 # Check if node groups for locked instance are still correct
14431 assert owned_nodes.issuperset(self.instance.all_nodes), \
14432 ("Instance %s's nodes changed while we kept the lock" %
14433 self.op.instance_name)
14435 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14438 if self.req_target_uuids:
14439 # User requested specific target groups
14440 self.target_uuids = frozenset(self.req_target_uuids)
14442 # All groups except those used by the instance are potential targets
14443 self.target_uuids = owned_groups - inst_groups
14445 conflicting_groups = self.target_uuids & inst_groups
14446 if conflicting_groups:
14447 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14448 " used by the instance '%s'" %
14449 (utils.CommaJoin(conflicting_groups),
14450 self.op.instance_name),
14451 errors.ECODE_INVAL)
14453 if not self.target_uuids:
14454 raise errors.OpPrereqError("There are no possible target groups",
14455 errors.ECODE_INVAL)
14457 def BuildHooksEnv(self):
14458 """Build hooks env.
14461 assert self.target_uuids
14464 "TARGET_GROUPS": " ".join(self.target_uuids),
14467 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14471 def BuildHooksNodes(self):
14472 """Build hooks nodes.
14475 mn = self.cfg.GetMasterNode()
14476 return ([mn], [mn])
14478 def Exec(self, feedback_fn):
14479 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14481 assert instances == [self.op.instance_name], "Instance not locked"
14483 req = iallocator.IAReqGroupChange(instances=instances,
14484 target_groups=list(self.target_uuids))
14485 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14487 ial.Run(self.op.iallocator)
14489 if not ial.success:
14490 raise errors.OpPrereqError("Can't compute solution for changing group of"
14491 " instance '%s' using iallocator '%s': %s" %
14492 (self.op.instance_name, self.op.iallocator,
14493 ial.info), errors.ECODE_NORES)
14495 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14497 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14498 " instance '%s'", len(jobs), self.op.instance_name)
14500 return ResultWithJobs(jobs)
14503 class LUBackupQuery(NoHooksLU):
14504 """Query the exports list
14509 def CheckArguments(self):
14510 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14511 ["node", "export"], self.op.use_locking)
14513 def ExpandNames(self):
14514 self.expq.ExpandNames(self)
14516 def DeclareLocks(self, level):
14517 self.expq.DeclareLocks(self, level)
14519 def Exec(self, feedback_fn):
14522 for (node, expname) in self.expq.OldStyleQuery(self):
14523 if expname is None:
14524 result[node] = False
14526 result.setdefault(node, []).append(expname)
14531 class _ExportQuery(_QueryBase):
14532 FIELDS = query.EXPORT_FIELDS
14534 #: The node name is not a unique key for this query
14535 SORT_FIELD = "node"
14537 def ExpandNames(self, lu):
14538 lu.needed_locks = {}
14540 # The following variables interact with _QueryBase._GetNames
14542 self.wanted = _GetWantedNodes(lu, self.names)
14544 self.wanted = locking.ALL_SET
14546 self.do_locking = self.use_locking
14548 if self.do_locking:
14549 lu.share_locks = _ShareAll()
14550 lu.needed_locks = {
14551 locking.LEVEL_NODE: self.wanted,
14555 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14557 def DeclareLocks(self, lu, level):
14560 def _GetQueryData(self, lu):
14561 """Computes the list of nodes and their attributes.
14564 # Locking is not used
14566 assert not (compat.any(lu.glm.is_owned(level)
14567 for level in locking.LEVELS
14568 if level != locking.LEVEL_CLUSTER) or
14569 self.do_locking or self.use_locking)
14571 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14575 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14577 result.append((node, None))
14579 result.extend((node, expname) for expname in nres.payload)
14584 class LUBackupPrepare(NoHooksLU):
14585 """Prepares an instance for an export and returns useful information.
14590 def ExpandNames(self):
14591 self._ExpandAndLockInstance()
14593 def CheckPrereq(self):
14594 """Check prerequisites.
14597 instance_name = self.op.instance_name
14599 self.instance = self.cfg.GetInstanceInfo(instance_name)
14600 assert self.instance is not None, \
14601 "Cannot retrieve locked instance %s" % self.op.instance_name
14602 _CheckNodeOnline(self, self.instance.primary_node)
14604 self._cds = _GetClusterDomainSecret()
14606 def Exec(self, feedback_fn):
14607 """Prepares an instance for an export.
14610 instance = self.instance
14612 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14613 salt = utils.GenerateSecret(8)
14615 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14616 result = self.rpc.call_x509_cert_create(instance.primary_node,
14617 constants.RIE_CERT_VALIDITY)
14618 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14620 (name, cert_pem) = result.payload
14622 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14626 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14627 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14629 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14635 class LUBackupExport(LogicalUnit):
14636 """Export an instance to an image in the cluster.
14639 HPATH = "instance-export"
14640 HTYPE = constants.HTYPE_INSTANCE
14643 def CheckArguments(self):
14644 """Check the arguments.
14647 self.x509_key_name = self.op.x509_key_name
14648 self.dest_x509_ca_pem = self.op.destination_x509_ca
14650 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14651 if not self.x509_key_name:
14652 raise errors.OpPrereqError("Missing X509 key name for encryption",
14653 errors.ECODE_INVAL)
14655 if not self.dest_x509_ca_pem:
14656 raise errors.OpPrereqError("Missing destination X509 CA",
14657 errors.ECODE_INVAL)
14659 def ExpandNames(self):
14660 self._ExpandAndLockInstance()
14662 # Lock all nodes for local exports
14663 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14664 # FIXME: lock only instance primary and destination node
14666 # Sad but true, for now we have do lock all nodes, as we don't know where
14667 # the previous export might be, and in this LU we search for it and
14668 # remove it from its current node. In the future we could fix this by:
14669 # - making a tasklet to search (share-lock all), then create the
14670 # new one, then one to remove, after
14671 # - removing the removal operation altogether
14672 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14674 # Allocations should be stopped while this LU runs with node locks, but
14675 # it doesn't have to be exclusive
14676 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14677 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14679 def DeclareLocks(self, level):
14680 """Last minute lock declaration."""
14681 # All nodes are locked anyway, so nothing to do here.
14683 def BuildHooksEnv(self):
14684 """Build hooks env.
14686 This will run on the master, primary node and target node.
14690 "EXPORT_MODE": self.op.mode,
14691 "EXPORT_NODE": self.op.target_node,
14692 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14693 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14694 # TODO: Generic function for boolean env variables
14695 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14698 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14702 def BuildHooksNodes(self):
14703 """Build hooks nodes.
14706 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14708 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14709 nl.append(self.op.target_node)
14713 def CheckPrereq(self):
14714 """Check prerequisites.
14716 This checks that the instance and node names are valid.
14719 instance_name = self.op.instance_name
14721 self.instance = self.cfg.GetInstanceInfo(instance_name)
14722 assert self.instance is not None, \
14723 "Cannot retrieve locked instance %s" % self.op.instance_name
14724 _CheckNodeOnline(self, self.instance.primary_node)
14726 if (self.op.remove_instance and
14727 self.instance.admin_state == constants.ADMINST_UP and
14728 not self.op.shutdown):
14729 raise errors.OpPrereqError("Can not remove instance without shutting it"
14730 " down before", errors.ECODE_STATE)
14732 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14733 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14734 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14735 assert self.dst_node is not None
14737 _CheckNodeOnline(self, self.dst_node.name)
14738 _CheckNodeNotDrained(self, self.dst_node.name)
14741 self.dest_disk_info = None
14742 self.dest_x509_ca = None
14744 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14745 self.dst_node = None
14747 if len(self.op.target_node) != len(self.instance.disks):
14748 raise errors.OpPrereqError(("Received destination information for %s"
14749 " disks, but instance %s has %s disks") %
14750 (len(self.op.target_node), instance_name,
14751 len(self.instance.disks)),
14752 errors.ECODE_INVAL)
14754 cds = _GetClusterDomainSecret()
14756 # Check X509 key name
14758 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14759 except (TypeError, ValueError), err:
14760 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14761 errors.ECODE_INVAL)
14763 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14764 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14765 errors.ECODE_INVAL)
14767 # Load and verify CA
14769 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14770 except OpenSSL.crypto.Error, err:
14771 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14772 (err, ), errors.ECODE_INVAL)
14774 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14775 if errcode is not None:
14776 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14777 (msg, ), errors.ECODE_INVAL)
14779 self.dest_x509_ca = cert
14781 # Verify target information
14783 for idx, disk_data in enumerate(self.op.target_node):
14785 (host, port, magic) = \
14786 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14787 except errors.GenericError, err:
14788 raise errors.OpPrereqError("Target info for disk %s: %s" %
14789 (idx, err), errors.ECODE_INVAL)
14791 disk_info.append((host, port, magic))
14793 assert len(disk_info) == len(self.op.target_node)
14794 self.dest_disk_info = disk_info
14797 raise errors.ProgrammerError("Unhandled export mode %r" %
14800 # instance disk type verification
14801 # TODO: Implement export support for file-based disks
14802 for disk in self.instance.disks:
14803 if disk.dev_type == constants.LD_FILE:
14804 raise errors.OpPrereqError("Export not supported for instances with"
14805 " file-based disks", errors.ECODE_INVAL)
14807 def _CleanupExports(self, feedback_fn):
14808 """Removes exports of current instance from all other nodes.
14810 If an instance in a cluster with nodes A..D was exported to node C, its
14811 exports will be removed from the nodes A, B and D.
14814 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14816 nodelist = self.cfg.GetNodeList()
14817 nodelist.remove(self.dst_node.name)
14819 # on one-node clusters nodelist will be empty after the removal
14820 # if we proceed the backup would be removed because OpBackupQuery
14821 # substitutes an empty list with the full cluster node list.
14822 iname = self.instance.name
14824 feedback_fn("Removing old exports for instance %s" % iname)
14825 exportlist = self.rpc.call_export_list(nodelist)
14826 for node in exportlist:
14827 if exportlist[node].fail_msg:
14829 if iname in exportlist[node].payload:
14830 msg = self.rpc.call_export_remove(node, iname).fail_msg
14832 self.LogWarning("Could not remove older export for instance %s"
14833 " on node %s: %s", iname, node, msg)
14835 def Exec(self, feedback_fn):
14836 """Export an instance to an image in the cluster.
14839 assert self.op.mode in constants.EXPORT_MODES
14841 instance = self.instance
14842 src_node = instance.primary_node
14844 if self.op.shutdown:
14845 # shutdown the instance, but not the disks
14846 feedback_fn("Shutting down instance %s" % instance.name)
14847 result = self.rpc.call_instance_shutdown(src_node, instance,
14848 self.op.shutdown_timeout)
14849 # TODO: Maybe ignore failures if ignore_remove_failures is set
14850 result.Raise("Could not shutdown instance %s on"
14851 " node %s" % (instance.name, src_node))
14853 # set the disks ID correctly since call_instance_start needs the
14854 # correct drbd minor to create the symlinks
14855 for disk in instance.disks:
14856 self.cfg.SetDiskID(disk, src_node)
14858 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14861 # Activate the instance disks if we'exporting a stopped instance
14862 feedback_fn("Activating disks for %s" % instance.name)
14863 _StartInstanceDisks(self, instance, None)
14866 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14869 helper.CreateSnapshots()
14871 if (self.op.shutdown and
14872 instance.admin_state == constants.ADMINST_UP and
14873 not self.op.remove_instance):
14874 assert not activate_disks
14875 feedback_fn("Starting instance %s" % instance.name)
14876 result = self.rpc.call_instance_start(src_node,
14877 (instance, None, None), False)
14878 msg = result.fail_msg
14880 feedback_fn("Failed to start instance: %s" % msg)
14881 _ShutdownInstanceDisks(self, instance)
14882 raise errors.OpExecError("Could not start instance: %s" % msg)
14884 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14885 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14886 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14887 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14888 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14890 (key_name, _, _) = self.x509_key_name
14893 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14896 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14897 key_name, dest_ca_pem,
14902 # Check for backwards compatibility
14903 assert len(dresults) == len(instance.disks)
14904 assert compat.all(isinstance(i, bool) for i in dresults), \
14905 "Not all results are boolean: %r" % dresults
14909 feedback_fn("Deactivating disks for %s" % instance.name)
14910 _ShutdownInstanceDisks(self, instance)
14912 if not (compat.all(dresults) and fin_resu):
14915 failures.append("export finalization")
14916 if not compat.all(dresults):
14917 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14919 failures.append("disk export: disk(s) %s" % fdsk)
14921 raise errors.OpExecError("Export failed, errors in %s" %
14922 utils.CommaJoin(failures))
14924 # At this point, the export was successful, we can cleanup/finish
14926 # Remove instance if requested
14927 if self.op.remove_instance:
14928 feedback_fn("Removing instance %s" % instance.name)
14929 _RemoveInstance(self, feedback_fn, instance,
14930 self.op.ignore_remove_failures)
14932 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14933 self._CleanupExports(feedback_fn)
14935 return fin_resu, dresults
14938 class LUBackupRemove(NoHooksLU):
14939 """Remove exports related to the named instance.
14944 def ExpandNames(self):
14945 self.needed_locks = {
14946 # We need all nodes to be locked in order for RemoveExport to work, but
14947 # we don't need to lock the instance itself, as nothing will happen to it
14948 # (and we can remove exports also for a removed instance)
14949 locking.LEVEL_NODE: locking.ALL_SET,
14951 # Removing backups is quick, so blocking allocations is justified
14952 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14955 # Allocations should be stopped while this LU runs with node locks, but it
14956 # doesn't have to be exclusive
14957 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14959 def Exec(self, feedback_fn):
14960 """Remove any export.
14963 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14964 # If the instance was not found we'll try with the name that was passed in.
14965 # This will only work if it was an FQDN, though.
14967 if not instance_name:
14969 instance_name = self.op.instance_name
14971 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14972 exportlist = self.rpc.call_export_list(locked_nodes)
14974 for node in exportlist:
14975 msg = exportlist[node].fail_msg
14977 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14979 if instance_name in exportlist[node].payload:
14981 result = self.rpc.call_export_remove(node, instance_name)
14982 msg = result.fail_msg
14984 logging.error("Could not remove export for instance %s"
14985 " on node %s: %s", instance_name, node, msg)
14987 if fqdn_warn and not found:
14988 feedback_fn("Export not found. If trying to remove an export belonging"
14989 " to a deleted instance please use its Fully Qualified"
14993 class LUGroupAdd(LogicalUnit):
14994 """Logical unit for creating node groups.
14997 HPATH = "group-add"
14998 HTYPE = constants.HTYPE_GROUP
15001 def ExpandNames(self):
15002 # We need the new group's UUID here so that we can create and acquire the
15003 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
15004 # that it should not check whether the UUID exists in the configuration.
15005 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15006 self.needed_locks = {}
15007 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15009 def CheckPrereq(self):
15010 """Check prerequisites.
15012 This checks that the given group name is not an existing node group
15017 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15018 except errors.OpPrereqError:
15021 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
15022 " node group (UUID: %s)" %
15023 (self.op.group_name, existing_uuid),
15024 errors.ECODE_EXISTS)
15026 if self.op.ndparams:
15027 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
15029 if self.op.hv_state:
15030 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
15032 self.new_hv_state = None
15034 if self.op.disk_state:
15035 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
15037 self.new_disk_state = None
15039 if self.op.diskparams:
15040 for templ in constants.DISK_TEMPLATES:
15041 if templ in self.op.diskparams:
15042 utils.ForceDictType(self.op.diskparams[templ],
15043 constants.DISK_DT_TYPES)
15044 self.new_diskparams = self.op.diskparams
15046 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15047 except errors.OpPrereqError, err:
15048 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15049 errors.ECODE_INVAL)
15051 self.new_diskparams = {}
15053 if self.op.ipolicy:
15054 cluster = self.cfg.GetClusterInfo()
15055 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
15057 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
15058 except errors.ConfigurationError, err:
15059 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
15060 errors.ECODE_INVAL)
15062 def BuildHooksEnv(self):
15063 """Build hooks env.
15067 "GROUP_NAME": self.op.group_name,
15070 def BuildHooksNodes(self):
15071 """Build hooks nodes.
15074 mn = self.cfg.GetMasterNode()
15075 return ([mn], [mn])
15077 def Exec(self, feedback_fn):
15078 """Add the node group to the cluster.
15081 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
15082 uuid=self.group_uuid,
15083 alloc_policy=self.op.alloc_policy,
15084 ndparams=self.op.ndparams,
15085 diskparams=self.new_diskparams,
15086 ipolicy=self.op.ipolicy,
15087 hv_state_static=self.new_hv_state,
15088 disk_state_static=self.new_disk_state)
15090 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
15091 del self.remove_locks[locking.LEVEL_NODEGROUP]
15094 class LUGroupAssignNodes(NoHooksLU):
15095 """Logical unit for assigning nodes to groups.
15100 def ExpandNames(self):
15101 # These raise errors.OpPrereqError on their own:
15102 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15103 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15105 # We want to lock all the affected nodes and groups. We have readily
15106 # available the list of nodes, and the *destination* group. To gather the
15107 # list of "source" groups, we need to fetch node information later on.
15108 self.needed_locks = {
15109 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
15110 locking.LEVEL_NODE: self.op.nodes,
15113 def DeclareLocks(self, level):
15114 if level == locking.LEVEL_NODEGROUP:
15115 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15117 # Try to get all affected nodes' groups without having the group or node
15118 # lock yet. Needs verification later in the code flow.
15119 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15121 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15123 def CheckPrereq(self):
15124 """Check prerequisites.
15127 assert self.needed_locks[locking.LEVEL_NODEGROUP]
15128 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15129 frozenset(self.op.nodes))
15131 expected_locks = (set([self.group_uuid]) |
15132 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15133 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15134 if actual_locks != expected_locks:
15135 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15136 " current groups are '%s', used to be '%s'" %
15137 (utils.CommaJoin(expected_locks),
15138 utils.CommaJoin(actual_locks)))
15140 self.node_data = self.cfg.GetAllNodesInfo()
15141 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15142 instance_data = self.cfg.GetAllInstancesInfo()
15144 if self.group is None:
15145 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15146 (self.op.group_name, self.group_uuid))
15148 (new_splits, previous_splits) = \
15149 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15150 for node in self.op.nodes],
15151 self.node_data, instance_data)
15154 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15156 if not self.op.force:
15157 raise errors.OpExecError("The following instances get split by this"
15158 " change and --force was not given: %s" %
15161 self.LogWarning("This operation will split the following instances: %s",
15164 if previous_splits:
15165 self.LogWarning("In addition, these already-split instances continue"
15166 " to be split across groups: %s",
15167 utils.CommaJoin(utils.NiceSort(previous_splits)))
15169 def Exec(self, feedback_fn):
15170 """Assign nodes to a new group.
15173 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15175 self.cfg.AssignGroupNodes(mods)
15178 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15179 """Check for split instances after a node assignment.
15181 This method considers a series of node assignments as an atomic operation,
15182 and returns information about split instances after applying the set of
15185 In particular, it returns information about newly split instances, and
15186 instances that were already split, and remain so after the change.
15188 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15191 @type changes: list of (node_name, new_group_uuid) pairs.
15192 @param changes: list of node assignments to consider.
15193 @param node_data: a dict with data for all nodes
15194 @param instance_data: a dict with all instances to consider
15195 @rtype: a two-tuple
15196 @return: a list of instances that were previously okay and result split as a
15197 consequence of this change, and a list of instances that were previously
15198 split and this change does not fix.
15201 changed_nodes = dict((node, group) for node, group in changes
15202 if node_data[node].group != group)
15204 all_split_instances = set()
15205 previously_split_instances = set()
15207 def InstanceNodes(instance):
15208 return [instance.primary_node] + list(instance.secondary_nodes)
15210 for inst in instance_data.values():
15211 if inst.disk_template not in constants.DTS_INT_MIRROR:
15214 instance_nodes = InstanceNodes(inst)
15216 if len(set(node_data[node].group for node in instance_nodes)) > 1:
15217 previously_split_instances.add(inst.name)
15219 if len(set(changed_nodes.get(node, node_data[node].group)
15220 for node in instance_nodes)) > 1:
15221 all_split_instances.add(inst.name)
15223 return (list(all_split_instances - previously_split_instances),
15224 list(previously_split_instances & all_split_instances))
15227 class _GroupQuery(_QueryBase):
15228 FIELDS = query.GROUP_FIELDS
15230 def ExpandNames(self, lu):
15231 lu.needed_locks = {}
15233 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15234 self._cluster = lu.cfg.GetClusterInfo()
15235 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15238 self.wanted = [name_to_uuid[name]
15239 for name in utils.NiceSort(name_to_uuid.keys())]
15241 # Accept names to be either names or UUIDs.
15244 all_uuid = frozenset(self._all_groups.keys())
15246 for name in self.names:
15247 if name in all_uuid:
15248 self.wanted.append(name)
15249 elif name in name_to_uuid:
15250 self.wanted.append(name_to_uuid[name])
15252 missing.append(name)
15255 raise errors.OpPrereqError("Some groups do not exist: %s" %
15256 utils.CommaJoin(missing),
15257 errors.ECODE_NOENT)
15259 def DeclareLocks(self, lu, level):
15262 def _GetQueryData(self, lu):
15263 """Computes the list of node groups and their attributes.
15266 do_nodes = query.GQ_NODE in self.requested_data
15267 do_instances = query.GQ_INST in self.requested_data
15269 group_to_nodes = None
15270 group_to_instances = None
15272 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15273 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15274 # latter GetAllInstancesInfo() is not enough, for we have to go through
15275 # instance->node. Hence, we will need to process nodes even if we only need
15276 # instance information.
15277 if do_nodes or do_instances:
15278 all_nodes = lu.cfg.GetAllNodesInfo()
15279 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15282 for node in all_nodes.values():
15283 if node.group in group_to_nodes:
15284 group_to_nodes[node.group].append(node.name)
15285 node_to_group[node.name] = node.group
15288 all_instances = lu.cfg.GetAllInstancesInfo()
15289 group_to_instances = dict((uuid, []) for uuid in self.wanted)
15291 for instance in all_instances.values():
15292 node = instance.primary_node
15293 if node in node_to_group:
15294 group_to_instances[node_to_group[node]].append(instance.name)
15297 # Do not pass on node information if it was not requested.
15298 group_to_nodes = None
15300 return query.GroupQueryData(self._cluster,
15301 [self._all_groups[uuid]
15302 for uuid in self.wanted],
15303 group_to_nodes, group_to_instances,
15304 query.GQ_DISKPARAMS in self.requested_data)
15307 class LUGroupQuery(NoHooksLU):
15308 """Logical unit for querying node groups.
15313 def CheckArguments(self):
15314 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15315 self.op.output_fields, False)
15317 def ExpandNames(self):
15318 self.gq.ExpandNames(self)
15320 def DeclareLocks(self, level):
15321 self.gq.DeclareLocks(self, level)
15323 def Exec(self, feedback_fn):
15324 return self.gq.OldStyleQuery(self)
15327 class LUGroupSetParams(LogicalUnit):
15328 """Modifies the parameters of a node group.
15331 HPATH = "group-modify"
15332 HTYPE = constants.HTYPE_GROUP
15335 def CheckArguments(self):
15338 self.op.diskparams,
15339 self.op.alloc_policy,
15341 self.op.disk_state,
15345 if all_changes.count(None) == len(all_changes):
15346 raise errors.OpPrereqError("Please pass at least one modification",
15347 errors.ECODE_INVAL)
15349 def ExpandNames(self):
15350 # This raises errors.OpPrereqError on its own:
15351 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15353 self.needed_locks = {
15354 locking.LEVEL_INSTANCE: [],
15355 locking.LEVEL_NODEGROUP: [self.group_uuid],
15358 self.share_locks[locking.LEVEL_INSTANCE] = 1
15360 def DeclareLocks(self, level):
15361 if level == locking.LEVEL_INSTANCE:
15362 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15364 # Lock instances optimistically, needs verification once group lock has
15366 self.needed_locks[locking.LEVEL_INSTANCE] = \
15367 self.cfg.GetNodeGroupInstances(self.group_uuid)
15370 def _UpdateAndVerifyDiskParams(old, new):
15371 """Updates and verifies disk parameters.
15374 new_params = _GetUpdatedParams(old, new)
15375 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15378 def CheckPrereq(self):
15379 """Check prerequisites.
15382 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15384 # Check if locked instances are still correct
15385 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15387 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15388 cluster = self.cfg.GetClusterInfo()
15390 if self.group is None:
15391 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15392 (self.op.group_name, self.group_uuid))
15394 if self.op.ndparams:
15395 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15396 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15397 self.new_ndparams = new_ndparams
15399 if self.op.diskparams:
15400 diskparams = self.group.diskparams
15401 uavdp = self._UpdateAndVerifyDiskParams
15402 # For each disktemplate subdict update and verify the values
15403 new_diskparams = dict((dt,
15404 uavdp(diskparams.get(dt, {}),
15405 self.op.diskparams[dt]))
15406 for dt in constants.DISK_TEMPLATES
15407 if dt in self.op.diskparams)
15408 # As we've all subdicts of diskparams ready, lets merge the actual
15409 # dict with all updated subdicts
15410 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15412 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15413 except errors.OpPrereqError, err:
15414 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15415 errors.ECODE_INVAL)
15417 if self.op.hv_state:
15418 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15419 self.group.hv_state_static)
15421 if self.op.disk_state:
15422 self.new_disk_state = \
15423 _MergeAndVerifyDiskState(self.op.disk_state,
15424 self.group.disk_state_static)
15426 if self.op.ipolicy:
15427 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15431 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15432 inst_filter = lambda inst: inst.name in owned_instances
15433 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15434 gmi = ganeti.masterd.instance
15436 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15438 new_ipolicy, instances, self.cfg)
15441 self.LogWarning("After the ipolicy change the following instances"
15442 " violate them: %s",
15443 utils.CommaJoin(violations))
15445 def BuildHooksEnv(self):
15446 """Build hooks env.
15450 "GROUP_NAME": self.op.group_name,
15451 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15454 def BuildHooksNodes(self):
15455 """Build hooks nodes.
15458 mn = self.cfg.GetMasterNode()
15459 return ([mn], [mn])
15461 def Exec(self, feedback_fn):
15462 """Modifies the node group.
15467 if self.op.ndparams:
15468 self.group.ndparams = self.new_ndparams
15469 result.append(("ndparams", str(self.group.ndparams)))
15471 if self.op.diskparams:
15472 self.group.diskparams = self.new_diskparams
15473 result.append(("diskparams", str(self.group.diskparams)))
15475 if self.op.alloc_policy:
15476 self.group.alloc_policy = self.op.alloc_policy
15478 if self.op.hv_state:
15479 self.group.hv_state_static = self.new_hv_state
15481 if self.op.disk_state:
15482 self.group.disk_state_static = self.new_disk_state
15484 if self.op.ipolicy:
15485 self.group.ipolicy = self.new_ipolicy
15487 self.cfg.Update(self.group, feedback_fn)
15491 class LUGroupRemove(LogicalUnit):
15492 HPATH = "group-remove"
15493 HTYPE = constants.HTYPE_GROUP
15496 def ExpandNames(self):
15497 # This will raises errors.OpPrereqError on its own:
15498 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15499 self.needed_locks = {
15500 locking.LEVEL_NODEGROUP: [self.group_uuid],
15503 def CheckPrereq(self):
15504 """Check prerequisites.
15506 This checks that the given group name exists as a node group, that is
15507 empty (i.e., contains no nodes), and that is not the last group of the
15511 # Verify that the group is empty.
15512 group_nodes = [node.name
15513 for node in self.cfg.GetAllNodesInfo().values()
15514 if node.group == self.group_uuid]
15517 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15519 (self.op.group_name,
15520 utils.CommaJoin(utils.NiceSort(group_nodes))),
15521 errors.ECODE_STATE)
15523 # Verify the cluster would not be left group-less.
15524 if len(self.cfg.GetNodeGroupList()) == 1:
15525 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15526 " removed" % self.op.group_name,
15527 errors.ECODE_STATE)
15529 def BuildHooksEnv(self):
15530 """Build hooks env.
15534 "GROUP_NAME": self.op.group_name,
15537 def BuildHooksNodes(self):
15538 """Build hooks nodes.
15541 mn = self.cfg.GetMasterNode()
15542 return ([mn], [mn])
15544 def Exec(self, feedback_fn):
15545 """Remove the node group.
15549 self.cfg.RemoveNodeGroup(self.group_uuid)
15550 except errors.ConfigurationError:
15551 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15552 (self.op.group_name, self.group_uuid))
15554 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15557 class LUGroupRename(LogicalUnit):
15558 HPATH = "group-rename"
15559 HTYPE = constants.HTYPE_GROUP
15562 def ExpandNames(self):
15563 # This raises errors.OpPrereqError on its own:
15564 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15566 self.needed_locks = {
15567 locking.LEVEL_NODEGROUP: [self.group_uuid],
15570 def CheckPrereq(self):
15571 """Check prerequisites.
15573 Ensures requested new name is not yet used.
15577 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15578 except errors.OpPrereqError:
15581 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15582 " node group (UUID: %s)" %
15583 (self.op.new_name, new_name_uuid),
15584 errors.ECODE_EXISTS)
15586 def BuildHooksEnv(self):
15587 """Build hooks env.
15591 "OLD_NAME": self.op.group_name,
15592 "NEW_NAME": self.op.new_name,
15595 def BuildHooksNodes(self):
15596 """Build hooks nodes.
15599 mn = self.cfg.GetMasterNode()
15601 all_nodes = self.cfg.GetAllNodesInfo()
15602 all_nodes.pop(mn, None)
15605 run_nodes.extend(node.name for node in all_nodes.values()
15606 if node.group == self.group_uuid)
15608 return (run_nodes, run_nodes)
15610 def Exec(self, feedback_fn):
15611 """Rename the node group.
15614 group = self.cfg.GetNodeGroup(self.group_uuid)
15617 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15618 (self.op.group_name, self.group_uuid))
15620 group.name = self.op.new_name
15621 self.cfg.Update(group, feedback_fn)
15623 return self.op.new_name
15626 class LUGroupEvacuate(LogicalUnit):
15627 HPATH = "group-evacuate"
15628 HTYPE = constants.HTYPE_GROUP
15631 def ExpandNames(self):
15632 # This raises errors.OpPrereqError on its own:
15633 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15635 if self.op.target_groups:
15636 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15637 self.op.target_groups)
15639 self.req_target_uuids = []
15641 if self.group_uuid in self.req_target_uuids:
15642 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15643 " as a target group (targets are %s)" %
15645 utils.CommaJoin(self.req_target_uuids)),
15646 errors.ECODE_INVAL)
15648 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15650 self.share_locks = _ShareAll()
15651 self.needed_locks = {
15652 locking.LEVEL_INSTANCE: [],
15653 locking.LEVEL_NODEGROUP: [],
15654 locking.LEVEL_NODE: [],
15657 def DeclareLocks(self, level):
15658 if level == locking.LEVEL_INSTANCE:
15659 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15661 # Lock instances optimistically, needs verification once node and group
15662 # locks have been acquired
15663 self.needed_locks[locking.LEVEL_INSTANCE] = \
15664 self.cfg.GetNodeGroupInstances(self.group_uuid)
15666 elif level == locking.LEVEL_NODEGROUP:
15667 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15669 if self.req_target_uuids:
15670 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15672 # Lock all groups used by instances optimistically; this requires going
15673 # via the node before it's locked, requiring verification later on
15674 lock_groups.update(group_uuid
15675 for instance_name in
15676 self.owned_locks(locking.LEVEL_INSTANCE)
15678 self.cfg.GetInstanceNodeGroups(instance_name))
15680 # No target groups, need to lock all of them
15681 lock_groups = locking.ALL_SET
15683 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15685 elif level == locking.LEVEL_NODE:
15686 # This will only lock the nodes in the group to be evacuated which
15687 # contain actual instances
15688 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15689 self._LockInstancesNodes()
15691 # Lock all nodes in group to be evacuated and target groups
15692 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15693 assert self.group_uuid in owned_groups
15694 member_nodes = [node_name
15695 for group in owned_groups
15696 for node_name in self.cfg.GetNodeGroup(group).members]
15697 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15699 def CheckPrereq(self):
15700 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15701 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15702 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15704 assert owned_groups.issuperset(self.req_target_uuids)
15705 assert self.group_uuid in owned_groups
15707 # Check if locked instances are still correct
15708 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15710 # Get instance information
15711 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15713 # Check if node groups for locked instances are still correct
15714 _CheckInstancesNodeGroups(self.cfg, self.instances,
15715 owned_groups, owned_nodes, self.group_uuid)
15717 if self.req_target_uuids:
15718 # User requested specific target groups
15719 self.target_uuids = self.req_target_uuids
15721 # All groups except the one to be evacuated are potential targets
15722 self.target_uuids = [group_uuid for group_uuid in owned_groups
15723 if group_uuid != self.group_uuid]
15725 if not self.target_uuids:
15726 raise errors.OpPrereqError("There are no possible target groups",
15727 errors.ECODE_INVAL)
15729 def BuildHooksEnv(self):
15730 """Build hooks env.
15734 "GROUP_NAME": self.op.group_name,
15735 "TARGET_GROUPS": " ".join(self.target_uuids),
15738 def BuildHooksNodes(self):
15739 """Build hooks nodes.
15742 mn = self.cfg.GetMasterNode()
15744 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15746 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15748 return (run_nodes, run_nodes)
15750 def Exec(self, feedback_fn):
15751 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15753 assert self.group_uuid not in self.target_uuids
15755 req = iallocator.IAReqGroupChange(instances=instances,
15756 target_groups=self.target_uuids)
15757 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15759 ial.Run(self.op.iallocator)
15761 if not ial.success:
15762 raise errors.OpPrereqError("Can't compute group evacuation using"
15763 " iallocator '%s': %s" %
15764 (self.op.iallocator, ial.info),
15765 errors.ECODE_NORES)
15767 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15769 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15770 len(jobs), self.op.group_name)
15772 return ResultWithJobs(jobs)
15775 class TagsLU(NoHooksLU): # pylint: disable=W0223
15776 """Generic tags LU.
15778 This is an abstract class which is the parent of all the other tags LUs.
15781 def ExpandNames(self):
15782 self.group_uuid = None
15783 self.needed_locks = {}
15785 if self.op.kind == constants.TAG_NODE:
15786 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15787 lock_level = locking.LEVEL_NODE
15788 lock_name = self.op.name
15789 elif self.op.kind == constants.TAG_INSTANCE:
15790 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15791 lock_level = locking.LEVEL_INSTANCE
15792 lock_name = self.op.name
15793 elif self.op.kind == constants.TAG_NODEGROUP:
15794 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15795 lock_level = locking.LEVEL_NODEGROUP
15796 lock_name = self.group_uuid
15797 elif self.op.kind == constants.TAG_NETWORK:
15798 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15799 lock_level = locking.LEVEL_NETWORK
15800 lock_name = self.network_uuid
15805 if lock_level and getattr(self.op, "use_locking", True):
15806 self.needed_locks[lock_level] = lock_name
15808 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15809 # not possible to acquire the BGL based on opcode parameters)
15811 def CheckPrereq(self):
15812 """Check prerequisites.
15815 if self.op.kind == constants.TAG_CLUSTER:
15816 self.target = self.cfg.GetClusterInfo()
15817 elif self.op.kind == constants.TAG_NODE:
15818 self.target = self.cfg.GetNodeInfo(self.op.name)
15819 elif self.op.kind == constants.TAG_INSTANCE:
15820 self.target = self.cfg.GetInstanceInfo(self.op.name)
15821 elif self.op.kind == constants.TAG_NODEGROUP:
15822 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15823 elif self.op.kind == constants.TAG_NETWORK:
15824 self.target = self.cfg.GetNetwork(self.network_uuid)
15826 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15827 str(self.op.kind), errors.ECODE_INVAL)
15830 class LUTagsGet(TagsLU):
15831 """Returns the tags of a given object.
15836 def ExpandNames(self):
15837 TagsLU.ExpandNames(self)
15839 # Share locks as this is only a read operation
15840 self.share_locks = _ShareAll()
15842 def Exec(self, feedback_fn):
15843 """Returns the tag list.
15846 return list(self.target.GetTags())
15849 class LUTagsSearch(NoHooksLU):
15850 """Searches the tags for a given pattern.
15855 def ExpandNames(self):
15856 self.needed_locks = {}
15858 def CheckPrereq(self):
15859 """Check prerequisites.
15861 This checks the pattern passed for validity by compiling it.
15865 self.re = re.compile(self.op.pattern)
15866 except re.error, err:
15867 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15868 (self.op.pattern, err), errors.ECODE_INVAL)
15870 def Exec(self, feedback_fn):
15871 """Returns the tag list.
15875 tgts = [("/cluster", cfg.GetClusterInfo())]
15876 ilist = cfg.GetAllInstancesInfo().values()
15877 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15878 nlist = cfg.GetAllNodesInfo().values()
15879 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15880 tgts.extend(("/nodegroup/%s" % n.name, n)
15881 for n in cfg.GetAllNodeGroupsInfo().values())
15883 for path, target in tgts:
15884 for tag in target.GetTags():
15885 if self.re.search(tag):
15886 results.append((path, tag))
15890 class LUTagsSet(TagsLU):
15891 """Sets a tag on a given object.
15896 def CheckPrereq(self):
15897 """Check prerequisites.
15899 This checks the type and length of the tag name and value.
15902 TagsLU.CheckPrereq(self)
15903 for tag in self.op.tags:
15904 objects.TaggableObject.ValidateTag(tag)
15906 def Exec(self, feedback_fn):
15911 for tag in self.op.tags:
15912 self.target.AddTag(tag)
15913 except errors.TagError, err:
15914 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15915 self.cfg.Update(self.target, feedback_fn)
15918 class LUTagsDel(TagsLU):
15919 """Delete a list of tags from a given object.
15924 def CheckPrereq(self):
15925 """Check prerequisites.
15927 This checks that we have the given tag.
15930 TagsLU.CheckPrereq(self)
15931 for tag in self.op.tags:
15932 objects.TaggableObject.ValidateTag(tag)
15933 del_tags = frozenset(self.op.tags)
15934 cur_tags = self.target.GetTags()
15936 diff_tags = del_tags - cur_tags
15938 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15939 raise errors.OpPrereqError("Tag(s) %s not found" %
15940 (utils.CommaJoin(diff_names), ),
15941 errors.ECODE_NOENT)
15943 def Exec(self, feedback_fn):
15944 """Remove the tag from the object.
15947 for tag in self.op.tags:
15948 self.target.RemoveTag(tag)
15949 self.cfg.Update(self.target, feedback_fn)
15952 class LUTestDelay(NoHooksLU):
15953 """Sleep for a specified amount of time.
15955 This LU sleeps on the master and/or nodes for a specified amount of
15961 def ExpandNames(self):
15962 """Expand names and set required locks.
15964 This expands the node list, if any.
15967 self.needed_locks = {}
15968 if self.op.on_nodes:
15969 # _GetWantedNodes can be used here, but is not always appropriate to use
15970 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15971 # more information.
15972 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15973 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15975 def _TestDelay(self):
15976 """Do the actual sleep.
15979 if self.op.on_master:
15980 if not utils.TestDelay(self.op.duration):
15981 raise errors.OpExecError("Error during master delay test")
15982 if self.op.on_nodes:
15983 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15984 for node, node_result in result.items():
15985 node_result.Raise("Failure during rpc call to node %s" % node)
15987 def Exec(self, feedback_fn):
15988 """Execute the test delay opcode, with the wanted repetitions.
15991 if self.op.repeat == 0:
15994 top_value = self.op.repeat - 1
15995 for i in range(self.op.repeat):
15996 self.LogInfo("Test delay iteration %d/%d", i, top_value)
16000 class LURestrictedCommand(NoHooksLU):
16001 """Logical unit for executing restricted commands.
16006 def ExpandNames(self):
16008 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
16010 self.needed_locks = {
16011 locking.LEVEL_NODE: self.op.nodes,
16013 self.share_locks = {
16014 locking.LEVEL_NODE: not self.op.use_locking,
16017 def CheckPrereq(self):
16018 """Check prerequisites.
16022 def Exec(self, feedback_fn):
16023 """Execute restricted command and return output.
16026 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
16028 # Check if correct locks are held
16029 assert set(self.op.nodes).issubset(owned_nodes)
16031 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
16035 for node_name in self.op.nodes:
16036 nres = rpcres[node_name]
16038 msg = ("Command '%s' on node '%s' failed: %s" %
16039 (self.op.command, node_name, nres.fail_msg))
16040 result.append((False, msg))
16042 result.append((True, nres.payload))
16047 class LUTestJqueue(NoHooksLU):
16048 """Utility LU to test some aspects of the job queue.
16053 # Must be lower than default timeout for WaitForJobChange to see whether it
16054 # notices changed jobs
16055 _CLIENT_CONNECT_TIMEOUT = 20.0
16056 _CLIENT_CONFIRM_TIMEOUT = 60.0
16059 def _NotifyUsingSocket(cls, cb, errcls):
16060 """Opens a Unix socket and waits for another program to connect.
16063 @param cb: Callback to send socket name to client
16064 @type errcls: class
16065 @param errcls: Exception class to use for errors
16068 # Using a temporary directory as there's no easy way to create temporary
16069 # sockets without writing a custom loop around tempfile.mktemp and
16071 tmpdir = tempfile.mkdtemp()
16073 tmpsock = utils.PathJoin(tmpdir, "sock")
16075 logging.debug("Creating temporary socket at %s", tmpsock)
16076 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
16081 # Send details to client
16084 # Wait for client to connect before continuing
16085 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
16087 (conn, _) = sock.accept()
16088 except socket.error, err:
16089 raise errcls("Client didn't connect in time (%s)" % err)
16093 # Remove as soon as client is connected
16094 shutil.rmtree(tmpdir)
16096 # Wait for client to close
16099 # pylint: disable=E1101
16100 # Instance of '_socketobject' has no ... member
16101 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
16103 except socket.error, err:
16104 raise errcls("Client failed to confirm notification (%s)" % err)
16108 def _SendNotification(self, test, arg, sockname):
16109 """Sends a notification to the client.
16112 @param test: Test name
16113 @param arg: Test argument (depends on test)
16114 @type sockname: string
16115 @param sockname: Socket path
16118 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16120 def _Notify(self, prereq, test, arg):
16121 """Notifies the client of a test.
16124 @param prereq: Whether this is a prereq-phase test
16126 @param test: Test name
16127 @param arg: Test argument (depends on test)
16131 errcls = errors.OpPrereqError
16133 errcls = errors.OpExecError
16135 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16139 def CheckArguments(self):
16140 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16141 self.expandnames_calls = 0
16143 def ExpandNames(self):
16144 checkargs_calls = getattr(self, "checkargs_calls", 0)
16145 if checkargs_calls < 1:
16146 raise errors.ProgrammerError("CheckArguments was not called")
16148 self.expandnames_calls += 1
16150 if self.op.notify_waitlock:
16151 self._Notify(True, constants.JQT_EXPANDNAMES, None)
16153 self.LogInfo("Expanding names")
16155 # Get lock on master node (just to get a lock, not for a particular reason)
16156 self.needed_locks = {
16157 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16160 def Exec(self, feedback_fn):
16161 if self.expandnames_calls < 1:
16162 raise errors.ProgrammerError("ExpandNames was not called")
16164 if self.op.notify_exec:
16165 self._Notify(False, constants.JQT_EXEC, None)
16167 self.LogInfo("Executing")
16169 if self.op.log_messages:
16170 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16171 for idx, msg in enumerate(self.op.log_messages):
16172 self.LogInfo("Sending log message %s", idx + 1)
16173 feedback_fn(constants.JQT_MSGPREFIX + msg)
16174 # Report how many test messages have been sent
16175 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16178 raise errors.OpExecError("Opcode failure was requested")
16183 class LUTestAllocator(NoHooksLU):
16184 """Run allocator tests.
16186 This LU runs the allocator tests
16189 def CheckPrereq(self):
16190 """Check prerequisites.
16192 This checks the opcode parameters depending on the director and mode test.
16195 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16196 constants.IALLOCATOR_MODE_MULTI_ALLOC):
16197 for attr in ["memory", "disks", "disk_template",
16198 "os", "tags", "nics", "vcpus"]:
16199 if not hasattr(self.op, attr):
16200 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16201 attr, errors.ECODE_INVAL)
16202 iname = self.cfg.ExpandInstanceName(self.op.name)
16203 if iname is not None:
16204 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16205 iname, errors.ECODE_EXISTS)
16206 if not isinstance(self.op.nics, list):
16207 raise errors.OpPrereqError("Invalid parameter 'nics'",
16208 errors.ECODE_INVAL)
16209 if not isinstance(self.op.disks, list):
16210 raise errors.OpPrereqError("Invalid parameter 'disks'",
16211 errors.ECODE_INVAL)
16212 for row in self.op.disks:
16213 if (not isinstance(row, dict) or
16214 constants.IDISK_SIZE not in row or
16215 not isinstance(row[constants.IDISK_SIZE], int) or
16216 constants.IDISK_MODE not in row or
16217 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16218 raise errors.OpPrereqError("Invalid contents of the 'disks'"
16219 " parameter", errors.ECODE_INVAL)
16220 if self.op.hypervisor is None:
16221 self.op.hypervisor = self.cfg.GetHypervisorType()
16222 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16223 fname = _ExpandInstanceName(self.cfg, self.op.name)
16224 self.op.name = fname
16225 self.relocate_from = \
16226 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16227 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16228 constants.IALLOCATOR_MODE_NODE_EVAC):
16229 if not self.op.instances:
16230 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16231 self.op.instances = _GetWantedInstances(self, self.op.instances)
16233 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16234 self.op.mode, errors.ECODE_INVAL)
16236 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16237 if self.op.iallocator is None:
16238 raise errors.OpPrereqError("Missing allocator name",
16239 errors.ECODE_INVAL)
16240 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16241 raise errors.OpPrereqError("Wrong allocator test '%s'" %
16242 self.op.direction, errors.ECODE_INVAL)
16244 def Exec(self, feedback_fn):
16245 """Run the allocator test.
16248 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16249 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16250 memory=self.op.memory,
16251 disks=self.op.disks,
16252 disk_template=self.op.disk_template,
16256 vcpus=self.op.vcpus,
16257 spindle_use=self.op.spindle_use,
16258 hypervisor=self.op.hypervisor,
16259 node_whitelist=None)
16260 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16261 req = iallocator.IAReqRelocate(name=self.op.name,
16262 relocate_from=list(self.relocate_from))
16263 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16264 req = iallocator.IAReqGroupChange(instances=self.op.instances,
16265 target_groups=self.op.target_groups)
16266 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16267 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16268 evac_mode=self.op.evac_mode)
16269 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16270 disk_template = self.op.disk_template
16271 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16272 memory=self.op.memory,
16273 disks=self.op.disks,
16274 disk_template=disk_template,
16278 vcpus=self.op.vcpus,
16279 spindle_use=self.op.spindle_use,
16280 hypervisor=self.op.hypervisor)
16281 for idx in range(self.op.count)]
16282 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16284 raise errors.ProgrammerError("Uncatched mode %s in"
16285 " LUTestAllocator.Exec", self.op.mode)
16287 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16288 if self.op.direction == constants.IALLOCATOR_DIR_IN:
16289 result = ial.in_text
16291 ial.Run(self.op.iallocator, validate=False)
16292 result = ial.out_text
16296 class LUNetworkAdd(LogicalUnit):
16297 """Logical unit for creating networks.
16300 HPATH = "network-add"
16301 HTYPE = constants.HTYPE_NETWORK
16304 def BuildHooksNodes(self):
16305 """Build hooks nodes.
16308 mn = self.cfg.GetMasterNode()
16309 return ([mn], [mn])
16311 def CheckArguments(self):
16312 if self.op.mac_prefix:
16313 self.op.mac_prefix = \
16314 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16316 def ExpandNames(self):
16317 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16319 if self.op.conflicts_check:
16320 self.share_locks[locking.LEVEL_NODE] = 1
16321 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16322 self.needed_locks = {
16323 locking.LEVEL_NODE: locking.ALL_SET,
16324 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16327 self.needed_locks = {}
16329 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16331 def CheckPrereq(self):
16332 if self.op.network is None:
16333 raise errors.OpPrereqError("Network must be given",
16334 errors.ECODE_INVAL)
16337 existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
16338 except errors.OpPrereqError:
16341 raise errors.OpPrereqError("Desired network name '%s' already exists as a"
16342 " network (UUID: %s)" %
16343 (self.op.network_name, existing_uuid),
16344 errors.ECODE_EXISTS)
16346 # Check tag validity
16347 for tag in self.op.tags:
16348 objects.TaggableObject.ValidateTag(tag)
16350 def BuildHooksEnv(self):
16351 """Build hooks env.
16355 "name": self.op.network_name,
16356 "subnet": self.op.network,
16357 "gateway": self.op.gateway,
16358 "network6": self.op.network6,
16359 "gateway6": self.op.gateway6,
16360 "mac_prefix": self.op.mac_prefix,
16361 "tags": self.op.tags,
16363 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16365 def Exec(self, feedback_fn):
16366 """Add the ip pool to the cluster.
16369 nobj = objects.Network(name=self.op.network_name,
16370 network=self.op.network,
16371 gateway=self.op.gateway,
16372 network6=self.op.network6,
16373 gateway6=self.op.gateway6,
16374 mac_prefix=self.op.mac_prefix,
16375 uuid=self.network_uuid)
16376 # Initialize the associated address pool
16378 pool = network.AddressPool.InitializeNetwork(nobj)
16379 except errors.AddressPoolError, err:
16380 raise errors.OpExecError("Cannot create IP address pool for network"
16381 " '%s': %s" % (self.op.network_name, err))
16383 # Check if we need to reserve the nodes and the cluster master IP
16384 # These may not be allocated to any instances in routed mode, as
16385 # they wouldn't function anyway.
16386 if self.op.conflicts_check:
16387 for node in self.cfg.GetAllNodesInfo().values():
16388 for ip in [node.primary_ip, node.secondary_ip]:
16390 if pool.Contains(ip):
16392 self.LogInfo("Reserved IP address of node '%s' (%s)",
16394 except errors.AddressPoolError, err:
16395 self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s",
16396 ip, node.name, err)
16398 master_ip = self.cfg.GetClusterInfo().master_ip
16400 if pool.Contains(master_ip):
16401 pool.Reserve(master_ip)
16402 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16403 except errors.AddressPoolError, err:
16404 self.LogWarning("Cannot reserve cluster master IP address (%s): %s",
16407 if self.op.add_reserved_ips:
16408 for ip in self.op.add_reserved_ips:
16410 pool.Reserve(ip, external=True)
16411 except errors.AddressPoolError, err:
16412 raise errors.OpExecError("Cannot reserve IP address '%s': %s" %
16416 for tag in self.op.tags:
16419 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16420 del self.remove_locks[locking.LEVEL_NETWORK]
16423 class LUNetworkRemove(LogicalUnit):
16424 HPATH = "network-remove"
16425 HTYPE = constants.HTYPE_NETWORK
16428 def ExpandNames(self):
16429 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16431 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16432 self.needed_locks = {
16433 locking.LEVEL_NETWORK: [self.network_uuid],
16434 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16437 def CheckPrereq(self):
16438 """Check prerequisites.
16440 This checks that the given network name exists as a network, that is
16441 empty (i.e., contains no nodes), and that is not the last group of the
16445 # Verify that the network is not conncted.
16446 node_groups = [group.name
16447 for group in self.cfg.GetAllNodeGroupsInfo().values()
16448 if self.network_uuid in group.networks]
16451 self.LogWarning("Network '%s' is connected to the following"
16452 " node groups: %s" %
16453 (self.op.network_name,
16454 utils.CommaJoin(utils.NiceSort(node_groups))))
16455 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16457 def BuildHooksEnv(self):
16458 """Build hooks env.
16462 "NETWORK_NAME": self.op.network_name,
16465 def BuildHooksNodes(self):
16466 """Build hooks nodes.
16469 mn = self.cfg.GetMasterNode()
16470 return ([mn], [mn])
16472 def Exec(self, feedback_fn):
16473 """Remove the network.
16477 self.cfg.RemoveNetwork(self.network_uuid)
16478 except errors.ConfigurationError:
16479 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16480 (self.op.network_name, self.network_uuid))
16483 class LUNetworkSetParams(LogicalUnit):
16484 """Modifies the parameters of a network.
16487 HPATH = "network-modify"
16488 HTYPE = constants.HTYPE_NETWORK
16491 def CheckArguments(self):
16492 if (self.op.gateway and
16493 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16494 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16495 " at once", errors.ECODE_INVAL)
16497 def ExpandNames(self):
16498 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16500 self.needed_locks = {
16501 locking.LEVEL_NETWORK: [self.network_uuid],
16504 def CheckPrereq(self):
16505 """Check prerequisites.
16508 self.network = self.cfg.GetNetwork(self.network_uuid)
16509 self.gateway = self.network.gateway
16510 self.mac_prefix = self.network.mac_prefix
16511 self.network6 = self.network.network6
16512 self.gateway6 = self.network.gateway6
16513 self.tags = self.network.tags
16515 self.pool = network.AddressPool(self.network)
16517 if self.op.gateway:
16518 if self.op.gateway == constants.VALUE_NONE:
16519 self.gateway = None
16521 self.gateway = self.op.gateway
16522 if self.pool.IsReserved(self.gateway):
16523 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16524 " reserved" % self.gateway,
16525 errors.ECODE_STATE)
16527 if self.op.mac_prefix:
16528 if self.op.mac_prefix == constants.VALUE_NONE:
16529 self.mac_prefix = None
16531 self.mac_prefix = \
16532 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16534 if self.op.gateway6:
16535 if self.op.gateway6 == constants.VALUE_NONE:
16536 self.gateway6 = None
16538 self.gateway6 = self.op.gateway6
16540 if self.op.network6:
16541 if self.op.network6 == constants.VALUE_NONE:
16542 self.network6 = None
16544 self.network6 = self.op.network6
16546 def BuildHooksEnv(self):
16547 """Build hooks env.
16551 "name": self.op.network_name,
16552 "subnet": self.network.network,
16553 "gateway": self.gateway,
16554 "network6": self.network6,
16555 "gateway6": self.gateway6,
16556 "mac_prefix": self.mac_prefix,
16559 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16561 def BuildHooksNodes(self):
16562 """Build hooks nodes.
16565 mn = self.cfg.GetMasterNode()
16566 return ([mn], [mn])
16568 def Exec(self, feedback_fn):
16569 """Modifies the network.
16572 #TODO: reserve/release via temporary reservation manager
16573 # extend cfg.ReserveIp/ReleaseIp with the external flag
16574 if self.op.gateway:
16575 if self.gateway == self.network.gateway:
16576 self.LogWarning("Gateway is already %s", self.gateway)
16579 self.pool.Reserve(self.gateway, external=True)
16580 if self.network.gateway:
16581 self.pool.Release(self.network.gateway, external=True)
16582 self.network.gateway = self.gateway
16584 if self.op.add_reserved_ips:
16585 for ip in self.op.add_reserved_ips:
16587 if self.pool.IsReserved(ip):
16588 self.LogWarning("IP address %s is already reserved", ip)
16590 self.pool.Reserve(ip, external=True)
16591 except errors.AddressPoolError, err:
16592 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16594 if self.op.remove_reserved_ips:
16595 for ip in self.op.remove_reserved_ips:
16596 if ip == self.network.gateway:
16597 self.LogWarning("Cannot unreserve Gateway's IP")
16600 if not self.pool.IsReserved(ip):
16601 self.LogWarning("IP address %s is already unreserved", ip)
16603 self.pool.Release(ip, external=True)
16604 except errors.AddressPoolError, err:
16605 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16607 if self.op.mac_prefix:
16608 self.network.mac_prefix = self.mac_prefix
16610 if self.op.network6:
16611 self.network.network6 = self.network6
16613 if self.op.gateway6:
16614 self.network.gateway6 = self.gateway6
16616 self.pool.Validate()
16618 self.cfg.Update(self.network, feedback_fn)
16621 class _NetworkQuery(_QueryBase):
16622 FIELDS = query.NETWORK_FIELDS
16624 def ExpandNames(self, lu):
16625 lu.needed_locks = {}
16626 lu.share_locks = _ShareAll()
16628 self.do_locking = self.use_locking
16630 all_networks = lu.cfg.GetAllNetworksInfo()
16631 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16637 for name in self.names:
16638 if name in name_to_uuid:
16639 self.wanted.append(name_to_uuid[name])
16641 missing.append(name)
16644 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16645 errors.ECODE_NOENT)
16647 self.wanted = locking.ALL_SET
16649 if self.do_locking:
16650 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16651 if query.NETQ_INST in self.requested_data:
16652 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16653 if query.NETQ_GROUP in self.requested_data:
16654 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16656 def DeclareLocks(self, lu, level):
16659 def _GetQueryData(self, lu):
16660 """Computes the list of networks and their attributes.
16663 all_networks = lu.cfg.GetAllNetworksInfo()
16665 network_uuids = self._GetNames(lu, all_networks.keys(),
16666 locking.LEVEL_NETWORK)
16668 do_instances = query.NETQ_INST in self.requested_data
16669 do_groups = query.NETQ_GROUP in self.requested_data
16671 network_to_instances = None
16672 network_to_groups = None
16674 # For NETQ_GROUP, we need to map network->[groups]
16676 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16677 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16678 for _, group in all_groups.iteritems():
16679 for net_uuid in network_uuids:
16680 netparams = group.networks.get(net_uuid, None)
16682 info = (group.name, netparams[constants.NIC_MODE],
16683 netparams[constants.NIC_LINK])
16685 network_to_groups[net_uuid].append(info)
16688 all_instances = lu.cfg.GetAllInstancesInfo()
16689 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16690 for instance in all_instances.values():
16691 for nic in instance.nics:
16692 if nic.network in network_uuids:
16693 network_to_instances[nic.network].append(instance.name)
16696 if query.NETQ_STATS in self.requested_data:
16699 self._GetStats(network.AddressPool(all_networks[uuid])))
16700 for uuid in network_uuids)
16704 return query.NetworkQueryData([all_networks[uuid]
16705 for uuid in network_uuids],
16707 network_to_instances,
16711 def _GetStats(pool):
16712 """Returns statistics for a network address pool.
16716 "free_count": pool.GetFreeCount(),
16717 "reserved_count": pool.GetReservedCount(),
16718 "map": pool.GetMap(),
16719 "external_reservations":
16720 utils.CommaJoin(pool.GetExternalReservations()),
16724 class LUNetworkQuery(NoHooksLU):
16725 """Logical unit for querying networks.
16730 def CheckArguments(self):
16731 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16732 self.op.output_fields, self.op.use_locking)
16734 def ExpandNames(self):
16735 self.nq.ExpandNames(self)
16737 def Exec(self, feedback_fn):
16738 return self.nq.OldStyleQuery(self)
16741 class LUNetworkConnect(LogicalUnit):
16742 """Connect a network to a nodegroup
16745 HPATH = "network-connect"
16746 HTYPE = constants.HTYPE_NETWORK
16749 def ExpandNames(self):
16750 self.network_name = self.op.network_name
16751 self.group_name = self.op.group_name
16752 self.network_mode = self.op.network_mode
16753 self.network_link = self.op.network_link
16755 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16756 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16758 self.needed_locks = {
16759 locking.LEVEL_INSTANCE: [],
16760 locking.LEVEL_NODEGROUP: [self.group_uuid],
16762 self.share_locks[locking.LEVEL_INSTANCE] = 1
16764 if self.op.conflicts_check:
16765 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16766 self.share_locks[locking.LEVEL_NETWORK] = 1
16768 def DeclareLocks(self, level):
16769 if level == locking.LEVEL_INSTANCE:
16770 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16772 # Lock instances optimistically, needs verification once group lock has
16774 if self.op.conflicts_check:
16775 self.needed_locks[locking.LEVEL_INSTANCE] = \
16776 self.cfg.GetNodeGroupInstances(self.group_uuid)
16778 def BuildHooksEnv(self):
16780 "GROUP_NAME": self.group_name,
16781 "GROUP_NETWORK_MODE": self.network_mode,
16782 "GROUP_NETWORK_LINK": self.network_link,
16786 def BuildHooksNodes(self):
16787 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16788 return (nodes, nodes)
16790 def CheckPrereq(self):
16791 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16793 assert self.group_uuid in owned_groups
16795 # Check if locked instances are still correct
16796 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16797 if self.op.conflicts_check:
16798 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16801 constants.NIC_MODE: self.network_mode,
16802 constants.NIC_LINK: self.network_link,
16804 objects.NIC.CheckParameterSyntax(self.netparams)
16806 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16807 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16808 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16809 self.connected = False
16810 if self.network_uuid in self.group.networks:
16811 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16812 (self.network_name, self.group.name))
16813 self.connected = True
16815 # check only if not already connected
16816 elif self.op.conflicts_check:
16817 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16819 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16820 "connect to", owned_instances)
16822 def Exec(self, feedback_fn):
16823 # Connect the network and update the group only if not already connected
16824 if not self.connected:
16825 self.group.networks[self.network_uuid] = self.netparams
16826 self.cfg.Update(self.group, feedback_fn)
16829 def _NetworkConflictCheck(lu, check_fn, action, instances):
16830 """Checks for network interface conflicts with a network.
16832 @type lu: L{LogicalUnit}
16833 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16835 @param check_fn: Function checking for conflict
16836 @type action: string
16837 @param action: Part of error message (see code)
16838 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16843 for (_, instance) in lu.cfg.GetMultiInstanceInfo(instances):
16844 instconflicts = [(idx, nic.ip)
16845 for (idx, nic) in enumerate(instance.nics)
16849 conflicts.append((instance.name, instconflicts))
16852 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16853 " node group '%s', are in use: %s" %
16854 (lu.network_name, action, lu.group.name,
16855 utils.CommaJoin(("%s: %s" %
16856 (name, _FmtNetworkConflict(details)))
16857 for (name, details) in conflicts)))
16859 raise errors.OpPrereqError("Conflicting IP addresses found; "
16860 " remove/modify the corresponding network"
16861 " interfaces", errors.ECODE_STATE)
16864 def _FmtNetworkConflict(details):
16865 """Utility for L{_NetworkConflictCheck}.
16868 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16869 for (idx, ipaddr) in details)
16872 class LUNetworkDisconnect(LogicalUnit):
16873 """Disconnect a network to a nodegroup
16876 HPATH = "network-disconnect"
16877 HTYPE = constants.HTYPE_NETWORK
16880 def ExpandNames(self):
16881 self.network_name = self.op.network_name
16882 self.group_name = self.op.group_name
16884 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16885 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16887 self.needed_locks = {
16888 locking.LEVEL_INSTANCE: [],
16889 locking.LEVEL_NODEGROUP: [self.group_uuid],
16891 self.share_locks[locking.LEVEL_INSTANCE] = 1
16893 def DeclareLocks(self, level):
16894 if level == locking.LEVEL_INSTANCE:
16895 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16897 # Lock instances optimistically, needs verification once group lock has
16899 self.needed_locks[locking.LEVEL_INSTANCE] = \
16900 self.cfg.GetNodeGroupInstances(self.group_uuid)
16902 def BuildHooksEnv(self):
16904 "GROUP_NAME": self.group_name,
16908 def BuildHooksNodes(self):
16909 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16910 return (nodes, nodes)
16912 def CheckPrereq(self):
16913 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16915 assert self.group_uuid in owned_groups
16917 # Check if locked instances are still correct
16918 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16919 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16921 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16922 self.connected = True
16923 if self.network_uuid not in self.group.networks:
16924 self.LogWarning("Network '%s' is not mapped to group '%s'",
16925 self.network_name, self.group.name)
16926 self.connected = False
16928 # We need this check only if network is not already connected
16930 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid,
16931 "disconnect from", owned_instances)
16933 def Exec(self, feedback_fn):
16934 # Disconnect the network and update the group only if network is connected
16936 del self.group.networks[self.network_uuid]
16937 self.cfg.Update(self.group, feedback_fn)
16940 #: Query type implementations
16942 constants.QR_CLUSTER: _ClusterQuery,
16943 constants.QR_INSTANCE: _InstanceQuery,
16944 constants.QR_NODE: _NodeQuery,
16945 constants.QR_GROUP: _GroupQuery,
16946 constants.QR_NETWORK: _NetworkQuery,
16947 constants.QR_OS: _OsQuery,
16948 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16949 constants.QR_EXPORT: _ExportQuery,
16952 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16955 def _GetQueryImplementation(name):
16956 """Returns the implemtnation for a query type.
16958 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16962 return _QUERY_IMPL[name]
16964 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16965 errors.ECODE_INVAL)
16968 def _CheckForConflictingIp(lu, ip, node):
16969 """In case of conflicting IP address raise error.
16972 @param ip: IP address
16974 @param node: node name
16977 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16978 if conf_net is not None:
16979 raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16981 errors.ECODE_STATE)
16983 return (None, None)