4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node.
703 @type cfg: L{config.ConfigWriter}
704 @param cfg: The cluster configuration
705 @type node: L{objects.Node}
706 @param node: The node
708 @return: The effective value of exclusive_storage
711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node.
717 @type cfg: L{config.ConfigWriter}
718 @param cfg: The cluster configuration
719 @type nodename: string
720 @param nodename: The node
722 @return: The effective value of exclusive_storage
723 @raise errors.OpPrereqError: if no node exists with the given name
726 ni = cfg.GetNodeInfo(nodename)
728 raise errors.OpPrereqError("Invalid node name %s" % nodename,
730 return _IsExclusiveStorageEnabledNode(cfg, ni)
733 def _CopyLockList(names):
734 """Makes a copy of a list of lock names.
736 Handles L{locking.ALL_SET} correctly.
739 if names == locking.ALL_SET:
740 return locking.ALL_SET
745 def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names.
748 @type lu: L{LogicalUnit}
749 @param lu: the logical unit on whose behalf we execute
751 @param nodes: list of node names or None for all nodes
753 @return: the list of nodes, sorted
754 @raise errors.ProgrammerError: if the nodes parameter is wrong type
758 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
760 return utils.NiceSort(lu.cfg.GetNodeList())
763 def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names.
766 @type lu: L{LogicalUnit}
767 @param lu: the logical unit on whose behalf we execute
768 @type instances: list
769 @param instances: list of instance names or None for all instances
771 @return: the list of instances, sorted
772 @raise errors.OpPrereqError: if the instances parameter is wrong type
773 @raise errors.OpPrereqError: if any of the passed instances is not found
777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
779 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
783 def _GetUpdatedParams(old_params, update_dict,
784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary.
787 @type old_params: dict
788 @param old_params: old parameters
789 @type update_dict: dict
790 @param update_dict: dict containing new parameter values, or
791 constants.VALUE_DEFAULT to reset the parameter to its default
793 @param use_default: boolean
794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
795 values as 'to be deleted' values
796 @param use_none: boolean
797 @type use_none: whether to recognise C{None} values as 'to be
800 @return: the new parameter dictionary
803 params_copy = copy.deepcopy(old_params)
804 for key, val in update_dict.iteritems():
805 if ((use_default and val == constants.VALUE_DEFAULT) or
806 (use_none and val is None)):
812 params_copy[key] = val
816 def _UpdateMinMaxISpecs(ipolicy, new_minmax, group_policy):
817 use_none = use_default = group_policy
818 minmax = ipolicy.setdefault(constants.ISPECS_MINMAX, {})
819 for (key, value) in new_minmax.items():
820 if key not in constants.ISPECS_MINMAX_KEYS:
821 raise errors.OpPrereqError("Invalid key in new ipolicy/%s: %s" %
822 (constants.ISPECS_MINMAX, key),
824 old_spec = minmax.get(key, {})
825 minmax[key] = _GetUpdatedParams(old_spec, value, use_none=use_none,
826 use_default=use_default)
827 utils.ForceDictType(minmax[key], constants.ISPECS_PARAMETER_TYPES)
830 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
831 """Return the new version of an instance policy.
833 @param group_policy: whether this policy applies to a group and thus
834 we should support removal of policy entries
837 use_none = use_default = group_policy
838 ipolicy = copy.deepcopy(old_ipolicy)
839 for key, value in new_ipolicy.items():
840 if key not in constants.IPOLICY_ALL_KEYS:
841 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
843 if key == constants.ISPECS_MINMAX:
844 _UpdateMinMaxISpecs(ipolicy, value, group_policy)
845 elif key == constants.ISPECS_STD:
846 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
848 use_default=use_default)
849 utils.ForceDictType(ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
851 if (not value or value == [constants.VALUE_DEFAULT] or
852 value == constants.VALUE_DEFAULT):
856 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
857 " on the cluster'" % key,
860 if key in constants.IPOLICY_PARAMETERS:
861 # FIXME: we assume all such values are float
863 ipolicy[key] = float(value)
864 except (TypeError, ValueError), err:
865 raise errors.OpPrereqError("Invalid value for attribute"
866 " '%s': '%s', error: %s" %
867 (key, value, err), errors.ECODE_INVAL)
869 # FIXME: we assume all others are lists; this should be redone
871 ipolicy[key] = list(value)
873 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
874 except errors.ConfigurationError, err:
875 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
880 def _UpdateAndVerifySubDict(base, updates, type_check):
881 """Updates and verifies a dict with sub dicts of the same type.
883 @param base: The dict with the old data
884 @param updates: The dict with the new data
885 @param type_check: Dict suitable to ForceDictType to verify correct types
886 @returns: A new dict with updated and verified values
890 new = _GetUpdatedParams(old, value)
891 utils.ForceDictType(new, type_check)
894 ret = copy.deepcopy(base)
895 ret.update(dict((key, fn(base.get(key, {}), value))
896 for key, value in updates.items()))
900 def _MergeAndVerifyHvState(op_input, obj_input):
901 """Combines the hv state from an opcode with the one of the object
903 @param op_input: The input dict from the opcode
904 @param obj_input: The input dict from the objects
905 @return: The verified and updated dict
909 invalid_hvs = set(op_input) - constants.HYPER_TYPES
911 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
912 " %s" % utils.CommaJoin(invalid_hvs),
914 if obj_input is None:
916 type_check = constants.HVSTS_PARAMETER_TYPES
917 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
922 def _MergeAndVerifyDiskState(op_input, obj_input):
923 """Combines the disk state from an opcode with the one of the object
925 @param op_input: The input dict from the opcode
926 @param obj_input: The input dict from the objects
927 @return: The verified and updated dict
930 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
932 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
933 utils.CommaJoin(invalid_dst),
935 type_check = constants.DSS_PARAMETER_TYPES
936 if obj_input is None:
938 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
940 for key, value in op_input.items())
945 def _ReleaseLocks(lu, level, names=None, keep=None):
946 """Releases locks owned by an LU.
948 @type lu: L{LogicalUnit}
949 @param level: Lock level
950 @type names: list or None
951 @param names: Names of locks to release
952 @type keep: list or None
953 @param keep: Names of locks to retain
956 assert not (keep is not None and names is not None), \
957 "Only one of the 'names' and the 'keep' parameters can be given"
959 if names is not None:
960 should_release = names.__contains__
962 should_release = lambda name: name not in keep
964 should_release = None
966 owned = lu.owned_locks(level)
968 # Not owning any lock at this level, do nothing
975 # Determine which locks to release
977 if should_release(name):
982 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
984 # Release just some locks
985 lu.glm.release(level, names=release)
987 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
990 lu.glm.release(level)
992 assert not lu.glm.is_owned(level), "No locks should be owned"
995 def _MapInstanceDisksToNodes(instances):
996 """Creates a map from (node, volume) to instance name.
998 @type instances: list of L{objects.Instance}
999 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
1002 return dict(((node, vol), inst.name)
1003 for inst in instances
1004 for (node, vols) in inst.MapLVsByNode().items()
1008 def _RunPostHook(lu, node_name):
1009 """Runs the post-hook for an opcode on a single node.
1012 hm = lu.proc.BuildHooksManager(lu)
1014 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
1015 except Exception, err: # pylint: disable=W0703
1016 lu.LogWarning("Errors occurred running hooks on %s: %s",
1020 def _CheckOutputFields(static, dynamic, selected):
1021 """Checks whether all selected fields are valid.
1023 @type static: L{utils.FieldSet}
1024 @param static: static fields set
1025 @type dynamic: L{utils.FieldSet}
1026 @param dynamic: dynamic fields set
1029 f = utils.FieldSet()
1033 delta = f.NonMatching(selected)
1035 raise errors.OpPrereqError("Unknown output fields selected: %s"
1036 % ",".join(delta), errors.ECODE_INVAL)
1039 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1040 """Make sure that none of the given paramters is global.
1042 If a global parameter is found, an L{errors.OpPrereqError} exception is
1043 raised. This is used to avoid setting global parameters for individual nodes.
1045 @type params: dictionary
1046 @param params: Parameters to check
1047 @type glob_pars: dictionary
1048 @param glob_pars: Forbidden parameters
1050 @param kind: Kind of parameters (e.g. "node")
1051 @type bad_levels: string
1052 @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
1054 @type good_levels: strings
1055 @param good_levels: Level(s) at which the parameters are allowed (e.g.
1059 used_globals = glob_pars.intersection(params)
1061 msg = ("The following %s parameters are global and cannot"
1062 " be customized at %s level, please modify them at"
1064 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
1065 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1068 def _CheckNodeOnline(lu, node, msg=None):
1069 """Ensure that a given node is online.
1071 @param lu: the LU on behalf of which we make the check
1072 @param node: the node to check
1073 @param msg: if passed, should be a message to replace the default one
1074 @raise errors.OpPrereqError: if the node is offline
1078 msg = "Can't use offline node"
1079 if lu.cfg.GetNodeInfo(node).offline:
1080 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1083 def _CheckNodeNotDrained(lu, node):
1084 """Ensure that a given node is not drained.
1086 @param lu: the LU on behalf of which we make the check
1087 @param node: the node to check
1088 @raise errors.OpPrereqError: if the node is drained
1091 if lu.cfg.GetNodeInfo(node).drained:
1092 raise errors.OpPrereqError("Can't use drained node %s" % node,
1096 def _CheckNodeVmCapable(lu, node):
1097 """Ensure that a given node is vm capable.
1099 @param lu: the LU on behalf of which we make the check
1100 @param node: the node to check
1101 @raise errors.OpPrereqError: if the node is not vm capable
1104 if not lu.cfg.GetNodeInfo(node).vm_capable:
1105 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1109 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1110 """Ensure that a node supports a given OS.
1112 @param lu: the LU on behalf of which we make the check
1113 @param node: the node to check
1114 @param os_name: the OS to query about
1115 @param force_variant: whether to ignore variant errors
1116 @raise errors.OpPrereqError: if the node is not supporting the OS
1119 result = lu.rpc.call_os_get(node, os_name)
1120 result.Raise("OS '%s' not in supported OS list for node %s" %
1122 prereq=True, ecode=errors.ECODE_INVAL)
1123 if not force_variant:
1124 _CheckOSVariant(result.payload, os_name)
1127 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1128 """Ensure that a node has the given secondary ip.
1130 @type lu: L{LogicalUnit}
1131 @param lu: the LU on behalf of which we make the check
1133 @param node: the node to check
1134 @type secondary_ip: string
1135 @param secondary_ip: the ip to check
1136 @type prereq: boolean
1137 @param prereq: whether to throw a prerequisite or an execute error
1138 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1139 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1142 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1143 result.Raise("Failure checking secondary ip on node %s" % node,
1144 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1145 if not result.payload:
1146 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1147 " please fix and re-run this command" % secondary_ip)
1149 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1151 raise errors.OpExecError(msg)
1154 def _CheckNodePVs(nresult, exclusive_storage):
1158 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1159 if pvlist_dict is None:
1160 return (["Can't get PV list from node"], None)
1161 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1163 # check that ':' is not present in PV names, since it's a
1164 # special character for lvcreate (denotes the range of PEs to
1168 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1169 (pv.name, pv.vg_name))
1171 if exclusive_storage:
1172 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1173 errlist.extend(errmsgs)
1174 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1176 for (pvname, lvlist) in shared_pvs:
1177 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1178 errlist.append("PV %s is shared among unrelated LVs (%s)" %
1179 (pvname, utils.CommaJoin(lvlist)))
1180 return (errlist, es_pvinfo)
1183 def _GetClusterDomainSecret():
1184 """Reads the cluster domain secret.
1187 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1191 def _CheckInstanceState(lu, instance, req_states, msg=None):
1192 """Ensure that an instance is in one of the required states.
1194 @param lu: the LU on behalf of which we make the check
1195 @param instance: the instance to check
1196 @param msg: if passed, should be a message to replace the default one
1197 @raise errors.OpPrereqError: if the instance is not in the required state
1201 msg = ("can't use instance from outside %s states" %
1202 utils.CommaJoin(req_states))
1203 if instance.admin_state not in req_states:
1204 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1205 (instance.name, instance.admin_state, msg),
1208 if constants.ADMINST_UP not in req_states:
1209 pnode = instance.primary_node
1210 if not lu.cfg.GetNodeInfo(pnode).offline:
1211 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1212 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1213 prereq=True, ecode=errors.ECODE_ENVIRON)
1214 if instance.name in ins_l.payload:
1215 raise errors.OpPrereqError("Instance %s is running, %s" %
1216 (instance.name, msg), errors.ECODE_STATE)
1218 lu.LogWarning("Primary node offline, ignoring check that instance"
1222 def _ComputeMinMaxSpec(name, qualifier, ispecs, value):
1223 """Computes if value is in the desired range.
1225 @param name: name of the parameter for which we perform the check
1226 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1228 @param ispecs: dictionary containing min and max values
1229 @param value: actual value that we want to use
1230 @return: None or an error string
1233 if value in [None, constants.VALUE_AUTO]:
1235 max_v = ispecs[constants.ISPECS_MAX].get(name, value)
1236 min_v = ispecs[constants.ISPECS_MIN].get(name, value)
1237 if value > max_v or min_v > value:
1239 fqn = "%s/%s" % (name, qualifier)
1242 return ("%s value %s is not in range [%s, %s]" %
1243 (fqn, value, min_v, max_v))
1247 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1248 nic_count, disk_sizes, spindle_use,
1250 _compute_fn=_ComputeMinMaxSpec):
1251 """Verifies ipolicy against provided specs.
1254 @param ipolicy: The ipolicy
1256 @param mem_size: The memory size
1257 @type cpu_count: int
1258 @param cpu_count: Used cpu cores
1259 @type disk_count: int
1260 @param disk_count: Number of disks used
1261 @type nic_count: int
1262 @param nic_count: Number of nics used
1263 @type disk_sizes: list of ints
1264 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1265 @type spindle_use: int
1266 @param spindle_use: The number of spindles this instance uses
1267 @type disk_template: string
1268 @param disk_template: The disk template of the instance
1269 @param _compute_fn: The compute function (unittest only)
1270 @return: A list of violations, or an empty list of no violations are found
1273 assert disk_count == len(disk_sizes)
1276 (constants.ISPEC_MEM_SIZE, "", mem_size),
1277 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1278 (constants.ISPEC_NIC_COUNT, "", nic_count),
1279 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1280 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1281 for idx, d in enumerate(disk_sizes)]
1282 if disk_template != constants.DT_DISKLESS:
1283 # This check doesn't make sense for diskless instances
1284 test_settings.append((constants.ISPEC_DISK_COUNT, "", disk_count))
1286 allowed_dts = ipolicy[constants.IPOLICY_DTS]
1287 if disk_template not in allowed_dts:
1288 ret.append("Disk template %s is not allowed (allowed templates: %s)" %
1289 (disk_template, utils.CommaJoin(allowed_dts)))
1291 minmax = ipolicy[constants.ISPECS_MINMAX]
1292 return ret + filter(None,
1293 (_compute_fn(name, qualifier, minmax, value)
1294 for (name, qualifier, value) in test_settings))
1297 def _ComputeIPolicyInstanceViolation(ipolicy, instance, cfg,
1298 _compute_fn=_ComputeIPolicySpecViolation):
1299 """Compute if instance meets the specs of ipolicy.
1302 @param ipolicy: The ipolicy to verify against
1303 @type instance: L{objects.Instance}
1304 @param instance: The instance to verify
1305 @type cfg: L{config.ConfigWriter}
1306 @param cfg: Cluster configuration
1307 @param _compute_fn: The function to verify ipolicy (unittest only)
1308 @see: L{_ComputeIPolicySpecViolation}
1311 be_full = cfg.GetClusterInfo().FillBE(instance)
1312 mem_size = be_full[constants.BE_MAXMEM]
1313 cpu_count = be_full[constants.BE_VCPUS]
1314 spindle_use = be_full[constants.BE_SPINDLE_USE]
1315 disk_count = len(instance.disks)
1316 disk_sizes = [disk.size for disk in instance.disks]
1317 nic_count = len(instance.nics)
1318 disk_template = instance.disk_template
1320 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1321 disk_sizes, spindle_use, disk_template)
1324 def _ComputeIPolicyInstanceSpecViolation(
1325 ipolicy, instance_spec, disk_template,
1326 _compute_fn=_ComputeIPolicySpecViolation):
1327 """Compute if instance specs meets the specs of ipolicy.
1330 @param ipolicy: The ipolicy to verify against
1331 @param instance_spec: dict
1332 @param instance_spec: The instance spec to verify
1333 @type disk_template: string
1334 @param disk_template: the disk template of the instance
1335 @param _compute_fn: The function to verify ipolicy (unittest only)
1336 @see: L{_ComputeIPolicySpecViolation}
1339 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1340 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1341 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1342 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1343 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1344 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1346 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1347 disk_sizes, spindle_use, disk_template)
1350 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1352 _compute_fn=_ComputeIPolicyInstanceViolation):
1353 """Compute if instance meets the specs of the new target group.
1355 @param ipolicy: The ipolicy to verify
1356 @param instance: The instance object to verify
1357 @param current_group: The current group of the instance
1358 @param target_group: The new group of the instance
1359 @type cfg: L{config.ConfigWriter}
1360 @param cfg: Cluster configuration
1361 @param _compute_fn: The function to verify ipolicy (unittest only)
1362 @see: L{_ComputeIPolicySpecViolation}
1365 if current_group == target_group:
1368 return _compute_fn(ipolicy, instance, cfg)
1371 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False,
1372 _compute_fn=_ComputeIPolicyNodeViolation):
1373 """Checks that the target node is correct in terms of instance policy.
1375 @param ipolicy: The ipolicy to verify
1376 @param instance: The instance object to verify
1377 @param node: The new node to relocate
1378 @type cfg: L{config.ConfigWriter}
1379 @param cfg: Cluster configuration
1380 @param ignore: Ignore violations of the ipolicy
1381 @param _compute_fn: The function to verify ipolicy (unittest only)
1382 @see: L{_ComputeIPolicySpecViolation}
1385 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1386 res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg)
1389 msg = ("Instance does not meet target node group's (%s) instance"
1390 " policy: %s") % (node.group, utils.CommaJoin(res))
1394 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1397 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances, cfg):
1398 """Computes a set of any instances that would violate the new ipolicy.
1400 @param old_ipolicy: The current (still in-place) ipolicy
1401 @param new_ipolicy: The new (to become) ipolicy
1402 @param instances: List of instances to verify
1403 @type cfg: L{config.ConfigWriter}
1404 @param cfg: Cluster configuration
1405 @return: A list of instances which violates the new ipolicy but
1409 return (_ComputeViolatingInstances(new_ipolicy, instances, cfg) -
1410 _ComputeViolatingInstances(old_ipolicy, instances, cfg))
1413 def _ExpandItemName(fn, name, kind):
1414 """Expand an item name.
1416 @param fn: the function to use for expansion
1417 @param name: requested item name
1418 @param kind: text description ('Node' or 'Instance')
1419 @return: the resolved (full) name
1420 @raise errors.OpPrereqError: if the item is not found
1423 full_name = fn(name)
1424 if full_name is None:
1425 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1430 def _ExpandNodeName(cfg, name):
1431 """Wrapper over L{_ExpandItemName} for nodes."""
1432 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1435 def _ExpandInstanceName(cfg, name):
1436 """Wrapper over L{_ExpandItemName} for instance."""
1437 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1440 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1442 """Builds network related env variables for hooks
1444 This builds the hook environment from individual variables.
1447 @param name: the name of the network
1448 @type subnet: string
1449 @param subnet: the ipv4 subnet
1450 @type gateway: string
1451 @param gateway: the ipv4 gateway
1452 @type network6: string
1453 @param network6: the ipv6 subnet
1454 @type gateway6: string
1455 @param gateway6: the ipv6 gateway
1456 @type mac_prefix: string
1457 @param mac_prefix: the mac_prefix
1459 @param tags: the tags of the network
1464 env["NETWORK_NAME"] = name
1466 env["NETWORK_SUBNET"] = subnet
1468 env["NETWORK_GATEWAY"] = gateway
1470 env["NETWORK_SUBNET6"] = network6
1472 env["NETWORK_GATEWAY6"] = gateway6
1474 env["NETWORK_MAC_PREFIX"] = mac_prefix
1476 env["NETWORK_TAGS"] = " ".join(tags)
1481 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1482 minmem, maxmem, vcpus, nics, disk_template, disks,
1483 bep, hvp, hypervisor_name, tags):
1484 """Builds instance related env variables for hooks
1486 This builds the hook environment from individual variables.
1489 @param name: the name of the instance
1490 @type primary_node: string
1491 @param primary_node: the name of the instance's primary node
1492 @type secondary_nodes: list
1493 @param secondary_nodes: list of secondary nodes as strings
1494 @type os_type: string
1495 @param os_type: the name of the instance's OS
1496 @type status: string
1497 @param status: the desired status of the instance
1498 @type minmem: string
1499 @param minmem: the minimum memory size of the instance
1500 @type maxmem: string
1501 @param maxmem: the maximum memory size of the instance
1503 @param vcpus: the count of VCPUs the instance has
1505 @param nics: list of tuples (name, uuid, ip, mac, mode, link, net, netinfo)
1506 representing the NICs the instance has
1507 @type disk_template: string
1508 @param disk_template: the disk template of the instance
1510 @param disks: list of tuples (name, uuid, size, mode)
1512 @param bep: the backend parameters for the instance
1514 @param hvp: the hypervisor parameters for the instance
1515 @type hypervisor_name: string
1516 @param hypervisor_name: the hypervisor for the instance
1518 @param tags: list of instance tags as strings
1520 @return: the hook environment for this instance
1525 "INSTANCE_NAME": name,
1526 "INSTANCE_PRIMARY": primary_node,
1527 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1528 "INSTANCE_OS_TYPE": os_type,
1529 "INSTANCE_STATUS": status,
1530 "INSTANCE_MINMEM": minmem,
1531 "INSTANCE_MAXMEM": maxmem,
1532 # TODO(2.9) remove deprecated "memory" value
1533 "INSTANCE_MEMORY": maxmem,
1534 "INSTANCE_VCPUS": vcpus,
1535 "INSTANCE_DISK_TEMPLATE": disk_template,
1536 "INSTANCE_HYPERVISOR": hypervisor_name,
1539 nic_count = len(nics)
1540 for idx, (name, _, ip, mac, mode, link, net, netinfo) in enumerate(nics):
1543 env["INSTANCE_NIC%d_NAME" % idx] = name
1544 env["INSTANCE_NIC%d_IP" % idx] = ip
1545 env["INSTANCE_NIC%d_MAC" % idx] = mac
1546 env["INSTANCE_NIC%d_MODE" % idx] = mode
1547 env["INSTANCE_NIC%d_LINK" % idx] = link
1549 nobj = objects.Network.FromDict(netinfo)
1550 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1552 # FIXME: broken network reference: the instance NIC specifies a
1553 # network, but the relevant network entry was not in the config. This
1554 # should be made impossible.
1555 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
1556 if mode == constants.NIC_MODE_BRIDGED:
1557 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1561 env["INSTANCE_NIC_COUNT"] = nic_count
1564 disk_count = len(disks)
1565 for idx, (name, size, mode) in enumerate(disks):
1566 env["INSTANCE_DISK%d_NAME" % idx] = name
1567 env["INSTANCE_DISK%d_SIZE" % idx] = size
1568 env["INSTANCE_DISK%d_MODE" % idx] = mode
1572 env["INSTANCE_DISK_COUNT"] = disk_count
1577 env["INSTANCE_TAGS"] = " ".join(tags)
1579 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1580 for key, value in source.items():
1581 env["INSTANCE_%s_%s" % (kind, key)] = value
1586 def _NICToTuple(lu, nic):
1587 """Build a tupple of nic information.
1589 @type lu: L{LogicalUnit}
1590 @param lu: the logical unit on whose behalf we execute
1591 @type nic: L{objects.NIC}
1592 @param nic: nic to convert to hooks tuple
1595 cluster = lu.cfg.GetClusterInfo()
1596 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1597 mode = filled_params[constants.NIC_MODE]
1598 link = filled_params[constants.NIC_LINK]
1601 nobj = lu.cfg.GetNetwork(nic.network)
1602 netinfo = objects.Network.ToDict(nobj)
1603 return (nic.name, nic.uuid, nic.ip, nic.mac, mode, link, nic.network, netinfo)
1606 def _NICListToTuple(lu, nics):
1607 """Build a list of nic information tuples.
1609 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1610 value in LUInstanceQueryData.
1612 @type lu: L{LogicalUnit}
1613 @param lu: the logical unit on whose behalf we execute
1614 @type nics: list of L{objects.NIC}
1615 @param nics: list of nics to convert to hooks tuples
1620 hooks_nics.append(_NICToTuple(lu, nic))
1624 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1625 """Builds instance related env variables for hooks from an object.
1627 @type lu: L{LogicalUnit}
1628 @param lu: the logical unit on whose behalf we execute
1629 @type instance: L{objects.Instance}
1630 @param instance: the instance for which we should build the
1632 @type override: dict
1633 @param override: dictionary with key/values that will override
1636 @return: the hook environment dictionary
1639 cluster = lu.cfg.GetClusterInfo()
1640 bep = cluster.FillBE(instance)
1641 hvp = cluster.FillHV(instance)
1643 "name": instance.name,
1644 "primary_node": instance.primary_node,
1645 "secondary_nodes": instance.secondary_nodes,
1646 "os_type": instance.os,
1647 "status": instance.admin_state,
1648 "maxmem": bep[constants.BE_MAXMEM],
1649 "minmem": bep[constants.BE_MINMEM],
1650 "vcpus": bep[constants.BE_VCPUS],
1651 "nics": _NICListToTuple(lu, instance.nics),
1652 "disk_template": instance.disk_template,
1653 "disks": [(disk.name, disk.size, disk.mode)
1654 for disk in instance.disks],
1657 "hypervisor_name": instance.hypervisor,
1658 "tags": instance.tags,
1661 args.update(override)
1662 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1665 def _AdjustCandidatePool(lu, exceptions):
1666 """Adjust the candidate pool after node operations.
1669 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1671 lu.LogInfo("Promoted nodes to master candidate role: %s",
1672 utils.CommaJoin(node.name for node in mod_list))
1673 for name in mod_list:
1674 lu.context.ReaddNode(name)
1675 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1677 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1681 def _DecideSelfPromotion(lu, exceptions=None):
1682 """Decide whether I should promote myself as a master candidate.
1685 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1686 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1687 # the new node will increase mc_max with one, so:
1688 mc_should = min(mc_should + 1, cp_size)
1689 return mc_now < mc_should
1692 def _ComputeViolatingInstances(ipolicy, instances, cfg):
1693 """Computes a set of instances who violates given ipolicy.
1695 @param ipolicy: The ipolicy to verify
1696 @type instances: L{objects.Instance}
1697 @param instances: List of instances to verify
1698 @type cfg: L{config.ConfigWriter}
1699 @param cfg: Cluster configuration
1700 @return: A frozenset of instance names violating the ipolicy
1703 return frozenset([inst.name for inst in instances
1704 if _ComputeIPolicyInstanceViolation(ipolicy, inst, cfg)])
1707 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1708 """Check that the brigdes needed by a list of nics exist.
1711 cluster = lu.cfg.GetClusterInfo()
1712 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1713 brlist = [params[constants.NIC_LINK] for params in paramslist
1714 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1716 result = lu.rpc.call_bridges_exist(target_node, brlist)
1717 result.Raise("Error checking bridges on destination node '%s'" %
1718 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1721 def _CheckInstanceBridgesExist(lu, instance, node=None):
1722 """Check that the brigdes needed by an instance exist.
1726 node = instance.primary_node
1727 _CheckNicsBridgesExist(lu, instance.nics, node)
1730 def _CheckOSVariant(os_obj, name):
1731 """Check whether an OS name conforms to the os variants specification.
1733 @type os_obj: L{objects.OS}
1734 @param os_obj: OS object to check
1736 @param name: OS name passed by the user, to check for validity
1739 variant = objects.OS.GetVariant(name)
1740 if not os_obj.supported_variants:
1742 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1743 " passed)" % (os_obj.name, variant),
1747 raise errors.OpPrereqError("OS name must include a variant",
1750 if variant not in os_obj.supported_variants:
1751 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1754 def _GetNodeInstancesInner(cfg, fn):
1755 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1758 def _GetNodeInstances(cfg, node_name):
1759 """Returns a list of all primary and secondary instances on a node.
1763 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1766 def _GetNodePrimaryInstances(cfg, node_name):
1767 """Returns primary instances on a node.
1770 return _GetNodeInstancesInner(cfg,
1771 lambda inst: node_name == inst.primary_node)
1774 def _GetNodeSecondaryInstances(cfg, node_name):
1775 """Returns secondary instances on a node.
1778 return _GetNodeInstancesInner(cfg,
1779 lambda inst: node_name in inst.secondary_nodes)
1782 def _GetStorageTypeArgs(cfg, storage_type):
1783 """Returns the arguments for a storage type.
1786 # Special case for file storage
1787 if storage_type == constants.ST_FILE:
1788 # storage.FileStorage wants a list of storage directories
1789 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1794 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1797 for dev in instance.disks:
1798 cfg.SetDiskID(dev, node_name)
1800 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1802 result.Raise("Failed to get disk status from node %s" % node_name,
1803 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1805 for idx, bdev_status in enumerate(result.payload):
1806 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1812 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1813 """Check the sanity of iallocator and node arguments and use the
1814 cluster-wide iallocator if appropriate.
1816 Check that at most one of (iallocator, node) is specified. If none is
1817 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1818 then the LU's opcode's iallocator slot is filled with the cluster-wide
1821 @type iallocator_slot: string
1822 @param iallocator_slot: the name of the opcode iallocator slot
1823 @type node_slot: string
1824 @param node_slot: the name of the opcode target node slot
1827 node = getattr(lu.op, node_slot, None)
1828 ialloc = getattr(lu.op, iallocator_slot, None)
1832 if node is not None and ialloc is not None:
1833 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1835 elif ((node is None and ialloc is None) or
1836 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1837 default_iallocator = lu.cfg.GetDefaultIAllocator()
1838 if default_iallocator:
1839 setattr(lu.op, iallocator_slot, default_iallocator)
1841 raise errors.OpPrereqError("No iallocator or node given and no"
1842 " cluster-wide default iallocator found;"
1843 " please specify either an iallocator or a"
1844 " node, or set a cluster-wide default"
1845 " iallocator", errors.ECODE_INVAL)
1848 def _GetDefaultIAllocator(cfg, ialloc):
1849 """Decides on which iallocator to use.
1851 @type cfg: L{config.ConfigWriter}
1852 @param cfg: Cluster configuration object
1853 @type ialloc: string or None
1854 @param ialloc: Iallocator specified in opcode
1856 @return: Iallocator name
1860 # Use default iallocator
1861 ialloc = cfg.GetDefaultIAllocator()
1864 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1865 " opcode nor as a cluster-wide default",
1871 def _CheckHostnameSane(lu, name):
1872 """Ensures that a given hostname resolves to a 'sane' name.
1874 The given name is required to be a prefix of the resolved hostname,
1875 to prevent accidental mismatches.
1877 @param lu: the logical unit on behalf of which we're checking
1878 @param name: the name we should resolve and check
1879 @return: the resolved hostname object
1882 hostname = netutils.GetHostname(name=name)
1883 if hostname.name != name:
1884 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1885 if not utils.MatchNameComponent(name, [hostname.name]):
1886 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1887 " same as given hostname '%s'") %
1888 (hostname.name, name), errors.ECODE_INVAL)
1892 class LUClusterPostInit(LogicalUnit):
1893 """Logical unit for running hooks after cluster initialization.
1896 HPATH = "cluster-init"
1897 HTYPE = constants.HTYPE_CLUSTER
1899 def BuildHooksEnv(self):
1904 "OP_TARGET": self.cfg.GetClusterName(),
1907 def BuildHooksNodes(self):
1908 """Build hooks nodes.
1911 return ([], [self.cfg.GetMasterNode()])
1913 def Exec(self, feedback_fn):
1920 class LUClusterDestroy(LogicalUnit):
1921 """Logical unit for destroying the cluster.
1924 HPATH = "cluster-destroy"
1925 HTYPE = constants.HTYPE_CLUSTER
1927 def BuildHooksEnv(self):
1932 "OP_TARGET": self.cfg.GetClusterName(),
1935 def BuildHooksNodes(self):
1936 """Build hooks nodes.
1941 def CheckPrereq(self):
1942 """Check prerequisites.
1944 This checks whether the cluster is empty.
1946 Any errors are signaled by raising errors.OpPrereqError.
1949 master = self.cfg.GetMasterNode()
1951 nodelist = self.cfg.GetNodeList()
1952 if len(nodelist) != 1 or nodelist[0] != master:
1953 raise errors.OpPrereqError("There are still %d node(s) in"
1954 " this cluster." % (len(nodelist) - 1),
1956 instancelist = self.cfg.GetInstanceList()
1958 raise errors.OpPrereqError("There are still %d instance(s) in"
1959 " this cluster." % len(instancelist),
1962 def Exec(self, feedback_fn):
1963 """Destroys the cluster.
1966 master_params = self.cfg.GetMasterNetworkParameters()
1968 # Run post hooks on master node before it's removed
1969 _RunPostHook(self, master_params.name)
1971 ems = self.cfg.GetUseExternalMipScript()
1972 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1975 self.LogWarning("Error disabling the master IP address: %s",
1978 return master_params.name
1981 def _VerifyCertificate(filename):
1982 """Verifies a certificate for L{LUClusterVerifyConfig}.
1984 @type filename: string
1985 @param filename: Path to PEM file
1989 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1990 utils.ReadFile(filename))
1991 except Exception, err: # pylint: disable=W0703
1992 return (LUClusterVerifyConfig.ETYPE_ERROR,
1993 "Failed to load X509 certificate %s: %s" % (filename, err))
1996 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1997 constants.SSL_CERT_EXPIRATION_ERROR)
2000 fnamemsg = "While verifying %s: %s" % (filename, msg)
2005 return (None, fnamemsg)
2006 elif errcode == utils.CERT_WARNING:
2007 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
2008 elif errcode == utils.CERT_ERROR:
2009 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
2011 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
2014 def _GetAllHypervisorParameters(cluster, instances):
2015 """Compute the set of all hypervisor parameters.
2017 @type cluster: L{objects.Cluster}
2018 @param cluster: the cluster object
2019 @param instances: list of L{objects.Instance}
2020 @param instances: additional instances from which to obtain parameters
2021 @rtype: list of (origin, hypervisor, parameters)
2022 @return: a list with all parameters found, indicating the hypervisor they
2023 apply to, and the origin (can be "cluster", "os X", or "instance Y")
2028 for hv_name in cluster.enabled_hypervisors:
2029 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2031 for os_name, os_hvp in cluster.os_hvp.items():
2032 for hv_name, hv_params in os_hvp.items():
2034 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2035 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2037 # TODO: collapse identical parameter values in a single one
2038 for instance in instances:
2039 if instance.hvparams:
2040 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2041 cluster.FillHV(instance)))
2046 class _VerifyErrors(object):
2047 """Mix-in for cluster/group verify LUs.
2049 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2050 self.op and self._feedback_fn to be available.)
2054 ETYPE_FIELD = "code"
2055 ETYPE_ERROR = "ERROR"
2056 ETYPE_WARNING = "WARNING"
2058 def _Error(self, ecode, item, msg, *args, **kwargs):
2059 """Format an error message.
2061 Based on the opcode's error_codes parameter, either format a
2062 parseable error code, or a simpler error string.
2064 This must be called only from Exec and functions called from Exec.
2067 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2068 itype, etxt, _ = ecode
2069 # If the error code is in the list of ignored errors, demote the error to a
2071 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2072 ltype = self.ETYPE_WARNING
2073 # first complete the msg
2076 # then format the whole message
2077 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2078 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2084 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2085 # and finally report it via the feedback_fn
2086 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2087 # do not mark the operation as failed for WARN cases only
2088 if ltype == self.ETYPE_ERROR:
2091 def _ErrorIf(self, cond, *args, **kwargs):
2092 """Log an error message if the passed condition is True.
2096 or self.op.debug_simulate_errors): # pylint: disable=E1101
2097 self._Error(*args, **kwargs)
2100 class LUClusterVerify(NoHooksLU):
2101 """Submits all jobs necessary to verify the cluster.
2106 def ExpandNames(self):
2107 self.needed_locks = {}
2109 def Exec(self, feedback_fn):
2112 if self.op.group_name:
2113 groups = [self.op.group_name]
2114 depends_fn = lambda: None
2116 groups = self.cfg.GetNodeGroupList()
2118 # Verify global configuration
2120 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2123 # Always depend on global verification
2124 depends_fn = lambda: [(-len(jobs), [])]
2127 [opcodes.OpClusterVerifyGroup(group_name=group,
2128 ignore_errors=self.op.ignore_errors,
2129 depends=depends_fn())]
2130 for group in groups)
2132 # Fix up all parameters
2133 for op in itertools.chain(*jobs): # pylint: disable=W0142
2134 op.debug_simulate_errors = self.op.debug_simulate_errors
2135 op.verbose = self.op.verbose
2136 op.error_codes = self.op.error_codes
2138 op.skip_checks = self.op.skip_checks
2139 except AttributeError:
2140 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2142 return ResultWithJobs(jobs)
2145 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2146 """Verifies the cluster config.
2151 def _VerifyHVP(self, hvp_data):
2152 """Verifies locally the syntax of the hypervisor parameters.
2155 for item, hv_name, hv_params in hvp_data:
2156 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2159 hv_class = hypervisor.GetHypervisorClass(hv_name)
2160 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2161 hv_class.CheckParameterSyntax(hv_params)
2162 except errors.GenericError, err:
2163 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2165 def ExpandNames(self):
2166 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2167 self.share_locks = _ShareAll()
2169 def CheckPrereq(self):
2170 """Check prerequisites.
2173 # Retrieve all information
2174 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2175 self.all_node_info = self.cfg.GetAllNodesInfo()
2176 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2178 def Exec(self, feedback_fn):
2179 """Verify integrity of cluster, performing various test on nodes.
2183 self._feedback_fn = feedback_fn
2185 feedback_fn("* Verifying cluster config")
2187 for msg in self.cfg.VerifyConfig():
2188 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2190 feedback_fn("* Verifying cluster certificate files")
2192 for cert_filename in pathutils.ALL_CERT_FILES:
2193 (errcode, msg) = _VerifyCertificate(cert_filename)
2194 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2196 feedback_fn("* Verifying hypervisor parameters")
2198 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2199 self.all_inst_info.values()))
2201 feedback_fn("* Verifying all nodes belong to an existing group")
2203 # We do this verification here because, should this bogus circumstance
2204 # occur, it would never be caught by VerifyGroup, which only acts on
2205 # nodes/instances reachable from existing node groups.
2207 dangling_nodes = set(node.name for node in self.all_node_info.values()
2208 if node.group not in self.all_group_info)
2210 dangling_instances = {}
2211 no_node_instances = []
2213 for inst in self.all_inst_info.values():
2214 if inst.primary_node in dangling_nodes:
2215 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2216 elif inst.primary_node not in self.all_node_info:
2217 no_node_instances.append(inst.name)
2222 utils.CommaJoin(dangling_instances.get(node.name,
2224 for node in dangling_nodes]
2226 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2228 "the following nodes (and their instances) belong to a non"
2229 " existing group: %s", utils.CommaJoin(pretty_dangling))
2231 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2233 "the following instances have a non-existing primary-node:"
2234 " %s", utils.CommaJoin(no_node_instances))
2239 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2240 """Verifies the status of a node group.
2243 HPATH = "cluster-verify"
2244 HTYPE = constants.HTYPE_CLUSTER
2247 _HOOKS_INDENT_RE = re.compile("^", re.M)
2249 class NodeImage(object):
2250 """A class representing the logical and physical status of a node.
2253 @ivar name: the node name to which this object refers
2254 @ivar volumes: a structure as returned from
2255 L{ganeti.backend.GetVolumeList} (runtime)
2256 @ivar instances: a list of running instances (runtime)
2257 @ivar pinst: list of configured primary instances (config)
2258 @ivar sinst: list of configured secondary instances (config)
2259 @ivar sbp: dictionary of {primary-node: list of instances} for all
2260 instances for which this node is secondary (config)
2261 @ivar mfree: free memory, as reported by hypervisor (runtime)
2262 @ivar dfree: free disk, as reported by the node (runtime)
2263 @ivar offline: the offline status (config)
2264 @type rpc_fail: boolean
2265 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2266 not whether the individual keys were correct) (runtime)
2267 @type lvm_fail: boolean
2268 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2269 @type hyp_fail: boolean
2270 @ivar hyp_fail: whether the RPC call didn't return the instance list
2271 @type ghost: boolean
2272 @ivar ghost: whether this is a known node or not (config)
2273 @type os_fail: boolean
2274 @ivar os_fail: whether the RPC call didn't return valid OS data
2276 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2277 @type vm_capable: boolean
2278 @ivar vm_capable: whether the node can host instances
2280 @ivar pv_min: size in MiB of the smallest PVs
2282 @ivar pv_max: size in MiB of the biggest PVs
2285 def __init__(self, offline=False, name=None, vm_capable=True):
2294 self.offline = offline
2295 self.vm_capable = vm_capable
2296 self.rpc_fail = False
2297 self.lvm_fail = False
2298 self.hyp_fail = False
2300 self.os_fail = False
2305 def ExpandNames(self):
2306 # This raises errors.OpPrereqError on its own:
2307 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2309 # Get instances in node group; this is unsafe and needs verification later
2311 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2313 self.needed_locks = {
2314 locking.LEVEL_INSTANCE: inst_names,
2315 locking.LEVEL_NODEGROUP: [self.group_uuid],
2316 locking.LEVEL_NODE: [],
2318 # This opcode is run by watcher every five minutes and acquires all nodes
2319 # for a group. It doesn't run for a long time, so it's better to acquire
2320 # the node allocation lock as well.
2321 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2324 self.share_locks = _ShareAll()
2326 def DeclareLocks(self, level):
2327 if level == locking.LEVEL_NODE:
2328 # Get members of node group; this is unsafe and needs verification later
2329 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2331 all_inst_info = self.cfg.GetAllInstancesInfo()
2333 # In Exec(), we warn about mirrored instances that have primary and
2334 # secondary living in separate node groups. To fully verify that
2335 # volumes for these instances are healthy, we will need to do an
2336 # extra call to their secondaries. We ensure here those nodes will
2338 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2339 # Important: access only the instances whose lock is owned
2340 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2341 nodes.update(all_inst_info[inst].secondary_nodes)
2343 self.needed_locks[locking.LEVEL_NODE] = nodes
2345 def CheckPrereq(self):
2346 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2347 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2349 group_nodes = set(self.group_info.members)
2351 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2354 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2356 unlocked_instances = \
2357 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2360 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2361 utils.CommaJoin(unlocked_nodes),
2364 if unlocked_instances:
2365 raise errors.OpPrereqError("Missing lock for instances: %s" %
2366 utils.CommaJoin(unlocked_instances),
2369 self.all_node_info = self.cfg.GetAllNodesInfo()
2370 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2372 self.my_node_names = utils.NiceSort(group_nodes)
2373 self.my_inst_names = utils.NiceSort(group_instances)
2375 self.my_node_info = dict((name, self.all_node_info[name])
2376 for name in self.my_node_names)
2378 self.my_inst_info = dict((name, self.all_inst_info[name])
2379 for name in self.my_inst_names)
2381 # We detect here the nodes that will need the extra RPC calls for verifying
2382 # split LV volumes; they should be locked.
2383 extra_lv_nodes = set()
2385 for inst in self.my_inst_info.values():
2386 if inst.disk_template in constants.DTS_INT_MIRROR:
2387 for nname in inst.all_nodes:
2388 if self.all_node_info[nname].group != self.group_uuid:
2389 extra_lv_nodes.add(nname)
2391 unlocked_lv_nodes = \
2392 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2394 if unlocked_lv_nodes:
2395 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2396 utils.CommaJoin(unlocked_lv_nodes),
2398 self.extra_lv_nodes = list(extra_lv_nodes)
2400 def _VerifyNode(self, ninfo, nresult):
2401 """Perform some basic validation on data returned from a node.
2403 - check the result data structure is well formed and has all the
2405 - check ganeti version
2407 @type ninfo: L{objects.Node}
2408 @param ninfo: the node to check
2409 @param nresult: the results from the node
2411 @return: whether overall this call was successful (and we can expect
2412 reasonable values in the respose)
2416 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2418 # main result, nresult should be a non-empty dict
2419 test = not nresult or not isinstance(nresult, dict)
2420 _ErrorIf(test, constants.CV_ENODERPC, node,
2421 "unable to verify node: no data returned")
2425 # compares ganeti version
2426 local_version = constants.PROTOCOL_VERSION
2427 remote_version = nresult.get("version", None)
2428 test = not (remote_version and
2429 isinstance(remote_version, (list, tuple)) and
2430 len(remote_version) == 2)
2431 _ErrorIf(test, constants.CV_ENODERPC, node,
2432 "connection to node returned invalid data")
2436 test = local_version != remote_version[0]
2437 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2438 "incompatible protocol versions: master %s,"
2439 " node %s", local_version, remote_version[0])
2443 # node seems compatible, we can actually try to look into its results
2445 # full package version
2446 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2447 constants.CV_ENODEVERSION, node,
2448 "software version mismatch: master %s, node %s",
2449 constants.RELEASE_VERSION, remote_version[1],
2450 code=self.ETYPE_WARNING)
2452 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2453 if ninfo.vm_capable and isinstance(hyp_result, dict):
2454 for hv_name, hv_result in hyp_result.iteritems():
2455 test = hv_result is not None
2456 _ErrorIf(test, constants.CV_ENODEHV, node,
2457 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2459 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2460 if ninfo.vm_capable and isinstance(hvp_result, list):
2461 for item, hv_name, hv_result in hvp_result:
2462 _ErrorIf(True, constants.CV_ENODEHV, node,
2463 "hypervisor %s parameter verify failure (source %s): %s",
2464 hv_name, item, hv_result)
2466 test = nresult.get(constants.NV_NODESETUP,
2467 ["Missing NODESETUP results"])
2468 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2473 def _VerifyNodeTime(self, ninfo, nresult,
2474 nvinfo_starttime, nvinfo_endtime):
2475 """Check the node time.
2477 @type ninfo: L{objects.Node}
2478 @param ninfo: the node to check
2479 @param nresult: the remote results for the node
2480 @param nvinfo_starttime: the start time of the RPC call
2481 @param nvinfo_endtime: the end time of the RPC call
2485 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2487 ntime = nresult.get(constants.NV_TIME, None)
2489 ntime_merged = utils.MergeTime(ntime)
2490 except (ValueError, TypeError):
2491 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2494 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2495 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2496 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2497 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2501 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2502 "Node time diverges by at least %s from master node time",
2505 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2506 """Check the node LVM results and update info for cross-node checks.
2508 @type ninfo: L{objects.Node}
2509 @param ninfo: the node to check
2510 @param nresult: the remote results for the node
2511 @param vg_name: the configured VG name
2512 @type nimg: L{NodeImage}
2513 @param nimg: node image
2520 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2522 # checks vg existence and size > 20G
2523 vglist = nresult.get(constants.NV_VGLIST, None)
2525 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2527 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2528 constants.MIN_VG_SIZE)
2529 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2532 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2534 self._Error(constants.CV_ENODELVM, node, em)
2535 if pvminmax is not None:
2536 (nimg.pv_min, nimg.pv_max) = pvminmax
2538 def _VerifyGroupLVM(self, node_image, vg_name):
2539 """Check cross-node consistency in LVM.
2541 @type node_image: dict
2542 @param node_image: info about nodes, mapping from node to names to
2543 L{NodeImage} objects
2544 @param vg_name: the configured VG name
2550 # Only exlcusive storage needs this kind of checks
2551 if not self._exclusive_storage:
2554 # exclusive_storage wants all PVs to have the same size (approximately),
2555 # if the smallest and the biggest ones are okay, everything is fine.
2556 # pv_min is None iff pv_max is None
2557 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2560 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2561 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2562 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2563 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2564 "PV sizes differ too much in the group; smallest (%s MB) is"
2565 " on %s, biggest (%s MB) is on %s",
2566 pvmin, minnode, pvmax, maxnode)
2568 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2569 """Check the node bridges.
2571 @type ninfo: L{objects.Node}
2572 @param ninfo: the node to check
2573 @param nresult: the remote results for the node
2574 @param bridges: the expected list of bridges
2581 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2583 missing = nresult.get(constants.NV_BRIDGES, None)
2584 test = not isinstance(missing, list)
2585 _ErrorIf(test, constants.CV_ENODENET, node,
2586 "did not return valid bridge information")
2588 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2589 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2591 def _VerifyNodeUserScripts(self, ninfo, nresult):
2592 """Check the results of user scripts presence and executability on the node
2594 @type ninfo: L{objects.Node}
2595 @param ninfo: the node to check
2596 @param nresult: the remote results for the node
2601 test = not constants.NV_USERSCRIPTS in nresult
2602 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2603 "did not return user scripts information")
2605 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2607 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2608 "user scripts not present or not executable: %s" %
2609 utils.CommaJoin(sorted(broken_scripts)))
2611 def _VerifyNodeNetwork(self, ninfo, nresult):
2612 """Check the node network connectivity results.
2614 @type ninfo: L{objects.Node}
2615 @param ninfo: the node to check
2616 @param nresult: the remote results for the node
2620 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2622 test = constants.NV_NODELIST not in nresult
2623 _ErrorIf(test, constants.CV_ENODESSH, node,
2624 "node hasn't returned node ssh connectivity data")
2626 if nresult[constants.NV_NODELIST]:
2627 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2628 _ErrorIf(True, constants.CV_ENODESSH, node,
2629 "ssh communication with node '%s': %s", a_node, a_msg)
2631 test = constants.NV_NODENETTEST not in nresult
2632 _ErrorIf(test, constants.CV_ENODENET, node,
2633 "node hasn't returned node tcp connectivity data")
2635 if nresult[constants.NV_NODENETTEST]:
2636 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2638 _ErrorIf(True, constants.CV_ENODENET, node,
2639 "tcp communication with node '%s': %s",
2640 anode, nresult[constants.NV_NODENETTEST][anode])
2642 test = constants.NV_MASTERIP not in nresult
2643 _ErrorIf(test, constants.CV_ENODENET, node,
2644 "node hasn't returned node master IP reachability data")
2646 if not nresult[constants.NV_MASTERIP]:
2647 if node == self.master_node:
2648 msg = "the master node cannot reach the master IP (not configured?)"
2650 msg = "cannot reach the master IP"
2651 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2653 def _VerifyInstance(self, instance, inst_config, node_image,
2655 """Verify an instance.
2657 This function checks to see if the required block devices are
2658 available on the instance's node, and that the nodes are in the correct
2662 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2663 pnode = inst_config.primary_node
2664 pnode_img = node_image[pnode]
2665 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2667 node_vol_should = {}
2668 inst_config.MapLVsByNode(node_vol_should)
2670 cluster = self.cfg.GetClusterInfo()
2671 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2673 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg)
2674 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2675 code=self.ETYPE_WARNING)
2677 for node in node_vol_should:
2678 n_img = node_image[node]
2679 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2680 # ignore missing volumes on offline or broken nodes
2682 for volume in node_vol_should[node]:
2683 test = volume not in n_img.volumes
2684 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2685 "volume %s missing on node %s", volume, node)
2687 if inst_config.admin_state == constants.ADMINST_UP:
2688 test = instance not in pnode_img.instances and not pnode_img.offline
2689 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2690 "instance not running on its primary node %s",
2692 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2693 "instance is marked as running and lives on offline node %s",
2696 diskdata = [(nname, success, status, idx)
2697 for (nname, disks) in diskstatus.items()
2698 for idx, (success, status) in enumerate(disks)]
2700 for nname, success, bdev_status, idx in diskdata:
2701 # the 'ghost node' construction in Exec() ensures that we have a
2703 snode = node_image[nname]
2704 bad_snode = snode.ghost or snode.offline
2705 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2706 not success and not bad_snode,
2707 constants.CV_EINSTANCEFAULTYDISK, instance,
2708 "couldn't retrieve status for disk/%s on %s: %s",
2709 idx, nname, bdev_status)
2710 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2711 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2712 constants.CV_EINSTANCEFAULTYDISK, instance,
2713 "disk/%s on %s is faulty", idx, nname)
2715 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2716 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2717 " primary node failed", instance)
2719 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2720 constants.CV_EINSTANCELAYOUT,
2721 instance, "instance has multiple secondary nodes: %s",
2722 utils.CommaJoin(inst_config.secondary_nodes),
2723 code=self.ETYPE_WARNING)
2725 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2726 # Disk template not compatible with exclusive_storage: no instance
2727 # node should have the flag set
2728 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2729 inst_config.all_nodes)
2730 es_nodes = [n for (n, es) in es_flags.items()
2732 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2733 "instance has template %s, which is not supported on nodes"
2734 " that have exclusive storage set: %s",
2735 inst_config.disk_template, utils.CommaJoin(es_nodes))
2737 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2738 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2739 instance_groups = {}
2741 for node in instance_nodes:
2742 instance_groups.setdefault(self.all_node_info[node].group,
2746 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2747 # Sort so that we always list the primary node first.
2748 for group, nodes in sorted(instance_groups.items(),
2749 key=lambda (_, nodes): pnode in nodes,
2752 self._ErrorIf(len(instance_groups) > 1,
2753 constants.CV_EINSTANCESPLITGROUPS,
2754 instance, "instance has primary and secondary nodes in"
2755 " different groups: %s", utils.CommaJoin(pretty_list),
2756 code=self.ETYPE_WARNING)
2758 inst_nodes_offline = []
2759 for snode in inst_config.secondary_nodes:
2760 s_img = node_image[snode]
2761 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2762 snode, "instance %s, connection to secondary node failed",
2766 inst_nodes_offline.append(snode)
2768 # warn that the instance lives on offline nodes
2769 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2770 "instance has offline secondary node(s) %s",
2771 utils.CommaJoin(inst_nodes_offline))
2772 # ... or ghost/non-vm_capable nodes
2773 for node in inst_config.all_nodes:
2774 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2775 instance, "instance lives on ghost node %s", node)
2776 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2777 instance, "instance lives on non-vm_capable node %s", node)
2779 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2780 """Verify if there are any unknown volumes in the cluster.
2782 The .os, .swap and backup volumes are ignored. All other volumes are
2783 reported as unknown.
2785 @type reserved: L{ganeti.utils.FieldSet}
2786 @param reserved: a FieldSet of reserved volume names
2789 for node, n_img in node_image.items():
2790 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2791 self.all_node_info[node].group != self.group_uuid):
2792 # skip non-healthy nodes
2794 for volume in n_img.volumes:
2795 test = ((node not in node_vol_should or
2796 volume not in node_vol_should[node]) and
2797 not reserved.Matches(volume))
2798 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2799 "volume %s is unknown", volume)
2801 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2802 """Verify N+1 Memory Resilience.
2804 Check that if one single node dies we can still start all the
2805 instances it was primary for.
2808 cluster_info = self.cfg.GetClusterInfo()
2809 for node, n_img in node_image.items():
2810 # This code checks that every node which is now listed as
2811 # secondary has enough memory to host all instances it is
2812 # supposed to should a single other node in the cluster fail.
2813 # FIXME: not ready for failover to an arbitrary node
2814 # FIXME: does not support file-backed instances
2815 # WARNING: we currently take into account down instances as well
2816 # as up ones, considering that even if they're down someone
2817 # might want to start them even in the event of a node failure.
2818 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2819 # we're skipping nodes marked offline and nodes in other groups from
2820 # the N+1 warning, since most likely we don't have good memory
2821 # infromation from them; we already list instances living on such
2822 # nodes, and that's enough warning
2824 #TODO(dynmem): also consider ballooning out other instances
2825 for prinode, instances in n_img.sbp.items():
2827 for instance in instances:
2828 bep = cluster_info.FillBE(instance_cfg[instance])
2829 if bep[constants.BE_AUTO_BALANCE]:
2830 needed_mem += bep[constants.BE_MINMEM]
2831 test = n_img.mfree < needed_mem
2832 self._ErrorIf(test, constants.CV_ENODEN1, node,
2833 "not enough memory to accomodate instance failovers"
2834 " should node %s fail (%dMiB needed, %dMiB available)",
2835 prinode, needed_mem, n_img.mfree)
2838 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2839 (files_all, files_opt, files_mc, files_vm)):
2840 """Verifies file checksums collected from all nodes.
2842 @param errorif: Callback for reporting errors
2843 @param nodeinfo: List of L{objects.Node} objects
2844 @param master_node: Name of master node
2845 @param all_nvinfo: RPC results
2848 # Define functions determining which nodes to consider for a file
2851 (files_mc, lambda node: (node.master_candidate or
2852 node.name == master_node)),
2853 (files_vm, lambda node: node.vm_capable),
2856 # Build mapping from filename to list of nodes which should have the file
2858 for (files, fn) in files2nodefn:
2860 filenodes = nodeinfo
2862 filenodes = filter(fn, nodeinfo)
2863 nodefiles.update((filename,
2864 frozenset(map(operator.attrgetter("name"), filenodes)))
2865 for filename in files)
2867 assert set(nodefiles) == (files_all | files_mc | files_vm)
2869 fileinfo = dict((filename, {}) for filename in nodefiles)
2870 ignore_nodes = set()
2872 for node in nodeinfo:
2874 ignore_nodes.add(node.name)
2877 nresult = all_nvinfo[node.name]
2879 if nresult.fail_msg or not nresult.payload:
2882 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2883 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2884 for (key, value) in fingerprints.items())
2887 test = not (node_files and isinstance(node_files, dict))
2888 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2889 "Node did not return file checksum data")
2891 ignore_nodes.add(node.name)
2894 # Build per-checksum mapping from filename to nodes having it
2895 for (filename, checksum) in node_files.items():
2896 assert filename in nodefiles
2897 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2899 for (filename, checksums) in fileinfo.items():
2900 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2902 # Nodes having the file
2903 with_file = frozenset(node_name
2904 for nodes in fileinfo[filename].values()
2905 for node_name in nodes) - ignore_nodes
2907 expected_nodes = nodefiles[filename] - ignore_nodes
2909 # Nodes missing file
2910 missing_file = expected_nodes - with_file
2912 if filename in files_opt:
2914 errorif(missing_file and missing_file != expected_nodes,
2915 constants.CV_ECLUSTERFILECHECK, None,
2916 "File %s is optional, but it must exist on all or no"
2917 " nodes (not found on %s)",
2918 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2920 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2921 "File %s is missing from node(s) %s", filename,
2922 utils.CommaJoin(utils.NiceSort(missing_file)))
2924 # Warn if a node has a file it shouldn't
2925 unexpected = with_file - expected_nodes
2927 constants.CV_ECLUSTERFILECHECK, None,
2928 "File %s should not exist on node(s) %s",
2929 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2931 # See if there are multiple versions of the file
2932 test = len(checksums) > 1
2934 variants = ["variant %s on %s" %
2935 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2936 for (idx, (checksum, nodes)) in
2937 enumerate(sorted(checksums.items()))]
2941 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2942 "File %s found with %s different checksums (%s)",
2943 filename, len(checksums), "; ".join(variants))
2945 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2947 """Verifies and the node DRBD status.
2949 @type ninfo: L{objects.Node}
2950 @param ninfo: the node to check
2951 @param nresult: the remote results for the node
2952 @param instanceinfo: the dict of instances
2953 @param drbd_helper: the configured DRBD usermode helper
2954 @param drbd_map: the DRBD map as returned by
2955 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2959 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2962 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2963 test = (helper_result is None)
2964 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2965 "no drbd usermode helper returned")
2967 status, payload = helper_result
2969 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2970 "drbd usermode helper check unsuccessful: %s", payload)
2971 test = status and (payload != drbd_helper)
2972 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2973 "wrong drbd usermode helper: %s", payload)
2975 # compute the DRBD minors
2977 for minor, instance in drbd_map[node].items():
2978 test = instance not in instanceinfo
2979 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2980 "ghost instance '%s' in temporary DRBD map", instance)
2981 # ghost instance should not be running, but otherwise we
2982 # don't give double warnings (both ghost instance and
2983 # unallocated minor in use)
2985 node_drbd[minor] = (instance, False)
2987 instance = instanceinfo[instance]
2988 node_drbd[minor] = (instance.name,
2989 instance.admin_state == constants.ADMINST_UP)
2991 # and now check them
2992 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2993 test = not isinstance(used_minors, (tuple, list))
2994 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2995 "cannot parse drbd status file: %s", str(used_minors))
2997 # we cannot check drbd status
3000 for minor, (iname, must_exist) in node_drbd.items():
3001 test = minor not in used_minors and must_exist
3002 _ErrorIf(test, constants.CV_ENODEDRBD, node,
3003 "drbd minor %d of instance %s is not active", minor, iname)
3004 for minor in used_minors:
3005 test = minor not in node_drbd
3006 _ErrorIf(test, constants.CV_ENODEDRBD, node,
3007 "unallocated drbd minor %d is in use", minor)
3009 def _UpdateNodeOS(self, ninfo, nresult, nimg):
3010 """Builds the node OS structures.
3012 @type ninfo: L{objects.Node}
3013 @param ninfo: the node to check
3014 @param nresult: the remote results for the node
3015 @param nimg: the node image object
3019 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3021 remote_os = nresult.get(constants.NV_OSLIST, None)
3022 test = (not isinstance(remote_os, list) or
3023 not compat.all(isinstance(v, list) and len(v) == 7
3024 for v in remote_os))
3026 _ErrorIf(test, constants.CV_ENODEOS, node,
3027 "node hasn't returned valid OS data")
3036 for (name, os_path, status, diagnose,
3037 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
3039 if name not in os_dict:
3042 # parameters is a list of lists instead of list of tuples due to
3043 # JSON lacking a real tuple type, fix it:
3044 parameters = [tuple(v) for v in parameters]
3045 os_dict[name].append((os_path, status, diagnose,
3046 set(variants), set(parameters), set(api_ver)))
3048 nimg.oslist = os_dict
3050 def _VerifyNodeOS(self, ninfo, nimg, base):
3051 """Verifies the node OS list.
3053 @type ninfo: L{objects.Node}
3054 @param ninfo: the node to check
3055 @param nimg: the node image object
3056 @param base: the 'template' node we match against (e.g. from the master)
3060 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3062 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3064 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3065 for os_name, os_data in nimg.oslist.items():
3066 assert os_data, "Empty OS status for OS %s?!" % os_name
3067 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3068 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3069 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3070 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3071 "OS '%s' has multiple entries (first one shadows the rest): %s",
3072 os_name, utils.CommaJoin([v[0] for v in os_data]))
3073 # comparisons with the 'base' image
3074 test = os_name not in base.oslist
3075 _ErrorIf(test, constants.CV_ENODEOS, node,
3076 "Extra OS %s not present on reference node (%s)",
3080 assert base.oslist[os_name], "Base node has empty OS status?"
3081 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3083 # base OS is invalid, skipping
3085 for kind, a, b in [("API version", f_api, b_api),
3086 ("variants list", f_var, b_var),
3087 ("parameters", beautify_params(f_param),
3088 beautify_params(b_param))]:
3089 _ErrorIf(a != b, constants.CV_ENODEOS, node,
3090 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3091 kind, os_name, base.name,
3092 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3094 # check any missing OSes
3095 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3096 _ErrorIf(missing, constants.CV_ENODEOS, node,
3097 "OSes present on reference node %s but missing on this node: %s",
3098 base.name, utils.CommaJoin(missing))
3100 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3101 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3103 @type ninfo: L{objects.Node}
3104 @param ninfo: the node to check
3105 @param nresult: the remote results for the node
3106 @type is_master: bool
3107 @param is_master: Whether node is the master node
3113 (constants.ENABLE_FILE_STORAGE or
3114 constants.ENABLE_SHARED_FILE_STORAGE)):
3116 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3118 # This should never happen
3119 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3120 "Node did not return forbidden file storage paths")
3122 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3123 "Found forbidden file storage paths: %s",
3124 utils.CommaJoin(fspaths))
3126 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3127 constants.CV_ENODEFILESTORAGEPATHS, node,
3128 "Node should not have returned forbidden file storage"
3131 def _VerifyOob(self, ninfo, nresult):
3132 """Verifies out of band functionality of a node.
3134 @type ninfo: L{objects.Node}
3135 @param ninfo: the node to check
3136 @param nresult: the remote results for the node
3140 # We just have to verify the paths on master and/or master candidates
3141 # as the oob helper is invoked on the master
3142 if ((ninfo.master_candidate or ninfo.master_capable) and
3143 constants.NV_OOB_PATHS in nresult):
3144 for path_result in nresult[constants.NV_OOB_PATHS]:
3145 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3147 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3148 """Verifies and updates the node volume data.
3150 This function will update a L{NodeImage}'s internal structures
3151 with data from the remote call.
3153 @type ninfo: L{objects.Node}
3154 @param ninfo: the node to check
3155 @param nresult: the remote results for the node
3156 @param nimg: the node image object
3157 @param vg_name: the configured VG name
3161 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3163 nimg.lvm_fail = True
3164 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3167 elif isinstance(lvdata, basestring):
3168 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3169 utils.SafeEncode(lvdata))
3170 elif not isinstance(lvdata, dict):
3171 _ErrorIf(True, constants.CV_ENODELVM, node,
3172 "rpc call to node failed (lvlist)")
3174 nimg.volumes = lvdata
3175 nimg.lvm_fail = False
3177 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3178 """Verifies and updates the node instance list.
3180 If the listing was successful, then updates this node's instance
3181 list. Otherwise, it marks the RPC call as failed for the instance
3184 @type ninfo: L{objects.Node}
3185 @param ninfo: the node to check
3186 @param nresult: the remote results for the node
3187 @param nimg: the node image object
3190 idata = nresult.get(constants.NV_INSTANCELIST, None)
3191 test = not isinstance(idata, list)
3192 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3193 "rpc call to node failed (instancelist): %s",
3194 utils.SafeEncode(str(idata)))
3196 nimg.hyp_fail = True
3198 nimg.instances = idata
3200 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3201 """Verifies and computes a node information map
3203 @type ninfo: L{objects.Node}
3204 @param ninfo: the node to check
3205 @param nresult: the remote results for the node
3206 @param nimg: the node image object
3207 @param vg_name: the configured VG name
3211 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3213 # try to read free memory (from the hypervisor)
3214 hv_info = nresult.get(constants.NV_HVINFO, None)
3215 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3216 _ErrorIf(test, constants.CV_ENODEHV, node,
3217 "rpc call to node failed (hvinfo)")
3220 nimg.mfree = int(hv_info["memory_free"])
3221 except (ValueError, TypeError):
3222 _ErrorIf(True, constants.CV_ENODERPC, node,
3223 "node returned invalid nodeinfo, check hypervisor")
3225 # FIXME: devise a free space model for file based instances as well
3226 if vg_name is not None:
3227 test = (constants.NV_VGLIST not in nresult or
3228 vg_name not in nresult[constants.NV_VGLIST])
3229 _ErrorIf(test, constants.CV_ENODELVM, node,
3230 "node didn't return data for the volume group '%s'"
3231 " - it is either missing or broken", vg_name)
3234 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3235 except (ValueError, TypeError):
3236 _ErrorIf(True, constants.CV_ENODERPC, node,
3237 "node returned invalid LVM info, check LVM status")
3239 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3240 """Gets per-disk status information for all instances.
3242 @type nodelist: list of strings
3243 @param nodelist: Node names
3244 @type node_image: dict of (name, L{objects.Node})
3245 @param node_image: Node objects
3246 @type instanceinfo: dict of (name, L{objects.Instance})
3247 @param instanceinfo: Instance objects
3248 @rtype: {instance: {node: [(succes, payload)]}}
3249 @return: a dictionary of per-instance dictionaries with nodes as
3250 keys and disk information as values; the disk information is a
3251 list of tuples (success, payload)
3254 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3257 node_disks_devonly = {}
3258 diskless_instances = set()
3259 diskless = constants.DT_DISKLESS
3261 for nname in nodelist:
3262 node_instances = list(itertools.chain(node_image[nname].pinst,
3263 node_image[nname].sinst))
3264 diskless_instances.update(inst for inst in node_instances
3265 if instanceinfo[inst].disk_template == diskless)
3266 disks = [(inst, disk)
3267 for inst in node_instances
3268 for disk in instanceinfo[inst].disks]
3271 # No need to collect data
3274 node_disks[nname] = disks
3276 # _AnnotateDiskParams makes already copies of the disks
3278 for (inst, dev) in disks:
3279 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3280 self.cfg.SetDiskID(anno_disk, nname)
3281 devonly.append(anno_disk)
3283 node_disks_devonly[nname] = devonly
3285 assert len(node_disks) == len(node_disks_devonly)
3287 # Collect data from all nodes with disks
3288 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3291 assert len(result) == len(node_disks)
3295 for (nname, nres) in result.items():
3296 disks = node_disks[nname]
3299 # No data from this node
3300 data = len(disks) * [(False, "node offline")]
3303 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3304 "while getting disk information: %s", msg)
3306 # No data from this node
3307 data = len(disks) * [(False, msg)]
3310 for idx, i in enumerate(nres.payload):
3311 if isinstance(i, (tuple, list)) and len(i) == 2:
3314 logging.warning("Invalid result from node %s, entry %d: %s",
3316 data.append((False, "Invalid result from the remote node"))
3318 for ((inst, _), status) in zip(disks, data):
3319 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3321 # Add empty entries for diskless instances.
3322 for inst in diskless_instances:
3323 assert inst not in instdisk
3326 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3327 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3328 compat.all(isinstance(s, (tuple, list)) and
3329 len(s) == 2 for s in statuses)
3330 for inst, nnames in instdisk.items()
3331 for nname, statuses in nnames.items())
3333 instdisk_keys = set(instdisk)
3334 instanceinfo_keys = set(instanceinfo)
3335 assert instdisk_keys == instanceinfo_keys, \
3336 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3337 (instdisk_keys, instanceinfo_keys))
3342 def _SshNodeSelector(group_uuid, all_nodes):
3343 """Create endless iterators for all potential SSH check hosts.
3346 nodes = [node for node in all_nodes
3347 if (node.group != group_uuid and
3349 keyfunc = operator.attrgetter("group")
3351 return map(itertools.cycle,
3352 [sorted(map(operator.attrgetter("name"), names))
3353 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3357 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3358 """Choose which nodes should talk to which other nodes.
3360 We will make nodes contact all nodes in their group, and one node from
3363 @warning: This algorithm has a known issue if one node group is much
3364 smaller than others (e.g. just one node). In such a case all other
3365 nodes will talk to the single node.
3368 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3369 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3371 return (online_nodes,
3372 dict((name, sorted([i.next() for i in sel]))
3373 for name in online_nodes))
3375 def BuildHooksEnv(self):
3378 Cluster-Verify hooks just ran in the post phase and their failure makes
3379 the output be logged in the verify output and the verification to fail.
3383 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3386 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3387 for node in self.my_node_info.values())
3391 def BuildHooksNodes(self):
3392 """Build hooks nodes.
3395 return ([], self.my_node_names)
3397 def Exec(self, feedback_fn):
3398 """Verify integrity of the node group, performing various test on nodes.
3401 # This method has too many local variables. pylint: disable=R0914
3402 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3404 if not self.my_node_names:
3406 feedback_fn("* Empty node group, skipping verification")
3410 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3411 verbose = self.op.verbose
3412 self._feedback_fn = feedback_fn
3414 vg_name = self.cfg.GetVGName()
3415 drbd_helper = self.cfg.GetDRBDHelper()
3416 cluster = self.cfg.GetClusterInfo()
3417 hypervisors = cluster.enabled_hypervisors
3418 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3420 i_non_redundant = [] # Non redundant instances
3421 i_non_a_balanced = [] # Non auto-balanced instances
3422 i_offline = 0 # Count of offline instances
3423 n_offline = 0 # Count of offline nodes
3424 n_drained = 0 # Count of nodes being drained
3425 node_vol_should = {}
3427 # FIXME: verify OS list
3430 filemap = _ComputeAncillaryFiles(cluster, False)
3432 # do local checksums
3433 master_node = self.master_node = self.cfg.GetMasterNode()
3434 master_ip = self.cfg.GetMasterIP()
3436 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3439 if self.cfg.GetUseExternalMipScript():
3440 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3442 node_verify_param = {
3443 constants.NV_FILELIST:
3444 map(vcluster.MakeVirtualPath,
3445 utils.UniqueSequence(filename
3446 for files in filemap
3447 for filename in files)),
3448 constants.NV_NODELIST:
3449 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3450 self.all_node_info.values()),
3451 constants.NV_HYPERVISOR: hypervisors,
3452 constants.NV_HVPARAMS:
3453 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3454 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3455 for node in node_data_list
3456 if not node.offline],
3457 constants.NV_INSTANCELIST: hypervisors,
3458 constants.NV_VERSION: None,
3459 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3460 constants.NV_NODESETUP: None,
3461 constants.NV_TIME: None,
3462 constants.NV_MASTERIP: (master_node, master_ip),
3463 constants.NV_OSLIST: None,
3464 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3465 constants.NV_USERSCRIPTS: user_scripts,
3468 if vg_name is not None:
3469 node_verify_param[constants.NV_VGLIST] = None
3470 node_verify_param[constants.NV_LVLIST] = vg_name
3471 node_verify_param[constants.NV_PVLIST] = [vg_name]
3474 node_verify_param[constants.NV_DRBDLIST] = None
3475 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3477 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3478 # Load file storage paths only from master node
3479 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3482 # FIXME: this needs to be changed per node-group, not cluster-wide
3484 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3485 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3486 bridges.add(default_nicpp[constants.NIC_LINK])
3487 for instance in self.my_inst_info.values():
3488 for nic in instance.nics:
3489 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3490 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3491 bridges.add(full_nic[constants.NIC_LINK])
3494 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3496 # Build our expected cluster state
3497 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3499 vm_capable=node.vm_capable))
3500 for node in node_data_list)
3504 for node in self.all_node_info.values():
3505 path = _SupportsOob(self.cfg, node)
3506 if path and path not in oob_paths:
3507 oob_paths.append(path)
3510 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3512 for instance in self.my_inst_names:
3513 inst_config = self.my_inst_info[instance]
3514 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3517 for nname in inst_config.all_nodes:
3518 if nname not in node_image:
3519 gnode = self.NodeImage(name=nname)
3520 gnode.ghost = (nname not in self.all_node_info)
3521 node_image[nname] = gnode
3523 inst_config.MapLVsByNode(node_vol_should)
3525 pnode = inst_config.primary_node
3526 node_image[pnode].pinst.append(instance)
3528 for snode in inst_config.secondary_nodes:
3529 nimg = node_image[snode]
3530 nimg.sinst.append(instance)
3531 if pnode not in nimg.sbp:
3532 nimg.sbp[pnode] = []
3533 nimg.sbp[pnode].append(instance)
3535 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3536 # The value of exclusive_storage should be the same across the group, so if
3537 # it's True for at least a node, we act as if it were set for all the nodes
3538 self._exclusive_storage = compat.any(es_flags.values())
3539 if self._exclusive_storage:
3540 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3542 # At this point, we have the in-memory data structures complete,
3543 # except for the runtime information, which we'll gather next
3545 # Due to the way our RPC system works, exact response times cannot be
3546 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3547 # time before and after executing the request, we can at least have a time
3549 nvinfo_starttime = time.time()
3550 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3552 self.cfg.GetClusterName())
3553 nvinfo_endtime = time.time()
3555 if self.extra_lv_nodes and vg_name is not None:
3557 self.rpc.call_node_verify(self.extra_lv_nodes,
3558 {constants.NV_LVLIST: vg_name},
3559 self.cfg.GetClusterName())
3561 extra_lv_nvinfo = {}
3563 all_drbd_map = self.cfg.ComputeDRBDMap()
3565 feedback_fn("* Gathering disk information (%s nodes)" %
3566 len(self.my_node_names))
3567 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3570 feedback_fn("* Verifying configuration file consistency")
3572 # If not all nodes are being checked, we need to make sure the master node
3573 # and a non-checked vm_capable node are in the list.
3574 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3576 vf_nvinfo = all_nvinfo.copy()
3577 vf_node_info = list(self.my_node_info.values())
3578 additional_nodes = []
3579 if master_node not in self.my_node_info:
3580 additional_nodes.append(master_node)
3581 vf_node_info.append(self.all_node_info[master_node])
3582 # Add the first vm_capable node we find which is not included,
3583 # excluding the master node (which we already have)
3584 for node in absent_nodes:
3585 nodeinfo = self.all_node_info[node]
3586 if (nodeinfo.vm_capable and not nodeinfo.offline and
3587 node != master_node):
3588 additional_nodes.append(node)
3589 vf_node_info.append(self.all_node_info[node])
3591 key = constants.NV_FILELIST
3592 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3593 {key: node_verify_param[key]},
3594 self.cfg.GetClusterName()))
3596 vf_nvinfo = all_nvinfo
3597 vf_node_info = self.my_node_info.values()
3599 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3601 feedback_fn("* Verifying node status")
3605 for node_i in node_data_list:
3607 nimg = node_image[node]
3611 feedback_fn("* Skipping offline node %s" % (node,))
3615 if node == master_node:
3617 elif node_i.master_candidate:
3618 ntype = "master candidate"
3619 elif node_i.drained:
3625 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3627 msg = all_nvinfo[node].fail_msg
3628 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3631 nimg.rpc_fail = True
3634 nresult = all_nvinfo[node].payload
3636 nimg.call_ok = self._VerifyNode(node_i, nresult)
3637 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3638 self._VerifyNodeNetwork(node_i, nresult)
3639 self._VerifyNodeUserScripts(node_i, nresult)
3640 self._VerifyOob(node_i, nresult)
3641 self._VerifyFileStoragePaths(node_i, nresult,
3642 node == master_node)
3645 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3646 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3649 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3650 self._UpdateNodeInstances(node_i, nresult, nimg)
3651 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3652 self._UpdateNodeOS(node_i, nresult, nimg)
3654 if not nimg.os_fail:
3655 if refos_img is None:
3657 self._VerifyNodeOS(node_i, nimg, refos_img)
3658 self._VerifyNodeBridges(node_i, nresult, bridges)
3660 # Check whether all running instancies are primary for the node. (This
3661 # can no longer be done from _VerifyInstance below, since some of the
3662 # wrong instances could be from other node groups.)
3663 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3665 for inst in non_primary_inst:
3666 test = inst in self.all_inst_info
3667 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3668 "instance should not run on node %s", node_i.name)
3669 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3670 "node is running unknown instance %s", inst)
3672 self._VerifyGroupLVM(node_image, vg_name)
3674 for node, result in extra_lv_nvinfo.items():
3675 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3676 node_image[node], vg_name)
3678 feedback_fn("* Verifying instance status")
3679 for instance in self.my_inst_names:
3681 feedback_fn("* Verifying instance %s" % instance)
3682 inst_config = self.my_inst_info[instance]
3683 self._VerifyInstance(instance, inst_config, node_image,
3686 # If the instance is non-redundant we cannot survive losing its primary
3687 # node, so we are not N+1 compliant.
3688 if inst_config.disk_template not in constants.DTS_MIRRORED:
3689 i_non_redundant.append(instance)
3691 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3692 i_non_a_balanced.append(instance)
3694 feedback_fn("* Verifying orphan volumes")
3695 reserved = utils.FieldSet(*cluster.reserved_lvs)
3697 # We will get spurious "unknown volume" warnings if any node of this group
3698 # is secondary for an instance whose primary is in another group. To avoid
3699 # them, we find these instances and add their volumes to node_vol_should.
3700 for inst in self.all_inst_info.values():
3701 for secondary in inst.secondary_nodes:
3702 if (secondary in self.my_node_info
3703 and inst.name not in self.my_inst_info):
3704 inst.MapLVsByNode(node_vol_should)
3707 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3709 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3710 feedback_fn("* Verifying N+1 Memory redundancy")
3711 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3713 feedback_fn("* Other Notes")
3715 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3716 % len(i_non_redundant))
3718 if i_non_a_balanced:
3719 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3720 % len(i_non_a_balanced))
3723 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3726 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3729 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3733 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3734 """Analyze the post-hooks' result
3736 This method analyses the hook result, handles it, and sends some
3737 nicely-formatted feedback back to the user.
3739 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3740 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3741 @param hooks_results: the results of the multi-node hooks rpc call
3742 @param feedback_fn: function used send feedback back to the caller
3743 @param lu_result: previous Exec result
3744 @return: the new Exec result, based on the previous result
3748 # We only really run POST phase hooks, only for non-empty groups,
3749 # and are only interested in their results
3750 if not self.my_node_names:
3753 elif phase == constants.HOOKS_PHASE_POST:
3754 # Used to change hooks' output to proper indentation
3755 feedback_fn("* Hooks Results")
3756 assert hooks_results, "invalid result from hooks"
3758 for node_name in hooks_results:
3759 res = hooks_results[node_name]
3761 test = msg and not res.offline
3762 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3763 "Communication failure in hooks execution: %s", msg)
3764 if res.offline or msg:
3765 # No need to investigate payload if node is offline or gave
3768 for script, hkr, output in res.payload:
3769 test = hkr == constants.HKR_FAIL
3770 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3771 "Script %s failed, output:", script)
3773 output = self._HOOKS_INDENT_RE.sub(" ", output)
3774 feedback_fn("%s" % output)
3780 class LUClusterVerifyDisks(NoHooksLU):
3781 """Verifies the cluster disks status.
3786 def ExpandNames(self):
3787 self.share_locks = _ShareAll()
3788 self.needed_locks = {
3789 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3792 def Exec(self, feedback_fn):
3793 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3795 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3796 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3797 for group in group_names])
3800 class LUGroupVerifyDisks(NoHooksLU):
3801 """Verifies the status of all disks in a node group.
3806 def ExpandNames(self):
3807 # Raises errors.OpPrereqError on its own if group can't be found
3808 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3810 self.share_locks = _ShareAll()
3811 self.needed_locks = {
3812 locking.LEVEL_INSTANCE: [],
3813 locking.LEVEL_NODEGROUP: [],
3814 locking.LEVEL_NODE: [],
3816 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3817 # starts one instance of this opcode for every group, which means all
3818 # nodes will be locked for a short amount of time, so it's better to
3819 # acquire the node allocation lock as well.
3820 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3823 def DeclareLocks(self, level):
3824 if level == locking.LEVEL_INSTANCE:
3825 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3827 # Lock instances optimistically, needs verification once node and group
3828 # locks have been acquired
3829 self.needed_locks[locking.LEVEL_INSTANCE] = \
3830 self.cfg.GetNodeGroupInstances(self.group_uuid)
3832 elif level == locking.LEVEL_NODEGROUP:
3833 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3835 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3836 set([self.group_uuid] +
3837 # Lock all groups used by instances optimistically; this requires
3838 # going via the node before it's locked, requiring verification
3841 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3842 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3844 elif level == locking.LEVEL_NODE:
3845 # This will only lock the nodes in the group to be verified which contain
3847 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3848 self._LockInstancesNodes()
3850 # Lock all nodes in group to be verified
3851 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3852 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3853 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3855 def CheckPrereq(self):
3856 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3857 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3858 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3860 assert self.group_uuid in owned_groups
3862 # Check if locked instances are still correct
3863 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3865 # Get instance information
3866 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3868 # Check if node groups for locked instances are still correct
3869 _CheckInstancesNodeGroups(self.cfg, self.instances,
3870 owned_groups, owned_nodes, self.group_uuid)
3872 def Exec(self, feedback_fn):
3873 """Verify integrity of cluster disks.
3875 @rtype: tuple of three items
3876 @return: a tuple of (dict of node-to-node_error, list of instances
3877 which need activate-disks, dict of instance: (node, volume) for
3882 res_instances = set()
3885 nv_dict = _MapInstanceDisksToNodes(
3886 [inst for inst in self.instances.values()
3887 if inst.admin_state == constants.ADMINST_UP])
3890 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3891 set(self.cfg.GetVmCapableNodeList()))
3893 node_lvs = self.rpc.call_lv_list(nodes, [])
3895 for (node, node_res) in node_lvs.items():
3896 if node_res.offline:
3899 msg = node_res.fail_msg
3901 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3902 res_nodes[node] = msg
3905 for lv_name, (_, _, lv_online) in node_res.payload.items():
3906 inst = nv_dict.pop((node, lv_name), None)
3907 if not (lv_online or inst is None):
3908 res_instances.add(inst)
3910 # any leftover items in nv_dict are missing LVs, let's arrange the data
3912 for key, inst in nv_dict.iteritems():
3913 res_missing.setdefault(inst, []).append(list(key))
3915 return (res_nodes, list(res_instances), res_missing)
3918 class LUClusterRepairDiskSizes(NoHooksLU):
3919 """Verifies the cluster disks sizes.
3924 def ExpandNames(self):
3925 if self.op.instances:
3926 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3927 # Not getting the node allocation lock as only a specific set of
3928 # instances (and their nodes) is going to be acquired
3929 self.needed_locks = {
3930 locking.LEVEL_NODE_RES: [],
3931 locking.LEVEL_INSTANCE: self.wanted_names,
3933 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3935 self.wanted_names = None
3936 self.needed_locks = {
3937 locking.LEVEL_NODE_RES: locking.ALL_SET,
3938 locking.LEVEL_INSTANCE: locking.ALL_SET,
3940 # This opcode is acquires the node locks for all instances
3941 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3944 self.share_locks = {
3945 locking.LEVEL_NODE_RES: 1,
3946 locking.LEVEL_INSTANCE: 0,
3947 locking.LEVEL_NODE_ALLOC: 1,
3950 def DeclareLocks(self, level):
3951 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3952 self._LockInstancesNodes(primary_only=True, level=level)
3954 def CheckPrereq(self):
3955 """Check prerequisites.
3957 This only checks the optional instance list against the existing names.
3960 if self.wanted_names is None:
3961 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3963 self.wanted_instances = \
3964 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3966 def _EnsureChildSizes(self, disk):
3967 """Ensure children of the disk have the needed disk size.
3969 This is valid mainly for DRBD8 and fixes an issue where the
3970 children have smaller disk size.
3972 @param disk: an L{ganeti.objects.Disk} object
3975 if disk.dev_type == constants.LD_DRBD8:
3976 assert disk.children, "Empty children for DRBD8?"
3977 fchild = disk.children[0]
3978 mismatch = fchild.size < disk.size
3980 self.LogInfo("Child disk has size %d, parent %d, fixing",
3981 fchild.size, disk.size)
3982 fchild.size = disk.size
3984 # and we recurse on this child only, not on the metadev
3985 return self._EnsureChildSizes(fchild) or mismatch
3989 def Exec(self, feedback_fn):
3990 """Verify the size of cluster disks.
3993 # TODO: check child disks too
3994 # TODO: check differences in size between primary/secondary nodes
3996 for instance in self.wanted_instances:
3997 pnode = instance.primary_node
3998 if pnode not in per_node_disks:
3999 per_node_disks[pnode] = []
4000 for idx, disk in enumerate(instance.disks):
4001 per_node_disks[pnode].append((instance, idx, disk))
4003 assert not (frozenset(per_node_disks.keys()) -
4004 self.owned_locks(locking.LEVEL_NODE_RES)), \
4005 "Not owning correct locks"
4006 assert not self.owned_locks(locking.LEVEL_NODE)
4009 for node, dskl in per_node_disks.items():
4010 newl = [v[2].Copy() for v in dskl]
4012 self.cfg.SetDiskID(dsk, node)
4013 result = self.rpc.call_blockdev_getsize(node, newl)
4015 self.LogWarning("Failure in blockdev_getsize call to node"
4016 " %s, ignoring", node)
4018 if len(result.payload) != len(dskl):
4019 logging.warning("Invalid result from node %s: len(dksl)=%d,"
4020 " result.payload=%s", node, len(dskl), result.payload)
4021 self.LogWarning("Invalid result from node %s, ignoring node results",
4024 for ((instance, idx, disk), size) in zip(dskl, result.payload):
4026 self.LogWarning("Disk %d of instance %s did not return size"
4027 " information, ignoring", idx, instance.name)
4029 if not isinstance(size, (int, long)):
4030 self.LogWarning("Disk %d of instance %s did not return valid"
4031 " size information, ignoring", idx, instance.name)
4034 if size != disk.size:
4035 self.LogInfo("Disk %d of instance %s has mismatched size,"
4036 " correcting: recorded %d, actual %d", idx,
4037 instance.name, disk.size, size)
4039 self.cfg.Update(instance, feedback_fn)
4040 changed.append((instance.name, idx, size))
4041 if self._EnsureChildSizes(disk):
4042 self.cfg.Update(instance, feedback_fn)
4043 changed.append((instance.name, idx, disk.size))
4047 class LUClusterRename(LogicalUnit):
4048 """Rename the cluster.
4051 HPATH = "cluster-rename"
4052 HTYPE = constants.HTYPE_CLUSTER
4054 def BuildHooksEnv(self):
4059 "OP_TARGET": self.cfg.GetClusterName(),
4060 "NEW_NAME": self.op.name,
4063 def BuildHooksNodes(self):
4064 """Build hooks nodes.
4067 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4069 def CheckPrereq(self):
4070 """Verify that the passed name is a valid one.
4073 hostname = netutils.GetHostname(name=self.op.name,
4074 family=self.cfg.GetPrimaryIPFamily())
4076 new_name = hostname.name
4077 self.ip = new_ip = hostname.ip
4078 old_name = self.cfg.GetClusterName()
4079 old_ip = self.cfg.GetMasterIP()
4080 if new_name == old_name and new_ip == old_ip:
4081 raise errors.OpPrereqError("Neither the name nor the IP address of the"
4082 " cluster has changed",
4084 if new_ip != old_ip:
4085 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4086 raise errors.OpPrereqError("The given cluster IP address (%s) is"
4087 " reachable on the network" %
4088 new_ip, errors.ECODE_NOTUNIQUE)
4090 self.op.name = new_name
4092 def Exec(self, feedback_fn):
4093 """Rename the cluster.
4096 clustername = self.op.name
4099 # shutdown the master IP
4100 master_params = self.cfg.GetMasterNetworkParameters()
4101 ems = self.cfg.GetUseExternalMipScript()
4102 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4104 result.Raise("Could not disable the master role")
4107 cluster = self.cfg.GetClusterInfo()
4108 cluster.cluster_name = clustername
4109 cluster.master_ip = new_ip
4110 self.cfg.Update(cluster, feedback_fn)
4112 # update the known hosts file
4113 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4114 node_list = self.cfg.GetOnlineNodeList()
4116 node_list.remove(master_params.name)
4119 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4121 master_params.ip = new_ip
4122 result = self.rpc.call_node_activate_master_ip(master_params.name,
4124 msg = result.fail_msg
4126 self.LogWarning("Could not re-enable the master role on"
4127 " the master, please restart manually: %s", msg)
4132 def _ValidateNetmask(cfg, netmask):
4133 """Checks if a netmask is valid.
4135 @type cfg: L{config.ConfigWriter}
4136 @param cfg: The cluster configuration
4138 @param netmask: the netmask to be verified
4139 @raise errors.OpPrereqError: if the validation fails
4142 ip_family = cfg.GetPrimaryIPFamily()
4144 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4145 except errors.ProgrammerError:
4146 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4147 ip_family, errors.ECODE_INVAL)
4148 if not ipcls.ValidateNetmask(netmask):
4149 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4150 (netmask), errors.ECODE_INVAL)
4153 class LUClusterSetParams(LogicalUnit):
4154 """Change the parameters of the cluster.
4157 HPATH = "cluster-modify"
4158 HTYPE = constants.HTYPE_CLUSTER
4161 def CheckArguments(self):
4165 if self.op.uid_pool:
4166 uidpool.CheckUidPool(self.op.uid_pool)
4168 if self.op.add_uids:
4169 uidpool.CheckUidPool(self.op.add_uids)
4171 if self.op.remove_uids:
4172 uidpool.CheckUidPool(self.op.remove_uids)
4174 if self.op.master_netmask is not None:
4175 _ValidateNetmask(self.cfg, self.op.master_netmask)
4177 if self.op.diskparams:
4178 for dt_params in self.op.diskparams.values():
4179 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4181 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4182 except errors.OpPrereqError, err:
4183 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4186 def ExpandNames(self):
4187 # FIXME: in the future maybe other cluster params won't require checking on
4188 # all nodes to be modified.
4189 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4190 # resource locks the right thing, shouldn't it be the BGL instead?
4191 self.needed_locks = {
4192 locking.LEVEL_NODE: locking.ALL_SET,
4193 locking.LEVEL_INSTANCE: locking.ALL_SET,
4194 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4195 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4197 self.share_locks = _ShareAll()
4199 def BuildHooksEnv(self):
4204 "OP_TARGET": self.cfg.GetClusterName(),
4205 "NEW_VG_NAME": self.op.vg_name,
4208 def BuildHooksNodes(self):
4209 """Build hooks nodes.
4212 mn = self.cfg.GetMasterNode()
4215 def _CheckVgName(self, node_list, enabled_disk_templates,
4216 new_enabled_disk_templates):
4217 """Check the consistency of the vg name on all nodes and in case it gets
4218 unset whether there are instances still using it.
4221 if self.op.vg_name is not None and not self.op.vg_name:
4222 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4223 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4224 " instances exist", errors.ECODE_INVAL)
4226 if (self.op.vg_name is not None and
4227 utils.IsLvmEnabled(enabled_disk_templates)) or \
4228 (self.cfg.GetVGName() is not None and
4229 utils.LvmGetsEnabled(enabled_disk_templates,
4230 new_enabled_disk_templates)):
4231 self._CheckVgNameOnNodes(node_list)
4233 def _CheckVgNameOnNodes(self, node_list):
4234 """Check the status of the volume group on each node.
4237 vglist = self.rpc.call_vg_list(node_list)
4238 for node in node_list:
4239 msg = vglist[node].fail_msg
4241 # ignoring down node
4242 self.LogWarning("Error while gathering data on node %s"
4243 " (ignoring node): %s", node, msg)
4245 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4247 constants.MIN_VG_SIZE)
4249 raise errors.OpPrereqError("Error on node '%s': %s" %
4250 (node, vgstatus), errors.ECODE_ENVIRON)
4252 def _GetEnabledDiskTemplates(self, cluster):
4253 """Determines the enabled disk templates and the subset of disk templates
4254 that are newly enabled by this operation.
4257 enabled_disk_templates = None
4258 new_enabled_disk_templates = []
4259 if self.op.enabled_disk_templates:
4260 enabled_disk_templates = self.op.enabled_disk_templates
4261 new_enabled_disk_templates = \
4262 list(set(enabled_disk_templates)
4263 - set(cluster.enabled_disk_templates))
4265 enabled_disk_templates = cluster.enabled_disk_templates
4266 return (enabled_disk_templates, new_enabled_disk_templates)
4268 def CheckPrereq(self):
4269 """Check prerequisites.
4271 This checks whether the given params don't conflict and
4272 if the given volume group is valid.
4275 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4276 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4277 raise errors.OpPrereqError("Cannot disable drbd helper while"
4278 " drbd-based instances exist",
4281 node_list = self.owned_locks(locking.LEVEL_NODE)
4282 self.cluster = cluster = self.cfg.GetClusterInfo()
4284 (enabled_disk_templates, new_enabled_disk_templates) = \
4285 self._GetEnabledDiskTemplates(cluster)
4287 self._CheckVgName(node_list, enabled_disk_templates,
4288 new_enabled_disk_templates)
4290 if self.op.drbd_helper:
4291 # checks given drbd helper on all nodes
4292 helpers = self.rpc.call_drbd_helper(node_list)
4293 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4295 self.LogInfo("Not checking drbd helper on offline node %s", node)
4297 msg = helpers[node].fail_msg
4299 raise errors.OpPrereqError("Error checking drbd helper on node"
4300 " '%s': %s" % (node, msg),
4301 errors.ECODE_ENVIRON)
4302 node_helper = helpers[node].payload
4303 if node_helper != self.op.drbd_helper:
4304 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4305 (node, node_helper), errors.ECODE_ENVIRON)
4307 # validate params changes
4308 if self.op.beparams:
4309 objects.UpgradeBeParams(self.op.beparams)
4310 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4311 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4313 if self.op.ndparams:
4314 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4315 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4317 # TODO: we need a more general way to handle resetting
4318 # cluster-level parameters to default values
4319 if self.new_ndparams["oob_program"] == "":
4320 self.new_ndparams["oob_program"] = \
4321 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4323 if self.op.hv_state:
4324 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4325 self.cluster.hv_state_static)
4326 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4327 for hv, values in new_hv_state.items())
4329 if self.op.disk_state:
4330 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4331 self.cluster.disk_state_static)
4332 self.new_disk_state = \
4333 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4334 for name, values in svalues.items()))
4335 for storage, svalues in new_disk_state.items())
4338 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4341 all_instances = self.cfg.GetAllInstancesInfo().values()
4343 for group in self.cfg.GetAllNodeGroupsInfo().values():
4344 instances = frozenset([inst for inst in all_instances
4345 if compat.any(node in group.members
4346 for node in inst.all_nodes)])
4347 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4348 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4349 new = _ComputeNewInstanceViolations(ipol,
4350 new_ipolicy, instances, self.cfg)
4352 violations.update(new)
4355 self.LogWarning("After the ipolicy change the following instances"
4356 " violate them: %s",
4357 utils.CommaJoin(utils.NiceSort(violations)))
4359 if self.op.nicparams:
4360 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4361 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4362 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4365 # check all instances for consistency
4366 for instance in self.cfg.GetAllInstancesInfo().values():
4367 for nic_idx, nic in enumerate(instance.nics):
4368 params_copy = copy.deepcopy(nic.nicparams)
4369 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4371 # check parameter syntax
4373 objects.NIC.CheckParameterSyntax(params_filled)
4374 except errors.ConfigurationError, err:
4375 nic_errors.append("Instance %s, nic/%d: %s" %
4376 (instance.name, nic_idx, err))
4378 # if we're moving instances to routed, check that they have an ip
4379 target_mode = params_filled[constants.NIC_MODE]
4380 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4381 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4382 " address" % (instance.name, nic_idx))
4384 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4385 "\n".join(nic_errors), errors.ECODE_INVAL)
4387 # hypervisor list/parameters
4388 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4389 if self.op.hvparams:
4390 for hv_name, hv_dict in self.op.hvparams.items():
4391 if hv_name not in self.new_hvparams:
4392 self.new_hvparams[hv_name] = hv_dict
4394 self.new_hvparams[hv_name].update(hv_dict)
4396 # disk template parameters
4397 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4398 if self.op.diskparams:
4399 for dt_name, dt_params in self.op.diskparams.items():
4400 if dt_name not in self.op.diskparams:
4401 self.new_diskparams[dt_name] = dt_params
4403 self.new_diskparams[dt_name].update(dt_params)
4405 # os hypervisor parameters
4406 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4408 for os_name, hvs in self.op.os_hvp.items():
4409 if os_name not in self.new_os_hvp:
4410 self.new_os_hvp[os_name] = hvs
4412 for hv_name, hv_dict in hvs.items():
4414 # Delete if it exists
4415 self.new_os_hvp[os_name].pop(hv_name, None)
4416 elif hv_name not in self.new_os_hvp[os_name]:
4417 self.new_os_hvp[os_name][hv_name] = hv_dict
4419 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4422 self.new_osp = objects.FillDict(cluster.osparams, {})
4423 if self.op.osparams:
4424 for os_name, osp in self.op.osparams.items():
4425 if os_name not in self.new_osp:
4426 self.new_osp[os_name] = {}
4428 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4431 if not self.new_osp[os_name]:
4432 # we removed all parameters
4433 del self.new_osp[os_name]
4435 # check the parameter validity (remote check)
4436 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4437 os_name, self.new_osp[os_name])
4439 # changes to the hypervisor list
4440 if self.op.enabled_hypervisors is not None:
4441 self.hv_list = self.op.enabled_hypervisors
4442 for hv in self.hv_list:
4443 # if the hypervisor doesn't already exist in the cluster
4444 # hvparams, we initialize it to empty, and then (in both
4445 # cases) we make sure to fill the defaults, as we might not
4446 # have a complete defaults list if the hypervisor wasn't
4448 if hv not in new_hvp:
4450 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4451 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4453 self.hv_list = cluster.enabled_hypervisors
4455 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4456 # either the enabled list has changed, or the parameters have, validate
4457 for hv_name, hv_params in self.new_hvparams.items():
4458 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4459 (self.op.enabled_hypervisors and
4460 hv_name in self.op.enabled_hypervisors)):
4461 # either this is a new hypervisor, or its parameters have changed
4462 hv_class = hypervisor.GetHypervisorClass(hv_name)
4463 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4464 hv_class.CheckParameterSyntax(hv_params)
4465 _CheckHVParams(self, node_list, hv_name, hv_params)
4467 self._CheckDiskTemplateConsistency()
4470 # no need to check any newly-enabled hypervisors, since the
4471 # defaults have already been checked in the above code-block
4472 for os_name, os_hvp in self.new_os_hvp.items():
4473 for hv_name, hv_params in os_hvp.items():
4474 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4475 # we need to fill in the new os_hvp on top of the actual hv_p
4476 cluster_defaults = self.new_hvparams.get(hv_name, {})
4477 new_osp = objects.FillDict(cluster_defaults, hv_params)
4478 hv_class = hypervisor.GetHypervisorClass(hv_name)
4479 hv_class.CheckParameterSyntax(new_osp)
4480 _CheckHVParams(self, node_list, hv_name, new_osp)
4482 if self.op.default_iallocator:
4483 alloc_script = utils.FindFile(self.op.default_iallocator,
4484 constants.IALLOCATOR_SEARCH_PATH,
4486 if alloc_script is None:
4487 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4488 " specified" % self.op.default_iallocator,
4491 def _CheckDiskTemplateConsistency(self):
4492 """Check whether the disk templates that are going to be disabled
4493 are still in use by some instances.
4496 if self.op.enabled_disk_templates:
4497 cluster = self.cfg.GetClusterInfo()
4498 instances = self.cfg.GetAllInstancesInfo()
4500 disk_templates_to_remove = set(cluster.enabled_disk_templates) \
4501 - set(self.op.enabled_disk_templates)
4502 for instance in instances.itervalues():
4503 if instance.disk_template in disk_templates_to_remove:
4504 raise errors.OpPrereqError("Cannot disable disk template '%s',"
4505 " because instance '%s' is using it." %
4506 (instance.disk_template, instance.name))
4508 def _SetVgName(self, feedback_fn):
4509 """Determines and sets the new volume group name.
4512 if self.op.vg_name is not None:
4513 if self.op.vg_name and not \
4514 utils.IsLvmEnabled(self.cluster.enabled_disk_templates):
4515 feedback_fn("Note that you specified a volume group, but did not"
4516 " enable any lvm disk template.")
4517 new_volume = self.op.vg_name
4519 if utils.IsLvmEnabled(self.cluster.enabled_disk_templates):
4520 raise errors.OpPrereqError("Cannot unset volume group if lvm-based"
4521 " disk templates are enabled.")
4523 if new_volume != self.cfg.GetVGName():
4524 self.cfg.SetVGName(new_volume)
4526 feedback_fn("Cluster LVM configuration already in desired"
4527 " state, not changing")
4529 if utils.IsLvmEnabled(self.cluster.enabled_disk_templates) and \
4530 not self.cfg.GetVGName():
4531 raise errors.OpPrereqError("Please specify a volume group when"
4532 " enabling lvm-based disk-templates.")
4534 def Exec(self, feedback_fn):
4535 """Change the parameters of the cluster.
4538 if self.op.enabled_disk_templates:
4539 self.cluster.enabled_disk_templates = \
4540 list(set(self.op.enabled_disk_templates))
4542 self._SetVgName(feedback_fn)
4544 if self.op.drbd_helper is not None:
4545 if not constants.DT_DRBD8 in self.cluster.enabled_disk_templates:
4546 feedback_fn("Note that you specified a drbd user helper, but did"
4547 " enabled the drbd disk template.")
4548 new_helper = self.op.drbd_helper
4551 if new_helper != self.cfg.GetDRBDHelper():
4552 self.cfg.SetDRBDHelper(new_helper)
4554 feedback_fn("Cluster DRBD helper already in desired state,"
4556 if self.op.hvparams:
4557 self.cluster.hvparams = self.new_hvparams
4559 self.cluster.os_hvp = self.new_os_hvp
4560 if self.op.enabled_hypervisors is not None:
4561 self.cluster.hvparams = self.new_hvparams
4562 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4563 if self.op.beparams:
4564 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4565 if self.op.nicparams:
4566 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4568 self.cluster.ipolicy = self.new_ipolicy
4569 if self.op.osparams:
4570 self.cluster.osparams = self.new_osp
4571 if self.op.ndparams:
4572 self.cluster.ndparams = self.new_ndparams
4573 if self.op.diskparams:
4574 self.cluster.diskparams = self.new_diskparams
4575 if self.op.hv_state:
4576 self.cluster.hv_state_static = self.new_hv_state
4577 if self.op.disk_state:
4578 self.cluster.disk_state_static = self.new_disk_state
4580 if self.op.candidate_pool_size is not None:
4581 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4582 # we need to update the pool size here, otherwise the save will fail
4583 _AdjustCandidatePool(self, [])
4585 if self.op.maintain_node_health is not None:
4586 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4587 feedback_fn("Note: CONFD was disabled at build time, node health"
4588 " maintenance is not useful (still enabling it)")
4589 self.cluster.maintain_node_health = self.op.maintain_node_health
4591 if self.op.prealloc_wipe_disks is not None:
4592 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4594 if self.op.add_uids is not None:
4595 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4597 if self.op.remove_uids is not None:
4598 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4600 if self.op.uid_pool is not None:
4601 self.cluster.uid_pool = self.op.uid_pool
4603 if self.op.default_iallocator is not None:
4604 self.cluster.default_iallocator = self.op.default_iallocator
4606 if self.op.reserved_lvs is not None:
4607 self.cluster.reserved_lvs = self.op.reserved_lvs
4609 if self.op.use_external_mip_script is not None:
4610 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4612 def helper_os(aname, mods, desc):
4614 lst = getattr(self.cluster, aname)
4615 for key, val in mods:
4616 if key == constants.DDM_ADD:
4618 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4621 elif key == constants.DDM_REMOVE:
4625 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4627 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4629 if self.op.hidden_os:
4630 helper_os("hidden_os", self.op.hidden_os, "hidden")
4632 if self.op.blacklisted_os:
4633 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4635 if self.op.master_netdev:
4636 master_params = self.cfg.GetMasterNetworkParameters()
4637 ems = self.cfg.GetUseExternalMipScript()
4638 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4639 self.cluster.master_netdev)
4640 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4642 result.Raise("Could not disable the master ip")
4643 feedback_fn("Changing master_netdev from %s to %s" %
4644 (master_params.netdev, self.op.master_netdev))
4645 self.cluster.master_netdev = self.op.master_netdev
4647 if self.op.master_netmask:
4648 master_params = self.cfg.GetMasterNetworkParameters()
4649 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4650 result = self.rpc.call_node_change_master_netmask(master_params.name,
4651 master_params.netmask,
4652 self.op.master_netmask,
4654 master_params.netdev)
4656 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4659 self.cluster.master_netmask = self.op.master_netmask
4661 self.cfg.Update(self.cluster, feedback_fn)
4663 if self.op.master_netdev:
4664 master_params = self.cfg.GetMasterNetworkParameters()
4665 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4666 self.op.master_netdev)
4667 ems = self.cfg.GetUseExternalMipScript()
4668 result = self.rpc.call_node_activate_master_ip(master_params.name,
4671 self.LogWarning("Could not re-enable the master ip on"
4672 " the master, please restart manually: %s",
4676 def _UploadHelper(lu, nodes, fname):
4677 """Helper for uploading a file and showing warnings.
4680 if os.path.exists(fname):
4681 result = lu.rpc.call_upload_file(nodes, fname)
4682 for to_node, to_result in result.items():
4683 msg = to_result.fail_msg
4685 msg = ("Copy of file %s to node %s failed: %s" %
4686 (fname, to_node, msg))
4690 def _ComputeAncillaryFiles(cluster, redist):
4691 """Compute files external to Ganeti which need to be consistent.
4693 @type redist: boolean
4694 @param redist: Whether to include files which need to be redistributed
4697 # Compute files for all nodes
4699 pathutils.SSH_KNOWN_HOSTS_FILE,
4700 pathutils.CONFD_HMAC_KEY,
4701 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4702 pathutils.SPICE_CERT_FILE,
4703 pathutils.SPICE_CACERT_FILE,
4704 pathutils.RAPI_USERS_FILE,
4708 # we need to ship at least the RAPI certificate
4709 files_all.add(pathutils.RAPI_CERT_FILE)
4711 files_all.update(pathutils.ALL_CERT_FILES)
4712 files_all.update(ssconf.SimpleStore().GetFileList())
4714 if cluster.modify_etc_hosts:
4715 files_all.add(pathutils.ETC_HOSTS)
4717 if cluster.use_external_mip_script:
4718 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4720 # Files which are optional, these must:
4721 # - be present in one other category as well
4722 # - either exist or not exist on all nodes of that category (mc, vm all)
4724 pathutils.RAPI_USERS_FILE,
4727 # Files which should only be on master candidates
4731 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4735 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4736 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4737 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4739 # Files which should only be on VM-capable nodes
4742 for hv_name in cluster.enabled_hypervisors
4744 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4748 for hv_name in cluster.enabled_hypervisors
4750 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4752 # Filenames in each category must be unique
4753 all_files_set = files_all | files_mc | files_vm
4754 assert (len(all_files_set) ==
4755 sum(map(len, [files_all, files_mc, files_vm]))), \
4756 "Found file listed in more than one file list"
4758 # Optional files must be present in one other category
4759 assert all_files_set.issuperset(files_opt), \
4760 "Optional file not in a different required list"
4762 # This one file should never ever be re-distributed via RPC
4763 assert not (redist and
4764 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4766 return (files_all, files_opt, files_mc, files_vm)
4769 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4770 """Distribute additional files which are part of the cluster configuration.
4772 ConfigWriter takes care of distributing the config and ssconf files, but
4773 there are more files which should be distributed to all nodes. This function
4774 makes sure those are copied.
4776 @param lu: calling logical unit
4777 @param additional_nodes: list of nodes not in the config to distribute to
4778 @type additional_vm: boolean
4779 @param additional_vm: whether the additional nodes are vm-capable or not
4782 # Gather target nodes
4783 cluster = lu.cfg.GetClusterInfo()
4784 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4786 online_nodes = lu.cfg.GetOnlineNodeList()
4787 online_set = frozenset(online_nodes)
4788 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4790 if additional_nodes is not None:
4791 online_nodes.extend(additional_nodes)
4793 vm_nodes.extend(additional_nodes)
4795 # Never distribute to master node
4796 for nodelist in [online_nodes, vm_nodes]:
4797 if master_info.name in nodelist:
4798 nodelist.remove(master_info.name)
4801 (files_all, _, files_mc, files_vm) = \
4802 _ComputeAncillaryFiles(cluster, True)
4804 # Never re-distribute configuration file from here
4805 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4806 pathutils.CLUSTER_CONF_FILE in files_vm)
4807 assert not files_mc, "Master candidates not handled in this function"
4810 (online_nodes, files_all),
4811 (vm_nodes, files_vm),
4815 for (node_list, files) in filemap:
4817 _UploadHelper(lu, node_list, fname)
4820 class LUClusterRedistConf(NoHooksLU):
4821 """Force the redistribution of cluster configuration.
4823 This is a very simple LU.
4828 def ExpandNames(self):
4829 self.needed_locks = {
4830 locking.LEVEL_NODE: locking.ALL_SET,
4831 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4833 self.share_locks = _ShareAll()
4835 def Exec(self, feedback_fn):
4836 """Redistribute the configuration.
4839 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4840 _RedistributeAncillaryFiles(self)
4843 class LUClusterActivateMasterIp(NoHooksLU):
4844 """Activate the master IP on the master node.
4847 def Exec(self, feedback_fn):
4848 """Activate the master IP.
4851 master_params = self.cfg.GetMasterNetworkParameters()
4852 ems = self.cfg.GetUseExternalMipScript()
4853 result = self.rpc.call_node_activate_master_ip(master_params.name,
4855 result.Raise("Could not activate the master IP")
4858 class LUClusterDeactivateMasterIp(NoHooksLU):
4859 """Deactivate the master IP on the master node.
4862 def Exec(self, feedback_fn):
4863 """Deactivate the master IP.
4866 master_params = self.cfg.GetMasterNetworkParameters()
4867 ems = self.cfg.GetUseExternalMipScript()
4868 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4870 result.Raise("Could not deactivate the master IP")
4873 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4874 """Sleep and poll for an instance's disk to sync.
4877 if not instance.disks or disks is not None and not disks:
4880 disks = _ExpandCheckDisks(instance, disks)
4883 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4885 node = instance.primary_node
4888 lu.cfg.SetDiskID(dev, node)
4890 # TODO: Convert to utils.Retry
4893 degr_retries = 10 # in seconds, as we sleep 1 second each time
4897 cumul_degraded = False
4898 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4899 msg = rstats.fail_msg
4901 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4904 raise errors.RemoteError("Can't contact node %s for mirror data,"
4905 " aborting." % node)
4908 rstats = rstats.payload
4910 for i, mstat in enumerate(rstats):
4912 lu.LogWarning("Can't compute data for node %s/%s",
4913 node, disks[i].iv_name)
4916 cumul_degraded = (cumul_degraded or
4917 (mstat.is_degraded and mstat.sync_percent is None))
4918 if mstat.sync_percent is not None:
4920 if mstat.estimated_time is not None:
4921 rem_time = ("%s remaining (estimated)" %
4922 utils.FormatSeconds(mstat.estimated_time))
4923 max_time = mstat.estimated_time
4925 rem_time = "no time estimate"
4926 lu.LogInfo("- device %s: %5.2f%% done, %s",
4927 disks[i].iv_name, mstat.sync_percent, rem_time)
4929 # if we're done but degraded, let's do a few small retries, to
4930 # make sure we see a stable and not transient situation; therefore
4931 # we force restart of the loop
4932 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4933 logging.info("Degraded disks found, %d retries left", degr_retries)
4941 time.sleep(min(60, max_time))
4944 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4946 return not cumul_degraded
4949 def _BlockdevFind(lu, node, dev, instance):
4950 """Wrapper around call_blockdev_find to annotate diskparams.
4952 @param lu: A reference to the lu object
4953 @param node: The node to call out
4954 @param dev: The device to find
4955 @param instance: The instance object the device belongs to
4956 @returns The result of the rpc call
4959 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4960 return lu.rpc.call_blockdev_find(node, disk)
4963 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4964 """Wrapper around L{_CheckDiskConsistencyInner}.
4967 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4968 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4972 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4974 """Check that mirrors are not degraded.
4976 @attention: The device has to be annotated already.
4978 The ldisk parameter, if True, will change the test from the
4979 is_degraded attribute (which represents overall non-ok status for
4980 the device(s)) to the ldisk (representing the local storage status).
4983 lu.cfg.SetDiskID(dev, node)
4987 if on_primary or dev.AssembleOnSecondary():
4988 rstats = lu.rpc.call_blockdev_find(node, dev)
4989 msg = rstats.fail_msg
4991 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4993 elif not rstats.payload:
4994 lu.LogWarning("Can't find disk on node %s", node)
4998 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
5000 result = result and not rstats.payload.is_degraded
5003 for child in dev.children:
5004 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
5010 class LUOobCommand(NoHooksLU):
5011 """Logical unit for OOB handling.
5015 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
5017 def ExpandNames(self):
5018 """Gather locks we need.
5021 if self.op.node_names:
5022 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
5023 lock_names = self.op.node_names
5025 lock_names = locking.ALL_SET
5027 self.needed_locks = {
5028 locking.LEVEL_NODE: lock_names,
5031 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
5033 if not self.op.node_names:
5034 # Acquire node allocation lock only if all nodes are affected
5035 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5037 def CheckPrereq(self):
5038 """Check prerequisites.
5041 - the node exists in the configuration
5044 Any errors are signaled by raising errors.OpPrereqError.
5048 self.master_node = self.cfg.GetMasterNode()
5050 assert self.op.power_delay >= 0.0
5052 if self.op.node_names:
5053 if (self.op.command in self._SKIP_MASTER and
5054 self.master_node in self.op.node_names):
5055 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
5056 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
5058 if master_oob_handler:
5059 additional_text = ("run '%s %s %s' if you want to operate on the"
5060 " master regardless") % (master_oob_handler,
5064 additional_text = "it does not support out-of-band operations"
5066 raise errors.OpPrereqError(("Operating on the master node %s is not"
5067 " allowed for %s; %s") %
5068 (self.master_node, self.op.command,
5069 additional_text), errors.ECODE_INVAL)
5071 self.op.node_names = self.cfg.GetNodeList()
5072 if self.op.command in self._SKIP_MASTER:
5073 self.op.node_names.remove(self.master_node)
5075 if self.op.command in self._SKIP_MASTER:
5076 assert self.master_node not in self.op.node_names
5078 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
5080 raise errors.OpPrereqError("Node %s not found" % node_name,
5083 self.nodes.append(node)
5085 if (not self.op.ignore_status and
5086 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
5087 raise errors.OpPrereqError(("Cannot power off node %s because it is"
5088 " not marked offline") % node_name,
5091 def Exec(self, feedback_fn):
5092 """Execute OOB and return result if we expect any.
5095 master_node = self.master_node
5098 for idx, node in enumerate(utils.NiceSort(self.nodes,
5099 key=lambda node: node.name)):
5100 node_entry = [(constants.RS_NORMAL, node.name)]
5101 ret.append(node_entry)
5103 oob_program = _SupportsOob(self.cfg, node)
5106 node_entry.append((constants.RS_UNAVAIL, None))
5109 logging.info("Executing out-of-band command '%s' using '%s' on %s",
5110 self.op.command, oob_program, node.name)
5111 result = self.rpc.call_run_oob(master_node, oob_program,
5112 self.op.command, node.name,
5116 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
5117 node.name, result.fail_msg)
5118 node_entry.append((constants.RS_NODATA, None))
5121 self._CheckPayload(result)
5122 except errors.OpExecError, err:
5123 self.LogWarning("Payload returned by node '%s' is not valid: %s",
5125 node_entry.append((constants.RS_NODATA, None))
5127 if self.op.command == constants.OOB_HEALTH:
5128 # For health we should log important events
5129 for item, status in result.payload:
5130 if status in [constants.OOB_STATUS_WARNING,
5131 constants.OOB_STATUS_CRITICAL]:
5132 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5133 item, node.name, status)
5135 if self.op.command == constants.OOB_POWER_ON:
5137 elif self.op.command == constants.OOB_POWER_OFF:
5138 node.powered = False
5139 elif self.op.command == constants.OOB_POWER_STATUS:
5140 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5141 if powered != node.powered:
5142 logging.warning(("Recorded power state (%s) of node '%s' does not"
5143 " match actual power state (%s)"), node.powered,
5146 # For configuration changing commands we should update the node
5147 if self.op.command in (constants.OOB_POWER_ON,
5148 constants.OOB_POWER_OFF):
5149 self.cfg.Update(node, feedback_fn)
5151 node_entry.append((constants.RS_NORMAL, result.payload))
5153 if (self.op.command == constants.OOB_POWER_ON and
5154 idx < len(self.nodes) - 1):
5155 time.sleep(self.op.power_delay)
5159 def _CheckPayload(self, result):
5160 """Checks if the payload is valid.
5162 @param result: RPC result
5163 @raises errors.OpExecError: If payload is not valid
5167 if self.op.command == constants.OOB_HEALTH:
5168 if not isinstance(result.payload, list):
5169 errs.append("command 'health' is expected to return a list but got %s" %
5170 type(result.payload))
5172 for item, status in result.payload:
5173 if status not in constants.OOB_STATUSES:
5174 errs.append("health item '%s' has invalid status '%s'" %
5177 if self.op.command == constants.OOB_POWER_STATUS:
5178 if not isinstance(result.payload, dict):
5179 errs.append("power-status is expected to return a dict but got %s" %
5180 type(result.payload))
5182 if self.op.command in [
5183 constants.OOB_POWER_ON,
5184 constants.OOB_POWER_OFF,
5185 constants.OOB_POWER_CYCLE,
5187 if result.payload is not None:
5188 errs.append("%s is expected to not return payload but got '%s'" %
5189 (self.op.command, result.payload))
5192 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5193 utils.CommaJoin(errs))
5196 class _OsQuery(_QueryBase):
5197 FIELDS = query.OS_FIELDS
5199 def ExpandNames(self, lu):
5200 # Lock all nodes in shared mode
5201 # Temporary removal of locks, should be reverted later
5202 # TODO: reintroduce locks when they are lighter-weight
5203 lu.needed_locks = {}
5204 #self.share_locks[locking.LEVEL_NODE] = 1
5205 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5207 # The following variables interact with _QueryBase._GetNames
5209 self.wanted = self.names
5211 self.wanted = locking.ALL_SET
5213 self.do_locking = self.use_locking
5215 def DeclareLocks(self, lu, level):
5219 def _DiagnoseByOS(rlist):
5220 """Remaps a per-node return list into an a per-os per-node dictionary
5222 @param rlist: a map with node names as keys and OS objects as values
5225 @return: a dictionary with osnames as keys and as value another
5226 map, with nodes as keys and tuples of (path, status, diagnose,
5227 variants, parameters, api_versions) as values, eg::
5229 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5230 (/srv/..., False, "invalid api")],
5231 "node2": [(/srv/..., True, "", [], [])]}
5236 # we build here the list of nodes that didn't fail the RPC (at RPC
5237 # level), so that nodes with a non-responding node daemon don't
5238 # make all OSes invalid
5239 good_nodes = [node_name for node_name in rlist
5240 if not rlist[node_name].fail_msg]
5241 for node_name, nr in rlist.items():
5242 if nr.fail_msg or not nr.payload:
5244 for (name, path, status, diagnose, variants,
5245 params, api_versions) in nr.payload:
5246 if name not in all_os:
5247 # build a list of nodes for this os containing empty lists
5248 # for each node in node_list
5250 for nname in good_nodes:
5251 all_os[name][nname] = []
5252 # convert params from [name, help] to (name, help)
5253 params = [tuple(v) for v in params]
5254 all_os[name][node_name].append((path, status, diagnose,
5255 variants, params, api_versions))
5258 def _GetQueryData(self, lu):
5259 """Computes the list of nodes and their attributes.
5262 # Locking is not used
5263 assert not (compat.any(lu.glm.is_owned(level)
5264 for level in locking.LEVELS
5265 if level != locking.LEVEL_CLUSTER) or
5266 self.do_locking or self.use_locking)
5268 valid_nodes = [node.name
5269 for node in lu.cfg.GetAllNodesInfo().values()
5270 if not node.offline and node.vm_capable]
5271 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5272 cluster = lu.cfg.GetClusterInfo()
5276 for (os_name, os_data) in pol.items():
5277 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5278 hidden=(os_name in cluster.hidden_os),
5279 blacklisted=(os_name in cluster.blacklisted_os))
5283 api_versions = set()
5285 for idx, osl in enumerate(os_data.values()):
5286 info.valid = bool(info.valid and osl and osl[0][1])
5290 (node_variants, node_params, node_api) = osl[0][3:6]
5293 variants.update(node_variants)
5294 parameters.update(node_params)
5295 api_versions.update(node_api)
5297 # Filter out inconsistent values
5298 variants.intersection_update(node_variants)
5299 parameters.intersection_update(node_params)
5300 api_versions.intersection_update(node_api)
5302 info.variants = list(variants)
5303 info.parameters = list(parameters)
5304 info.api_versions = list(api_versions)
5306 data[os_name] = info
5308 # Prepare data in requested order
5309 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5313 class LUOsDiagnose(NoHooksLU):
5314 """Logical unit for OS diagnose/query.
5320 def _BuildFilter(fields, names):
5321 """Builds a filter for querying OSes.
5324 name_filter = qlang.MakeSimpleFilter("name", names)
5326 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5327 # respective field is not requested
5328 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5329 for fname in ["hidden", "blacklisted"]
5330 if fname not in fields]
5331 if "valid" not in fields:
5332 status_filter.append([qlang.OP_TRUE, "valid"])
5335 status_filter.insert(0, qlang.OP_AND)
5337 status_filter = None
5339 if name_filter and status_filter:
5340 return [qlang.OP_AND, name_filter, status_filter]
5344 return status_filter
5346 def CheckArguments(self):
5347 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5348 self.op.output_fields, False)
5350 def ExpandNames(self):
5351 self.oq.ExpandNames(self)
5353 def Exec(self, feedback_fn):
5354 return self.oq.OldStyleQuery(self)
5357 class _ExtStorageQuery(_QueryBase):
5358 FIELDS = query.EXTSTORAGE_FIELDS
5360 def ExpandNames(self, lu):
5361 # Lock all nodes in shared mode
5362 # Temporary removal of locks, should be reverted later
5363 # TODO: reintroduce locks when they are lighter-weight
5364 lu.needed_locks = {}
5365 #self.share_locks[locking.LEVEL_NODE] = 1
5366 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5368 # The following variables interact with _QueryBase._GetNames
5370 self.wanted = self.names
5372 self.wanted = locking.ALL_SET
5374 self.do_locking = self.use_locking
5376 def DeclareLocks(self, lu, level):
5380 def _DiagnoseByProvider(rlist):
5381 """Remaps a per-node return list into an a per-provider per-node dictionary
5383 @param rlist: a map with node names as keys and ExtStorage objects as values
5386 @return: a dictionary with extstorage providers as keys and as
5387 value another map, with nodes as keys and tuples of
5388 (path, status, diagnose, parameters) as values, eg::
5390 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5391 "node2": [(/srv/..., False, "missing file")]
5392 "node3": [(/srv/..., True, "", [])]
5397 # we build here the list of nodes that didn't fail the RPC (at RPC
5398 # level), so that nodes with a non-responding node daemon don't
5399 # make all OSes invalid
5400 good_nodes = [node_name for node_name in rlist
5401 if not rlist[node_name].fail_msg]
5402 for node_name, nr in rlist.items():
5403 if nr.fail_msg or not nr.payload:
5405 for (name, path, status, diagnose, params) in nr.payload:
5406 if name not in all_es:
5407 # build a list of nodes for this os containing empty lists
5408 # for each node in node_list
5410 for nname in good_nodes:
5411 all_es[name][nname] = []
5412 # convert params from [name, help] to (name, help)
5413 params = [tuple(v) for v in params]
5414 all_es[name][node_name].append((path, status, diagnose, params))
5417 def _GetQueryData(self, lu):
5418 """Computes the list of nodes and their attributes.
5421 # Locking is not used
5422 assert not (compat.any(lu.glm.is_owned(level)
5423 for level in locking.LEVELS
5424 if level != locking.LEVEL_CLUSTER) or
5425 self.do_locking or self.use_locking)
5427 valid_nodes = [node.name
5428 for node in lu.cfg.GetAllNodesInfo().values()
5429 if not node.offline and node.vm_capable]
5430 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5434 nodegroup_list = lu.cfg.GetNodeGroupList()
5436 for (es_name, es_data) in pol.items():
5437 # For every provider compute the nodegroup validity.
5438 # To do this we need to check the validity of each node in es_data
5439 # and then construct the corresponding nodegroup dict:
5440 # { nodegroup1: status
5441 # nodegroup2: status
5444 for nodegroup in nodegroup_list:
5445 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5447 nodegroup_nodes = ndgrp.members
5448 nodegroup_name = ndgrp.name
5451 for node in nodegroup_nodes:
5452 if node in valid_nodes:
5453 if es_data[node] != []:
5454 node_status = es_data[node][0][1]
5455 node_statuses.append(node_status)
5457 node_statuses.append(False)
5459 if False in node_statuses:
5460 ndgrp_data[nodegroup_name] = False
5462 ndgrp_data[nodegroup_name] = True
5464 # Compute the provider's parameters
5466 for idx, esl in enumerate(es_data.values()):
5467 valid = bool(esl and esl[0][1])
5471 node_params = esl[0][3]
5474 parameters.update(node_params)
5476 # Filter out inconsistent values
5477 parameters.intersection_update(node_params)
5479 params = list(parameters)
5481 # Now fill all the info for this provider
5482 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5483 nodegroup_status=ndgrp_data,
5486 data[es_name] = info
5488 # Prepare data in requested order
5489 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5493 class LUExtStorageDiagnose(NoHooksLU):
5494 """Logical unit for ExtStorage diagnose/query.
5499 def CheckArguments(self):
5500 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5501 self.op.output_fields, False)
5503 def ExpandNames(self):
5504 self.eq.ExpandNames(self)
5506 def Exec(self, feedback_fn):
5507 return self.eq.OldStyleQuery(self)
5510 class LUNodeRemove(LogicalUnit):
5511 """Logical unit for removing a node.
5514 HPATH = "node-remove"
5515 HTYPE = constants.HTYPE_NODE
5517 def BuildHooksEnv(self):
5522 "OP_TARGET": self.op.node_name,
5523 "NODE_NAME": self.op.node_name,
5526 def BuildHooksNodes(self):
5527 """Build hooks nodes.
5529 This doesn't run on the target node in the pre phase as a failed
5530 node would then be impossible to remove.
5533 all_nodes = self.cfg.GetNodeList()
5535 all_nodes.remove(self.op.node_name)
5538 return (all_nodes, all_nodes)
5540 def CheckPrereq(self):
5541 """Check prerequisites.
5544 - the node exists in the configuration
5545 - it does not have primary or secondary instances
5546 - it's not the master
5548 Any errors are signaled by raising errors.OpPrereqError.
5551 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5552 node = self.cfg.GetNodeInfo(self.op.node_name)
5553 assert node is not None
5555 masternode = self.cfg.GetMasterNode()
5556 if node.name == masternode:
5557 raise errors.OpPrereqError("Node is the master node, failover to another"
5558 " node is required", errors.ECODE_INVAL)
5560 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5561 if node.name in instance.all_nodes:
5562 raise errors.OpPrereqError("Instance %s is still running on the node,"
5563 " please remove first" % instance_name,
5565 self.op.node_name = node.name
5568 def Exec(self, feedback_fn):
5569 """Removes the node from the cluster.
5573 logging.info("Stopping the node daemon and removing configs from node %s",
5576 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5578 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5581 # Promote nodes to master candidate as needed
5582 _AdjustCandidatePool(self, exceptions=[node.name])
5583 self.context.RemoveNode(node.name)
5585 # Run post hooks on the node before it's removed
5586 _RunPostHook(self, node.name)
5588 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5589 msg = result.fail_msg
5591 self.LogWarning("Errors encountered on the remote node while leaving"
5592 " the cluster: %s", msg)
5594 # Remove node from our /etc/hosts
5595 if self.cfg.GetClusterInfo().modify_etc_hosts:
5596 master_node = self.cfg.GetMasterNode()
5597 result = self.rpc.call_etc_hosts_modify(master_node,
5598 constants.ETC_HOSTS_REMOVE,
5600 result.Raise("Can't update hosts file with new host data")
5601 _RedistributeAncillaryFiles(self)
5604 class _NodeQuery(_QueryBase):
5605 FIELDS = query.NODE_FIELDS
5607 def ExpandNames(self, lu):
5608 lu.needed_locks = {}
5609 lu.share_locks = _ShareAll()
5612 self.wanted = _GetWantedNodes(lu, self.names)
5614 self.wanted = locking.ALL_SET
5616 self.do_locking = (self.use_locking and
5617 query.NQ_LIVE in self.requested_data)
5620 # If any non-static field is requested we need to lock the nodes
5621 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5622 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5624 def DeclareLocks(self, lu, level):
5627 def _GetQueryData(self, lu):
5628 """Computes the list of nodes and their attributes.
5631 all_info = lu.cfg.GetAllNodesInfo()
5633 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5635 # Gather data as requested
5636 if query.NQ_LIVE in self.requested_data:
5637 # filter out non-vm_capable nodes
5638 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5640 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5641 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5642 [lu.cfg.GetHypervisorType()], es_flags)
5643 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5644 for (name, nresult) in node_data.items()
5645 if not nresult.fail_msg and nresult.payload)
5649 if query.NQ_INST in self.requested_data:
5650 node_to_primary = dict([(name, set()) for name in nodenames])
5651 node_to_secondary = dict([(name, set()) for name in nodenames])
5653 inst_data = lu.cfg.GetAllInstancesInfo()
5655 for inst in inst_data.values():
5656 if inst.primary_node in node_to_primary:
5657 node_to_primary[inst.primary_node].add(inst.name)
5658 for secnode in inst.secondary_nodes:
5659 if secnode in node_to_secondary:
5660 node_to_secondary[secnode].add(inst.name)
5662 node_to_primary = None
5663 node_to_secondary = None
5665 if query.NQ_OOB in self.requested_data:
5666 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5667 for name, node in all_info.iteritems())
5671 if query.NQ_GROUP in self.requested_data:
5672 groups = lu.cfg.GetAllNodeGroupsInfo()
5676 return query.NodeQueryData([all_info[name] for name in nodenames],
5677 live_data, lu.cfg.GetMasterNode(),
5678 node_to_primary, node_to_secondary, groups,
5679 oob_support, lu.cfg.GetClusterInfo())
5682 class LUNodeQuery(NoHooksLU):
5683 """Logical unit for querying nodes.
5686 # pylint: disable=W0142
5689 def CheckArguments(self):
5690 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5691 self.op.output_fields, self.op.use_locking)
5693 def ExpandNames(self):
5694 self.nq.ExpandNames(self)
5696 def DeclareLocks(self, level):
5697 self.nq.DeclareLocks(self, level)
5699 def Exec(self, feedback_fn):
5700 return self.nq.OldStyleQuery(self)
5703 class LUNodeQueryvols(NoHooksLU):
5704 """Logical unit for getting volumes on node(s).
5708 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5709 _FIELDS_STATIC = utils.FieldSet("node")
5711 def CheckArguments(self):
5712 _CheckOutputFields(static=self._FIELDS_STATIC,
5713 dynamic=self._FIELDS_DYNAMIC,
5714 selected=self.op.output_fields)
5716 def ExpandNames(self):
5717 self.share_locks = _ShareAll()
5720 self.needed_locks = {
5721 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5724 self.needed_locks = {
5725 locking.LEVEL_NODE: locking.ALL_SET,
5726 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5729 def Exec(self, feedback_fn):
5730 """Computes the list of nodes and their attributes.
5733 nodenames = self.owned_locks(locking.LEVEL_NODE)
5734 volumes = self.rpc.call_node_volumes(nodenames)
5736 ilist = self.cfg.GetAllInstancesInfo()
5737 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5740 for node in nodenames:
5741 nresult = volumes[node]
5744 msg = nresult.fail_msg
5746 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5749 node_vols = sorted(nresult.payload,
5750 key=operator.itemgetter("dev"))
5752 for vol in node_vols:
5754 for field in self.op.output_fields:
5757 elif field == "phys":
5761 elif field == "name":
5763 elif field == "size":
5764 val = int(float(vol["size"]))
5765 elif field == "instance":
5766 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5768 raise errors.ParameterError(field)
5769 node_output.append(str(val))
5771 output.append(node_output)
5776 class LUNodeQueryStorage(NoHooksLU):
5777 """Logical unit for getting information on storage units on node(s).
5780 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5783 def CheckArguments(self):
5784 _CheckOutputFields(static=self._FIELDS_STATIC,
5785 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5786 selected=self.op.output_fields)
5788 def ExpandNames(self):
5789 self.share_locks = _ShareAll()
5792 self.needed_locks = {
5793 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5796 self.needed_locks = {
5797 locking.LEVEL_NODE: locking.ALL_SET,
5798 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5801 def Exec(self, feedback_fn):
5802 """Computes the list of nodes and their attributes.
5805 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5807 # Always get name to sort by
5808 if constants.SF_NAME in self.op.output_fields:
5809 fields = self.op.output_fields[:]
5811 fields = [constants.SF_NAME] + self.op.output_fields
5813 # Never ask for node or type as it's only known to the LU
5814 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5815 while extra in fields:
5816 fields.remove(extra)
5818 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5819 name_idx = field_idx[constants.SF_NAME]
5821 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5822 data = self.rpc.call_storage_list(self.nodes,
5823 self.op.storage_type, st_args,
5824 self.op.name, fields)
5828 for node in utils.NiceSort(self.nodes):
5829 nresult = data[node]
5833 msg = nresult.fail_msg
5835 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5838 rows = dict([(row[name_idx], row) for row in nresult.payload])
5840 for name in utils.NiceSort(rows.keys()):
5845 for field in self.op.output_fields:
5846 if field == constants.SF_NODE:
5848 elif field == constants.SF_TYPE:
5849 val = self.op.storage_type
5850 elif field in field_idx:
5851 val = row[field_idx[field]]
5853 raise errors.ParameterError(field)
5862 class _InstanceQuery(_QueryBase):
5863 FIELDS = query.INSTANCE_FIELDS
5865 def ExpandNames(self, lu):
5866 lu.needed_locks = {}
5867 lu.share_locks = _ShareAll()
5870 self.wanted = _GetWantedInstances(lu, self.names)
5872 self.wanted = locking.ALL_SET
5874 self.do_locking = (self.use_locking and
5875 query.IQ_LIVE in self.requested_data)
5877 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5878 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5879 lu.needed_locks[locking.LEVEL_NODE] = []
5880 lu.needed_locks[locking.LEVEL_NETWORK] = []
5881 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5883 self.do_grouplocks = (self.do_locking and
5884 query.IQ_NODES in self.requested_data)
5886 def DeclareLocks(self, lu, level):
5888 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5889 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5891 # Lock all groups used by instances optimistically; this requires going
5892 # via the node before it's locked, requiring verification later on
5893 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5895 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5896 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5897 elif level == locking.LEVEL_NODE:
5898 lu._LockInstancesNodes() # pylint: disable=W0212
5900 elif level == locking.LEVEL_NETWORK:
5901 lu.needed_locks[locking.LEVEL_NETWORK] = \
5903 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5904 for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
5907 def _CheckGroupLocks(lu):
5908 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5909 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5911 # Check if node groups for locked instances are still correct
5912 for instance_name in owned_instances:
5913 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5915 def _GetQueryData(self, lu):
5916 """Computes the list of instances and their attributes.
5919 if self.do_grouplocks:
5920 self._CheckGroupLocks(lu)
5922 cluster = lu.cfg.GetClusterInfo()
5923 all_info = lu.cfg.GetAllInstancesInfo()
5925 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5927 instance_list = [all_info[name] for name in instance_names]
5928 nodes = frozenset(itertools.chain(*(inst.all_nodes
5929 for inst in instance_list)))
5930 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5933 wrongnode_inst = set()
5935 # Gather data as requested
5936 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5938 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5940 result = node_data[name]
5942 # offline nodes will be in both lists
5943 assert result.fail_msg
5944 offline_nodes.append(name)
5946 bad_nodes.append(name)
5947 elif result.payload:
5948 for inst in result.payload:
5949 if inst in all_info:
5950 if all_info[inst].primary_node == name:
5951 live_data.update(result.payload)
5953 wrongnode_inst.add(inst)
5955 # orphan instance; we don't list it here as we don't
5956 # handle this case yet in the output of instance listing
5957 logging.warning("Orphan instance '%s' found on node %s",
5959 # else no instance is alive
5963 if query.IQ_DISKUSAGE in self.requested_data:
5964 gmi = ganeti.masterd.instance
5965 disk_usage = dict((inst.name,
5966 gmi.ComputeDiskSize(inst.disk_template,
5967 [{constants.IDISK_SIZE: disk.size}
5968 for disk in inst.disks]))
5969 for inst in instance_list)
5973 if query.IQ_CONSOLE in self.requested_data:
5975 for inst in instance_list:
5976 if inst.name in live_data:
5977 # Instance is running
5978 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5980 consinfo[inst.name] = None
5981 assert set(consinfo.keys()) == set(instance_names)
5985 if query.IQ_NODES in self.requested_data:
5986 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5988 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5989 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5990 for uuid in set(map(operator.attrgetter("group"),
5996 if query.IQ_NETWORKS in self.requested_data:
5997 net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
5998 for i in instance_list))
5999 networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
6003 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
6004 disk_usage, offline_nodes, bad_nodes,
6005 live_data, wrongnode_inst, consinfo,
6006 nodes, groups, networks)
6009 class LUQuery(NoHooksLU):
6010 """Query for resources/items of a certain kind.
6013 # pylint: disable=W0142
6016 def CheckArguments(self):
6017 qcls = _GetQueryImplementation(self.op.what)
6019 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
6021 def ExpandNames(self):
6022 self.impl.ExpandNames(self)
6024 def DeclareLocks(self, level):
6025 self.impl.DeclareLocks(self, level)
6027 def Exec(self, feedback_fn):
6028 return self.impl.NewStyleQuery(self)
6031 class LUQueryFields(NoHooksLU):
6032 """Query for resources/items of a certain kind.
6035 # pylint: disable=W0142
6038 def CheckArguments(self):
6039 self.qcls = _GetQueryImplementation(self.op.what)
6041 def ExpandNames(self):
6042 self.needed_locks = {}
6044 def Exec(self, feedback_fn):
6045 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
6048 class LUNodeModifyStorage(NoHooksLU):
6049 """Logical unit for modifying a storage volume on a node.
6054 def CheckArguments(self):
6055 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6057 storage_type = self.op.storage_type
6060 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
6062 raise errors.OpPrereqError("Storage units of type '%s' can not be"
6063 " modified" % storage_type,
6066 diff = set(self.op.changes.keys()) - modifiable
6068 raise errors.OpPrereqError("The following fields can not be modified for"
6069 " storage units of type '%s': %r" %
6070 (storage_type, list(diff)),
6073 def ExpandNames(self):
6074 self.needed_locks = {
6075 locking.LEVEL_NODE: self.op.node_name,
6078 def Exec(self, feedback_fn):
6079 """Computes the list of nodes and their attributes.
6082 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6083 result = self.rpc.call_storage_modify(self.op.node_name,
6084 self.op.storage_type, st_args,
6085 self.op.name, self.op.changes)
6086 result.Raise("Failed to modify storage unit '%s' on %s" %
6087 (self.op.name, self.op.node_name))
6090 class LUNodeAdd(LogicalUnit):
6091 """Logical unit for adding node to the cluster.
6095 HTYPE = constants.HTYPE_NODE
6096 _NFLAGS = ["master_capable", "vm_capable"]
6098 def CheckArguments(self):
6099 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
6100 # validate/normalize the node name
6101 self.hostname = netutils.GetHostname(name=self.op.node_name,
6102 family=self.primary_ip_family)
6103 self.op.node_name = self.hostname.name
6105 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
6106 raise errors.OpPrereqError("Cannot readd the master node",
6109 if self.op.readd and self.op.group:
6110 raise errors.OpPrereqError("Cannot pass a node group when a node is"
6111 " being readded", errors.ECODE_INVAL)
6113 def BuildHooksEnv(self):
6116 This will run on all nodes before, and on all nodes + the new node after.
6120 "OP_TARGET": self.op.node_name,
6121 "NODE_NAME": self.op.node_name,
6122 "NODE_PIP": self.op.primary_ip,
6123 "NODE_SIP": self.op.secondary_ip,
6124 "MASTER_CAPABLE": str(self.op.master_capable),
6125 "VM_CAPABLE": str(self.op.vm_capable),
6128 def BuildHooksNodes(self):
6129 """Build hooks nodes.
6132 # Exclude added node
6133 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6134 post_nodes = pre_nodes + [self.op.node_name, ]
6136 return (pre_nodes, post_nodes)
6138 def CheckPrereq(self):
6139 """Check prerequisites.
6142 - the new node is not already in the config
6144 - its parameters (single/dual homed) matches the cluster
6146 Any errors are signaled by raising errors.OpPrereqError.
6150 hostname = self.hostname
6151 node = hostname.name
6152 primary_ip = self.op.primary_ip = hostname.ip
6153 if self.op.secondary_ip is None:
6154 if self.primary_ip_family == netutils.IP6Address.family:
6155 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6156 " IPv4 address must be given as secondary",
6158 self.op.secondary_ip = primary_ip
6160 secondary_ip = self.op.secondary_ip
6161 if not netutils.IP4Address.IsValid(secondary_ip):
6162 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6163 " address" % secondary_ip, errors.ECODE_INVAL)
6165 node_list = cfg.GetNodeList()
6166 if not self.op.readd and node in node_list:
6167 raise errors.OpPrereqError("Node %s is already in the configuration" %
6168 node, errors.ECODE_EXISTS)
6169 elif self.op.readd and node not in node_list:
6170 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6173 self.changed_primary_ip = False
6175 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6176 if self.op.readd and node == existing_node_name:
6177 if existing_node.secondary_ip != secondary_ip:
6178 raise errors.OpPrereqError("Readded node doesn't have the same IP"
6179 " address configuration as before",
6181 if existing_node.primary_ip != primary_ip:
6182 self.changed_primary_ip = True
6186 if (existing_node.primary_ip == primary_ip or
6187 existing_node.secondary_ip == primary_ip or
6188 existing_node.primary_ip == secondary_ip or
6189 existing_node.secondary_ip == secondary_ip):
6190 raise errors.OpPrereqError("New node ip address(es) conflict with"
6191 " existing node %s" % existing_node.name,
6192 errors.ECODE_NOTUNIQUE)
6194 # After this 'if' block, None is no longer a valid value for the
6195 # _capable op attributes
6197 old_node = self.cfg.GetNodeInfo(node)
6198 assert old_node is not None, "Can't retrieve locked node %s" % node
6199 for attr in self._NFLAGS:
6200 if getattr(self.op, attr) is None:
6201 setattr(self.op, attr, getattr(old_node, attr))
6203 for attr in self._NFLAGS:
6204 if getattr(self.op, attr) is None:
6205 setattr(self.op, attr, True)
6207 if self.op.readd and not self.op.vm_capable:
6208 pri, sec = cfg.GetNodeInstances(node)
6210 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6211 " flag set to false, but it already holds"
6212 " instances" % node,
6215 # check that the type of the node (single versus dual homed) is the
6216 # same as for the master
6217 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6218 master_singlehomed = myself.secondary_ip == myself.primary_ip
6219 newbie_singlehomed = secondary_ip == primary_ip
6220 if master_singlehomed != newbie_singlehomed:
6221 if master_singlehomed:
6222 raise errors.OpPrereqError("The master has no secondary ip but the"
6223 " new node has one",
6226 raise errors.OpPrereqError("The master has a secondary ip but the"
6227 " new node doesn't have one",
6230 # checks reachability
6231 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6232 raise errors.OpPrereqError("Node not reachable by ping",
6233 errors.ECODE_ENVIRON)
6235 if not newbie_singlehomed:
6236 # check reachability from my secondary ip to newbie's secondary ip
6237 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6238 source=myself.secondary_ip):
6239 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6240 " based ping to node daemon port",
6241 errors.ECODE_ENVIRON)
6248 if self.op.master_capable:
6249 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6251 self.master_candidate = False
6254 self.new_node = old_node
6256 node_group = cfg.LookupNodeGroup(self.op.group)
6257 self.new_node = objects.Node(name=node,
6258 primary_ip=primary_ip,
6259 secondary_ip=secondary_ip,
6260 master_candidate=self.master_candidate,
6261 offline=False, drained=False,
6262 group=node_group, ndparams={})
6264 if self.op.ndparams:
6265 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6266 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6267 "node", "cluster or group")
6269 if self.op.hv_state:
6270 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6272 if self.op.disk_state:
6273 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6275 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6276 # it a property on the base class.
6277 rpcrunner = rpc.DnsOnlyRunner()
6278 result = rpcrunner.call_version([node])[node]
6279 result.Raise("Can't get version information from node %s" % node)
6280 if constants.PROTOCOL_VERSION == result.payload:
6281 logging.info("Communication to node %s fine, sw version %s match",
6282 node, result.payload)
6284 raise errors.OpPrereqError("Version mismatch master version %s,"
6285 " node version %s" %
6286 (constants.PROTOCOL_VERSION, result.payload),
6287 errors.ECODE_ENVIRON)
6289 vg_name = cfg.GetVGName()
6290 if vg_name is not None:
6291 vparams = {constants.NV_PVLIST: [vg_name]}
6292 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6293 cname = self.cfg.GetClusterName()
6294 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6295 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6297 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6298 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6300 def Exec(self, feedback_fn):
6301 """Adds the new node to the cluster.
6304 new_node = self.new_node
6305 node = new_node.name
6307 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6310 # We adding a new node so we assume it's powered
6311 new_node.powered = True
6313 # for re-adds, reset the offline/drained/master-candidate flags;
6314 # we need to reset here, otherwise offline would prevent RPC calls
6315 # later in the procedure; this also means that if the re-add
6316 # fails, we are left with a non-offlined, broken node
6318 new_node.drained = new_node.offline = False # pylint: disable=W0201
6319 self.LogInfo("Readding a node, the offline/drained flags were reset")
6320 # if we demote the node, we do cleanup later in the procedure
6321 new_node.master_candidate = self.master_candidate
6322 if self.changed_primary_ip:
6323 new_node.primary_ip = self.op.primary_ip
6325 # copy the master/vm_capable flags
6326 for attr in self._NFLAGS:
6327 setattr(new_node, attr, getattr(self.op, attr))
6329 # notify the user about any possible mc promotion
6330 if new_node.master_candidate:
6331 self.LogInfo("Node will be a master candidate")
6333 if self.op.ndparams:
6334 new_node.ndparams = self.op.ndparams
6336 new_node.ndparams = {}
6338 if self.op.hv_state:
6339 new_node.hv_state_static = self.new_hv_state
6341 if self.op.disk_state:
6342 new_node.disk_state_static = self.new_disk_state
6344 # Add node to our /etc/hosts, and add key to known_hosts
6345 if self.cfg.GetClusterInfo().modify_etc_hosts:
6346 master_node = self.cfg.GetMasterNode()
6347 result = self.rpc.call_etc_hosts_modify(master_node,
6348 constants.ETC_HOSTS_ADD,
6351 result.Raise("Can't update hosts file with new host data")
6353 if new_node.secondary_ip != new_node.primary_ip:
6354 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6357 node_verify_list = [self.cfg.GetMasterNode()]
6358 node_verify_param = {
6359 constants.NV_NODELIST: ([node], {}),
6360 # TODO: do a node-net-test as well?
6363 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6364 self.cfg.GetClusterName())
6365 for verifier in node_verify_list:
6366 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6367 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6369 for failed in nl_payload:
6370 feedback_fn("ssh/hostname verification failed"
6371 " (checking from %s): %s" %
6372 (verifier, nl_payload[failed]))
6373 raise errors.OpExecError("ssh/hostname verification failed")
6376 _RedistributeAncillaryFiles(self)
6377 self.context.ReaddNode(new_node)
6378 # make sure we redistribute the config
6379 self.cfg.Update(new_node, feedback_fn)
6380 # and make sure the new node will not have old files around
6381 if not new_node.master_candidate:
6382 result = self.rpc.call_node_demote_from_mc(new_node.name)
6383 msg = result.fail_msg
6385 self.LogWarning("Node failed to demote itself from master"
6386 " candidate status: %s" % msg)
6388 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6389 additional_vm=self.op.vm_capable)
6390 self.context.AddNode(new_node, self.proc.GetECId())
6393 class LUNodeSetParams(LogicalUnit):
6394 """Modifies the parameters of a node.
6396 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6397 to the node role (as _ROLE_*)
6398 @cvar _R2F: a dictionary from node role to tuples of flags
6399 @cvar _FLAGS: a list of attribute names corresponding to the flags
6402 HPATH = "node-modify"
6403 HTYPE = constants.HTYPE_NODE
6405 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6407 (True, False, False): _ROLE_CANDIDATE,
6408 (False, True, False): _ROLE_DRAINED,
6409 (False, False, True): _ROLE_OFFLINE,
6410 (False, False, False): _ROLE_REGULAR,
6412 _R2F = dict((v, k) for k, v in _F2R.items())
6413 _FLAGS = ["master_candidate", "drained", "offline"]
6415 def CheckArguments(self):
6416 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6417 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6418 self.op.master_capable, self.op.vm_capable,
6419 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6421 if all_mods.count(None) == len(all_mods):
6422 raise errors.OpPrereqError("Please pass at least one modification",
6424 if all_mods.count(True) > 1:
6425 raise errors.OpPrereqError("Can't set the node into more than one"
6426 " state at the same time",
6429 # Boolean value that tells us whether we might be demoting from MC
6430 self.might_demote = (self.op.master_candidate is False or
6431 self.op.offline is True or
6432 self.op.drained is True or
6433 self.op.master_capable is False)
6435 if self.op.secondary_ip:
6436 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6437 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6438 " address" % self.op.secondary_ip,
6441 self.lock_all = self.op.auto_promote and self.might_demote
6442 self.lock_instances = self.op.secondary_ip is not None
6444 def _InstanceFilter(self, instance):
6445 """Filter for getting affected instances.
6448 return (instance.disk_template in constants.DTS_INT_MIRROR and
6449 self.op.node_name in instance.all_nodes)
6451 def ExpandNames(self):
6453 self.needed_locks = {
6454 locking.LEVEL_NODE: locking.ALL_SET,
6456 # Block allocations when all nodes are locked
6457 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6460 self.needed_locks = {
6461 locking.LEVEL_NODE: self.op.node_name,
6464 # Since modifying a node can have severe effects on currently running
6465 # operations the resource lock is at least acquired in shared mode
6466 self.needed_locks[locking.LEVEL_NODE_RES] = \
6467 self.needed_locks[locking.LEVEL_NODE]
6469 # Get all locks except nodes in shared mode; they are not used for anything
6470 # but read-only access
6471 self.share_locks = _ShareAll()
6472 self.share_locks[locking.LEVEL_NODE] = 0
6473 self.share_locks[locking.LEVEL_NODE_RES] = 0
6474 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6476 if self.lock_instances:
6477 self.needed_locks[locking.LEVEL_INSTANCE] = \
6478 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6480 def BuildHooksEnv(self):
6483 This runs on the master node.
6487 "OP_TARGET": self.op.node_name,
6488 "MASTER_CANDIDATE": str(self.op.master_candidate),
6489 "OFFLINE": str(self.op.offline),
6490 "DRAINED": str(self.op.drained),
6491 "MASTER_CAPABLE": str(self.op.master_capable),
6492 "VM_CAPABLE": str(self.op.vm_capable),
6495 def BuildHooksNodes(self):
6496 """Build hooks nodes.
6499 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6502 def CheckPrereq(self):
6503 """Check prerequisites.
6505 This only checks the instance list against the existing names.
6508 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6510 if self.lock_instances:
6511 affected_instances = \
6512 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6514 # Verify instance locks
6515 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6516 wanted_instances = frozenset(affected_instances.keys())
6517 if wanted_instances - owned_instances:
6518 raise errors.OpPrereqError("Instances affected by changing node %s's"
6519 " secondary IP address have changed since"
6520 " locks were acquired, wanted '%s', have"
6521 " '%s'; retry the operation" %
6523 utils.CommaJoin(wanted_instances),
6524 utils.CommaJoin(owned_instances)),
6527 affected_instances = None
6529 if (self.op.master_candidate is not None or
6530 self.op.drained is not None or
6531 self.op.offline is not None):
6532 # we can't change the master's node flags
6533 if self.op.node_name == self.cfg.GetMasterNode():
6534 raise errors.OpPrereqError("The master role can be changed"
6535 " only via master-failover",
6538 if self.op.master_candidate and not node.master_capable:
6539 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6540 " it a master candidate" % node.name,
6543 if self.op.vm_capable is False:
6544 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6546 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6547 " the vm_capable flag" % node.name,
6550 if node.master_candidate and self.might_demote and not self.lock_all:
6551 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6552 # check if after removing the current node, we're missing master
6554 (mc_remaining, mc_should, _) = \
6555 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6556 if mc_remaining < mc_should:
6557 raise errors.OpPrereqError("Not enough master candidates, please"
6558 " pass auto promote option to allow"
6559 " promotion (--auto-promote or RAPI"
6560 " auto_promote=True)", errors.ECODE_STATE)
6562 self.old_flags = old_flags = (node.master_candidate,
6563 node.drained, node.offline)
6564 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6565 self.old_role = old_role = self._F2R[old_flags]
6567 # Check for ineffective changes
6568 for attr in self._FLAGS:
6569 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6570 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6571 setattr(self.op, attr, None)
6573 # Past this point, any flag change to False means a transition
6574 # away from the respective state, as only real changes are kept
6576 # TODO: We might query the real power state if it supports OOB
6577 if _SupportsOob(self.cfg, node):
6578 if self.op.offline is False and not (node.powered or
6579 self.op.powered is True):
6580 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6581 " offline status can be reset") %
6582 self.op.node_name, errors.ECODE_STATE)
6583 elif self.op.powered is not None:
6584 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6585 " as it does not support out-of-band"
6586 " handling") % self.op.node_name,
6589 # If we're being deofflined/drained, we'll MC ourself if needed
6590 if (self.op.drained is False or self.op.offline is False or
6591 (self.op.master_capable and not node.master_capable)):
6592 if _DecideSelfPromotion(self):
6593 self.op.master_candidate = True
6594 self.LogInfo("Auto-promoting node to master candidate")
6596 # If we're no longer master capable, we'll demote ourselves from MC
6597 if self.op.master_capable is False and node.master_candidate:
6598 self.LogInfo("Demoting from master candidate")
6599 self.op.master_candidate = False
6602 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6603 if self.op.master_candidate:
6604 new_role = self._ROLE_CANDIDATE
6605 elif self.op.drained:
6606 new_role = self._ROLE_DRAINED
6607 elif self.op.offline:
6608 new_role = self._ROLE_OFFLINE
6609 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6610 # False is still in new flags, which means we're un-setting (the
6612 new_role = self._ROLE_REGULAR
6613 else: # no new flags, nothing, keep old role
6616 self.new_role = new_role
6618 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6619 # Trying to transition out of offline status
6620 result = self.rpc.call_version([node.name])[node.name]
6622 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6623 " to report its version: %s" %
6624 (node.name, result.fail_msg),
6627 self.LogWarning("Transitioning node from offline to online state"
6628 " without using re-add. Please make sure the node"
6631 # When changing the secondary ip, verify if this is a single-homed to
6632 # multi-homed transition or vice versa, and apply the relevant
6634 if self.op.secondary_ip:
6635 # Ok even without locking, because this can't be changed by any LU
6636 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6637 master_singlehomed = master.secondary_ip == master.primary_ip
6638 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6639 if self.op.force and node.name == master.name:
6640 self.LogWarning("Transitioning from single-homed to multi-homed"
6641 " cluster; all nodes will require a secondary IP"
6644 raise errors.OpPrereqError("Changing the secondary ip on a"
6645 " single-homed cluster requires the"
6646 " --force option to be passed, and the"
6647 " target node to be the master",
6649 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6650 if self.op.force and node.name == master.name:
6651 self.LogWarning("Transitioning from multi-homed to single-homed"
6652 " cluster; secondary IP addresses will have to be"
6655 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6656 " same as the primary IP on a multi-homed"
6657 " cluster, unless the --force option is"
6658 " passed, and the target node is the"
6659 " master", errors.ECODE_INVAL)
6661 assert not (frozenset(affected_instances) -
6662 self.owned_locks(locking.LEVEL_INSTANCE))
6665 if affected_instances:
6666 msg = ("Cannot change secondary IP address: offline node has"
6667 " instances (%s) configured to use it" %
6668 utils.CommaJoin(affected_instances.keys()))
6669 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6671 # On online nodes, check that no instances are running, and that
6672 # the node has the new ip and we can reach it.
6673 for instance in affected_instances.values():
6674 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6675 msg="cannot change secondary ip")
6677 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6678 if master.name != node.name:
6679 # check reachability from master secondary ip to new secondary ip
6680 if not netutils.TcpPing(self.op.secondary_ip,
6681 constants.DEFAULT_NODED_PORT,
6682 source=master.secondary_ip):
6683 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6684 " based ping to node daemon port",
6685 errors.ECODE_ENVIRON)
6687 if self.op.ndparams:
6688 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6689 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6690 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6691 "node", "cluster or group")
6692 self.new_ndparams = new_ndparams
6694 if self.op.hv_state:
6695 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6696 self.node.hv_state_static)
6698 if self.op.disk_state:
6699 self.new_disk_state = \
6700 _MergeAndVerifyDiskState(self.op.disk_state,
6701 self.node.disk_state_static)
6703 def Exec(self, feedback_fn):
6708 old_role = self.old_role
6709 new_role = self.new_role
6713 if self.op.ndparams:
6714 node.ndparams = self.new_ndparams
6716 if self.op.powered is not None:
6717 node.powered = self.op.powered
6719 if self.op.hv_state:
6720 node.hv_state_static = self.new_hv_state
6722 if self.op.disk_state:
6723 node.disk_state_static = self.new_disk_state
6725 for attr in ["master_capable", "vm_capable"]:
6726 val = getattr(self.op, attr)
6728 setattr(node, attr, val)
6729 result.append((attr, str(val)))
6731 if new_role != old_role:
6732 # Tell the node to demote itself, if no longer MC and not offline
6733 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6734 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6736 self.LogWarning("Node failed to demote itself: %s", msg)
6738 new_flags = self._R2F[new_role]
6739 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6741 result.append((desc, str(nf)))
6742 (node.master_candidate, node.drained, node.offline) = new_flags
6744 # we locked all nodes, we adjust the CP before updating this node
6746 _AdjustCandidatePool(self, [node.name])
6748 if self.op.secondary_ip:
6749 node.secondary_ip = self.op.secondary_ip
6750 result.append(("secondary_ip", self.op.secondary_ip))
6752 # this will trigger configuration file update, if needed
6753 self.cfg.Update(node, feedback_fn)
6755 # this will trigger job queue propagation or cleanup if the mc
6757 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6758 self.context.ReaddNode(node)
6763 class LUNodePowercycle(NoHooksLU):
6764 """Powercycles a node.
6769 def CheckArguments(self):
6770 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6771 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6772 raise errors.OpPrereqError("The node is the master and the force"
6773 " parameter was not set",
6776 def ExpandNames(self):
6777 """Locking for PowercycleNode.
6779 This is a last-resort option and shouldn't block on other
6780 jobs. Therefore, we grab no locks.
6783 self.needed_locks = {}
6785 def Exec(self, feedback_fn):
6789 result = self.rpc.call_node_powercycle(self.op.node_name,
6790 self.cfg.GetHypervisorType())
6791 result.Raise("Failed to schedule the reboot")
6792 return result.payload
6795 class LUClusterQuery(NoHooksLU):
6796 """Query cluster configuration.
6801 def ExpandNames(self):
6802 self.needed_locks = {}
6804 def Exec(self, feedback_fn):
6805 """Return cluster config.
6808 cluster = self.cfg.GetClusterInfo()
6811 # Filter just for enabled hypervisors
6812 for os_name, hv_dict in cluster.os_hvp.items():
6813 os_hvp[os_name] = {}
6814 for hv_name, hv_params in hv_dict.items():
6815 if hv_name in cluster.enabled_hypervisors:
6816 os_hvp[os_name][hv_name] = hv_params
6818 # Convert ip_family to ip_version
6819 primary_ip_version = constants.IP4_VERSION
6820 if cluster.primary_ip_family == netutils.IP6Address.family:
6821 primary_ip_version = constants.IP6_VERSION
6824 "software_version": constants.RELEASE_VERSION,
6825 "protocol_version": constants.PROTOCOL_VERSION,
6826 "config_version": constants.CONFIG_VERSION,
6827 "os_api_version": max(constants.OS_API_VERSIONS),
6828 "export_version": constants.EXPORT_VERSION,
6829 "architecture": runtime.GetArchInfo(),
6830 "name": cluster.cluster_name,
6831 "master": cluster.master_node,
6832 "default_hypervisor": cluster.primary_hypervisor,
6833 "enabled_hypervisors": cluster.enabled_hypervisors,
6834 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6835 for hypervisor_name in cluster.enabled_hypervisors]),
6837 "beparams": cluster.beparams,
6838 "osparams": cluster.osparams,
6839 "ipolicy": cluster.ipolicy,
6840 "nicparams": cluster.nicparams,
6841 "ndparams": cluster.ndparams,
6842 "diskparams": cluster.diskparams,
6843 "candidate_pool_size": cluster.candidate_pool_size,
6844 "master_netdev": cluster.master_netdev,
6845 "master_netmask": cluster.master_netmask,
6846 "use_external_mip_script": cluster.use_external_mip_script,
6847 "volume_group_name": cluster.volume_group_name,
6848 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6849 "file_storage_dir": cluster.file_storage_dir,
6850 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6851 "maintain_node_health": cluster.maintain_node_health,
6852 "ctime": cluster.ctime,
6853 "mtime": cluster.mtime,
6854 "uuid": cluster.uuid,
6855 "tags": list(cluster.GetTags()),
6856 "uid_pool": cluster.uid_pool,
6857 "default_iallocator": cluster.default_iallocator,
6858 "reserved_lvs": cluster.reserved_lvs,
6859 "primary_ip_version": primary_ip_version,
6860 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6861 "hidden_os": cluster.hidden_os,
6862 "blacklisted_os": cluster.blacklisted_os,
6868 class LUClusterConfigQuery(NoHooksLU):
6869 """Return configuration values.
6874 def CheckArguments(self):
6875 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6877 def ExpandNames(self):
6878 self.cq.ExpandNames(self)
6880 def DeclareLocks(self, level):
6881 self.cq.DeclareLocks(self, level)
6883 def Exec(self, feedback_fn):
6884 result = self.cq.OldStyleQuery(self)
6886 assert len(result) == 1
6891 class _ClusterQuery(_QueryBase):
6892 FIELDS = query.CLUSTER_FIELDS
6894 #: Do not sort (there is only one item)
6897 def ExpandNames(self, lu):
6898 lu.needed_locks = {}
6900 # The following variables interact with _QueryBase._GetNames
6901 self.wanted = locking.ALL_SET
6902 self.do_locking = self.use_locking
6905 raise errors.OpPrereqError("Can not use locking for cluster queries",
6908 def DeclareLocks(self, lu, level):
6911 def _GetQueryData(self, lu):
6912 """Computes the list of nodes and their attributes.
6915 # Locking is not used
6916 assert not (compat.any(lu.glm.is_owned(level)
6917 for level in locking.LEVELS
6918 if level != locking.LEVEL_CLUSTER) or
6919 self.do_locking or self.use_locking)
6921 if query.CQ_CONFIG in self.requested_data:
6922 cluster = lu.cfg.GetClusterInfo()
6924 cluster = NotImplemented
6926 if query.CQ_QUEUE_DRAINED in self.requested_data:
6927 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6929 drain_flag = NotImplemented
6931 if query.CQ_WATCHER_PAUSE in self.requested_data:
6932 master_name = lu.cfg.GetMasterNode()
6934 result = lu.rpc.call_get_watcher_pause(master_name)
6935 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6938 watcher_pause = result.payload
6940 watcher_pause = NotImplemented
6942 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6945 class LUInstanceActivateDisks(NoHooksLU):
6946 """Bring up an instance's disks.
6951 def ExpandNames(self):
6952 self._ExpandAndLockInstance()
6953 self.needed_locks[locking.LEVEL_NODE] = []
6954 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6956 def DeclareLocks(self, level):
6957 if level == locking.LEVEL_NODE:
6958 self._LockInstancesNodes()
6960 def CheckPrereq(self):
6961 """Check prerequisites.
6963 This checks that the instance is in the cluster.
6966 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6967 assert self.instance is not None, \
6968 "Cannot retrieve locked instance %s" % self.op.instance_name
6969 _CheckNodeOnline(self, self.instance.primary_node)
6971 def Exec(self, feedback_fn):
6972 """Activate the disks.
6975 disks_ok, disks_info = \
6976 _AssembleInstanceDisks(self, self.instance,
6977 ignore_size=self.op.ignore_size)
6979 raise errors.OpExecError("Cannot activate block devices")
6981 if self.op.wait_for_sync:
6982 if not _WaitForSync(self, self.instance):
6983 raise errors.OpExecError("Some disks of the instance are degraded!")
6988 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6990 """Prepare the block devices for an instance.
6992 This sets up the block devices on all nodes.
6994 @type lu: L{LogicalUnit}
6995 @param lu: the logical unit on whose behalf we execute
6996 @type instance: L{objects.Instance}
6997 @param instance: the instance for whose disks we assemble
6998 @type disks: list of L{objects.Disk} or None
6999 @param disks: which disks to assemble (or all, if None)
7000 @type ignore_secondaries: boolean
7001 @param ignore_secondaries: if true, errors on secondary nodes
7002 won't result in an error return from the function
7003 @type ignore_size: boolean
7004 @param ignore_size: if true, the current known size of the disk
7005 will not be used during the disk activation, useful for cases
7006 when the size is wrong
7007 @return: False if the operation failed, otherwise a list of
7008 (host, instance_visible_name, node_visible_name)
7009 with the mapping from node devices to instance devices
7014 iname = instance.name
7015 disks = _ExpandCheckDisks(instance, disks)
7017 # With the two passes mechanism we try to reduce the window of
7018 # opportunity for the race condition of switching DRBD to primary
7019 # before handshaking occured, but we do not eliminate it
7021 # The proper fix would be to wait (with some limits) until the
7022 # connection has been made and drbd transitions from WFConnection
7023 # into any other network-connected state (Connected, SyncTarget,
7026 # 1st pass, assemble on all nodes in secondary mode
7027 for idx, inst_disk in enumerate(disks):
7028 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
7030 node_disk = node_disk.Copy()
7031 node_disk.UnsetSize()
7032 lu.cfg.SetDiskID(node_disk, node)
7033 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
7035 msg = result.fail_msg
7037 is_offline_secondary = (node in instance.secondary_nodes and
7039 lu.LogWarning("Could not prepare block device %s on node %s"
7040 " (is_primary=False, pass=1): %s",
7041 inst_disk.iv_name, node, msg)
7042 if not (ignore_secondaries or is_offline_secondary):
7045 # FIXME: race condition on drbd migration to primary
7047 # 2nd pass, do only the primary node
7048 for idx, inst_disk in enumerate(disks):
7051 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
7052 if node != instance.primary_node:
7055 node_disk = node_disk.Copy()
7056 node_disk.UnsetSize()
7057 lu.cfg.SetDiskID(node_disk, node)
7058 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
7060 msg = result.fail_msg
7062 lu.LogWarning("Could not prepare block device %s on node %s"
7063 " (is_primary=True, pass=2): %s",
7064 inst_disk.iv_name, node, msg)
7067 dev_path = result.payload
7069 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
7071 # leave the disks configured for the primary node
7072 # this is a workaround that would be fixed better by
7073 # improving the logical/physical id handling
7075 lu.cfg.SetDiskID(disk, instance.primary_node)
7077 return disks_ok, device_info
7080 def _StartInstanceDisks(lu, instance, force):
7081 """Start the disks of an instance.
7084 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
7085 ignore_secondaries=force)
7087 _ShutdownInstanceDisks(lu, instance)
7088 if force is not None and not force:
7090 hint=("If the message above refers to a secondary node,"
7091 " you can retry the operation using '--force'"))
7092 raise errors.OpExecError("Disk consistency error")
7095 class LUInstanceDeactivateDisks(NoHooksLU):
7096 """Shutdown an instance's disks.
7101 def ExpandNames(self):
7102 self._ExpandAndLockInstance()
7103 self.needed_locks[locking.LEVEL_NODE] = []
7104 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7106 def DeclareLocks(self, level):
7107 if level == locking.LEVEL_NODE:
7108 self._LockInstancesNodes()
7110 def CheckPrereq(self):
7111 """Check prerequisites.
7113 This checks that the instance is in the cluster.
7116 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7117 assert self.instance is not None, \
7118 "Cannot retrieve locked instance %s" % self.op.instance_name
7120 def Exec(self, feedback_fn):
7121 """Deactivate the disks
7124 instance = self.instance
7126 _ShutdownInstanceDisks(self, instance)
7128 _SafeShutdownInstanceDisks(self, instance)
7131 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7132 """Shutdown block devices of an instance.
7134 This function checks if an instance is running, before calling
7135 _ShutdownInstanceDisks.
7138 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7139 _ShutdownInstanceDisks(lu, instance, disks=disks)
7142 def _ExpandCheckDisks(instance, disks):
7143 """Return the instance disks selected by the disks list
7145 @type disks: list of L{objects.Disk} or None
7146 @param disks: selected disks
7147 @rtype: list of L{objects.Disk}
7148 @return: selected instance disks to act on
7152 return instance.disks
7154 if not set(disks).issubset(instance.disks):
7155 raise errors.ProgrammerError("Can only act on disks belonging to the"
7160 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7161 """Shutdown block devices of an instance.
7163 This does the shutdown on all nodes of the instance.
7165 If the ignore_primary is false, errors on the primary node are
7170 disks = _ExpandCheckDisks(instance, disks)
7173 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7174 lu.cfg.SetDiskID(top_disk, node)
7175 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7176 msg = result.fail_msg
7178 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7179 disk.iv_name, node, msg)
7180 if ((node == instance.primary_node and not ignore_primary) or
7181 (node != instance.primary_node and not result.offline)):
7186 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7187 """Checks if a node has enough free memory.
7189 This function checks if a given node has the needed amount of free
7190 memory. In case the node has less memory or we cannot get the
7191 information from the node, this function raises an OpPrereqError
7194 @type lu: C{LogicalUnit}
7195 @param lu: a logical unit from which we get configuration data
7197 @param node: the node to check
7198 @type reason: C{str}
7199 @param reason: string to use in the error message
7200 @type requested: C{int}
7201 @param requested: the amount of memory in MiB to check for
7202 @type hypervisor_name: C{str}
7203 @param hypervisor_name: the hypervisor to ask for memory stats
7205 @return: node current free memory
7206 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7207 we cannot check the node
7210 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7211 nodeinfo[node].Raise("Can't get data from node %s" % node,
7212 prereq=True, ecode=errors.ECODE_ENVIRON)
7213 (_, _, (hv_info, )) = nodeinfo[node].payload
7215 free_mem = hv_info.get("memory_free", None)
7216 if not isinstance(free_mem, int):
7217 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7218 " was '%s'" % (node, free_mem),
7219 errors.ECODE_ENVIRON)
7220 if requested > free_mem:
7221 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7222 " needed %s MiB, available %s MiB" %
7223 (node, reason, requested, free_mem),
7228 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7229 """Checks if nodes have enough free disk space in all the VGs.
7231 This function checks if all given nodes have the needed amount of
7232 free disk. In case any node has less disk or we cannot get the
7233 information from the node, this function raises an OpPrereqError
7236 @type lu: C{LogicalUnit}
7237 @param lu: a logical unit from which we get configuration data
7238 @type nodenames: C{list}
7239 @param nodenames: the list of node names to check
7240 @type req_sizes: C{dict}
7241 @param req_sizes: the hash of vg and corresponding amount of disk in
7243 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7244 or we cannot check the node
7247 for vg, req_size in req_sizes.items():
7248 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7251 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7252 """Checks if nodes have enough free disk space in the specified VG.
7254 This function checks if all given nodes have the needed amount of
7255 free disk. In case any node has less disk or we cannot get the
7256 information from the node, this function raises an OpPrereqError
7259 @type lu: C{LogicalUnit}
7260 @param lu: a logical unit from which we get configuration data
7261 @type nodenames: C{list}
7262 @param nodenames: the list of node names to check
7264 @param vg: the volume group to check
7265 @type requested: C{int}
7266 @param requested: the amount of disk in MiB to check for
7267 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7268 or we cannot check the node
7271 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7272 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7273 for node in nodenames:
7274 info = nodeinfo[node]
7275 info.Raise("Cannot get current information from node %s" % node,
7276 prereq=True, ecode=errors.ECODE_ENVIRON)
7277 (_, (vg_info, ), _) = info.payload
7278 vg_free = vg_info.get("vg_free", None)
7279 if not isinstance(vg_free, int):
7280 raise errors.OpPrereqError("Can't compute free disk space on node"
7281 " %s for vg %s, result was '%s'" %
7282 (node, vg, vg_free), errors.ECODE_ENVIRON)
7283 if requested > vg_free:
7284 raise errors.OpPrereqError("Not enough disk space on target node %s"
7285 " vg %s: required %d MiB, available %d MiB" %
7286 (node, vg, requested, vg_free),
7290 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7291 """Checks if nodes have enough physical CPUs
7293 This function checks if all given nodes have the needed number of
7294 physical CPUs. In case any node has less CPUs or we cannot get the
7295 information from the node, this function raises an OpPrereqError
7298 @type lu: C{LogicalUnit}
7299 @param lu: a logical unit from which we get configuration data
7300 @type nodenames: C{list}
7301 @param nodenames: the list of node names to check
7302 @type requested: C{int}
7303 @param requested: the minimum acceptable number of physical CPUs
7304 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7305 or we cannot check the node
7308 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7309 for node in nodenames:
7310 info = nodeinfo[node]
7311 info.Raise("Cannot get current information from node %s" % node,
7312 prereq=True, ecode=errors.ECODE_ENVIRON)
7313 (_, _, (hv_info, )) = info.payload
7314 num_cpus = hv_info.get("cpu_total", None)
7315 if not isinstance(num_cpus, int):
7316 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7317 " on node %s, result was '%s'" %
7318 (node, num_cpus), errors.ECODE_ENVIRON)
7319 if requested > num_cpus:
7320 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7321 "required" % (node, num_cpus, requested),
7325 class LUInstanceStartup(LogicalUnit):
7326 """Starts an instance.
7329 HPATH = "instance-start"
7330 HTYPE = constants.HTYPE_INSTANCE
7333 def CheckArguments(self):
7335 if self.op.beparams:
7336 # fill the beparams dict
7337 objects.UpgradeBeParams(self.op.beparams)
7338 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7340 def ExpandNames(self):
7341 self._ExpandAndLockInstance()
7342 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7344 def DeclareLocks(self, level):
7345 if level == locking.LEVEL_NODE_RES:
7346 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7348 def BuildHooksEnv(self):
7351 This runs on master, primary and secondary nodes of the instance.
7355 "FORCE": self.op.force,
7358 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7362 def BuildHooksNodes(self):
7363 """Build hooks nodes.
7366 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7369 def CheckPrereq(self):
7370 """Check prerequisites.
7372 This checks that the instance is in the cluster.
7375 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7376 assert self.instance is not None, \
7377 "Cannot retrieve locked instance %s" % self.op.instance_name
7380 if self.op.hvparams:
7381 # check hypervisor parameter syntax (locally)
7382 cluster = self.cfg.GetClusterInfo()
7383 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7384 filled_hvp = cluster.FillHV(instance)
7385 filled_hvp.update(self.op.hvparams)
7386 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7387 hv_type.CheckParameterSyntax(filled_hvp)
7388 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7390 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7392 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7394 if self.primary_offline and self.op.ignore_offline_nodes:
7395 self.LogWarning("Ignoring offline primary node")
7397 if self.op.hvparams or self.op.beparams:
7398 self.LogWarning("Overridden parameters are ignored")
7400 _CheckNodeOnline(self, instance.primary_node)
7402 bep = self.cfg.GetClusterInfo().FillBE(instance)
7403 bep.update(self.op.beparams)
7405 # check bridges existence
7406 _CheckInstanceBridgesExist(self, instance)
7408 remote_info = self.rpc.call_instance_info(instance.primary_node,
7410 instance.hypervisor)
7411 remote_info.Raise("Error checking node %s" % instance.primary_node,
7412 prereq=True, ecode=errors.ECODE_ENVIRON)
7413 if not remote_info.payload: # not running already
7414 _CheckNodeFreeMemory(self, instance.primary_node,
7415 "starting instance %s" % instance.name,
7416 bep[constants.BE_MINMEM], instance.hypervisor)
7418 def Exec(self, feedback_fn):
7419 """Start the instance.
7422 instance = self.instance
7423 force = self.op.force
7425 if not self.op.no_remember:
7426 self.cfg.MarkInstanceUp(instance.name)
7428 if self.primary_offline:
7429 assert self.op.ignore_offline_nodes
7430 self.LogInfo("Primary node offline, marked instance as started")
7432 node_current = instance.primary_node
7434 _StartInstanceDisks(self, instance, force)
7437 self.rpc.call_instance_start(node_current,
7438 (instance, self.op.hvparams,
7440 self.op.startup_paused)
7441 msg = result.fail_msg
7443 _ShutdownInstanceDisks(self, instance)
7444 raise errors.OpExecError("Could not start instance: %s" % msg)
7447 class LUInstanceReboot(LogicalUnit):
7448 """Reboot an instance.
7451 HPATH = "instance-reboot"
7452 HTYPE = constants.HTYPE_INSTANCE
7455 def ExpandNames(self):
7456 self._ExpandAndLockInstance()
7458 def BuildHooksEnv(self):
7461 This runs on master, primary and secondary nodes of the instance.
7465 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7466 "REBOOT_TYPE": self.op.reboot_type,
7467 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7470 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7474 def BuildHooksNodes(self):
7475 """Build hooks nodes.
7478 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7481 def CheckPrereq(self):
7482 """Check prerequisites.
7484 This checks that the instance is in the cluster.
7487 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7488 assert self.instance is not None, \
7489 "Cannot retrieve locked instance %s" % self.op.instance_name
7490 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7491 _CheckNodeOnline(self, instance.primary_node)
7493 # check bridges existence
7494 _CheckInstanceBridgesExist(self, instance)
7496 def Exec(self, feedback_fn):
7497 """Reboot the instance.
7500 instance = self.instance
7501 ignore_secondaries = self.op.ignore_secondaries
7502 reboot_type = self.op.reboot_type
7503 reason = self.op.reason
7505 remote_info = self.rpc.call_instance_info(instance.primary_node,
7507 instance.hypervisor)
7508 remote_info.Raise("Error checking node %s" % instance.primary_node)
7509 instance_running = bool(remote_info.payload)
7511 node_current = instance.primary_node
7513 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7514 constants.INSTANCE_REBOOT_HARD]:
7515 for disk in instance.disks:
7516 self.cfg.SetDiskID(disk, node_current)
7517 result = self.rpc.call_instance_reboot(node_current, instance,
7519 self.op.shutdown_timeout, reason)
7520 result.Raise("Could not reboot instance")
7522 if instance_running:
7523 result = self.rpc.call_instance_shutdown(node_current, instance,
7524 self.op.shutdown_timeout)
7525 result.Raise("Could not shutdown instance for full reboot")
7526 _ShutdownInstanceDisks(self, instance)
7528 self.LogInfo("Instance %s was already stopped, starting now",
7530 _StartInstanceDisks(self, instance, ignore_secondaries)
7531 result = self.rpc.call_instance_start(node_current,
7532 (instance, None, None), False)
7533 msg = result.fail_msg
7535 _ShutdownInstanceDisks(self, instance)
7536 raise errors.OpExecError("Could not start instance for"
7537 " full reboot: %s" % msg)
7539 self.cfg.MarkInstanceUp(instance.name)
7542 class LUInstanceShutdown(LogicalUnit):
7543 """Shutdown an instance.
7546 HPATH = "instance-stop"
7547 HTYPE = constants.HTYPE_INSTANCE
7550 def ExpandNames(self):
7551 self._ExpandAndLockInstance()
7553 def BuildHooksEnv(self):
7556 This runs on master, primary and secondary nodes of the instance.
7559 env = _BuildInstanceHookEnvByObject(self, self.instance)
7560 env["TIMEOUT"] = self.op.timeout
7563 def BuildHooksNodes(self):
7564 """Build hooks nodes.
7567 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7570 def CheckPrereq(self):
7571 """Check prerequisites.
7573 This checks that the instance is in the cluster.
7576 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7577 assert self.instance is not None, \
7578 "Cannot retrieve locked instance %s" % self.op.instance_name
7580 if not self.op.force:
7581 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7583 self.LogWarning("Ignoring offline instance check")
7585 self.primary_offline = \
7586 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7588 if self.primary_offline and self.op.ignore_offline_nodes:
7589 self.LogWarning("Ignoring offline primary node")
7591 _CheckNodeOnline(self, self.instance.primary_node)
7593 def Exec(self, feedback_fn):
7594 """Shutdown the instance.
7597 instance = self.instance
7598 node_current = instance.primary_node
7599 timeout = self.op.timeout
7601 # If the instance is offline we shouldn't mark it as down, as that
7602 # resets the offline flag.
7603 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7604 self.cfg.MarkInstanceDown(instance.name)
7606 if self.primary_offline:
7607 assert self.op.ignore_offline_nodes
7608 self.LogInfo("Primary node offline, marked instance as stopped")
7610 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7611 msg = result.fail_msg
7613 self.LogWarning("Could not shutdown instance: %s", msg)
7615 _ShutdownInstanceDisks(self, instance)
7618 class LUInstanceReinstall(LogicalUnit):
7619 """Reinstall an instance.
7622 HPATH = "instance-reinstall"
7623 HTYPE = constants.HTYPE_INSTANCE
7626 def ExpandNames(self):
7627 self._ExpandAndLockInstance()
7629 def BuildHooksEnv(self):
7632 This runs on master, primary and secondary nodes of the instance.
7635 return _BuildInstanceHookEnvByObject(self, self.instance)
7637 def BuildHooksNodes(self):
7638 """Build hooks nodes.
7641 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7644 def CheckPrereq(self):
7645 """Check prerequisites.
7647 This checks that the instance is in the cluster and is not running.
7650 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7651 assert instance is not None, \
7652 "Cannot retrieve locked instance %s" % self.op.instance_name
7653 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7654 " offline, cannot reinstall")
7656 if instance.disk_template == constants.DT_DISKLESS:
7657 raise errors.OpPrereqError("Instance '%s' has no disks" %
7658 self.op.instance_name,
7660 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7662 if self.op.os_type is not None:
7664 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7665 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7666 instance_os = self.op.os_type
7668 instance_os = instance.os
7670 nodelist = list(instance.all_nodes)
7672 if self.op.osparams:
7673 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7674 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7675 self.os_inst = i_osdict # the new dict (without defaults)
7679 self.instance = instance
7681 def Exec(self, feedback_fn):
7682 """Reinstall the instance.
7685 inst = self.instance
7687 if self.op.os_type is not None:
7688 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7689 inst.os = self.op.os_type
7690 # Write to configuration
7691 self.cfg.Update(inst, feedback_fn)
7693 _StartInstanceDisks(self, inst, None)
7695 feedback_fn("Running the instance OS create scripts...")
7696 # FIXME: pass debug option from opcode to backend
7697 result = self.rpc.call_instance_os_add(inst.primary_node,
7698 (inst, self.os_inst), True,
7699 self.op.debug_level)
7700 result.Raise("Could not install OS for instance %s on node %s" %
7701 (inst.name, inst.primary_node))
7703 _ShutdownInstanceDisks(self, inst)
7706 class LUInstanceRecreateDisks(LogicalUnit):
7707 """Recreate an instance's missing disks.
7710 HPATH = "instance-recreate-disks"
7711 HTYPE = constants.HTYPE_INSTANCE
7714 _MODIFYABLE = compat.UniqueFrozenset([
7715 constants.IDISK_SIZE,
7716 constants.IDISK_MODE,
7719 # New or changed disk parameters may have different semantics
7720 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7721 constants.IDISK_ADOPT,
7723 # TODO: Implement support changing VG while recreating
7725 constants.IDISK_METAVG,
7726 constants.IDISK_PROVIDER,
7727 constants.IDISK_NAME,
7730 def _RunAllocator(self):
7731 """Run the allocator based on input opcode.
7734 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7737 # The allocator should actually run in "relocate" mode, but current
7738 # allocators don't support relocating all the nodes of an instance at
7739 # the same time. As a workaround we use "allocate" mode, but this is
7740 # suboptimal for two reasons:
7741 # - The instance name passed to the allocator is present in the list of
7742 # existing instances, so there could be a conflict within the
7743 # internal structures of the allocator. This doesn't happen with the
7744 # current allocators, but it's a liability.
7745 # - The allocator counts the resources used by the instance twice: once
7746 # because the instance exists already, and once because it tries to
7747 # allocate a new instance.
7748 # The allocator could choose some of the nodes on which the instance is
7749 # running, but that's not a problem. If the instance nodes are broken,
7750 # they should be already be marked as drained or offline, and hence
7751 # skipped by the allocator. If instance disks have been lost for other
7752 # reasons, then recreating the disks on the same nodes should be fine.
7753 disk_template = self.instance.disk_template
7754 spindle_use = be_full[constants.BE_SPINDLE_USE]
7755 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7756 disk_template=disk_template,
7757 tags=list(self.instance.GetTags()),
7758 os=self.instance.os,
7760 vcpus=be_full[constants.BE_VCPUS],
7761 memory=be_full[constants.BE_MAXMEM],
7762 spindle_use=spindle_use,
7763 disks=[{constants.IDISK_SIZE: d.size,
7764 constants.IDISK_MODE: d.mode}
7765 for d in self.instance.disks],
7766 hypervisor=self.instance.hypervisor,
7767 node_whitelist=None)
7768 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7770 ial.Run(self.op.iallocator)
7772 assert req.RequiredNodes() == len(self.instance.all_nodes)
7775 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7776 " %s" % (self.op.iallocator, ial.info),
7779 self.op.nodes = ial.result
7780 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7781 self.op.instance_name, self.op.iallocator,
7782 utils.CommaJoin(ial.result))
7784 def CheckArguments(self):
7785 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7786 # Normalize and convert deprecated list of disk indices
7787 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7789 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7791 raise errors.OpPrereqError("Some disks have been specified more than"
7792 " once: %s" % utils.CommaJoin(duplicates),
7795 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7796 # when neither iallocator nor nodes are specified
7797 if self.op.iallocator or self.op.nodes:
7798 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7800 for (idx, params) in self.op.disks:
7801 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7802 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7804 raise errors.OpPrereqError("Parameters for disk %s try to change"
7805 " unmodifyable parameter(s): %s" %
7806 (idx, utils.CommaJoin(unsupported)),
7809 def ExpandNames(self):
7810 self._ExpandAndLockInstance()
7811 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7814 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7815 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7817 self.needed_locks[locking.LEVEL_NODE] = []
7818 if self.op.iallocator:
7819 # iallocator will select a new node in the same group
7820 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7821 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7823 self.needed_locks[locking.LEVEL_NODE_RES] = []
7825 def DeclareLocks(self, level):
7826 if level == locking.LEVEL_NODEGROUP:
7827 assert self.op.iallocator is not None
7828 assert not self.op.nodes
7829 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7830 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7831 # Lock the primary group used by the instance optimistically; this
7832 # requires going via the node before it's locked, requiring
7833 # verification later on
7834 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7835 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7837 elif level == locking.LEVEL_NODE:
7838 # If an allocator is used, then we lock all the nodes in the current
7839 # instance group, as we don't know yet which ones will be selected;
7840 # if we replace the nodes without using an allocator, locks are
7841 # already declared in ExpandNames; otherwise, we need to lock all the
7842 # instance nodes for disk re-creation
7843 if self.op.iallocator:
7844 assert not self.op.nodes
7845 assert not self.needed_locks[locking.LEVEL_NODE]
7846 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7848 # Lock member nodes of the group of the primary node
7849 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7850 self.needed_locks[locking.LEVEL_NODE].extend(
7851 self.cfg.GetNodeGroup(group_uuid).members)
7853 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7854 elif not self.op.nodes:
7855 self._LockInstancesNodes(primary_only=False)
7856 elif level == locking.LEVEL_NODE_RES:
7858 self.needed_locks[locking.LEVEL_NODE_RES] = \
7859 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7861 def BuildHooksEnv(self):
7864 This runs on master, primary and secondary nodes of the instance.
7867 return _BuildInstanceHookEnvByObject(self, self.instance)
7869 def BuildHooksNodes(self):
7870 """Build hooks nodes.
7873 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7876 def CheckPrereq(self):
7877 """Check prerequisites.
7879 This checks that the instance is in the cluster and is not running.
7882 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7883 assert instance is not None, \
7884 "Cannot retrieve locked instance %s" % self.op.instance_name
7886 if len(self.op.nodes) != len(instance.all_nodes):
7887 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7888 " %d replacement nodes were specified" %
7889 (instance.name, len(instance.all_nodes),
7890 len(self.op.nodes)),
7892 assert instance.disk_template != constants.DT_DRBD8 or \
7893 len(self.op.nodes) == 2
7894 assert instance.disk_template != constants.DT_PLAIN or \
7895 len(self.op.nodes) == 1
7896 primary_node = self.op.nodes[0]
7898 primary_node = instance.primary_node
7899 if not self.op.iallocator:
7900 _CheckNodeOnline(self, primary_node)
7902 if instance.disk_template == constants.DT_DISKLESS:
7903 raise errors.OpPrereqError("Instance '%s' has no disks" %
7904 self.op.instance_name, errors.ECODE_INVAL)
7906 # Verify if node group locks are still correct
7907 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7909 # Node group locks are acquired only for the primary node (and only
7910 # when the allocator is used)
7911 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7914 # if we replace nodes *and* the old primary is offline, we don't
7915 # check the instance state
7916 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7917 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7918 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7919 msg="cannot recreate disks")
7922 self.disks = dict(self.op.disks)
7924 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7926 maxidx = max(self.disks.keys())
7927 if maxidx >= len(instance.disks):
7928 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7931 if ((self.op.nodes or self.op.iallocator) and
7932 sorted(self.disks.keys()) != range(len(instance.disks))):
7933 raise errors.OpPrereqError("Can't recreate disks partially and"
7934 " change the nodes at the same time",
7937 self.instance = instance
7939 if self.op.iallocator:
7940 self._RunAllocator()
7941 # Release unneeded node and node resource locks
7942 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7943 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7944 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7946 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7948 def Exec(self, feedback_fn):
7949 """Recreate the disks.
7952 instance = self.instance
7954 assert (self.owned_locks(locking.LEVEL_NODE) ==
7955 self.owned_locks(locking.LEVEL_NODE_RES))
7958 mods = [] # keeps track of needed changes
7960 for idx, disk in enumerate(instance.disks):
7962 changes = self.disks[idx]
7964 # Disk should not be recreated
7968 # update secondaries for disks, if needed
7969 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7970 # need to update the nodes and minors
7971 assert len(self.op.nodes) == 2
7972 assert len(disk.logical_id) == 6 # otherwise disk internals
7974 (_, _, old_port, _, _, old_secret) = disk.logical_id
7975 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7976 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7977 new_minors[0], new_minors[1], old_secret)
7978 assert len(disk.logical_id) == len(new_id)
7982 mods.append((idx, new_id, changes))
7984 # now that we have passed all asserts above, we can apply the mods
7985 # in a single run (to avoid partial changes)
7986 for idx, new_id, changes in mods:
7987 disk = instance.disks[idx]
7988 if new_id is not None:
7989 assert disk.dev_type == constants.LD_DRBD8
7990 disk.logical_id = new_id
7992 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7993 mode=changes.get(constants.IDISK_MODE, None))
7995 # change primary node, if needed
7997 instance.primary_node = self.op.nodes[0]
7998 self.LogWarning("Changing the instance's nodes, you will have to"
7999 " remove any disks left on the older nodes manually")
8002 self.cfg.Update(instance, feedback_fn)
8004 # All touched nodes must be locked
8005 mylocks = self.owned_locks(locking.LEVEL_NODE)
8006 assert mylocks.issuperset(frozenset(instance.all_nodes))
8007 _CreateDisks(self, instance, to_skip=to_skip)
8010 class LUInstanceRename(LogicalUnit):
8011 """Rename an instance.
8014 HPATH = "instance-rename"
8015 HTYPE = constants.HTYPE_INSTANCE
8017 def CheckArguments(self):
8021 if self.op.ip_check and not self.op.name_check:
8022 # TODO: make the ip check more flexible and not depend on the name check
8023 raise errors.OpPrereqError("IP address check requires a name check",
8026 def BuildHooksEnv(self):
8029 This runs on master, primary and secondary nodes of the instance.
8032 env = _BuildInstanceHookEnvByObject(self, self.instance)
8033 env["INSTANCE_NEW_NAME"] = self.op.new_name
8036 def BuildHooksNodes(self):
8037 """Build hooks nodes.
8040 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8043 def CheckPrereq(self):
8044 """Check prerequisites.
8046 This checks that the instance is in the cluster and is not running.
8049 self.op.instance_name = _ExpandInstanceName(self.cfg,
8050 self.op.instance_name)
8051 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8052 assert instance is not None
8053 _CheckNodeOnline(self, instance.primary_node)
8054 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
8055 msg="cannot rename")
8056 self.instance = instance
8058 new_name = self.op.new_name
8059 if self.op.name_check:
8060 hostname = _CheckHostnameSane(self, new_name)
8061 new_name = self.op.new_name = hostname.name
8062 if (self.op.ip_check and
8063 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
8064 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8065 (hostname.ip, new_name),
8066 errors.ECODE_NOTUNIQUE)
8068 instance_list = self.cfg.GetInstanceList()
8069 if new_name in instance_list and new_name != instance.name:
8070 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8071 new_name, errors.ECODE_EXISTS)
8073 def Exec(self, feedback_fn):
8074 """Rename the instance.
8077 inst = self.instance
8078 old_name = inst.name
8080 rename_file_storage = False
8081 if (inst.disk_template in constants.DTS_FILEBASED and
8082 self.op.new_name != inst.name):
8083 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
8084 rename_file_storage = True
8086 self.cfg.RenameInstance(inst.name, self.op.new_name)
8087 # Change the instance lock. This is definitely safe while we hold the BGL.
8088 # Otherwise the new lock would have to be added in acquired mode.
8090 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
8091 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
8092 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
8094 # re-read the instance from the configuration after rename
8095 inst = self.cfg.GetInstanceInfo(self.op.new_name)
8097 if rename_file_storage:
8098 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
8099 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
8100 old_file_storage_dir,
8101 new_file_storage_dir)
8102 result.Raise("Could not rename on node %s directory '%s' to '%s'"
8103 " (but the instance has been renamed in Ganeti)" %
8104 (inst.primary_node, old_file_storage_dir,
8105 new_file_storage_dir))
8107 _StartInstanceDisks(self, inst, None)
8108 # update info on disks
8109 info = _GetInstanceInfoText(inst)
8110 for (idx, disk) in enumerate(inst.disks):
8111 for node in inst.all_nodes:
8112 self.cfg.SetDiskID(disk, node)
8113 result = self.rpc.call_blockdev_setinfo(node, disk, info)
8115 self.LogWarning("Error setting info on node %s for disk %s: %s",
8116 node, idx, result.fail_msg)
8118 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
8119 old_name, self.op.debug_level)
8120 msg = result.fail_msg
8122 msg = ("Could not run OS rename script for instance %s on node %s"
8123 " (but the instance has been renamed in Ganeti): %s" %
8124 (inst.name, inst.primary_node, msg))
8125 self.LogWarning(msg)
8127 _ShutdownInstanceDisks(self, inst)
8132 class LUInstanceRemove(LogicalUnit):
8133 """Remove an instance.
8136 HPATH = "instance-remove"
8137 HTYPE = constants.HTYPE_INSTANCE
8140 def ExpandNames(self):
8141 self._ExpandAndLockInstance()
8142 self.needed_locks[locking.LEVEL_NODE] = []
8143 self.needed_locks[locking.LEVEL_NODE_RES] = []
8144 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8146 def DeclareLocks(self, level):
8147 if level == locking.LEVEL_NODE:
8148 self._LockInstancesNodes()
8149 elif level == locking.LEVEL_NODE_RES:
8151 self.needed_locks[locking.LEVEL_NODE_RES] = \
8152 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8154 def BuildHooksEnv(self):
8157 This runs on master, primary and secondary nodes of the instance.
8160 env = _BuildInstanceHookEnvByObject(self, self.instance)
8161 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8164 def BuildHooksNodes(self):
8165 """Build hooks nodes.
8168 nl = [self.cfg.GetMasterNode()]
8169 nl_post = list(self.instance.all_nodes) + nl
8170 return (nl, nl_post)
8172 def CheckPrereq(self):
8173 """Check prerequisites.
8175 This checks that the instance is in the cluster.
8178 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8179 assert self.instance is not None, \
8180 "Cannot retrieve locked instance %s" % self.op.instance_name
8182 def Exec(self, feedback_fn):
8183 """Remove the instance.
8186 instance = self.instance
8187 logging.info("Shutting down instance %s on node %s",
8188 instance.name, instance.primary_node)
8190 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8191 self.op.shutdown_timeout)
8192 msg = result.fail_msg
8194 if self.op.ignore_failures:
8195 feedback_fn("Warning: can't shutdown instance: %s" % msg)
8197 raise errors.OpExecError("Could not shutdown instance %s on"
8199 (instance.name, instance.primary_node, msg))
8201 assert (self.owned_locks(locking.LEVEL_NODE) ==
8202 self.owned_locks(locking.LEVEL_NODE_RES))
8203 assert not (set(instance.all_nodes) -
8204 self.owned_locks(locking.LEVEL_NODE)), \
8205 "Not owning correct locks"
8207 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8210 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8211 """Utility function to remove an instance.
8214 logging.info("Removing block devices for instance %s", instance.name)
8216 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8217 if not ignore_failures:
8218 raise errors.OpExecError("Can't remove instance's disks")
8219 feedback_fn("Warning: can't remove instance's disks")
8221 logging.info("Removing instance %s out of cluster config", instance.name)
8223 lu.cfg.RemoveInstance(instance.name)
8225 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8226 "Instance lock removal conflict"
8228 # Remove lock for the instance
8229 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8232 class LUInstanceQuery(NoHooksLU):
8233 """Logical unit for querying instances.
8236 # pylint: disable=W0142
8239 def CheckArguments(self):
8240 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8241 self.op.output_fields, self.op.use_locking)
8243 def ExpandNames(self):
8244 self.iq.ExpandNames(self)
8246 def DeclareLocks(self, level):
8247 self.iq.DeclareLocks(self, level)
8249 def Exec(self, feedback_fn):
8250 return self.iq.OldStyleQuery(self)
8253 def _ExpandNamesForMigration(lu):
8254 """Expands names for use with L{TLMigrateInstance}.
8256 @type lu: L{LogicalUnit}
8259 if lu.op.target_node is not None:
8260 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8262 lu.needed_locks[locking.LEVEL_NODE] = []
8263 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8265 lu.needed_locks[locking.LEVEL_NODE_RES] = []
8266 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8268 # The node allocation lock is actually only needed for externally replicated
8269 # instances (e.g. sharedfile or RBD) and if an iallocator is used.
8270 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8273 def _DeclareLocksForMigration(lu, level):
8274 """Declares locks for L{TLMigrateInstance}.
8276 @type lu: L{LogicalUnit}
8277 @param level: Lock level
8280 if level == locking.LEVEL_NODE_ALLOC:
8281 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8283 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8285 # Node locks are already declared here rather than at LEVEL_NODE as we need
8286 # the instance object anyway to declare the node allocation lock.
8287 if instance.disk_template in constants.DTS_EXT_MIRROR:
8288 if lu.op.target_node is None:
8289 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8290 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8292 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8294 del lu.recalculate_locks[locking.LEVEL_NODE]
8296 lu._LockInstancesNodes() # pylint: disable=W0212
8298 elif level == locking.LEVEL_NODE:
8299 # Node locks are declared together with the node allocation lock
8300 assert (lu.needed_locks[locking.LEVEL_NODE] or
8301 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8303 elif level == locking.LEVEL_NODE_RES:
8305 lu.needed_locks[locking.LEVEL_NODE_RES] = \
8306 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8309 class LUInstanceFailover(LogicalUnit):
8310 """Failover an instance.
8313 HPATH = "instance-failover"
8314 HTYPE = constants.HTYPE_INSTANCE
8317 def CheckArguments(self):
8318 """Check the arguments.
8321 self.iallocator = getattr(self.op, "iallocator", None)
8322 self.target_node = getattr(self.op, "target_node", None)
8324 def ExpandNames(self):
8325 self._ExpandAndLockInstance()
8326 _ExpandNamesForMigration(self)
8329 TLMigrateInstance(self, self.op.instance_name, False, True, False,
8330 self.op.ignore_consistency, True,
8331 self.op.shutdown_timeout, self.op.ignore_ipolicy)
8333 self.tasklets = [self._migrater]
8335 def DeclareLocks(self, level):
8336 _DeclareLocksForMigration(self, level)
8338 def BuildHooksEnv(self):
8341 This runs on master, primary and secondary nodes of the instance.
8344 instance = self._migrater.instance
8345 source_node = instance.primary_node
8346 target_node = self.op.target_node
8348 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8349 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8350 "OLD_PRIMARY": source_node,
8351 "NEW_PRIMARY": target_node,
8354 if instance.disk_template in constants.DTS_INT_MIRROR:
8355 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8356 env["NEW_SECONDARY"] = source_node
8358 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8360 env.update(_BuildInstanceHookEnvByObject(self, instance))
8364 def BuildHooksNodes(self):
8365 """Build hooks nodes.
8368 instance = self._migrater.instance
8369 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8370 return (nl, nl + [instance.primary_node])
8373 class LUInstanceMigrate(LogicalUnit):
8374 """Migrate an instance.
8376 This is migration without shutting down, compared to the failover,
8377 which is done with shutdown.
8380 HPATH = "instance-migrate"
8381 HTYPE = constants.HTYPE_INSTANCE
8384 def ExpandNames(self):
8385 self._ExpandAndLockInstance()
8386 _ExpandNamesForMigration(self)
8389 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8390 False, self.op.allow_failover, False,
8391 self.op.allow_runtime_changes,
8392 constants.DEFAULT_SHUTDOWN_TIMEOUT,
8393 self.op.ignore_ipolicy)
8395 self.tasklets = [self._migrater]
8397 def DeclareLocks(self, level):
8398 _DeclareLocksForMigration(self, level)
8400 def BuildHooksEnv(self):
8403 This runs on master, primary and secondary nodes of the instance.
8406 instance = self._migrater.instance
8407 source_node = instance.primary_node
8408 target_node = self.op.target_node
8409 env = _BuildInstanceHookEnvByObject(self, instance)
8411 "MIGRATE_LIVE": self._migrater.live,
8412 "MIGRATE_CLEANUP": self.op.cleanup,
8413 "OLD_PRIMARY": source_node,
8414 "NEW_PRIMARY": target_node,
8415 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8418 if instance.disk_template in constants.DTS_INT_MIRROR:
8419 env["OLD_SECONDARY"] = target_node
8420 env["NEW_SECONDARY"] = source_node
8422 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8426 def BuildHooksNodes(self):
8427 """Build hooks nodes.
8430 instance = self._migrater.instance
8431 snodes = list(instance.secondary_nodes)
8432 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8436 class LUInstanceMove(LogicalUnit):
8437 """Move an instance by data-copying.
8440 HPATH = "instance-move"
8441 HTYPE = constants.HTYPE_INSTANCE
8444 def ExpandNames(self):
8445 self._ExpandAndLockInstance()
8446 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8447 self.op.target_node = target_node
8448 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8449 self.needed_locks[locking.LEVEL_NODE_RES] = []
8450 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8452 def DeclareLocks(self, level):
8453 if level == locking.LEVEL_NODE:
8454 self._LockInstancesNodes(primary_only=True)
8455 elif level == locking.LEVEL_NODE_RES:
8457 self.needed_locks[locking.LEVEL_NODE_RES] = \
8458 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8460 def BuildHooksEnv(self):
8463 This runs on master, primary and secondary nodes of the instance.
8467 "TARGET_NODE": self.op.target_node,
8468 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8470 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8473 def BuildHooksNodes(self):
8474 """Build hooks nodes.
8478 self.cfg.GetMasterNode(),
8479 self.instance.primary_node,
8480 self.op.target_node,
8484 def CheckPrereq(self):
8485 """Check prerequisites.
8487 This checks that the instance is in the cluster.
8490 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8491 assert self.instance is not None, \
8492 "Cannot retrieve locked instance %s" % self.op.instance_name
8494 if instance.disk_template not in constants.DTS_COPYABLE:
8495 raise errors.OpPrereqError("Disk template %s not suitable for copying" %
8496 instance.disk_template, errors.ECODE_STATE)
8498 node = self.cfg.GetNodeInfo(self.op.target_node)
8499 assert node is not None, \
8500 "Cannot retrieve locked node %s" % self.op.target_node
8502 self.target_node = target_node = node.name
8504 if target_node == instance.primary_node:
8505 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8506 (instance.name, target_node),
8509 bep = self.cfg.GetClusterInfo().FillBE(instance)
8511 for idx, dsk in enumerate(instance.disks):
8512 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8513 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8514 " cannot copy" % idx, errors.ECODE_STATE)
8516 _CheckNodeOnline(self, target_node)
8517 _CheckNodeNotDrained(self, target_node)
8518 _CheckNodeVmCapable(self, target_node)
8519 cluster = self.cfg.GetClusterInfo()
8520 group_info = self.cfg.GetNodeGroup(node.group)
8521 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8522 _CheckTargetNodeIPolicy(self, ipolicy, instance, node, self.cfg,
8523 ignore=self.op.ignore_ipolicy)
8525 if instance.admin_state == constants.ADMINST_UP:
8526 # check memory requirements on the secondary node
8527 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8528 instance.name, bep[constants.BE_MAXMEM],
8529 instance.hypervisor)
8531 self.LogInfo("Not checking memory on the secondary node as"
8532 " instance will not be started")
8534 # check bridge existance
8535 _CheckInstanceBridgesExist(self, instance, node=target_node)
8537 def Exec(self, feedback_fn):
8538 """Move an instance.
8540 The move is done by shutting it down on its present node, copying
8541 the data over (slow) and starting it on the new node.
8544 instance = self.instance
8546 source_node = instance.primary_node
8547 target_node = self.target_node
8549 self.LogInfo("Shutting down instance %s on source node %s",
8550 instance.name, source_node)
8552 assert (self.owned_locks(locking.LEVEL_NODE) ==
8553 self.owned_locks(locking.LEVEL_NODE_RES))
8555 result = self.rpc.call_instance_shutdown(source_node, instance,
8556 self.op.shutdown_timeout)
8557 msg = result.fail_msg
8559 if self.op.ignore_consistency:
8560 self.LogWarning("Could not shutdown instance %s on node %s."
8561 " Proceeding anyway. Please make sure node"
8562 " %s is down. Error details: %s",
8563 instance.name, source_node, source_node, msg)
8565 raise errors.OpExecError("Could not shutdown instance %s on"
8567 (instance.name, source_node, msg))
8569 # create the target disks
8571 _CreateDisks(self, instance, target_node=target_node)
8572 except errors.OpExecError:
8573 self.LogWarning("Device creation failed")
8574 self.cfg.ReleaseDRBDMinors(instance.name)
8577 cluster_name = self.cfg.GetClusterInfo().cluster_name
8580 # activate, get path, copy the data over
8581 for idx, disk in enumerate(instance.disks):
8582 self.LogInfo("Copying data for disk %d", idx)
8583 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8584 instance.name, True, idx)
8586 self.LogWarning("Can't assemble newly created disk %d: %s",
8587 idx, result.fail_msg)
8588 errs.append(result.fail_msg)
8590 dev_path = result.payload
8591 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8592 target_node, dev_path,
8595 self.LogWarning("Can't copy data over for disk %d: %s",
8596 idx, result.fail_msg)
8597 errs.append(result.fail_msg)
8601 self.LogWarning("Some disks failed to copy, aborting")
8603 _RemoveDisks(self, instance, target_node=target_node)
8605 self.cfg.ReleaseDRBDMinors(instance.name)
8606 raise errors.OpExecError("Errors during disk copy: %s" %
8609 instance.primary_node = target_node
8610 self.cfg.Update(instance, feedback_fn)
8612 self.LogInfo("Removing the disks on the original node")
8613 _RemoveDisks(self, instance, target_node=source_node)
8615 # Only start the instance if it's marked as up
8616 if instance.admin_state == constants.ADMINST_UP:
8617 self.LogInfo("Starting instance %s on node %s",
8618 instance.name, target_node)
8620 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8621 ignore_secondaries=True)
8623 _ShutdownInstanceDisks(self, instance)
8624 raise errors.OpExecError("Can't activate the instance's disks")
8626 result = self.rpc.call_instance_start(target_node,
8627 (instance, None, None), False)
8628 msg = result.fail_msg
8630 _ShutdownInstanceDisks(self, instance)
8631 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8632 (instance.name, target_node, msg))
8635 class LUNodeMigrate(LogicalUnit):
8636 """Migrate all instances from a node.
8639 HPATH = "node-migrate"
8640 HTYPE = constants.HTYPE_NODE
8643 def CheckArguments(self):
8646 def ExpandNames(self):
8647 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8649 self.share_locks = _ShareAll()
8650 self.needed_locks = {
8651 locking.LEVEL_NODE: [self.op.node_name],
8654 def BuildHooksEnv(self):
8657 This runs on the master, the primary and all the secondaries.
8661 "NODE_NAME": self.op.node_name,
8662 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8665 def BuildHooksNodes(self):
8666 """Build hooks nodes.
8669 nl = [self.cfg.GetMasterNode()]
8672 def CheckPrereq(self):
8675 def Exec(self, feedback_fn):
8676 # Prepare jobs for migration instances
8677 allow_runtime_changes = self.op.allow_runtime_changes
8679 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8682 iallocator=self.op.iallocator,
8683 target_node=self.op.target_node,
8684 allow_runtime_changes=allow_runtime_changes,
8685 ignore_ipolicy=self.op.ignore_ipolicy)]
8686 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8688 # TODO: Run iallocator in this opcode and pass correct placement options to
8689 # OpInstanceMigrate. Since other jobs can modify the cluster between
8690 # running the iallocator and the actual migration, a good consistency model
8691 # will have to be found.
8693 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8694 frozenset([self.op.node_name]))
8696 return ResultWithJobs(jobs)
8699 class TLMigrateInstance(Tasklet):
8700 """Tasklet class for instance migration.
8703 @ivar live: whether the migration will be done live or non-live;
8704 this variable is initalized only after CheckPrereq has run
8705 @type cleanup: boolean
8706 @ivar cleanup: Wheater we cleanup from a failed migration
8707 @type iallocator: string
8708 @ivar iallocator: The iallocator used to determine target_node
8709 @type target_node: string
8710 @ivar target_node: If given, the target_node to reallocate the instance to
8711 @type failover: boolean
8712 @ivar failover: Whether operation results in failover or migration
8713 @type fallback: boolean
8714 @ivar fallback: Whether fallback to failover is allowed if migration not
8716 @type ignore_consistency: boolean
8717 @ivar ignore_consistency: Wheter we should ignore consistency between source
8719 @type shutdown_timeout: int
8720 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8721 @type ignore_ipolicy: bool
8722 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8727 _MIGRATION_POLL_INTERVAL = 1 # seconds
8728 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8730 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8731 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8733 """Initializes this class.
8736 Tasklet.__init__(self, lu)
8739 self.instance_name = instance_name
8740 self.cleanup = cleanup
8741 self.live = False # will be overridden later
8742 self.failover = failover
8743 self.fallback = fallback
8744 self.ignore_consistency = ignore_consistency
8745 self.shutdown_timeout = shutdown_timeout
8746 self.ignore_ipolicy = ignore_ipolicy
8747 self.allow_runtime_changes = allow_runtime_changes
8749 def CheckPrereq(self):
8750 """Check prerequisites.
8752 This checks that the instance is in the cluster.
8755 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8756 instance = self.cfg.GetInstanceInfo(instance_name)
8757 assert instance is not None
8758 self.instance = instance
8759 cluster = self.cfg.GetClusterInfo()
8761 if (not self.cleanup and
8762 not instance.admin_state == constants.ADMINST_UP and
8763 not self.failover and self.fallback):
8764 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8765 " switching to failover")
8766 self.failover = True
8768 if instance.disk_template not in constants.DTS_MIRRORED:
8773 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8774 " %s" % (instance.disk_template, text),
8777 if instance.disk_template in constants.DTS_EXT_MIRROR:
8778 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8780 if self.lu.op.iallocator:
8781 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8782 self._RunAllocator()
8784 # We set set self.target_node as it is required by
8786 self.target_node = self.lu.op.target_node
8788 # Check that the target node is correct in terms of instance policy
8789 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8790 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8791 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8793 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8794 ignore=self.ignore_ipolicy)
8796 # self.target_node is already populated, either directly or by the
8798 target_node = self.target_node
8799 if self.target_node == instance.primary_node:
8800 raise errors.OpPrereqError("Cannot migrate instance %s"
8801 " to its primary (%s)" %
8802 (instance.name, instance.primary_node),
8805 if len(self.lu.tasklets) == 1:
8806 # It is safe to release locks only when we're the only tasklet
8808 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8809 keep=[instance.primary_node, self.target_node])
8810 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8813 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8815 secondary_nodes = instance.secondary_nodes
8816 if not secondary_nodes:
8817 raise errors.ConfigurationError("No secondary node but using"
8818 " %s disk template" %
8819 instance.disk_template)
8820 target_node = secondary_nodes[0]
8821 if self.lu.op.iallocator or (self.lu.op.target_node and
8822 self.lu.op.target_node != target_node):
8824 text = "failed over"
8827 raise errors.OpPrereqError("Instances with disk template %s cannot"
8828 " be %s to arbitrary nodes"
8829 " (neither an iallocator nor a target"
8830 " node can be passed)" %
8831 (instance.disk_template, text),
8833 nodeinfo = self.cfg.GetNodeInfo(target_node)
8834 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8835 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8837 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8838 ignore=self.ignore_ipolicy)
8840 i_be = cluster.FillBE(instance)
8842 # check memory requirements on the secondary node
8843 if (not self.cleanup and
8844 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8845 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8846 "migrating instance %s" %
8848 i_be[constants.BE_MINMEM],
8849 instance.hypervisor)
8851 self.lu.LogInfo("Not checking memory on the secondary node as"
8852 " instance will not be started")
8854 # check if failover must be forced instead of migration
8855 if (not self.cleanup and not self.failover and
8856 i_be[constants.BE_ALWAYS_FAILOVER]):
8857 self.lu.LogInfo("Instance configured to always failover; fallback"
8859 self.failover = True
8861 # check bridge existance
8862 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8864 if not self.cleanup:
8865 _CheckNodeNotDrained(self.lu, target_node)
8866 if not self.failover:
8867 result = self.rpc.call_instance_migratable(instance.primary_node,
8869 if result.fail_msg and self.fallback:
8870 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8872 self.failover = True
8874 result.Raise("Can't migrate, please use failover",
8875 prereq=True, ecode=errors.ECODE_STATE)
8877 assert not (self.failover and self.cleanup)
8879 if not self.failover:
8880 if self.lu.op.live is not None and self.lu.op.mode is not None:
8881 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8882 " parameters are accepted",
8884 if self.lu.op.live is not None:
8886 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8888 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8889 # reset the 'live' parameter to None so that repeated
8890 # invocations of CheckPrereq do not raise an exception
8891 self.lu.op.live = None
8892 elif self.lu.op.mode is None:
8893 # read the default value from the hypervisor
8894 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8895 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8897 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8899 # Failover is never live
8902 if not (self.failover or self.cleanup):
8903 remote_info = self.rpc.call_instance_info(instance.primary_node,
8905 instance.hypervisor)
8906 remote_info.Raise("Error checking instance on node %s" %
8907 instance.primary_node)
8908 instance_running = bool(remote_info.payload)
8909 if instance_running:
8910 self.current_mem = int(remote_info.payload["memory"])
8912 def _RunAllocator(self):
8913 """Run the allocator based on input opcode.
8916 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8918 # FIXME: add a self.ignore_ipolicy option
8919 req = iallocator.IAReqRelocate(name=self.instance_name,
8920 relocate_from=[self.instance.primary_node])
8921 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8923 ial.Run(self.lu.op.iallocator)
8926 raise errors.OpPrereqError("Can't compute nodes using"
8927 " iallocator '%s': %s" %
8928 (self.lu.op.iallocator, ial.info),
8930 self.target_node = ial.result[0]
8931 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8932 self.instance_name, self.lu.op.iallocator,
8933 utils.CommaJoin(ial.result))
8935 def _WaitUntilSync(self):
8936 """Poll with custom rpc for disk sync.
8938 This uses our own step-based rpc call.
8941 self.feedback_fn("* wait until resync is done")
8945 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8947 (self.instance.disks,
8950 for node, nres in result.items():
8951 nres.Raise("Cannot resync disks on node %s" % node)
8952 node_done, node_percent = nres.payload
8953 all_done = all_done and node_done
8954 if node_percent is not None:
8955 min_percent = min(min_percent, node_percent)
8957 if min_percent < 100:
8958 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8961 def _EnsureSecondary(self, node):
8962 """Demote a node to secondary.
8965 self.feedback_fn("* switching node %s to secondary mode" % node)
8967 for dev in self.instance.disks:
8968 self.cfg.SetDiskID(dev, node)
8970 result = self.rpc.call_blockdev_close(node, self.instance.name,
8971 self.instance.disks)
8972 result.Raise("Cannot change disk to secondary on node %s" % node)
8974 def _GoStandalone(self):
8975 """Disconnect from the network.
8978 self.feedback_fn("* changing into standalone mode")
8979 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8980 self.instance.disks)
8981 for node, nres in result.items():
8982 nres.Raise("Cannot disconnect disks node %s" % node)
8984 def _GoReconnect(self, multimaster):
8985 """Reconnect to the network.
8991 msg = "single-master"
8992 self.feedback_fn("* changing disks into %s mode" % msg)
8993 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8994 (self.instance.disks, self.instance),
8995 self.instance.name, multimaster)
8996 for node, nres in result.items():
8997 nres.Raise("Cannot change disks config on node %s" % node)
8999 def _ExecCleanup(self):
9000 """Try to cleanup after a failed migration.
9002 The cleanup is done by:
9003 - check that the instance is running only on one node
9004 (and update the config if needed)
9005 - change disks on its secondary node to secondary
9006 - wait until disks are fully synchronized
9007 - disconnect from the network
9008 - change disks into single-master mode
9009 - wait again until disks are fully synchronized
9012 instance = self.instance
9013 target_node = self.target_node
9014 source_node = self.source_node
9016 # check running on only one node
9017 self.feedback_fn("* checking where the instance actually runs"
9018 " (if this hangs, the hypervisor might be in"
9020 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
9021 for node, result in ins_l.items():
9022 result.Raise("Can't contact node %s" % node)
9024 runningon_source = instance.name in ins_l[source_node].payload
9025 runningon_target = instance.name in ins_l[target_node].payload
9027 if runningon_source and runningon_target:
9028 raise errors.OpExecError("Instance seems to be running on two nodes,"
9029 " or the hypervisor is confused; you will have"
9030 " to ensure manually that it runs only on one"
9031 " and restart this operation")
9033 if not (runningon_source or runningon_target):
9034 raise errors.OpExecError("Instance does not seem to be running at all;"
9035 " in this case it's safer to repair by"
9036 " running 'gnt-instance stop' to ensure disk"
9037 " shutdown, and then restarting it")
9039 if runningon_target:
9040 # the migration has actually succeeded, we need to update the config
9041 self.feedback_fn("* instance running on secondary node (%s),"
9042 " updating config" % target_node)
9043 instance.primary_node = target_node
9044 self.cfg.Update(instance, self.feedback_fn)
9045 demoted_node = source_node
9047 self.feedback_fn("* instance confirmed to be running on its"
9048 " primary node (%s)" % source_node)
9049 demoted_node = target_node
9051 if instance.disk_template in constants.DTS_INT_MIRROR:
9052 self._EnsureSecondary(demoted_node)
9054 self._WaitUntilSync()
9055 except errors.OpExecError:
9056 # we ignore here errors, since if the device is standalone, it
9057 # won't be able to sync
9059 self._GoStandalone()
9060 self._GoReconnect(False)
9061 self._WaitUntilSync()
9063 self.feedback_fn("* done")
9065 def _RevertDiskStatus(self):
9066 """Try to revert the disk status after a failed migration.
9069 target_node = self.target_node
9070 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
9074 self._EnsureSecondary(target_node)
9075 self._GoStandalone()
9076 self._GoReconnect(False)
9077 self._WaitUntilSync()
9078 except errors.OpExecError, err:
9079 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
9080 " please try to recover the instance manually;"
9081 " error '%s'" % str(err))
9083 def _AbortMigration(self):
9084 """Call the hypervisor code to abort a started migration.
9087 instance = self.instance
9088 target_node = self.target_node
9089 source_node = self.source_node
9090 migration_info = self.migration_info
9092 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
9096 abort_msg = abort_result.fail_msg
9098 logging.error("Aborting migration failed on target node %s: %s",
9099 target_node, abort_msg)
9100 # Don't raise an exception here, as we stil have to try to revert the
9101 # disk status, even if this step failed.
9103 abort_result = self.rpc.call_instance_finalize_migration_src(
9104 source_node, instance, False, self.live)
9105 abort_msg = abort_result.fail_msg
9107 logging.error("Aborting migration failed on source node %s: %s",
9108 source_node, abort_msg)
9110 def _ExecMigration(self):
9111 """Migrate an instance.
9113 The migrate is done by:
9114 - change the disks into dual-master mode
9115 - wait until disks are fully synchronized again
9116 - migrate the instance
9117 - change disks on the new secondary node (the old primary) to secondary
9118 - wait until disks are fully synchronized
9119 - change disks into single-master mode
9122 instance = self.instance
9123 target_node = self.target_node
9124 source_node = self.source_node
9126 # Check for hypervisor version mismatch and warn the user.
9127 nodeinfo = self.rpc.call_node_info([source_node, target_node],
9128 None, [self.instance.hypervisor], False)
9129 for ninfo in nodeinfo.values():
9130 ninfo.Raise("Unable to retrieve node information from node '%s'" %
9132 (_, _, (src_info, )) = nodeinfo[source_node].payload
9133 (_, _, (dst_info, )) = nodeinfo[target_node].payload
9135 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9136 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9137 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9138 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9139 if src_version != dst_version:
9140 self.feedback_fn("* warning: hypervisor version mismatch between"
9141 " source (%s) and target (%s) node" %
9142 (src_version, dst_version))
9144 self.feedback_fn("* checking disk consistency between source and target")
9145 for (idx, dev) in enumerate(instance.disks):
9146 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9147 raise errors.OpExecError("Disk %s is degraded or not fully"
9148 " synchronized on target node,"
9149 " aborting migration" % idx)
9151 if self.current_mem > self.tgt_free_mem:
9152 if not self.allow_runtime_changes:
9153 raise errors.OpExecError("Memory ballooning not allowed and not enough"
9154 " free memory to fit instance %s on target"
9155 " node %s (have %dMB, need %dMB)" %
9156 (instance.name, target_node,
9157 self.tgt_free_mem, self.current_mem))
9158 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9159 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9162 rpcres.Raise("Cannot modify instance runtime memory")
9164 # First get the migration information from the remote node
9165 result = self.rpc.call_migration_info(source_node, instance)
9166 msg = result.fail_msg
9168 log_err = ("Failed fetching source migration information from %s: %s" %
9170 logging.error(log_err)
9171 raise errors.OpExecError(log_err)
9173 self.migration_info = migration_info = result.payload
9175 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9176 # Then switch the disks to master/master mode
9177 self._EnsureSecondary(target_node)
9178 self._GoStandalone()
9179 self._GoReconnect(True)
9180 self._WaitUntilSync()
9182 self.feedback_fn("* preparing %s to accept the instance" % target_node)
9183 result = self.rpc.call_accept_instance(target_node,
9186 self.nodes_ip[target_node])
9188 msg = result.fail_msg
9190 logging.error("Instance pre-migration failed, trying to revert"
9191 " disk status: %s", msg)
9192 self.feedback_fn("Pre-migration failed, aborting")
9193 self._AbortMigration()
9194 self._RevertDiskStatus()
9195 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9196 (instance.name, msg))
9198 self.feedback_fn("* migrating instance to %s" % target_node)
9199 result = self.rpc.call_instance_migrate(source_node, instance,
9200 self.nodes_ip[target_node],
9202 msg = result.fail_msg
9204 logging.error("Instance migration failed, trying to revert"
9205 " disk status: %s", msg)
9206 self.feedback_fn("Migration failed, aborting")
9207 self._AbortMigration()
9208 self._RevertDiskStatus()
9209 raise errors.OpExecError("Could not migrate instance %s: %s" %
9210 (instance.name, msg))
9212 self.feedback_fn("* starting memory transfer")
9213 last_feedback = time.time()
9215 result = self.rpc.call_instance_get_migration_status(source_node,
9217 msg = result.fail_msg
9218 ms = result.payload # MigrationStatus instance
9219 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9220 logging.error("Instance migration failed, trying to revert"
9221 " disk status: %s", msg)
9222 self.feedback_fn("Migration failed, aborting")
9223 self._AbortMigration()
9224 self._RevertDiskStatus()
9226 msg = "hypervisor returned failure"
9227 raise errors.OpExecError("Could not migrate instance %s: %s" %
9228 (instance.name, msg))
9230 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9231 self.feedback_fn("* memory transfer complete")
9234 if (utils.TimeoutExpired(last_feedback,
9235 self._MIGRATION_FEEDBACK_INTERVAL) and
9236 ms.transferred_ram is not None):
9237 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9238 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9239 last_feedback = time.time()
9241 time.sleep(self._MIGRATION_POLL_INTERVAL)
9243 result = self.rpc.call_instance_finalize_migration_src(source_node,
9247 msg = result.fail_msg
9249 logging.error("Instance migration succeeded, but finalization failed"
9250 " on the source node: %s", msg)
9251 raise errors.OpExecError("Could not finalize instance migration: %s" %
9254 instance.primary_node = target_node
9256 # distribute new instance config to the other nodes
9257 self.cfg.Update(instance, self.feedback_fn)
9259 result = self.rpc.call_instance_finalize_migration_dst(target_node,
9263 msg = result.fail_msg
9265 logging.error("Instance migration succeeded, but finalization failed"
9266 " on the target node: %s", msg)
9267 raise errors.OpExecError("Could not finalize instance migration: %s" %
9270 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9271 self._EnsureSecondary(source_node)
9272 self._WaitUntilSync()
9273 self._GoStandalone()
9274 self._GoReconnect(False)
9275 self._WaitUntilSync()
9277 # If the instance's disk template is `rbd' or `ext' and there was a
9278 # successful migration, unmap the device from the source node.
9279 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9280 disks = _ExpandCheckDisks(instance, instance.disks)
9281 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9283 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9284 msg = result.fail_msg
9286 logging.error("Migration was successful, but couldn't unmap the"
9287 " block device %s on source node %s: %s",
9288 disk.iv_name, source_node, msg)
9289 logging.error("You need to unmap the device %s manually on %s",
9290 disk.iv_name, source_node)
9292 self.feedback_fn("* done")
9294 def _ExecFailover(self):
9295 """Failover an instance.
9297 The failover is done by shutting it down on its present node and
9298 starting it on the secondary.
9301 instance = self.instance
9302 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9304 source_node = instance.primary_node
9305 target_node = self.target_node
9307 if instance.admin_state == constants.ADMINST_UP:
9308 self.feedback_fn("* checking disk consistency between source and target")
9309 for (idx, dev) in enumerate(instance.disks):
9310 # for drbd, these are drbd over lvm
9311 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9313 if primary_node.offline:
9314 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9316 (primary_node.name, idx, target_node))
9317 elif not self.ignore_consistency:
9318 raise errors.OpExecError("Disk %s is degraded on target node,"
9319 " aborting failover" % idx)
9321 self.feedback_fn("* not checking disk consistency as instance is not"
9324 self.feedback_fn("* shutting down instance on source node")
9325 logging.info("Shutting down instance %s on node %s",
9326 instance.name, source_node)
9328 result = self.rpc.call_instance_shutdown(source_node, instance,
9329 self.shutdown_timeout)
9330 msg = result.fail_msg
9332 if self.ignore_consistency or primary_node.offline:
9333 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9334 " proceeding anyway; please make sure node"
9335 " %s is down; error details: %s",
9336 instance.name, source_node, source_node, msg)
9338 raise errors.OpExecError("Could not shutdown instance %s on"
9340 (instance.name, source_node, msg))
9342 self.feedback_fn("* deactivating the instance's disks on source node")
9343 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9344 raise errors.OpExecError("Can't shut down the instance's disks")
9346 instance.primary_node = target_node
9347 # distribute new instance config to the other nodes
9348 self.cfg.Update(instance, self.feedback_fn)
9350 # Only start the instance if it's marked as up
9351 if instance.admin_state == constants.ADMINST_UP:
9352 self.feedback_fn("* activating the instance's disks on target node %s" %
9354 logging.info("Starting instance %s on node %s",
9355 instance.name, target_node)
9357 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9358 ignore_secondaries=True)
9360 _ShutdownInstanceDisks(self.lu, instance)
9361 raise errors.OpExecError("Can't activate the instance's disks")
9363 self.feedback_fn("* starting the instance on the target node %s" %
9365 result = self.rpc.call_instance_start(target_node, (instance, None, None),
9367 msg = result.fail_msg
9369 _ShutdownInstanceDisks(self.lu, instance)
9370 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9371 (instance.name, target_node, msg))
9373 def Exec(self, feedback_fn):
9374 """Perform the migration.
9377 self.feedback_fn = feedback_fn
9378 self.source_node = self.instance.primary_node
9380 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9381 if self.instance.disk_template in constants.DTS_INT_MIRROR:
9382 self.target_node = self.instance.secondary_nodes[0]
9383 # Otherwise self.target_node has been populated either
9384 # directly, or through an iallocator.
9386 self.all_nodes = [self.source_node, self.target_node]
9387 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9388 in self.cfg.GetMultiNodeInfo(self.all_nodes))
9391 feedback_fn("Failover instance %s" % self.instance.name)
9392 self._ExecFailover()
9394 feedback_fn("Migrating instance %s" % self.instance.name)
9397 return self._ExecCleanup()
9399 return self._ExecMigration()
9402 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9404 """Wrapper around L{_CreateBlockDevInner}.
9406 This method annotates the root device first.
9409 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9410 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9411 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9412 force_open, excl_stor)
9415 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9416 info, force_open, excl_stor):
9417 """Create a tree of block devices on a given node.
9419 If this device type has to be created on secondaries, create it and
9422 If not, just recurse to children keeping the same 'force' value.
9424 @attention: The device has to be annotated already.
9426 @param lu: the lu on whose behalf we execute
9427 @param node: the node on which to create the device
9428 @type instance: L{objects.Instance}
9429 @param instance: the instance which owns the device
9430 @type device: L{objects.Disk}
9431 @param device: the device to create
9432 @type force_create: boolean
9433 @param force_create: whether to force creation of this device; this
9434 will be change to True whenever we find a device which has
9435 CreateOnSecondary() attribute
9436 @param info: the extra 'metadata' we should attach to the device
9437 (this will be represented as a LVM tag)
9438 @type force_open: boolean
9439 @param force_open: this parameter will be passes to the
9440 L{backend.BlockdevCreate} function where it specifies
9441 whether we run on primary or not, and it affects both
9442 the child assembly and the device own Open() execution
9443 @type excl_stor: boolean
9444 @param excl_stor: Whether exclusive_storage is active for the node
9447 if device.CreateOnSecondary():
9451 for child in device.children:
9452 _CreateBlockDevInner(lu, node, instance, child, force_create,
9453 info, force_open, excl_stor)
9455 if not force_create:
9458 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9462 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9464 """Create a single block device on a given node.
9466 This will not recurse over children of the device, so they must be
9469 @param lu: the lu on whose behalf we execute
9470 @param node: the node on which to create the device
9471 @type instance: L{objects.Instance}
9472 @param instance: the instance which owns the device
9473 @type device: L{objects.Disk}
9474 @param device: the device to create
9475 @param info: the extra 'metadata' we should attach to the device
9476 (this will be represented as a LVM tag)
9477 @type force_open: boolean
9478 @param force_open: this parameter will be passes to the
9479 L{backend.BlockdevCreate} function where it specifies
9480 whether we run on primary or not, and it affects both
9481 the child assembly and the device own Open() execution
9482 @type excl_stor: boolean
9483 @param excl_stor: Whether exclusive_storage is active for the node
9486 lu.cfg.SetDiskID(device, node)
9487 result = lu.rpc.call_blockdev_create(node, device, device.size,
9488 instance.name, force_open, info,
9490 result.Raise("Can't create block device %s on"
9491 " node %s for instance %s" % (device, node, instance.name))
9492 if device.physical_id is None:
9493 device.physical_id = result.payload
9496 def _GenerateUniqueNames(lu, exts):
9497 """Generate a suitable LV name.
9499 This will generate a logical volume name for the given instance.
9504 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9505 results.append("%s%s" % (new_id, val))
9509 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9510 iv_name, p_minor, s_minor):
9511 """Generate a drbd8 device complete with its children.
9514 assert len(vgnames) == len(names) == 2
9515 port = lu.cfg.AllocatePort()
9516 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9518 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9519 logical_id=(vgnames[0], names[0]),
9521 dev_data.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9522 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9523 size=constants.DRBD_META_SIZE,
9524 logical_id=(vgnames[1], names[1]),
9526 dev_meta.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9527 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9528 logical_id=(primary, secondary, port,
9531 children=[dev_data, dev_meta],
9532 iv_name=iv_name, params={})
9533 drbd_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9537 _DISK_TEMPLATE_NAME_PREFIX = {
9538 constants.DT_PLAIN: "",
9539 constants.DT_RBD: ".rbd",
9540 constants.DT_EXT: ".ext",
9544 _DISK_TEMPLATE_DEVICE_TYPE = {
9545 constants.DT_PLAIN: constants.LD_LV,
9546 constants.DT_FILE: constants.LD_FILE,
9547 constants.DT_SHARED_FILE: constants.LD_FILE,
9548 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9549 constants.DT_RBD: constants.LD_RBD,
9550 constants.DT_EXT: constants.LD_EXT,
9554 def _GenerateDiskTemplate(
9555 lu, template_name, instance_name, primary_node, secondary_nodes,
9556 disk_info, file_storage_dir, file_driver, base_index,
9557 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9558 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9559 """Generate the entire disk layout for a given template type.
9562 vgname = lu.cfg.GetVGName()
9563 disk_count = len(disk_info)
9566 if template_name == constants.DT_DISKLESS:
9568 elif template_name == constants.DT_DRBD8:
9569 if len(secondary_nodes) != 1:
9570 raise errors.ProgrammerError("Wrong template configuration")
9571 remote_node = secondary_nodes[0]
9572 minors = lu.cfg.AllocateDRBDMinor(
9573 [primary_node, remote_node] * len(disk_info), instance_name)
9575 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9577 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9580 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9581 for i in range(disk_count)]):
9582 names.append(lv_prefix + "_data")
9583 names.append(lv_prefix + "_meta")
9584 for idx, disk in enumerate(disk_info):
9585 disk_index = idx + base_index
9586 data_vg = disk.get(constants.IDISK_VG, vgname)
9587 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9588 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9589 disk[constants.IDISK_SIZE],
9591 names[idx * 2:idx * 2 + 2],
9592 "disk/%d" % disk_index,
9593 minors[idx * 2], minors[idx * 2 + 1])
9594 disk_dev.mode = disk[constants.IDISK_MODE]
9595 disk_dev.name = disk.get(constants.IDISK_NAME, None)
9596 disks.append(disk_dev)
9599 raise errors.ProgrammerError("Wrong template configuration")
9601 if template_name == constants.DT_FILE:
9603 elif template_name == constants.DT_SHARED_FILE:
9604 _req_shr_file_storage()
9606 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9607 if name_prefix is None:
9610 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9611 (name_prefix, base_index + i)
9612 for i in range(disk_count)])
9614 if template_name == constants.DT_PLAIN:
9616 def logical_id_fn(idx, _, disk):
9617 vg = disk.get(constants.IDISK_VG, vgname)
9618 return (vg, names[idx])
9620 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9622 lambda _, disk_index, disk: (file_driver,
9623 "%s/disk%d" % (file_storage_dir,
9625 elif template_name == constants.DT_BLOCK:
9627 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9628 disk[constants.IDISK_ADOPT])
9629 elif template_name == constants.DT_RBD:
9630 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9631 elif template_name == constants.DT_EXT:
9632 def logical_id_fn(idx, _, disk):
9633 provider = disk.get(constants.IDISK_PROVIDER, None)
9634 if provider is None:
9635 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9636 " not found", constants.DT_EXT,
9637 constants.IDISK_PROVIDER)
9638 return (provider, names[idx])
9640 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9642 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9644 for idx, disk in enumerate(disk_info):
9646 # Only for the Ext template add disk_info to params
9647 if template_name == constants.DT_EXT:
9648 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9650 if key not in constants.IDISK_PARAMS:
9651 params[key] = disk[key]
9652 disk_index = idx + base_index
9653 size = disk[constants.IDISK_SIZE]
9654 feedback_fn("* disk %s, size %s" %
9655 (disk_index, utils.FormatUnit(size, "h")))
9656 disk_dev = objects.Disk(dev_type=dev_type, size=size,
9657 logical_id=logical_id_fn(idx, disk_index, disk),
9658 iv_name="disk/%d" % disk_index,
9659 mode=disk[constants.IDISK_MODE],
9661 disk_dev.name = disk.get(constants.IDISK_NAME, None)
9662 disk_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9663 disks.append(disk_dev)
9668 def _GetInstanceInfoText(instance):
9669 """Compute that text that should be added to the disk's metadata.
9672 return "originstname+%s" % instance.name
9675 def _CalcEta(time_taken, written, total_size):
9676 """Calculates the ETA based on size written and total size.
9678 @param time_taken: The time taken so far
9679 @param written: amount written so far
9680 @param total_size: The total size of data to be written
9681 @return: The remaining time in seconds
9684 avg_time = time_taken / float(written)
9685 return (total_size - written) * avg_time
9688 def _WipeDisks(lu, instance, disks=None):
9689 """Wipes instance disks.
9691 @type lu: L{LogicalUnit}
9692 @param lu: the logical unit on whose behalf we execute
9693 @type instance: L{objects.Instance}
9694 @param instance: the instance whose disks we should create
9695 @type disks: None or list of tuple of (number, L{objects.Disk}, number)
9696 @param disks: Disk details; tuple contains disk index, disk object and the
9700 node = instance.primary_node
9703 disks = [(idx, disk, 0)
9704 for (idx, disk) in enumerate(instance.disks)]
9706 for (_, device, _) in disks:
9707 lu.cfg.SetDiskID(device, node)
9709 logging.info("Pausing synchronization of disks of instance '%s'",
9711 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9712 (map(compat.snd, disks),
9715 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9717 for idx, success in enumerate(result.payload):
9719 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9720 " failed", idx, instance.name)
9723 for (idx, device, offset) in disks:
9724 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9725 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9727 int(min(constants.MAX_WIPE_CHUNK,
9728 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9732 start_time = time.time()
9737 info_text = (" (from %s to %s)" %
9738 (utils.FormatUnit(offset, "h"),
9739 utils.FormatUnit(size, "h")))
9741 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9743 logging.info("Wiping disk %d for instance %s on node %s using"
9744 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9746 while offset < size:
9747 wipe_size = min(wipe_chunk_size, size - offset)
9749 logging.debug("Wiping disk %d, offset %s, chunk %s",
9750 idx, offset, wipe_size)
9752 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9754 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9755 (idx, offset, wipe_size))
9759 if now - last_output >= 60:
9760 eta = _CalcEta(now - start_time, offset, size)
9761 lu.LogInfo(" - done: %.1f%% ETA: %s",
9762 offset / float(size) * 100, utils.FormatSeconds(eta))
9765 logging.info("Resuming synchronization of disks for instance '%s'",
9768 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9769 (map(compat.snd, disks),
9774 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9775 node, result.fail_msg)
9777 for idx, success in enumerate(result.payload):
9779 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9780 " failed", idx, instance.name)
9783 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9784 """Create all disks for an instance.
9786 This abstracts away some work from AddInstance.
9788 @type lu: L{LogicalUnit}
9789 @param lu: the logical unit on whose behalf we execute
9790 @type instance: L{objects.Instance}
9791 @param instance: the instance whose disks we should create
9793 @param to_skip: list of indices to skip
9794 @type target_node: string
9795 @param target_node: if passed, overrides the target node for creation
9797 @return: the success of the creation
9800 info = _GetInstanceInfoText(instance)
9801 if target_node is None:
9802 pnode = instance.primary_node
9803 all_nodes = instance.all_nodes
9808 if instance.disk_template in constants.DTS_FILEBASED:
9809 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9810 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9812 result.Raise("Failed to create directory '%s' on"
9813 " node %s" % (file_storage_dir, pnode))
9816 # Note: this needs to be kept in sync with adding of disks in
9817 # LUInstanceSetParams
9818 for idx, device in enumerate(instance.disks):
9819 if to_skip and idx in to_skip:
9821 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9823 for node in all_nodes:
9824 f_create = node == pnode
9826 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9827 disks_created.append((node, device))
9828 except errors.OpExecError:
9829 logging.warning("Creating disk %s for instance '%s' failed",
9831 for (node, disk) in disks_created:
9832 lu.cfg.SetDiskID(disk, node)
9833 result = lu.rpc.call_blockdev_remove(node, disk)
9835 logging.warning("Failed to remove newly-created disk %s on node %s:"
9836 " %s", device, node, result.fail_msg)
9840 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9841 """Remove all disks for an instance.
9843 This abstracts away some work from `AddInstance()` and
9844 `RemoveInstance()`. Note that in case some of the devices couldn't
9845 be removed, the removal will continue with the other ones.
9847 @type lu: L{LogicalUnit}
9848 @param lu: the logical unit on whose behalf we execute
9849 @type instance: L{objects.Instance}
9850 @param instance: the instance whose disks we should remove
9851 @type target_node: string
9852 @param target_node: used to override the node on which to remove the disks
9854 @return: the success of the removal
9857 logging.info("Removing block devices for instance %s", instance.name)
9860 ports_to_release = set()
9861 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9862 for (idx, device) in enumerate(anno_disks):
9864 edata = [(target_node, device)]
9866 edata = device.ComputeNodeTree(instance.primary_node)
9867 for node, disk in edata:
9868 lu.cfg.SetDiskID(disk, node)
9869 result = lu.rpc.call_blockdev_remove(node, disk)
9871 lu.LogWarning("Could not remove disk %s on node %s,"
9872 " continuing anyway: %s", idx, node, result.fail_msg)
9873 if not (result.offline and node != instance.primary_node):
9876 # if this is a DRBD disk, return its port to the pool
9877 if device.dev_type in constants.LDS_DRBD:
9878 ports_to_release.add(device.logical_id[2])
9880 if all_result or ignore_failures:
9881 for port in ports_to_release:
9882 lu.cfg.AddTcpUdpPort(port)
9884 if instance.disk_template in constants.DTS_FILEBASED:
9885 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9889 tgt = instance.primary_node
9890 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9892 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9893 file_storage_dir, instance.primary_node, result.fail_msg)
9899 def _ComputeDiskSizePerVG(disk_template, disks):
9900 """Compute disk size requirements in the volume group
9903 def _compute(disks, payload):
9904 """Universal algorithm.
9909 vgs[disk[constants.IDISK_VG]] = \
9910 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9914 # Required free disk space as a function of disk and swap space
9916 constants.DT_DISKLESS: {},
9917 constants.DT_PLAIN: _compute(disks, 0),
9918 # 128 MB are added for drbd metadata for each disk
9919 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9920 constants.DT_FILE: {},
9921 constants.DT_SHARED_FILE: {},
9924 if disk_template not in req_size_dict:
9925 raise errors.ProgrammerError("Disk template '%s' size requirement"
9926 " is unknown" % disk_template)
9928 return req_size_dict[disk_template]
9931 def _FilterVmNodes(lu, nodenames):
9932 """Filters out non-vm_capable nodes from a list.
9934 @type lu: L{LogicalUnit}
9935 @param lu: the logical unit for which we check
9936 @type nodenames: list
9937 @param nodenames: the list of nodes on which we should check
9939 @return: the list of vm-capable nodes
9942 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9943 return [name for name in nodenames if name not in vm_nodes]
9946 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9947 """Hypervisor parameter validation.
9949 This function abstract the hypervisor parameter validation to be
9950 used in both instance create and instance modify.
9952 @type lu: L{LogicalUnit}
9953 @param lu: the logical unit for which we check
9954 @type nodenames: list
9955 @param nodenames: the list of nodes on which we should check
9956 @type hvname: string
9957 @param hvname: the name of the hypervisor we should use
9958 @type hvparams: dict
9959 @param hvparams: the parameters which we need to check
9960 @raise errors.OpPrereqError: if the parameters are not valid
9963 nodenames = _FilterVmNodes(lu, nodenames)
9965 cluster = lu.cfg.GetClusterInfo()
9966 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9968 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9969 for node in nodenames:
9973 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9976 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9977 """OS parameters validation.
9979 @type lu: L{LogicalUnit}
9980 @param lu: the logical unit for which we check
9981 @type required: boolean
9982 @param required: whether the validation should fail if the OS is not
9984 @type nodenames: list
9985 @param nodenames: the list of nodes on which we should check
9986 @type osname: string
9987 @param osname: the name of the hypervisor we should use
9988 @type osparams: dict
9989 @param osparams: the parameters which we need to check
9990 @raise errors.OpPrereqError: if the parameters are not valid
9993 nodenames = _FilterVmNodes(lu, nodenames)
9994 result = lu.rpc.call_os_validate(nodenames, required, osname,
9995 [constants.OS_VALIDATE_PARAMETERS],
9997 for node, nres in result.items():
9998 # we don't check for offline cases since this should be run only
9999 # against the master node and/or an instance's nodes
10000 nres.Raise("OS Parameters validation failed on node %s" % node)
10001 if not nres.payload:
10002 lu.LogInfo("OS %s not found on node %s, validation skipped",
10006 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
10007 """Wrapper around IAReqInstanceAlloc.
10009 @param op: The instance opcode
10010 @param disks: The computed disks
10011 @param nics: The computed nics
10012 @param beparams: The full filled beparams
10013 @param node_whitelist: List of nodes which should appear as online to the
10014 allocator (unless the node is already marked offline)
10016 @returns: A filled L{iallocator.IAReqInstanceAlloc}
10019 spindle_use = beparams[constants.BE_SPINDLE_USE]
10020 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
10021 disk_template=op.disk_template,
10024 vcpus=beparams[constants.BE_VCPUS],
10025 memory=beparams[constants.BE_MAXMEM],
10026 spindle_use=spindle_use,
10028 nics=[n.ToDict() for n in nics],
10029 hypervisor=op.hypervisor,
10030 node_whitelist=node_whitelist)
10033 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
10034 """Computes the nics.
10036 @param op: The instance opcode
10037 @param cluster: Cluster configuration object
10038 @param default_ip: The default ip to assign
10039 @param cfg: An instance of the configuration object
10040 @param ec_id: Execution context ID
10042 @returns: The build up nics
10046 for nic in op.nics:
10047 nic_mode_req = nic.get(constants.INIC_MODE, None)
10048 nic_mode = nic_mode_req
10049 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
10050 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
10052 net = nic.get(constants.INIC_NETWORK, None)
10053 link = nic.get(constants.NIC_LINK, None)
10054 ip = nic.get(constants.INIC_IP, None)
10056 if net is None or net.lower() == constants.VALUE_NONE:
10059 if nic_mode_req is not None or link is not None:
10060 raise errors.OpPrereqError("If network is given, no mode or link"
10061 " is allowed to be passed",
10062 errors.ECODE_INVAL)
10064 # ip validity checks
10065 if ip is None or ip.lower() == constants.VALUE_NONE:
10067 elif ip.lower() == constants.VALUE_AUTO:
10068 if not op.name_check:
10069 raise errors.OpPrereqError("IP address set to auto but name checks"
10070 " have been skipped",
10071 errors.ECODE_INVAL)
10072 nic_ip = default_ip
10074 # We defer pool operations until later, so that the iallocator has
10075 # filled in the instance's node(s) dimara
10076 if ip.lower() == constants.NIC_IP_POOL:
10078 raise errors.OpPrereqError("if ip=pool, parameter network"
10079 " must be passed too",
10080 errors.ECODE_INVAL)
10082 elif not netutils.IPAddress.IsValid(ip):
10083 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
10084 errors.ECODE_INVAL)
10088 # TODO: check the ip address for uniqueness
10089 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
10090 raise errors.OpPrereqError("Routed nic mode requires an ip address",
10091 errors.ECODE_INVAL)
10093 # MAC address verification
10094 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
10095 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10096 mac = utils.NormalizeAndValidateMac(mac)
10099 # TODO: We need to factor this out
10100 cfg.ReserveMAC(mac, ec_id)
10101 except errors.ReservationError:
10102 raise errors.OpPrereqError("MAC address %s already in use"
10103 " in cluster" % mac,
10104 errors.ECODE_NOTUNIQUE)
10106 # Build nic parameters
10109 nicparams[constants.NIC_MODE] = nic_mode
10111 nicparams[constants.NIC_LINK] = link
10113 check_params = cluster.SimpleFillNIC(nicparams)
10114 objects.NIC.CheckParameterSyntax(check_params)
10115 net_uuid = cfg.LookupNetwork(net)
10116 name = nic.get(constants.INIC_NAME, None)
10117 if name is not None and name.lower() == constants.VALUE_NONE:
10119 nic_obj = objects.NIC(mac=mac, ip=nic_ip, name=name,
10120 network=net_uuid, nicparams=nicparams)
10121 nic_obj.uuid = cfg.GenerateUniqueID(ec_id)
10122 nics.append(nic_obj)
10127 def _ComputeDisks(op, default_vg):
10128 """Computes the instance disks.
10130 @param op: The instance opcode
10131 @param default_vg: The default_vg to assume
10133 @return: The computed disks
10137 for disk in op.disks:
10138 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
10139 if mode not in constants.DISK_ACCESS_SET:
10140 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
10141 mode, errors.ECODE_INVAL)
10142 size = disk.get(constants.IDISK_SIZE, None)
10144 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
10147 except (TypeError, ValueError):
10148 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
10149 errors.ECODE_INVAL)
10151 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
10152 if ext_provider and op.disk_template != constants.DT_EXT:
10153 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10154 " disk template, not %s" %
10155 (constants.IDISK_PROVIDER, constants.DT_EXT,
10156 op.disk_template), errors.ECODE_INVAL)
10158 data_vg = disk.get(constants.IDISK_VG, default_vg)
10159 name = disk.get(constants.IDISK_NAME, None)
10160 if name is not None and name.lower() == constants.VALUE_NONE:
10163 constants.IDISK_SIZE: size,
10164 constants.IDISK_MODE: mode,
10165 constants.IDISK_VG: data_vg,
10166 constants.IDISK_NAME: name,
10169 if constants.IDISK_METAVG in disk:
10170 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10171 if constants.IDISK_ADOPT in disk:
10172 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10174 # For extstorage, demand the `provider' option and add any
10175 # additional parameters (ext-params) to the dict
10176 if op.disk_template == constants.DT_EXT:
10178 new_disk[constants.IDISK_PROVIDER] = ext_provider
10180 if key not in constants.IDISK_PARAMS:
10181 new_disk[key] = disk[key]
10183 raise errors.OpPrereqError("Missing provider for template '%s'" %
10184 constants.DT_EXT, errors.ECODE_INVAL)
10186 disks.append(new_disk)
10191 def _ComputeFullBeParams(op, cluster):
10192 """Computes the full beparams.
10194 @param op: The instance opcode
10195 @param cluster: The cluster config object
10197 @return: The fully filled beparams
10200 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10201 for param, value in op.beparams.iteritems():
10202 if value == constants.VALUE_AUTO:
10203 op.beparams[param] = default_beparams[param]
10204 objects.UpgradeBeParams(op.beparams)
10205 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10206 return cluster.SimpleFillBE(op.beparams)
10209 def _CheckOpportunisticLocking(op):
10210 """Generate error if opportunistic locking is not possible.
10213 if op.opportunistic_locking and not op.iallocator:
10214 raise errors.OpPrereqError("Opportunistic locking is only available in"
10215 " combination with an instance allocator",
10216 errors.ECODE_INVAL)
10219 class LUInstanceCreate(LogicalUnit):
10220 """Create an instance.
10223 HPATH = "instance-add"
10224 HTYPE = constants.HTYPE_INSTANCE
10227 def CheckArguments(self):
10228 """Check arguments.
10231 # do not require name_check to ease forward/backward compatibility
10233 if self.op.no_install and self.op.start:
10234 self.LogInfo("No-installation mode selected, disabling startup")
10235 self.op.start = False
10236 # validate/normalize the instance name
10237 self.op.instance_name = \
10238 netutils.Hostname.GetNormalizedName(self.op.instance_name)
10240 if self.op.ip_check and not self.op.name_check:
10241 # TODO: make the ip check more flexible and not depend on the name check
10242 raise errors.OpPrereqError("Cannot do IP address check without a name"
10243 " check", errors.ECODE_INVAL)
10245 # check nics' parameter names
10246 for nic in self.op.nics:
10247 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10248 # check that NIC's parameters names are unique and valid
10249 utils.ValidateDeviceNames("NIC", self.op.nics)
10251 # check that disk's names are unique and valid
10252 utils.ValidateDeviceNames("disk", self.op.disks)
10254 cluster = self.cfg.GetClusterInfo()
10255 if not self.op.disk_template in cluster.enabled_disk_templates:
10256 raise errors.OpPrereqError("Cannot create an instance with disk template"
10257 " '%s', because it is not enabled in the"
10258 " cluster. Enabled disk templates are: %s." %
10259 (self.op.disk_template,
10260 ",".join(cluster.enabled_disk_templates)))
10262 # check disks. parameter names and consistent adopt/no-adopt strategy
10263 has_adopt = has_no_adopt = False
10264 for disk in self.op.disks:
10265 if self.op.disk_template != constants.DT_EXT:
10266 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10267 if constants.IDISK_ADOPT in disk:
10270 has_no_adopt = True
10271 if has_adopt and has_no_adopt:
10272 raise errors.OpPrereqError("Either all disks are adopted or none is",
10273 errors.ECODE_INVAL)
10275 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10276 raise errors.OpPrereqError("Disk adoption is not supported for the"
10277 " '%s' disk template" %
10278 self.op.disk_template,
10279 errors.ECODE_INVAL)
10280 if self.op.iallocator is not None:
10281 raise errors.OpPrereqError("Disk adoption not allowed with an"
10282 " iallocator script", errors.ECODE_INVAL)
10283 if self.op.mode == constants.INSTANCE_IMPORT:
10284 raise errors.OpPrereqError("Disk adoption not allowed for"
10285 " instance import", errors.ECODE_INVAL)
10287 if self.op.disk_template in constants.DTS_MUST_ADOPT:
10288 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10289 " but no 'adopt' parameter given" %
10290 self.op.disk_template,
10291 errors.ECODE_INVAL)
10293 self.adopt_disks = has_adopt
10295 # instance name verification
10296 if self.op.name_check:
10297 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10298 self.op.instance_name = self.hostname1.name
10299 # used in CheckPrereq for ip ping check
10300 self.check_ip = self.hostname1.ip
10302 self.check_ip = None
10304 # file storage checks
10305 if (self.op.file_driver and
10306 not self.op.file_driver in constants.FILE_DRIVER):
10307 raise errors.OpPrereqError("Invalid file driver name '%s'" %
10308 self.op.file_driver, errors.ECODE_INVAL)
10310 if self.op.disk_template == constants.DT_FILE:
10311 opcodes.RequireFileStorage()
10312 elif self.op.disk_template == constants.DT_SHARED_FILE:
10313 opcodes.RequireSharedFileStorage()
10315 ### Node/iallocator related checks
10316 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10318 if self.op.pnode is not None:
10319 if self.op.disk_template in constants.DTS_INT_MIRROR:
10320 if self.op.snode is None:
10321 raise errors.OpPrereqError("The networked disk templates need"
10322 " a mirror node", errors.ECODE_INVAL)
10323 elif self.op.snode:
10324 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10326 self.op.snode = None
10328 _CheckOpportunisticLocking(self.op)
10330 self._cds = _GetClusterDomainSecret()
10332 if self.op.mode == constants.INSTANCE_IMPORT:
10333 # On import force_variant must be True, because if we forced it at
10334 # initial install, our only chance when importing it back is that it
10336 self.op.force_variant = True
10338 if self.op.no_install:
10339 self.LogInfo("No-installation mode has no effect during import")
10341 elif self.op.mode == constants.INSTANCE_CREATE:
10342 if self.op.os_type is None:
10343 raise errors.OpPrereqError("No guest OS specified",
10344 errors.ECODE_INVAL)
10345 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10346 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10347 " installation" % self.op.os_type,
10348 errors.ECODE_STATE)
10349 if self.op.disk_template is None:
10350 raise errors.OpPrereqError("No disk template specified",
10351 errors.ECODE_INVAL)
10353 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10354 # Check handshake to ensure both clusters have the same domain secret
10355 src_handshake = self.op.source_handshake
10356 if not src_handshake:
10357 raise errors.OpPrereqError("Missing source handshake",
10358 errors.ECODE_INVAL)
10360 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10363 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10364 errors.ECODE_INVAL)
10366 # Load and check source CA
10367 self.source_x509_ca_pem = self.op.source_x509_ca
10368 if not self.source_x509_ca_pem:
10369 raise errors.OpPrereqError("Missing source X509 CA",
10370 errors.ECODE_INVAL)
10373 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10375 except OpenSSL.crypto.Error, err:
10376 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10377 (err, ), errors.ECODE_INVAL)
10379 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10380 if errcode is not None:
10381 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10382 errors.ECODE_INVAL)
10384 self.source_x509_ca = cert
10386 src_instance_name = self.op.source_instance_name
10387 if not src_instance_name:
10388 raise errors.OpPrereqError("Missing source instance name",
10389 errors.ECODE_INVAL)
10391 self.source_instance_name = \
10392 netutils.GetHostname(name=src_instance_name).name
10395 raise errors.OpPrereqError("Invalid instance creation mode %r" %
10396 self.op.mode, errors.ECODE_INVAL)
10398 def ExpandNames(self):
10399 """ExpandNames for CreateInstance.
10401 Figure out the right locks for instance creation.
10404 self.needed_locks = {}
10406 instance_name = self.op.instance_name
10407 # this is just a preventive check, but someone might still add this
10408 # instance in the meantime, and creation will fail at lock-add time
10409 if instance_name in self.cfg.GetInstanceList():
10410 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10411 instance_name, errors.ECODE_EXISTS)
10413 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10415 if self.op.iallocator:
10416 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10417 # specifying a group on instance creation and then selecting nodes from
10419 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10420 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10422 if self.op.opportunistic_locking:
10423 self.opportunistic_locks[locking.LEVEL_NODE] = True
10424 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10426 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10427 nodelist = [self.op.pnode]
10428 if self.op.snode is not None:
10429 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10430 nodelist.append(self.op.snode)
10431 self.needed_locks[locking.LEVEL_NODE] = nodelist
10433 # in case of import lock the source node too
10434 if self.op.mode == constants.INSTANCE_IMPORT:
10435 src_node = self.op.src_node
10436 src_path = self.op.src_path
10438 if src_path is None:
10439 self.op.src_path = src_path = self.op.instance_name
10441 if src_node is None:
10442 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10443 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10444 self.op.src_node = None
10445 if os.path.isabs(src_path):
10446 raise errors.OpPrereqError("Importing an instance from a path"
10447 " requires a source node option",
10448 errors.ECODE_INVAL)
10450 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10451 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10452 self.needed_locks[locking.LEVEL_NODE].append(src_node)
10453 if not os.path.isabs(src_path):
10454 self.op.src_path = src_path = \
10455 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10457 self.needed_locks[locking.LEVEL_NODE_RES] = \
10458 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10460 def _RunAllocator(self):
10461 """Run the allocator based on input opcode.
10464 if self.op.opportunistic_locking:
10465 # Only consider nodes for which a lock is held
10466 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10468 node_whitelist = None
10470 #TODO Export network to iallocator so that it chooses a pnode
10471 # in a nodegroup that has the desired network connected to
10472 req = _CreateInstanceAllocRequest(self.op, self.disks,
10473 self.nics, self.be_full,
10475 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10477 ial.Run(self.op.iallocator)
10479 if not ial.success:
10480 # When opportunistic locks are used only a temporary failure is generated
10481 if self.op.opportunistic_locking:
10482 ecode = errors.ECODE_TEMP_NORES
10484 ecode = errors.ECODE_NORES
10486 raise errors.OpPrereqError("Can't compute nodes using"
10487 " iallocator '%s': %s" %
10488 (self.op.iallocator, ial.info),
10491 self.op.pnode = ial.result[0]
10492 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10493 self.op.instance_name, self.op.iallocator,
10494 utils.CommaJoin(ial.result))
10496 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10498 if req.RequiredNodes() == 2:
10499 self.op.snode = ial.result[1]
10501 def BuildHooksEnv(self):
10502 """Build hooks env.
10504 This runs on master, primary and secondary nodes of the instance.
10508 "ADD_MODE": self.op.mode,
10510 if self.op.mode == constants.INSTANCE_IMPORT:
10511 env["SRC_NODE"] = self.op.src_node
10512 env["SRC_PATH"] = self.op.src_path
10513 env["SRC_IMAGES"] = self.src_images
10515 env.update(_BuildInstanceHookEnv(
10516 name=self.op.instance_name,
10517 primary_node=self.op.pnode,
10518 secondary_nodes=self.secondaries,
10519 status=self.op.start,
10520 os_type=self.op.os_type,
10521 minmem=self.be_full[constants.BE_MINMEM],
10522 maxmem=self.be_full[constants.BE_MAXMEM],
10523 vcpus=self.be_full[constants.BE_VCPUS],
10524 nics=_NICListToTuple(self, self.nics),
10525 disk_template=self.op.disk_template,
10526 disks=[(d[constants.IDISK_NAME], d[constants.IDISK_SIZE],
10527 d[constants.IDISK_MODE]) for d in self.disks],
10530 hypervisor_name=self.op.hypervisor,
10536 def BuildHooksNodes(self):
10537 """Build hooks nodes.
10540 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10543 def _ReadExportInfo(self):
10544 """Reads the export information from disk.
10546 It will override the opcode source node and path with the actual
10547 information, if these two were not specified before.
10549 @return: the export information
10552 assert self.op.mode == constants.INSTANCE_IMPORT
10554 src_node = self.op.src_node
10555 src_path = self.op.src_path
10557 if src_node is None:
10558 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10559 exp_list = self.rpc.call_export_list(locked_nodes)
10561 for node in exp_list:
10562 if exp_list[node].fail_msg:
10564 if src_path in exp_list[node].payload:
10566 self.op.src_node = src_node = node
10567 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10571 raise errors.OpPrereqError("No export found for relative path %s" %
10572 src_path, errors.ECODE_INVAL)
10574 _CheckNodeOnline(self, src_node)
10575 result = self.rpc.call_export_info(src_node, src_path)
10576 result.Raise("No export or invalid export found in dir %s" % src_path)
10578 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10579 if not export_info.has_section(constants.INISECT_EXP):
10580 raise errors.ProgrammerError("Corrupted export config",
10581 errors.ECODE_ENVIRON)
10583 ei_version = export_info.get(constants.INISECT_EXP, "version")
10584 if (int(ei_version) != constants.EXPORT_VERSION):
10585 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10586 (ei_version, constants.EXPORT_VERSION),
10587 errors.ECODE_ENVIRON)
10590 def _ReadExportParams(self, einfo):
10591 """Use export parameters as defaults.
10593 In case the opcode doesn't specify (as in override) some instance
10594 parameters, then try to use them from the export information, if
10595 that declares them.
10598 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10600 if self.op.disk_template is None:
10601 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10602 self.op.disk_template = einfo.get(constants.INISECT_INS,
10604 if self.op.disk_template not in constants.DISK_TEMPLATES:
10605 raise errors.OpPrereqError("Disk template specified in configuration"
10606 " file is not one of the allowed values:"
10608 " ".join(constants.DISK_TEMPLATES),
10609 errors.ECODE_INVAL)
10611 raise errors.OpPrereqError("No disk template specified and the export"
10612 " is missing the disk_template information",
10613 errors.ECODE_INVAL)
10615 if not self.op.disks:
10617 # TODO: import the disk iv_name too
10618 for idx in range(constants.MAX_DISKS):
10619 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10620 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10621 disks.append({constants.IDISK_SIZE: disk_sz})
10622 self.op.disks = disks
10623 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10624 raise errors.OpPrereqError("No disk info specified and the export"
10625 " is missing the disk information",
10626 errors.ECODE_INVAL)
10628 if not self.op.nics:
10630 for idx in range(constants.MAX_NICS):
10631 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10633 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10634 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10639 self.op.nics = nics
10641 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10642 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10644 if (self.op.hypervisor is None and
10645 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10646 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10648 if einfo.has_section(constants.INISECT_HYP):
10649 # use the export parameters but do not override the ones
10650 # specified by the user
10651 for name, value in einfo.items(constants.INISECT_HYP):
10652 if name not in self.op.hvparams:
10653 self.op.hvparams[name] = value
10655 if einfo.has_section(constants.INISECT_BEP):
10656 # use the parameters, without overriding
10657 for name, value in einfo.items(constants.INISECT_BEP):
10658 if name not in self.op.beparams:
10659 self.op.beparams[name] = value
10660 # Compatibility for the old "memory" be param
10661 if name == constants.BE_MEMORY:
10662 if constants.BE_MAXMEM not in self.op.beparams:
10663 self.op.beparams[constants.BE_MAXMEM] = value
10664 if constants.BE_MINMEM not in self.op.beparams:
10665 self.op.beparams[constants.BE_MINMEM] = value
10667 # try to read the parameters old style, from the main section
10668 for name in constants.BES_PARAMETERS:
10669 if (name not in self.op.beparams and
10670 einfo.has_option(constants.INISECT_INS, name)):
10671 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10673 if einfo.has_section(constants.INISECT_OSP):
10674 # use the parameters, without overriding
10675 for name, value in einfo.items(constants.INISECT_OSP):
10676 if name not in self.op.osparams:
10677 self.op.osparams[name] = value
10679 def _RevertToDefaults(self, cluster):
10680 """Revert the instance parameters to the default values.
10684 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10685 for name in self.op.hvparams.keys():
10686 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10687 del self.op.hvparams[name]
10689 be_defs = cluster.SimpleFillBE({})
10690 for name in self.op.beparams.keys():
10691 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10692 del self.op.beparams[name]
10694 nic_defs = cluster.SimpleFillNIC({})
10695 for nic in self.op.nics:
10696 for name in constants.NICS_PARAMETERS:
10697 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10700 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10701 for name in self.op.osparams.keys():
10702 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10703 del self.op.osparams[name]
10705 def _CalculateFileStorageDir(self):
10706 """Calculate final instance file storage dir.
10709 # file storage dir calculation/check
10710 self.instance_file_storage_dir = None
10711 if self.op.disk_template in constants.DTS_FILEBASED:
10712 # build the full file storage dir path
10715 if self.op.disk_template == constants.DT_SHARED_FILE:
10716 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10718 get_fsd_fn = self.cfg.GetFileStorageDir
10720 cfg_storagedir = get_fsd_fn()
10721 if not cfg_storagedir:
10722 raise errors.OpPrereqError("Cluster file storage dir not defined",
10723 errors.ECODE_STATE)
10724 joinargs.append(cfg_storagedir)
10726 if self.op.file_storage_dir is not None:
10727 joinargs.append(self.op.file_storage_dir)
10729 joinargs.append(self.op.instance_name)
10731 # pylint: disable=W0142
10732 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10734 def CheckPrereq(self): # pylint: disable=R0914
10735 """Check prerequisites.
10738 self._CalculateFileStorageDir()
10740 if self.op.mode == constants.INSTANCE_IMPORT:
10741 export_info = self._ReadExportInfo()
10742 self._ReadExportParams(export_info)
10743 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10745 self._old_instance_name = None
10747 if (not self.cfg.GetVGName() and
10748 self.op.disk_template not in constants.DTS_NOT_LVM):
10749 raise errors.OpPrereqError("Cluster does not support lvm-based"
10750 " instances", errors.ECODE_STATE)
10752 if (self.op.hypervisor is None or
10753 self.op.hypervisor == constants.VALUE_AUTO):
10754 self.op.hypervisor = self.cfg.GetHypervisorType()
10756 cluster = self.cfg.GetClusterInfo()
10757 enabled_hvs = cluster.enabled_hypervisors
10758 if self.op.hypervisor not in enabled_hvs:
10759 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10761 (self.op.hypervisor, ",".join(enabled_hvs)),
10762 errors.ECODE_STATE)
10764 # Check tag validity
10765 for tag in self.op.tags:
10766 objects.TaggableObject.ValidateTag(tag)
10768 # check hypervisor parameter syntax (locally)
10769 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10770 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10772 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10773 hv_type.CheckParameterSyntax(filled_hvp)
10774 self.hv_full = filled_hvp
10775 # check that we don't specify global parameters on an instance
10776 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10777 "instance", "cluster")
10779 # fill and remember the beparams dict
10780 self.be_full = _ComputeFullBeParams(self.op, cluster)
10782 # build os parameters
10783 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10785 # now that hvp/bep are in final format, let's reset to defaults,
10787 if self.op.identify_defaults:
10788 self._RevertToDefaults(cluster)
10791 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10792 self.proc.GetECId())
10794 # disk checks/pre-build
10795 default_vg = self.cfg.GetVGName()
10796 self.disks = _ComputeDisks(self.op, default_vg)
10798 if self.op.mode == constants.INSTANCE_IMPORT:
10800 for idx in range(len(self.disks)):
10801 option = "disk%d_dump" % idx
10802 if export_info.has_option(constants.INISECT_INS, option):
10803 # FIXME: are the old os-es, disk sizes, etc. useful?
10804 export_name = export_info.get(constants.INISECT_INS, option)
10805 image = utils.PathJoin(self.op.src_path, export_name)
10806 disk_images.append(image)
10808 disk_images.append(False)
10810 self.src_images = disk_images
10812 if self.op.instance_name == self._old_instance_name:
10813 for idx, nic in enumerate(self.nics):
10814 if nic.mac == constants.VALUE_AUTO:
10815 nic_mac_ini = "nic%d_mac" % idx
10816 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10818 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10820 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10821 if self.op.ip_check:
10822 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10823 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10824 (self.check_ip, self.op.instance_name),
10825 errors.ECODE_NOTUNIQUE)
10827 #### mac address generation
10828 # By generating here the mac address both the allocator and the hooks get
10829 # the real final mac address rather than the 'auto' or 'generate' value.
10830 # There is a race condition between the generation and the instance object
10831 # creation, which means that we know the mac is valid now, but we're not
10832 # sure it will be when we actually add the instance. If things go bad
10833 # adding the instance will abort because of a duplicate mac, and the
10834 # creation job will fail.
10835 for nic in self.nics:
10836 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10837 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10841 if self.op.iallocator is not None:
10842 self._RunAllocator()
10844 # Release all unneeded node locks
10845 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10846 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10847 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10848 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10850 assert (self.owned_locks(locking.LEVEL_NODE) ==
10851 self.owned_locks(locking.LEVEL_NODE_RES)), \
10852 "Node locks differ from node resource locks"
10854 #### node related checks
10856 # check primary node
10857 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10858 assert self.pnode is not None, \
10859 "Cannot retrieve locked node %s" % self.op.pnode
10861 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10862 pnode.name, errors.ECODE_STATE)
10864 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10865 pnode.name, errors.ECODE_STATE)
10866 if not pnode.vm_capable:
10867 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10868 " '%s'" % pnode.name, errors.ECODE_STATE)
10870 self.secondaries = []
10872 # Fill in any IPs from IP pools. This must happen here, because we need to
10873 # know the nic's primary node, as specified by the iallocator
10874 for idx, nic in enumerate(self.nics):
10875 net_uuid = nic.network
10876 if net_uuid is not None:
10877 nobj = self.cfg.GetNetwork(net_uuid)
10878 netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
10879 if netparams is None:
10880 raise errors.OpPrereqError("No netparams found for network"
10881 " %s. Propably not connected to"
10882 " node's %s nodegroup" %
10883 (nobj.name, self.pnode.name),
10884 errors.ECODE_INVAL)
10885 self.LogInfo("NIC/%d inherits netparams %s" %
10886 (idx, netparams.values()))
10887 nic.nicparams = dict(netparams)
10888 if nic.ip is not None:
10889 if nic.ip.lower() == constants.NIC_IP_POOL:
10891 nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
10892 except errors.ReservationError:
10893 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10894 " from the address pool" % idx,
10895 errors.ECODE_STATE)
10896 self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
10899 self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
10900 except errors.ReservationError:
10901 raise errors.OpPrereqError("IP address %s already in use"
10902 " or does not belong to network %s" %
10903 (nic.ip, nobj.name),
10904 errors.ECODE_NOTUNIQUE)
10906 # net is None, ip None or given
10907 elif self.op.conflicts_check:
10908 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10910 # mirror node verification
10911 if self.op.disk_template in constants.DTS_INT_MIRROR:
10912 if self.op.snode == pnode.name:
10913 raise errors.OpPrereqError("The secondary node cannot be the"
10914 " primary node", errors.ECODE_INVAL)
10915 _CheckNodeOnline(self, self.op.snode)
10916 _CheckNodeNotDrained(self, self.op.snode)
10917 _CheckNodeVmCapable(self, self.op.snode)
10918 self.secondaries.append(self.op.snode)
10920 snode = self.cfg.GetNodeInfo(self.op.snode)
10921 if pnode.group != snode.group:
10922 self.LogWarning("The primary and secondary nodes are in two"
10923 " different node groups; the disk parameters"
10924 " from the first disk's node group will be"
10927 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10929 if self.op.disk_template in constants.DTS_INT_MIRROR:
10930 nodes.append(snode)
10931 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10932 if compat.any(map(has_es, nodes)):
10933 raise errors.OpPrereqError("Disk template %s not supported with"
10934 " exclusive storage" % self.op.disk_template,
10935 errors.ECODE_STATE)
10937 nodenames = [pnode.name] + self.secondaries
10939 if not self.adopt_disks:
10940 if self.op.disk_template == constants.DT_RBD:
10941 # _CheckRADOSFreeSpace() is just a placeholder.
10942 # Any function that checks prerequisites can be placed here.
10943 # Check if there is enough space on the RADOS cluster.
10944 _CheckRADOSFreeSpace()
10945 elif self.op.disk_template == constants.DT_EXT:
10946 # FIXME: Function that checks prereqs if needed
10949 # Check lv size requirements, if not adopting
10950 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10951 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10953 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10954 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10955 disk[constants.IDISK_ADOPT])
10956 for disk in self.disks])
10957 if len(all_lvs) != len(self.disks):
10958 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10959 errors.ECODE_INVAL)
10960 for lv_name in all_lvs:
10962 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10963 # to ReserveLV uses the same syntax
10964 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10965 except errors.ReservationError:
10966 raise errors.OpPrereqError("LV named %s used by another instance" %
10967 lv_name, errors.ECODE_NOTUNIQUE)
10969 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10970 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10972 node_lvs = self.rpc.call_lv_list([pnode.name],
10973 vg_names.payload.keys())[pnode.name]
10974 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10975 node_lvs = node_lvs.payload
10977 delta = all_lvs.difference(node_lvs.keys())
10979 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10980 utils.CommaJoin(delta),
10981 errors.ECODE_INVAL)
10982 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10984 raise errors.OpPrereqError("Online logical volumes found, cannot"
10985 " adopt: %s" % utils.CommaJoin(online_lvs),
10986 errors.ECODE_STATE)
10987 # update the size of disk based on what is found
10988 for dsk in self.disks:
10989 dsk[constants.IDISK_SIZE] = \
10990 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10991 dsk[constants.IDISK_ADOPT])][0]))
10993 elif self.op.disk_template == constants.DT_BLOCK:
10994 # Normalize and de-duplicate device paths
10995 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10996 for disk in self.disks])
10997 if len(all_disks) != len(self.disks):
10998 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10999 errors.ECODE_INVAL)
11000 baddisks = [d for d in all_disks
11001 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
11003 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
11004 " cannot be adopted" %
11005 (utils.CommaJoin(baddisks),
11006 constants.ADOPTABLE_BLOCKDEV_ROOT),
11007 errors.ECODE_INVAL)
11009 node_disks = self.rpc.call_bdev_sizes([pnode.name],
11010 list(all_disks))[pnode.name]
11011 node_disks.Raise("Cannot get block device information from node %s" %
11013 node_disks = node_disks.payload
11014 delta = all_disks.difference(node_disks.keys())
11016 raise errors.OpPrereqError("Missing block device(s): %s" %
11017 utils.CommaJoin(delta),
11018 errors.ECODE_INVAL)
11019 for dsk in self.disks:
11020 dsk[constants.IDISK_SIZE] = \
11021 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
11023 # Verify instance specs
11024 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
11026 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
11027 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
11028 constants.ISPEC_DISK_COUNT: len(self.disks),
11029 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
11030 for disk in self.disks],
11031 constants.ISPEC_NIC_COUNT: len(self.nics),
11032 constants.ISPEC_SPINDLE_USE: spindle_use,
11035 group_info = self.cfg.GetNodeGroup(pnode.group)
11036 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
11037 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec,
11038 self.op.disk_template)
11039 if not self.op.ignore_ipolicy and res:
11040 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
11041 (pnode.group, group_info.name, utils.CommaJoin(res)))
11042 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11044 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
11046 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
11047 # check OS parameters (remotely)
11048 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
11050 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
11052 #TODO: _CheckExtParams (remotely)
11053 # Check parameters for extstorage
11055 # memory check on primary node
11056 #TODO(dynmem): use MINMEM for checking
11058 _CheckNodeFreeMemory(self, self.pnode.name,
11059 "creating instance %s" % self.op.instance_name,
11060 self.be_full[constants.BE_MAXMEM],
11061 self.op.hypervisor)
11063 self.dry_run_result = list(nodenames)
11065 def Exec(self, feedback_fn):
11066 """Create and add the instance to the cluster.
11069 instance = self.op.instance_name
11070 pnode_name = self.pnode.name
11072 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
11073 self.owned_locks(locking.LEVEL_NODE)), \
11074 "Node locks differ from node resource locks"
11075 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11077 ht_kind = self.op.hypervisor
11078 if ht_kind in constants.HTS_REQ_PORT:
11079 network_port = self.cfg.AllocatePort()
11081 network_port = None
11083 # This is ugly but we got a chicken-egg problem here
11084 # We can only take the group disk parameters, as the instance
11085 # has no disks yet (we are generating them right here).
11086 node = self.cfg.GetNodeInfo(pnode_name)
11087 nodegroup = self.cfg.GetNodeGroup(node.group)
11088 disks = _GenerateDiskTemplate(self,
11089 self.op.disk_template,
11090 instance, pnode_name,
11093 self.instance_file_storage_dir,
11094 self.op.file_driver,
11097 self.cfg.GetGroupDiskParams(nodegroup))
11099 iobj = objects.Instance(name=instance, os=self.op.os_type,
11100 primary_node=pnode_name,
11101 nics=self.nics, disks=disks,
11102 disk_template=self.op.disk_template,
11103 admin_state=constants.ADMINST_DOWN,
11104 network_port=network_port,
11105 beparams=self.op.beparams,
11106 hvparams=self.op.hvparams,
11107 hypervisor=self.op.hypervisor,
11108 osparams=self.op.osparams,
11112 for tag in self.op.tags:
11115 if self.adopt_disks:
11116 if self.op.disk_template == constants.DT_PLAIN:
11117 # rename LVs to the newly-generated names; we need to construct
11118 # 'fake' LV disks with the old data, plus the new unique_id
11119 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
11121 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
11122 rename_to.append(t_dsk.logical_id)
11123 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
11124 self.cfg.SetDiskID(t_dsk, pnode_name)
11125 result = self.rpc.call_blockdev_rename(pnode_name,
11126 zip(tmp_disks, rename_to))
11127 result.Raise("Failed to rename adoped LVs")
11129 feedback_fn("* creating instance disks...")
11131 _CreateDisks(self, iobj)
11132 except errors.OpExecError:
11133 self.LogWarning("Device creation failed")
11134 self.cfg.ReleaseDRBDMinors(instance)
11137 feedback_fn("adding instance %s to cluster config" % instance)
11139 self.cfg.AddInstance(iobj, self.proc.GetECId())
11141 # Declare that we don't want to remove the instance lock anymore, as we've
11142 # added the instance to the config
11143 del self.remove_locks[locking.LEVEL_INSTANCE]
11145 if self.op.mode == constants.INSTANCE_IMPORT:
11146 # Release unused nodes
11147 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
11149 # Release all nodes
11150 _ReleaseLocks(self, locking.LEVEL_NODE)
11153 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
11154 feedback_fn("* wiping instance disks...")
11156 _WipeDisks(self, iobj)
11157 except errors.OpExecError, err:
11158 logging.exception("Wiping disks failed")
11159 self.LogWarning("Wiping instance disks failed (%s)", err)
11163 # Something is already wrong with the disks, don't do anything else
11165 elif self.op.wait_for_sync:
11166 disk_abort = not _WaitForSync(self, iobj)
11167 elif iobj.disk_template in constants.DTS_INT_MIRROR:
11168 # make sure the disks are not degraded (still sync-ing is ok)
11169 feedback_fn("* checking mirrors status")
11170 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11175 _RemoveDisks(self, iobj)
11176 self.cfg.RemoveInstance(iobj.name)
11177 # Make sure the instance lock gets removed
11178 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11179 raise errors.OpExecError("There are some degraded disks for"
11182 # Release all node resource locks
11183 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11185 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11186 # we need to set the disks ID to the primary node, since the
11187 # preceding code might or might have not done it, depending on
11188 # disk template and other options
11189 for disk in iobj.disks:
11190 self.cfg.SetDiskID(disk, pnode_name)
11191 if self.op.mode == constants.INSTANCE_CREATE:
11192 if not self.op.no_install:
11193 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11194 not self.op.wait_for_sync)
11196 feedback_fn("* pausing disk sync to install instance OS")
11197 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11200 for idx, success in enumerate(result.payload):
11202 logging.warn("pause-sync of instance %s for disk %d failed",
11205 feedback_fn("* running the instance OS create scripts...")
11206 # FIXME: pass debug option from opcode to backend
11208 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11209 self.op.debug_level)
11211 feedback_fn("* resuming disk sync")
11212 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11215 for idx, success in enumerate(result.payload):
11217 logging.warn("resume-sync of instance %s for disk %d failed",
11220 os_add_result.Raise("Could not add os for instance %s"
11221 " on node %s" % (instance, pnode_name))
11224 if self.op.mode == constants.INSTANCE_IMPORT:
11225 feedback_fn("* running the instance OS import scripts...")
11229 for idx, image in enumerate(self.src_images):
11233 # FIXME: pass debug option from opcode to backend
11234 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11235 constants.IEIO_FILE, (image, ),
11236 constants.IEIO_SCRIPT,
11237 (iobj.disks[idx], idx),
11239 transfers.append(dt)
11242 masterd.instance.TransferInstanceData(self, feedback_fn,
11243 self.op.src_node, pnode_name,
11244 self.pnode.secondary_ip,
11246 if not compat.all(import_result):
11247 self.LogWarning("Some disks for instance %s on node %s were not"
11248 " imported successfully" % (instance, pnode_name))
11250 rename_from = self._old_instance_name
11252 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11253 feedback_fn("* preparing remote import...")
11254 # The source cluster will stop the instance before attempting to make
11255 # a connection. In some cases stopping an instance can take a long
11256 # time, hence the shutdown timeout is added to the connection
11258 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11259 self.op.source_shutdown_timeout)
11260 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11262 assert iobj.primary_node == self.pnode.name
11264 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11265 self.source_x509_ca,
11266 self._cds, timeouts)
11267 if not compat.all(disk_results):
11268 # TODO: Should the instance still be started, even if some disks
11269 # failed to import (valid for local imports, too)?
11270 self.LogWarning("Some disks for instance %s on node %s were not"
11271 " imported successfully" % (instance, pnode_name))
11273 rename_from = self.source_instance_name
11276 # also checked in the prereq part
11277 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11280 # Run rename script on newly imported instance
11281 assert iobj.name == instance
11282 feedback_fn("Running rename script for %s" % instance)
11283 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11285 self.op.debug_level)
11286 if result.fail_msg:
11287 self.LogWarning("Failed to run rename script for %s on node"
11288 " %s: %s" % (instance, pnode_name, result.fail_msg))
11290 assert not self.owned_locks(locking.LEVEL_NODE_RES)
11293 iobj.admin_state = constants.ADMINST_UP
11294 self.cfg.Update(iobj, feedback_fn)
11295 logging.info("Starting instance %s on node %s", instance, pnode_name)
11296 feedback_fn("* starting instance...")
11297 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11299 result.Raise("Could not start instance")
11301 return list(iobj.all_nodes)
11304 class LUInstanceMultiAlloc(NoHooksLU):
11305 """Allocates multiple instances at the same time.
11310 def CheckArguments(self):
11311 """Check arguments.
11315 for inst in self.op.instances:
11316 if inst.iallocator is not None:
11317 raise errors.OpPrereqError("iallocator are not allowed to be set on"
11318 " instance objects", errors.ECODE_INVAL)
11319 nodes.append(bool(inst.pnode))
11320 if inst.disk_template in constants.DTS_INT_MIRROR:
11321 nodes.append(bool(inst.snode))
11323 has_nodes = compat.any(nodes)
11324 if compat.all(nodes) ^ has_nodes:
11325 raise errors.OpPrereqError("There are instance objects providing"
11326 " pnode/snode while others do not",
11327 errors.ECODE_INVAL)
11329 if self.op.iallocator is None:
11330 default_iallocator = self.cfg.GetDefaultIAllocator()
11331 if default_iallocator and has_nodes:
11332 self.op.iallocator = default_iallocator
11334 raise errors.OpPrereqError("No iallocator or nodes on the instances"
11335 " given and no cluster-wide default"
11336 " iallocator found; please specify either"
11337 " an iallocator or nodes on the instances"
11338 " or set a cluster-wide default iallocator",
11339 errors.ECODE_INVAL)
11341 _CheckOpportunisticLocking(self.op)
11343 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11345 raise errors.OpPrereqError("There are duplicate instance names: %s" %
11346 utils.CommaJoin(dups), errors.ECODE_INVAL)
11348 def ExpandNames(self):
11349 """Calculate the locks.
11352 self.share_locks = _ShareAll()
11353 self.needed_locks = {
11354 # iallocator will select nodes and even if no iallocator is used,
11355 # collisions with LUInstanceCreate should be avoided
11356 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11359 if self.op.iallocator:
11360 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11361 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11363 if self.op.opportunistic_locking:
11364 self.opportunistic_locks[locking.LEVEL_NODE] = True
11365 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11368 for inst in self.op.instances:
11369 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11370 nodeslist.append(inst.pnode)
11371 if inst.snode is not None:
11372 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11373 nodeslist.append(inst.snode)
11375 self.needed_locks[locking.LEVEL_NODE] = nodeslist
11376 # Lock resources of instance's primary and secondary nodes (copy to
11377 # prevent accidential modification)
11378 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11380 def CheckPrereq(self):
11381 """Check prerequisite.
11384 cluster = self.cfg.GetClusterInfo()
11385 default_vg = self.cfg.GetVGName()
11386 ec_id = self.proc.GetECId()
11388 if self.op.opportunistic_locking:
11389 # Only consider nodes for which a lock is held
11390 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11392 node_whitelist = None
11394 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11395 _ComputeNics(op, cluster, None,
11397 _ComputeFullBeParams(op, cluster),
11399 for op in self.op.instances]
11401 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11402 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11404 ial.Run(self.op.iallocator)
11406 if not ial.success:
11407 raise errors.OpPrereqError("Can't compute nodes using"
11408 " iallocator '%s': %s" %
11409 (self.op.iallocator, ial.info),
11410 errors.ECODE_NORES)
11412 self.ia_result = ial.result
11414 if self.op.dry_run:
11415 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11416 constants.JOB_IDS_KEY: [],
11419 def _ConstructPartialResult(self):
11420 """Contructs the partial result.
11423 (allocatable, failed) = self.ia_result
11425 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11426 map(compat.fst, allocatable),
11427 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11430 def Exec(self, feedback_fn):
11431 """Executes the opcode.
11434 op2inst = dict((op.instance_name, op) for op in self.op.instances)
11435 (allocatable, failed) = self.ia_result
11438 for (name, nodes) in allocatable:
11439 op = op2inst.pop(name)
11442 (op.pnode, op.snode) = nodes
11444 (op.pnode,) = nodes
11448 missing = set(op2inst.keys()) - set(failed)
11449 assert not missing, \
11450 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11452 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11455 def _CheckRADOSFreeSpace():
11456 """Compute disk size requirements inside the RADOS cluster.
11459 # For the RADOS cluster we assume there is always enough space.
11463 class LUInstanceConsole(NoHooksLU):
11464 """Connect to an instance's console.
11466 This is somewhat special in that it returns the command line that
11467 you need to run on the master node in order to connect to the
11473 def ExpandNames(self):
11474 self.share_locks = _ShareAll()
11475 self._ExpandAndLockInstance()
11477 def CheckPrereq(self):
11478 """Check prerequisites.
11480 This checks that the instance is in the cluster.
11483 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11484 assert self.instance is not None, \
11485 "Cannot retrieve locked instance %s" % self.op.instance_name
11486 _CheckNodeOnline(self, self.instance.primary_node)
11488 def Exec(self, feedback_fn):
11489 """Connect to the console of an instance
11492 instance = self.instance
11493 node = instance.primary_node
11495 node_insts = self.rpc.call_instance_list([node],
11496 [instance.hypervisor])[node]
11497 node_insts.Raise("Can't get node information from %s" % node)
11499 if instance.name not in node_insts.payload:
11500 if instance.admin_state == constants.ADMINST_UP:
11501 state = constants.INSTST_ERRORDOWN
11502 elif instance.admin_state == constants.ADMINST_DOWN:
11503 state = constants.INSTST_ADMINDOWN
11505 state = constants.INSTST_ADMINOFFLINE
11506 raise errors.OpExecError("Instance %s is not running (state %s)" %
11507 (instance.name, state))
11509 logging.debug("Connecting to console of %s on %s", instance.name, node)
11511 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11514 def _GetInstanceConsole(cluster, instance):
11515 """Returns console information for an instance.
11517 @type cluster: L{objects.Cluster}
11518 @type instance: L{objects.Instance}
11522 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11523 # beparams and hvparams are passed separately, to avoid editing the
11524 # instance and then saving the defaults in the instance itself.
11525 hvparams = cluster.FillHV(instance)
11526 beparams = cluster.FillBE(instance)
11527 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11529 assert console.instance == instance.name
11530 assert console.Validate()
11532 return console.ToDict()
11535 class LUInstanceReplaceDisks(LogicalUnit):
11536 """Replace the disks of an instance.
11539 HPATH = "mirrors-replace"
11540 HTYPE = constants.HTYPE_INSTANCE
11543 def CheckArguments(self):
11544 """Check arguments.
11547 remote_node = self.op.remote_node
11548 ialloc = self.op.iallocator
11549 if self.op.mode == constants.REPLACE_DISK_CHG:
11550 if remote_node is None and ialloc is None:
11551 raise errors.OpPrereqError("When changing the secondary either an"
11552 " iallocator script must be used or the"
11553 " new node given", errors.ECODE_INVAL)
11555 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11557 elif remote_node is not None or ialloc is not None:
11558 # Not replacing the secondary
11559 raise errors.OpPrereqError("The iallocator and new node options can"
11560 " only be used when changing the"
11561 " secondary node", errors.ECODE_INVAL)
11563 def ExpandNames(self):
11564 self._ExpandAndLockInstance()
11566 assert locking.LEVEL_NODE not in self.needed_locks
11567 assert locking.LEVEL_NODE_RES not in self.needed_locks
11568 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11570 assert self.op.iallocator is None or self.op.remote_node is None, \
11571 "Conflicting options"
11573 if self.op.remote_node is not None:
11574 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11576 # Warning: do not remove the locking of the new secondary here
11577 # unless DRBD8.AddChildren is changed to work in parallel;
11578 # currently it doesn't since parallel invocations of
11579 # FindUnusedMinor will conflict
11580 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11581 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11583 self.needed_locks[locking.LEVEL_NODE] = []
11584 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11586 if self.op.iallocator is not None:
11587 # iallocator will select a new node in the same group
11588 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11589 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11591 self.needed_locks[locking.LEVEL_NODE_RES] = []
11593 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11594 self.op.iallocator, self.op.remote_node,
11595 self.op.disks, self.op.early_release,
11596 self.op.ignore_ipolicy)
11598 self.tasklets = [self.replacer]
11600 def DeclareLocks(self, level):
11601 if level == locking.LEVEL_NODEGROUP:
11602 assert self.op.remote_node is None
11603 assert self.op.iallocator is not None
11604 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11606 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11607 # Lock all groups used by instance optimistically; this requires going
11608 # via the node before it's locked, requiring verification later on
11609 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11610 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11612 elif level == locking.LEVEL_NODE:
11613 if self.op.iallocator is not None:
11614 assert self.op.remote_node is None
11615 assert not self.needed_locks[locking.LEVEL_NODE]
11616 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11618 # Lock member nodes of all locked groups
11619 self.needed_locks[locking.LEVEL_NODE] = \
11621 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11622 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11624 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11626 self._LockInstancesNodes()
11628 elif level == locking.LEVEL_NODE_RES:
11630 self.needed_locks[locking.LEVEL_NODE_RES] = \
11631 self.needed_locks[locking.LEVEL_NODE]
11633 def BuildHooksEnv(self):
11634 """Build hooks env.
11636 This runs on the master, the primary and all the secondaries.
11639 instance = self.replacer.instance
11641 "MODE": self.op.mode,
11642 "NEW_SECONDARY": self.op.remote_node,
11643 "OLD_SECONDARY": instance.secondary_nodes[0],
11645 env.update(_BuildInstanceHookEnvByObject(self, instance))
11648 def BuildHooksNodes(self):
11649 """Build hooks nodes.
11652 instance = self.replacer.instance
11654 self.cfg.GetMasterNode(),
11655 instance.primary_node,
11657 if self.op.remote_node is not None:
11658 nl.append(self.op.remote_node)
11661 def CheckPrereq(self):
11662 """Check prerequisites.
11665 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11666 self.op.iallocator is None)
11668 # Verify if node group locks are still correct
11669 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11671 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11673 return LogicalUnit.CheckPrereq(self)
11676 class TLReplaceDisks(Tasklet):
11677 """Replaces disks for an instance.
11679 Note: Locking is not within the scope of this class.
11682 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11683 disks, early_release, ignore_ipolicy):
11684 """Initializes this class.
11687 Tasklet.__init__(self, lu)
11690 self.instance_name = instance_name
11692 self.iallocator_name = iallocator_name
11693 self.remote_node = remote_node
11695 self.early_release = early_release
11696 self.ignore_ipolicy = ignore_ipolicy
11699 self.instance = None
11700 self.new_node = None
11701 self.target_node = None
11702 self.other_node = None
11703 self.remote_node_info = None
11704 self.node_secondary_ip = None
11707 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11708 """Compute a new secondary node using an IAllocator.
11711 req = iallocator.IAReqRelocate(name=instance_name,
11712 relocate_from=list(relocate_from))
11713 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11715 ial.Run(iallocator_name)
11717 if not ial.success:
11718 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11719 " %s" % (iallocator_name, ial.info),
11720 errors.ECODE_NORES)
11722 remote_node_name = ial.result[0]
11724 lu.LogInfo("Selected new secondary for instance '%s': %s",
11725 instance_name, remote_node_name)
11727 return remote_node_name
11729 def _FindFaultyDisks(self, node_name):
11730 """Wrapper for L{_FindFaultyInstanceDisks}.
11733 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11736 def _CheckDisksActivated(self, instance):
11737 """Checks if the instance disks are activated.
11739 @param instance: The instance to check disks
11740 @return: True if they are activated, False otherwise
11743 nodes = instance.all_nodes
11745 for idx, dev in enumerate(instance.disks):
11747 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11748 self.cfg.SetDiskID(dev, node)
11750 result = _BlockdevFind(self, node, dev, instance)
11754 elif result.fail_msg or not result.payload:
11759 def CheckPrereq(self):
11760 """Check prerequisites.
11762 This checks that the instance is in the cluster.
11765 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11766 assert instance is not None, \
11767 "Cannot retrieve locked instance %s" % self.instance_name
11769 if instance.disk_template != constants.DT_DRBD8:
11770 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11771 " instances", errors.ECODE_INVAL)
11773 if len(instance.secondary_nodes) != 1:
11774 raise errors.OpPrereqError("The instance has a strange layout,"
11775 " expected one secondary but found %d" %
11776 len(instance.secondary_nodes),
11777 errors.ECODE_FAULT)
11779 instance = self.instance
11780 secondary_node = instance.secondary_nodes[0]
11782 if self.iallocator_name is None:
11783 remote_node = self.remote_node
11785 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11786 instance.name, instance.secondary_nodes)
11788 if remote_node is None:
11789 self.remote_node_info = None
11791 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11792 "Remote node '%s' is not locked" % remote_node
11794 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11795 assert self.remote_node_info is not None, \
11796 "Cannot retrieve locked node %s" % remote_node
11798 if remote_node == self.instance.primary_node:
11799 raise errors.OpPrereqError("The specified node is the primary node of"
11800 " the instance", errors.ECODE_INVAL)
11802 if remote_node == secondary_node:
11803 raise errors.OpPrereqError("The specified node is already the"
11804 " secondary node of the instance",
11805 errors.ECODE_INVAL)
11807 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11808 constants.REPLACE_DISK_CHG):
11809 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11810 errors.ECODE_INVAL)
11812 if self.mode == constants.REPLACE_DISK_AUTO:
11813 if not self._CheckDisksActivated(instance):
11814 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11815 " first" % self.instance_name,
11816 errors.ECODE_STATE)
11817 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11818 faulty_secondary = self._FindFaultyDisks(secondary_node)
11820 if faulty_primary and faulty_secondary:
11821 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11822 " one node and can not be repaired"
11823 " automatically" % self.instance_name,
11824 errors.ECODE_STATE)
11827 self.disks = faulty_primary
11828 self.target_node = instance.primary_node
11829 self.other_node = secondary_node
11830 check_nodes = [self.target_node, self.other_node]
11831 elif faulty_secondary:
11832 self.disks = faulty_secondary
11833 self.target_node = secondary_node
11834 self.other_node = instance.primary_node
11835 check_nodes = [self.target_node, self.other_node]
11841 # Non-automatic modes
11842 if self.mode == constants.REPLACE_DISK_PRI:
11843 self.target_node = instance.primary_node
11844 self.other_node = secondary_node
11845 check_nodes = [self.target_node, self.other_node]
11847 elif self.mode == constants.REPLACE_DISK_SEC:
11848 self.target_node = secondary_node
11849 self.other_node = instance.primary_node
11850 check_nodes = [self.target_node, self.other_node]
11852 elif self.mode == constants.REPLACE_DISK_CHG:
11853 self.new_node = remote_node
11854 self.other_node = instance.primary_node
11855 self.target_node = secondary_node
11856 check_nodes = [self.new_node, self.other_node]
11858 _CheckNodeNotDrained(self.lu, remote_node)
11859 _CheckNodeVmCapable(self.lu, remote_node)
11861 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11862 assert old_node_info is not None
11863 if old_node_info.offline and not self.early_release:
11864 # doesn't make sense to delay the release
11865 self.early_release = True
11866 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11867 " early-release mode", secondary_node)
11870 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11873 # If not specified all disks should be replaced
11875 self.disks = range(len(self.instance.disks))
11877 # TODO: This is ugly, but right now we can't distinguish between internal
11878 # submitted opcode and external one. We should fix that.
11879 if self.remote_node_info:
11880 # We change the node, lets verify it still meets instance policy
11881 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11882 cluster = self.cfg.GetClusterInfo()
11883 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11885 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11886 self.cfg, ignore=self.ignore_ipolicy)
11888 for node in check_nodes:
11889 _CheckNodeOnline(self.lu, node)
11891 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11894 if node_name is not None)
11896 # Release unneeded node and node resource locks
11897 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11898 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11899 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11901 # Release any owned node group
11902 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11904 # Check whether disks are valid
11905 for disk_idx in self.disks:
11906 instance.FindDisk(disk_idx)
11908 # Get secondary node IP addresses
11909 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11910 in self.cfg.GetMultiNodeInfo(touched_nodes))
11912 def Exec(self, feedback_fn):
11913 """Execute disk replacement.
11915 This dispatches the disk replacement to the appropriate handler.
11919 # Verify owned locks before starting operation
11920 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11921 assert set(owned_nodes) == set(self.node_secondary_ip), \
11922 ("Incorrect node locks, owning %s, expected %s" %
11923 (owned_nodes, self.node_secondary_ip.keys()))
11924 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11925 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11926 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11928 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11929 assert list(owned_instances) == [self.instance_name], \
11930 "Instance '%s' not locked" % self.instance_name
11932 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11933 "Should not own any node group lock at this point"
11936 feedback_fn("No disks need replacement for instance '%s'" %
11937 self.instance.name)
11940 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11941 (utils.CommaJoin(self.disks), self.instance.name))
11942 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11943 feedback_fn("Current seconary node: %s" %
11944 utils.CommaJoin(self.instance.secondary_nodes))
11946 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11948 # Activate the instance disks if we're replacing them on a down instance
11950 _StartInstanceDisks(self.lu, self.instance, True)
11953 # Should we replace the secondary node?
11954 if self.new_node is not None:
11955 fn = self._ExecDrbd8Secondary
11957 fn = self._ExecDrbd8DiskOnly
11959 result = fn(feedback_fn)
11961 # Deactivate the instance disks if we're replacing them on a
11964 _SafeShutdownInstanceDisks(self.lu, self.instance)
11966 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11969 # Verify owned locks
11970 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11971 nodes = frozenset(self.node_secondary_ip)
11972 assert ((self.early_release and not owned_nodes) or
11973 (not self.early_release and not (set(owned_nodes) - nodes))), \
11974 ("Not owning the correct locks, early_release=%s, owned=%r,"
11975 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11979 def _CheckVolumeGroup(self, nodes):
11980 self.lu.LogInfo("Checking volume groups")
11982 vgname = self.cfg.GetVGName()
11984 # Make sure volume group exists on all involved nodes
11985 results = self.rpc.call_vg_list(nodes)
11987 raise errors.OpExecError("Can't list volume groups on the nodes")
11990 res = results[node]
11991 res.Raise("Error checking node %s" % node)
11992 if vgname not in res.payload:
11993 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11996 def _CheckDisksExistence(self, nodes):
11997 # Check disk existence
11998 for idx, dev in enumerate(self.instance.disks):
11999 if idx not in self.disks:
12003 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
12004 self.cfg.SetDiskID(dev, node)
12006 result = _BlockdevFind(self, node, dev, self.instance)
12008 msg = result.fail_msg
12009 if msg or not result.payload:
12011 msg = "disk not found"
12012 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
12015 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
12016 for idx, dev in enumerate(self.instance.disks):
12017 if idx not in self.disks:
12020 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
12023 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
12024 on_primary, ldisk=ldisk):
12025 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
12026 " replace disks for instance %s" %
12027 (node_name, self.instance.name))
12029 def _CreateNewStorage(self, node_name):
12030 """Create new storage on the primary or secondary node.
12032 This is only used for same-node replaces, not for changing the
12033 secondary node, hence we don't want to modify the existing disk.
12038 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12039 for idx, dev in enumerate(disks):
12040 if idx not in self.disks:
12043 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
12045 self.cfg.SetDiskID(dev, node_name)
12047 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
12048 names = _GenerateUniqueNames(self.lu, lv_names)
12050 (data_disk, meta_disk) = dev.children
12051 vg_data = data_disk.logical_id[0]
12052 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
12053 logical_id=(vg_data, names[0]),
12054 params=data_disk.params)
12055 vg_meta = meta_disk.logical_id[0]
12056 lv_meta = objects.Disk(dev_type=constants.LD_LV,
12057 size=constants.DRBD_META_SIZE,
12058 logical_id=(vg_meta, names[1]),
12059 params=meta_disk.params)
12061 new_lvs = [lv_data, lv_meta]
12062 old_lvs = [child.Copy() for child in dev.children]
12063 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
12064 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
12066 # we pass force_create=True to force the LVM creation
12067 for new_lv in new_lvs:
12068 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
12069 _GetInstanceInfoText(self.instance), False,
12074 def _CheckDevices(self, node_name, iv_names):
12075 for name, (dev, _, _) in iv_names.iteritems():
12076 self.cfg.SetDiskID(dev, node_name)
12078 result = _BlockdevFind(self, node_name, dev, self.instance)
12080 msg = result.fail_msg
12081 if msg or not result.payload:
12083 msg = "disk not found"
12084 raise errors.OpExecError("Can't find DRBD device %s: %s" %
12087 if result.payload.is_degraded:
12088 raise errors.OpExecError("DRBD device %s is degraded!" % name)
12090 def _RemoveOldStorage(self, node_name, iv_names):
12091 for name, (_, old_lvs, _) in iv_names.iteritems():
12092 self.lu.LogInfo("Remove logical volumes for %s", name)
12095 self.cfg.SetDiskID(lv, node_name)
12097 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
12099 self.lu.LogWarning("Can't remove old LV: %s", msg,
12100 hint="remove unused LVs manually")
12102 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
12103 """Replace a disk on the primary or secondary for DRBD 8.
12105 The algorithm for replace is quite complicated:
12107 1. for each disk to be replaced:
12109 1. create new LVs on the target node with unique names
12110 1. detach old LVs from the drbd device
12111 1. rename old LVs to name_replaced.<time_t>
12112 1. rename new LVs to old LVs
12113 1. attach the new LVs (with the old names now) to the drbd device
12115 1. wait for sync across all devices
12117 1. for each modified disk:
12119 1. remove old LVs (which have the name name_replaces.<time_t>)
12121 Failures are not very well handled.
12126 # Step: check device activation
12127 self.lu.LogStep(1, steps_total, "Check device existence")
12128 self._CheckDisksExistence([self.other_node, self.target_node])
12129 self._CheckVolumeGroup([self.target_node, self.other_node])
12131 # Step: check other node consistency
12132 self.lu.LogStep(2, steps_total, "Check peer consistency")
12133 self._CheckDisksConsistency(self.other_node,
12134 self.other_node == self.instance.primary_node,
12137 # Step: create new storage
12138 self.lu.LogStep(3, steps_total, "Allocate new storage")
12139 iv_names = self._CreateNewStorage(self.target_node)
12141 # Step: for each lv, detach+rename*2+attach
12142 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12143 for dev, old_lvs, new_lvs in iv_names.itervalues():
12144 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
12146 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
12148 result.Raise("Can't detach drbd from local storage on node"
12149 " %s for device %s" % (self.target_node, dev.iv_name))
12151 #cfg.Update(instance)
12153 # ok, we created the new LVs, so now we know we have the needed
12154 # storage; as such, we proceed on the target node to rename
12155 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12156 # using the assumption that logical_id == physical_id (which in
12157 # turn is the unique_id on that node)
12159 # FIXME(iustin): use a better name for the replaced LVs
12160 temp_suffix = int(time.time())
12161 ren_fn = lambda d, suff: (d.physical_id[0],
12162 d.physical_id[1] + "_replaced-%s" % suff)
12164 # Build the rename list based on what LVs exist on the node
12165 rename_old_to_new = []
12166 for to_ren in old_lvs:
12167 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12168 if not result.fail_msg and result.payload:
12170 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12172 self.lu.LogInfo("Renaming the old LVs on the target node")
12173 result = self.rpc.call_blockdev_rename(self.target_node,
12175 result.Raise("Can't rename old LVs on node %s" % self.target_node)
12177 # Now we rename the new LVs to the old LVs
12178 self.lu.LogInfo("Renaming the new LVs on the target node")
12179 rename_new_to_old = [(new, old.physical_id)
12180 for old, new in zip(old_lvs, new_lvs)]
12181 result = self.rpc.call_blockdev_rename(self.target_node,
12183 result.Raise("Can't rename new LVs on node %s" % self.target_node)
12185 # Intermediate steps of in memory modifications
12186 for old, new in zip(old_lvs, new_lvs):
12187 new.logical_id = old.logical_id
12188 self.cfg.SetDiskID(new, self.target_node)
12190 # We need to modify old_lvs so that removal later removes the
12191 # right LVs, not the newly added ones; note that old_lvs is a
12193 for disk in old_lvs:
12194 disk.logical_id = ren_fn(disk, temp_suffix)
12195 self.cfg.SetDiskID(disk, self.target_node)
12197 # Now that the new lvs have the old name, we can add them to the device
12198 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12199 result = self.rpc.call_blockdev_addchildren(self.target_node,
12200 (dev, self.instance), new_lvs)
12201 msg = result.fail_msg
12203 for new_lv in new_lvs:
12204 msg2 = self.rpc.call_blockdev_remove(self.target_node,
12207 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12208 hint=("cleanup manually the unused logical"
12210 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12212 cstep = itertools.count(5)
12214 if self.early_release:
12215 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12216 self._RemoveOldStorage(self.target_node, iv_names)
12217 # TODO: Check if releasing locks early still makes sense
12218 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12220 # Release all resource locks except those used by the instance
12221 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12222 keep=self.node_secondary_ip.keys())
12224 # Release all node locks while waiting for sync
12225 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12227 # TODO: Can the instance lock be downgraded here? Take the optional disk
12228 # shutdown in the caller into consideration.
12231 # This can fail as the old devices are degraded and _WaitForSync
12232 # does a combined result over all disks, so we don't check its return value
12233 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12234 _WaitForSync(self.lu, self.instance)
12236 # Check all devices manually
12237 self._CheckDevices(self.instance.primary_node, iv_names)
12239 # Step: remove old storage
12240 if not self.early_release:
12241 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12242 self._RemoveOldStorage(self.target_node, iv_names)
12244 def _ExecDrbd8Secondary(self, feedback_fn):
12245 """Replace the secondary node for DRBD 8.
12247 The algorithm for replace is quite complicated:
12248 - for all disks of the instance:
12249 - create new LVs on the new node with same names
12250 - shutdown the drbd device on the old secondary
12251 - disconnect the drbd network on the primary
12252 - create the drbd device on the new secondary
12253 - network attach the drbd on the primary, using an artifice:
12254 the drbd code for Attach() will connect to the network if it
12255 finds a device which is connected to the good local disks but
12256 not network enabled
12257 - wait for sync across all devices
12258 - remove all disks from the old secondary
12260 Failures are not very well handled.
12265 pnode = self.instance.primary_node
12267 # Step: check device activation
12268 self.lu.LogStep(1, steps_total, "Check device existence")
12269 self._CheckDisksExistence([self.instance.primary_node])
12270 self._CheckVolumeGroup([self.instance.primary_node])
12272 # Step: check other node consistency
12273 self.lu.LogStep(2, steps_total, "Check peer consistency")
12274 self._CheckDisksConsistency(self.instance.primary_node, True, True)
12276 # Step: create new storage
12277 self.lu.LogStep(3, steps_total, "Allocate new storage")
12278 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12279 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12280 for idx, dev in enumerate(disks):
12281 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12282 (self.new_node, idx))
12283 # we pass force_create=True to force LVM creation
12284 for new_lv in dev.children:
12285 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12286 True, _GetInstanceInfoText(self.instance), False,
12289 # Step 4: dbrd minors and drbd setups changes
12290 # after this, we must manually remove the drbd minors on both the
12291 # error and the success paths
12292 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12293 minors = self.cfg.AllocateDRBDMinor([self.new_node
12294 for dev in self.instance.disks],
12295 self.instance.name)
12296 logging.debug("Allocated minors %r", minors)
12299 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12300 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12301 (self.new_node, idx))
12302 # create new devices on new_node; note that we create two IDs:
12303 # one without port, so the drbd will be activated without
12304 # networking information on the new node at this stage, and one
12305 # with network, for the latter activation in step 4
12306 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12307 if self.instance.primary_node == o_node1:
12310 assert self.instance.primary_node == o_node2, "Three-node instance?"
12313 new_alone_id = (self.instance.primary_node, self.new_node, None,
12314 p_minor, new_minor, o_secret)
12315 new_net_id = (self.instance.primary_node, self.new_node, o_port,
12316 p_minor, new_minor, o_secret)
12318 iv_names[idx] = (dev, dev.children, new_net_id)
12319 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12321 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12322 logical_id=new_alone_id,
12323 children=dev.children,
12326 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12329 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12331 _GetInstanceInfoText(self.instance), False,
12333 except errors.GenericError:
12334 self.cfg.ReleaseDRBDMinors(self.instance.name)
12337 # We have new devices, shutdown the drbd on the old secondary
12338 for idx, dev in enumerate(self.instance.disks):
12339 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12340 self.cfg.SetDiskID(dev, self.target_node)
12341 msg = self.rpc.call_blockdev_shutdown(self.target_node,
12342 (dev, self.instance)).fail_msg
12344 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12345 "node: %s" % (idx, msg),
12346 hint=("Please cleanup this device manually as"
12347 " soon as possible"))
12349 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12350 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12351 self.instance.disks)[pnode]
12353 msg = result.fail_msg
12355 # detaches didn't succeed (unlikely)
12356 self.cfg.ReleaseDRBDMinors(self.instance.name)
12357 raise errors.OpExecError("Can't detach the disks from the network on"
12358 " old node: %s" % (msg,))
12360 # if we managed to detach at least one, we update all the disks of
12361 # the instance to point to the new secondary
12362 self.lu.LogInfo("Updating instance configuration")
12363 for dev, _, new_logical_id in iv_names.itervalues():
12364 dev.logical_id = new_logical_id
12365 self.cfg.SetDiskID(dev, self.instance.primary_node)
12367 self.cfg.Update(self.instance, feedback_fn)
12369 # Release all node locks (the configuration has been updated)
12370 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12372 # and now perform the drbd attach
12373 self.lu.LogInfo("Attaching primary drbds to new secondary"
12374 " (standalone => connected)")
12375 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12377 self.node_secondary_ip,
12378 (self.instance.disks, self.instance),
12379 self.instance.name,
12381 for to_node, to_result in result.items():
12382 msg = to_result.fail_msg
12384 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12386 hint=("please do a gnt-instance info to see the"
12387 " status of disks"))
12389 cstep = itertools.count(5)
12391 if self.early_release:
12392 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12393 self._RemoveOldStorage(self.target_node, iv_names)
12394 # TODO: Check if releasing locks early still makes sense
12395 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12397 # Release all resource locks except those used by the instance
12398 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12399 keep=self.node_secondary_ip.keys())
12401 # TODO: Can the instance lock be downgraded here? Take the optional disk
12402 # shutdown in the caller into consideration.
12405 # This can fail as the old devices are degraded and _WaitForSync
12406 # does a combined result over all disks, so we don't check its return value
12407 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12408 _WaitForSync(self.lu, self.instance)
12410 # Check all devices manually
12411 self._CheckDevices(self.instance.primary_node, iv_names)
12413 # Step: remove old storage
12414 if not self.early_release:
12415 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12416 self._RemoveOldStorage(self.target_node, iv_names)
12419 class LURepairNodeStorage(NoHooksLU):
12420 """Repairs the volume group on a node.
12425 def CheckArguments(self):
12426 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12428 storage_type = self.op.storage_type
12430 if (constants.SO_FIX_CONSISTENCY not in
12431 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12432 raise errors.OpPrereqError("Storage units of type '%s' can not be"
12433 " repaired" % storage_type,
12434 errors.ECODE_INVAL)
12436 def ExpandNames(self):
12437 self.needed_locks = {
12438 locking.LEVEL_NODE: [self.op.node_name],
12441 def _CheckFaultyDisks(self, instance, node_name):
12442 """Ensure faulty disks abort the opcode or at least warn."""
12444 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12446 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12447 " node '%s'" % (instance.name, node_name),
12448 errors.ECODE_STATE)
12449 except errors.OpPrereqError, err:
12450 if self.op.ignore_consistency:
12451 self.LogWarning(str(err.args[0]))
12455 def CheckPrereq(self):
12456 """Check prerequisites.
12459 # Check whether any instance on this node has faulty disks
12460 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12461 if inst.admin_state != constants.ADMINST_UP:
12463 check_nodes = set(inst.all_nodes)
12464 check_nodes.discard(self.op.node_name)
12465 for inst_node_name in check_nodes:
12466 self._CheckFaultyDisks(inst, inst_node_name)
12468 def Exec(self, feedback_fn):
12469 feedback_fn("Repairing storage unit '%s' on %s ..." %
12470 (self.op.name, self.op.node_name))
12472 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12473 result = self.rpc.call_storage_execute(self.op.node_name,
12474 self.op.storage_type, st_args,
12476 constants.SO_FIX_CONSISTENCY)
12477 result.Raise("Failed to repair storage unit '%s' on %s" %
12478 (self.op.name, self.op.node_name))
12481 class LUNodeEvacuate(NoHooksLU):
12482 """Evacuates instances off a list of nodes.
12487 _MODE2IALLOCATOR = {
12488 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12489 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12490 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12492 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12493 assert (frozenset(_MODE2IALLOCATOR.values()) ==
12494 constants.IALLOCATOR_NEVAC_MODES)
12496 def CheckArguments(self):
12497 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12499 def ExpandNames(self):
12500 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12502 if self.op.remote_node is not None:
12503 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12504 assert self.op.remote_node
12506 if self.op.remote_node == self.op.node_name:
12507 raise errors.OpPrereqError("Can not use evacuated node as a new"
12508 " secondary node", errors.ECODE_INVAL)
12510 if self.op.mode != constants.NODE_EVAC_SEC:
12511 raise errors.OpPrereqError("Without the use of an iallocator only"
12512 " secondary instances can be evacuated",
12513 errors.ECODE_INVAL)
12516 self.share_locks = _ShareAll()
12517 self.needed_locks = {
12518 locking.LEVEL_INSTANCE: [],
12519 locking.LEVEL_NODEGROUP: [],
12520 locking.LEVEL_NODE: [],
12523 # Determine nodes (via group) optimistically, needs verification once locks
12524 # have been acquired
12525 self.lock_nodes = self._DetermineNodes()
12527 def _DetermineNodes(self):
12528 """Gets the list of nodes to operate on.
12531 if self.op.remote_node is None:
12532 # Iallocator will choose any node(s) in the same group
12533 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12535 group_nodes = frozenset([self.op.remote_node])
12537 # Determine nodes to be locked
12538 return set([self.op.node_name]) | group_nodes
12540 def _DetermineInstances(self):
12541 """Builds list of instances to operate on.
12544 assert self.op.mode in constants.NODE_EVAC_MODES
12546 if self.op.mode == constants.NODE_EVAC_PRI:
12547 # Primary instances only
12548 inst_fn = _GetNodePrimaryInstances
12549 assert self.op.remote_node is None, \
12550 "Evacuating primary instances requires iallocator"
12551 elif self.op.mode == constants.NODE_EVAC_SEC:
12552 # Secondary instances only
12553 inst_fn = _GetNodeSecondaryInstances
12556 assert self.op.mode == constants.NODE_EVAC_ALL
12557 inst_fn = _GetNodeInstances
12558 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12560 raise errors.OpPrereqError("Due to an issue with the iallocator"
12561 " interface it is not possible to evacuate"
12562 " all instances at once; specify explicitly"
12563 " whether to evacuate primary or secondary"
12565 errors.ECODE_INVAL)
12567 return inst_fn(self.cfg, self.op.node_name)
12569 def DeclareLocks(self, level):
12570 if level == locking.LEVEL_INSTANCE:
12571 # Lock instances optimistically, needs verification once node and group
12572 # locks have been acquired
12573 self.needed_locks[locking.LEVEL_INSTANCE] = \
12574 set(i.name for i in self._DetermineInstances())
12576 elif level == locking.LEVEL_NODEGROUP:
12577 # Lock node groups for all potential target nodes optimistically, needs
12578 # verification once nodes have been acquired
12579 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12580 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12582 elif level == locking.LEVEL_NODE:
12583 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12585 def CheckPrereq(self):
12587 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12588 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12589 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12591 need_nodes = self._DetermineNodes()
12593 if not owned_nodes.issuperset(need_nodes):
12594 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12595 " locks were acquired, current nodes are"
12596 " are '%s', used to be '%s'; retry the"
12598 (self.op.node_name,
12599 utils.CommaJoin(need_nodes),
12600 utils.CommaJoin(owned_nodes)),
12601 errors.ECODE_STATE)
12603 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12604 if owned_groups != wanted_groups:
12605 raise errors.OpExecError("Node groups changed since locks were acquired,"
12606 " current groups are '%s', used to be '%s';"
12607 " retry the operation" %
12608 (utils.CommaJoin(wanted_groups),
12609 utils.CommaJoin(owned_groups)))
12611 # Determine affected instances
12612 self.instances = self._DetermineInstances()
12613 self.instance_names = [i.name for i in self.instances]
12615 if set(self.instance_names) != owned_instances:
12616 raise errors.OpExecError("Instances on node '%s' changed since locks"
12617 " were acquired, current instances are '%s',"
12618 " used to be '%s'; retry the operation" %
12619 (self.op.node_name,
12620 utils.CommaJoin(self.instance_names),
12621 utils.CommaJoin(owned_instances)))
12623 if self.instance_names:
12624 self.LogInfo("Evacuating instances from node '%s': %s",
12626 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12628 self.LogInfo("No instances to evacuate from node '%s'",
12631 if self.op.remote_node is not None:
12632 for i in self.instances:
12633 if i.primary_node == self.op.remote_node:
12634 raise errors.OpPrereqError("Node %s is the primary node of"
12635 " instance %s, cannot use it as"
12637 (self.op.remote_node, i.name),
12638 errors.ECODE_INVAL)
12640 def Exec(self, feedback_fn):
12641 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12643 if not self.instance_names:
12644 # No instances to evacuate
12647 elif self.op.iallocator is not None:
12648 # TODO: Implement relocation to other group
12649 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12650 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12651 instances=list(self.instance_names))
12652 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12654 ial.Run(self.op.iallocator)
12656 if not ial.success:
12657 raise errors.OpPrereqError("Can't compute node evacuation using"
12658 " iallocator '%s': %s" %
12659 (self.op.iallocator, ial.info),
12660 errors.ECODE_NORES)
12662 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12664 elif self.op.remote_node is not None:
12665 assert self.op.mode == constants.NODE_EVAC_SEC
12667 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12668 remote_node=self.op.remote_node,
12670 mode=constants.REPLACE_DISK_CHG,
12671 early_release=self.op.early_release)]
12672 for instance_name in self.instance_names]
12675 raise errors.ProgrammerError("No iallocator or remote node")
12677 return ResultWithJobs(jobs)
12680 def _SetOpEarlyRelease(early_release, op):
12681 """Sets C{early_release} flag on opcodes if available.
12685 op.early_release = early_release
12686 except AttributeError:
12687 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12692 def _NodeEvacDest(use_nodes, group, nodes):
12693 """Returns group or nodes depending on caller's choice.
12697 return utils.CommaJoin(nodes)
12702 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12703 """Unpacks the result of change-group and node-evacuate iallocator requests.
12705 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12706 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12708 @type lu: L{LogicalUnit}
12709 @param lu: Logical unit instance
12710 @type alloc_result: tuple/list
12711 @param alloc_result: Result from iallocator
12712 @type early_release: bool
12713 @param early_release: Whether to release locks early if possible
12714 @type use_nodes: bool
12715 @param use_nodes: Whether to display node names instead of groups
12718 (moved, failed, jobs) = alloc_result
12721 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12722 for (name, reason) in failed)
12723 lu.LogWarning("Unable to evacuate instances %s", failreason)
12724 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12727 lu.LogInfo("Instances to be moved: %s",
12728 utils.CommaJoin("%s (to %s)" %
12729 (name, _NodeEvacDest(use_nodes, group, nodes))
12730 for (name, group, nodes) in moved))
12732 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12733 map(opcodes.OpCode.LoadOpCode, ops))
12737 def _DiskSizeInBytesToMebibytes(lu, size):
12738 """Converts a disk size in bytes to mebibytes.
12740 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12743 (mib, remainder) = divmod(size, 1024 * 1024)
12746 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12747 " to not overwrite existing data (%s bytes will not be"
12748 " wiped)", (1024 * 1024) - remainder)
12754 class LUInstanceGrowDisk(LogicalUnit):
12755 """Grow a disk of an instance.
12758 HPATH = "disk-grow"
12759 HTYPE = constants.HTYPE_INSTANCE
12762 def ExpandNames(self):
12763 self._ExpandAndLockInstance()
12764 self.needed_locks[locking.LEVEL_NODE] = []
12765 self.needed_locks[locking.LEVEL_NODE_RES] = []
12766 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12767 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12769 def DeclareLocks(self, level):
12770 if level == locking.LEVEL_NODE:
12771 self._LockInstancesNodes()
12772 elif level == locking.LEVEL_NODE_RES:
12774 self.needed_locks[locking.LEVEL_NODE_RES] = \
12775 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12777 def BuildHooksEnv(self):
12778 """Build hooks env.
12780 This runs on the master, the primary and all the secondaries.
12784 "DISK": self.op.disk,
12785 "AMOUNT": self.op.amount,
12786 "ABSOLUTE": self.op.absolute,
12788 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12791 def BuildHooksNodes(self):
12792 """Build hooks nodes.
12795 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12798 def CheckPrereq(self):
12799 """Check prerequisites.
12801 This checks that the instance is in the cluster.
12804 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12805 assert instance is not None, \
12806 "Cannot retrieve locked instance %s" % self.op.instance_name
12807 nodenames = list(instance.all_nodes)
12808 for node in nodenames:
12809 _CheckNodeOnline(self, node)
12811 self.instance = instance
12813 if instance.disk_template not in constants.DTS_GROWABLE:
12814 raise errors.OpPrereqError("Instance's disk layout does not support"
12815 " growing", errors.ECODE_INVAL)
12817 self.disk = instance.FindDisk(self.op.disk)
12819 if self.op.absolute:
12820 self.target = self.op.amount
12821 self.delta = self.target - self.disk.size
12823 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12824 "current disk size (%s)" %
12825 (utils.FormatUnit(self.target, "h"),
12826 utils.FormatUnit(self.disk.size, "h")),
12827 errors.ECODE_STATE)
12829 self.delta = self.op.amount
12830 self.target = self.disk.size + self.delta
12832 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12833 utils.FormatUnit(self.delta, "h"),
12834 errors.ECODE_INVAL)
12836 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12838 def _CheckDiskSpace(self, nodenames, req_vgspace):
12839 template = self.instance.disk_template
12840 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12841 # TODO: check the free disk space for file, when that feature will be
12843 nodes = map(self.cfg.GetNodeInfo, nodenames)
12844 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12847 # With exclusive storage we need to something smarter than just looking
12848 # at free space; for now, let's simply abort the operation.
12849 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12850 " is enabled", errors.ECODE_STATE)
12851 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12853 def Exec(self, feedback_fn):
12854 """Execute disk grow.
12857 instance = self.instance
12860 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12861 assert (self.owned_locks(locking.LEVEL_NODE) ==
12862 self.owned_locks(locking.LEVEL_NODE_RES))
12864 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12866 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12868 raise errors.OpExecError("Cannot activate block device to grow")
12870 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12871 (self.op.disk, instance.name,
12872 utils.FormatUnit(self.delta, "h"),
12873 utils.FormatUnit(self.target, "h")))
12875 # First run all grow ops in dry-run mode
12876 for node in instance.all_nodes:
12877 self.cfg.SetDiskID(disk, node)
12878 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12880 result.Raise("Dry-run grow request failed to node %s" % node)
12883 # Get disk size from primary node for wiping
12884 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12885 result.Raise("Failed to retrieve disk size from node '%s'" %
12886 instance.primary_node)
12888 (disk_size_in_bytes, ) = result.payload
12890 if disk_size_in_bytes is None:
12891 raise errors.OpExecError("Failed to retrieve disk size from primary"
12892 " node '%s'" % instance.primary_node)
12894 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12896 assert old_disk_size >= disk.size, \
12897 ("Retrieved disk size too small (got %s, should be at least %s)" %
12898 (old_disk_size, disk.size))
12900 old_disk_size = None
12902 # We know that (as far as we can test) operations across different
12903 # nodes will succeed, time to run it for real on the backing storage
12904 for node in instance.all_nodes:
12905 self.cfg.SetDiskID(disk, node)
12906 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12908 result.Raise("Grow request failed to node %s" % node)
12910 # And now execute it for logical storage, on the primary node
12911 node = instance.primary_node
12912 self.cfg.SetDiskID(disk, node)
12913 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12915 result.Raise("Grow request failed to node %s" % node)
12917 disk.RecordGrow(self.delta)
12918 self.cfg.Update(instance, feedback_fn)
12920 # Changes have been recorded, release node lock
12921 _ReleaseLocks(self, locking.LEVEL_NODE)
12923 # Downgrade lock while waiting for sync
12924 self.glm.downgrade(locking.LEVEL_INSTANCE)
12926 assert wipe_disks ^ (old_disk_size is None)
12929 assert instance.disks[self.op.disk] == disk
12931 # Wipe newly added disk space
12932 _WipeDisks(self, instance,
12933 disks=[(self.op.disk, disk, old_disk_size)])
12935 if self.op.wait_for_sync:
12936 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12938 self.LogWarning("Disk syncing has not returned a good status; check"
12940 if instance.admin_state != constants.ADMINST_UP:
12941 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12942 elif instance.admin_state != constants.ADMINST_UP:
12943 self.LogWarning("Not shutting down the disk even if the instance is"
12944 " not supposed to be running because no wait for"
12945 " sync mode was requested")
12947 assert self.owned_locks(locking.LEVEL_NODE_RES)
12948 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12951 class LUInstanceQueryData(NoHooksLU):
12952 """Query runtime instance data.
12957 def ExpandNames(self):
12958 self.needed_locks = {}
12960 # Use locking if requested or when non-static information is wanted
12961 if not (self.op.static or self.op.use_locking):
12962 self.LogWarning("Non-static data requested, locks need to be acquired")
12963 self.op.use_locking = True
12965 if self.op.instances or not self.op.use_locking:
12966 # Expand instance names right here
12967 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12969 # Will use acquired locks
12970 self.wanted_names = None
12972 if self.op.use_locking:
12973 self.share_locks = _ShareAll()
12975 if self.wanted_names is None:
12976 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12978 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12980 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12981 self.needed_locks[locking.LEVEL_NODE] = []
12982 self.needed_locks[locking.LEVEL_NETWORK] = []
12983 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12985 def DeclareLocks(self, level):
12986 if self.op.use_locking:
12987 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12988 if level == locking.LEVEL_NODEGROUP:
12990 # Lock all groups used by instances optimistically; this requires going
12991 # via the node before it's locked, requiring verification later on
12992 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12993 frozenset(group_uuid
12994 for instance_name in owned_instances
12996 self.cfg.GetInstanceNodeGroups(instance_name))
12998 elif level == locking.LEVEL_NODE:
12999 self._LockInstancesNodes()
13001 elif level == locking.LEVEL_NETWORK:
13002 self.needed_locks[locking.LEVEL_NETWORK] = \
13004 for instance_name in owned_instances
13006 self.cfg.GetInstanceNetworks(instance_name))
13008 def CheckPrereq(self):
13009 """Check prerequisites.
13011 This only checks the optional instance list against the existing names.
13014 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13015 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13016 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13017 owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
13019 if self.wanted_names is None:
13020 assert self.op.use_locking, "Locking was not used"
13021 self.wanted_names = owned_instances
13023 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
13025 if self.op.use_locking:
13026 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
13029 assert not (owned_instances or owned_groups or
13030 owned_nodes or owned_networks)
13032 self.wanted_instances = instances.values()
13034 def _ComputeBlockdevStatus(self, node, instance, dev):
13035 """Returns the status of a block device
13038 if self.op.static or not node:
13041 self.cfg.SetDiskID(dev, node)
13043 result = self.rpc.call_blockdev_find(node, dev)
13047 result.Raise("Can't compute disk status for %s" % instance.name)
13049 status = result.payload
13053 return (status.dev_path, status.major, status.minor,
13054 status.sync_percent, status.estimated_time,
13055 status.is_degraded, status.ldisk_status)
13057 def _ComputeDiskStatus(self, instance, snode, dev):
13058 """Compute block device status.
13061 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
13063 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
13065 def _ComputeDiskStatusInner(self, instance, snode, dev):
13066 """Compute block device status.
13068 @attention: The device has to be annotated already.
13071 if dev.dev_type in constants.LDS_DRBD:
13072 # we change the snode then (otherwise we use the one passed in)
13073 if dev.logical_id[0] == instance.primary_node:
13074 snode = dev.logical_id[1]
13076 snode = dev.logical_id[0]
13078 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
13080 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
13083 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
13090 "iv_name": dev.iv_name,
13091 "dev_type": dev.dev_type,
13092 "logical_id": dev.logical_id,
13093 "physical_id": dev.physical_id,
13094 "pstatus": dev_pstatus,
13095 "sstatus": dev_sstatus,
13096 "children": dev_children,
13103 def Exec(self, feedback_fn):
13104 """Gather and return data"""
13107 cluster = self.cfg.GetClusterInfo()
13109 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
13110 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
13112 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
13113 for node in nodes.values()))
13115 group2name_fn = lambda uuid: groups[uuid].name
13116 for instance in self.wanted_instances:
13117 pnode = nodes[instance.primary_node]
13119 if self.op.static or pnode.offline:
13120 remote_state = None
13122 self.LogWarning("Primary node %s is marked offline, returning static"
13123 " information only for instance %s" %
13124 (pnode.name, instance.name))
13126 remote_info = self.rpc.call_instance_info(instance.primary_node,
13128 instance.hypervisor)
13129 remote_info.Raise("Error checking node %s" % instance.primary_node)
13130 remote_info = remote_info.payload
13131 if remote_info and "state" in remote_info:
13132 remote_state = "up"
13134 if instance.admin_state == constants.ADMINST_UP:
13135 remote_state = "down"
13137 remote_state = instance.admin_state
13139 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
13142 snodes_group_uuids = [nodes[snode_name].group
13143 for snode_name in instance.secondary_nodes]
13145 result[instance.name] = {
13146 "name": instance.name,
13147 "config_state": instance.admin_state,
13148 "run_state": remote_state,
13149 "pnode": instance.primary_node,
13150 "pnode_group_uuid": pnode.group,
13151 "pnode_group_name": group2name_fn(pnode.group),
13152 "snodes": instance.secondary_nodes,
13153 "snodes_group_uuids": snodes_group_uuids,
13154 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
13156 # this happens to be the same format used for hooks
13157 "nics": _NICListToTuple(self, instance.nics),
13158 "disk_template": instance.disk_template,
13160 "hypervisor": instance.hypervisor,
13161 "network_port": instance.network_port,
13162 "hv_instance": instance.hvparams,
13163 "hv_actual": cluster.FillHV(instance, skip_globals=True),
13164 "be_instance": instance.beparams,
13165 "be_actual": cluster.FillBE(instance),
13166 "os_instance": instance.osparams,
13167 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13168 "serial_no": instance.serial_no,
13169 "mtime": instance.mtime,
13170 "ctime": instance.ctime,
13171 "uuid": instance.uuid,
13177 def PrepareContainerMods(mods, private_fn):
13178 """Prepares a list of container modifications by adding a private data field.
13180 @type mods: list of tuples; (operation, index, parameters)
13181 @param mods: List of modifications
13182 @type private_fn: callable or None
13183 @param private_fn: Callable for constructing a private data field for a
13188 if private_fn is None:
13193 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13196 def GetItemFromContainer(identifier, kind, container):
13197 """Return the item refered by the identifier.
13199 @type identifier: string
13200 @param identifier: Item index or name or UUID
13202 @param kind: One-word item description
13203 @type container: list
13204 @param container: Container to get the item from
13209 idx = int(identifier)
13212 absidx = len(container) - 1
13214 raise IndexError("Not accepting negative indices other than -1")
13215 elif idx > len(container):
13216 raise IndexError("Got %s index %s, but there are only %s" %
13217 (kind, idx, len(container)))
13220 return (absidx, container[idx])
13224 for idx, item in enumerate(container):
13225 if item.uuid == identifier or item.name == identifier:
13228 raise errors.OpPrereqError("Cannot find %s with identifier %s" %
13229 (kind, identifier), errors.ECODE_NOENT)
13232 #: Type description for changes as returned by L{ApplyContainerMods}'s
13234 _TApplyContModsCbChanges = \
13235 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13236 ht.TNonEmptyString,
13241 def ApplyContainerMods(kind, container, chgdesc, mods,
13242 create_fn, modify_fn, remove_fn):
13243 """Applies descriptions in C{mods} to C{container}.
13246 @param kind: One-word item description
13247 @type container: list
13248 @param container: Container to modify
13249 @type chgdesc: None or list
13250 @param chgdesc: List of applied changes
13252 @param mods: Modifications as returned by L{PrepareContainerMods}
13253 @type create_fn: callable
13254 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13255 receives absolute item index, parameters and private data object as added
13256 by L{PrepareContainerMods}, returns tuple containing new item and changes
13258 @type modify_fn: callable
13259 @param modify_fn: Callback for modifying an existing item
13260 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13261 and private data object as added by L{PrepareContainerMods}, returns
13263 @type remove_fn: callable
13264 @param remove_fn: Callback on removing item; receives absolute item index,
13265 item and private data object as added by L{PrepareContainerMods}
13268 for (op, identifier, params, private) in mods:
13271 if op == constants.DDM_ADD:
13272 # Calculate where item will be added
13273 # When adding an item, identifier can only be an index
13275 idx = int(identifier)
13277 raise errors.OpPrereqError("Only possitive integer or -1 is accepted as"
13278 " identifier for %s" % constants.DDM_ADD,
13279 errors.ECODE_INVAL)
13281 addidx = len(container)
13284 raise IndexError("Not accepting negative indices other than -1")
13285 elif idx > len(container):
13286 raise IndexError("Got %s index %s, but there are only %s" %
13287 (kind, idx, len(container)))
13290 if create_fn is None:
13293 (item, changes) = create_fn(addidx, params, private)
13296 container.append(item)
13299 assert idx <= len(container)
13300 # list.insert does so before the specified index
13301 container.insert(idx, item)
13303 # Retrieve existing item
13304 (absidx, item) = GetItemFromContainer(identifier, kind, container)
13306 if op == constants.DDM_REMOVE:
13309 if remove_fn is not None:
13310 remove_fn(absidx, item, private)
13312 changes = [("%s/%s" % (kind, absidx), "remove")]
13314 assert container[absidx] == item
13315 del container[absidx]
13316 elif op == constants.DDM_MODIFY:
13317 if modify_fn is not None:
13318 changes = modify_fn(absidx, item, params, private)
13320 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13322 assert _TApplyContModsCbChanges(changes)
13324 if not (chgdesc is None or changes is None):
13325 chgdesc.extend(changes)
13328 def _UpdateIvNames(base_index, disks):
13329 """Updates the C{iv_name} attribute of disks.
13331 @type disks: list of L{objects.Disk}
13334 for (idx, disk) in enumerate(disks):
13335 disk.iv_name = "disk/%s" % (base_index + idx, )
13338 class _InstNicModPrivate:
13339 """Data structure for network interface modifications.
13341 Used by L{LUInstanceSetParams}.
13344 def __init__(self):
13349 class LUInstanceSetParams(LogicalUnit):
13350 """Modifies an instances's parameters.
13353 HPATH = "instance-modify"
13354 HTYPE = constants.HTYPE_INSTANCE
13358 def _UpgradeDiskNicMods(kind, mods, verify_fn):
13359 assert ht.TList(mods)
13360 assert not mods or len(mods[0]) in (2, 3)
13362 if mods and len(mods[0]) == 2:
13366 for op, params in mods:
13367 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13368 result.append((op, -1, params))
13372 raise errors.OpPrereqError("Only one %s add or remove operation is"
13373 " supported at a time" % kind,
13374 errors.ECODE_INVAL)
13376 result.append((constants.DDM_MODIFY, op, params))
13378 assert verify_fn(result)
13385 def _CheckMods(kind, mods, key_types, item_fn):
13386 """Ensures requested disk/NIC modifications are valid.
13389 for (op, _, params) in mods:
13390 assert ht.TDict(params)
13392 # If 'key_types' is an empty dict, we assume we have an
13393 # 'ext' template and thus do not ForceDictType
13395 utils.ForceDictType(params, key_types)
13397 if op == constants.DDM_REMOVE:
13399 raise errors.OpPrereqError("No settings should be passed when"
13400 " removing a %s" % kind,
13401 errors.ECODE_INVAL)
13402 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13403 item_fn(op, params)
13405 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13408 def _VerifyDiskModification(op, params):
13409 """Verifies a disk modification.
13412 if op == constants.DDM_ADD:
13413 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13414 if mode not in constants.DISK_ACCESS_SET:
13415 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13416 errors.ECODE_INVAL)
13418 size = params.get(constants.IDISK_SIZE, None)
13420 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13421 constants.IDISK_SIZE, errors.ECODE_INVAL)
13425 except (TypeError, ValueError), err:
13426 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13427 errors.ECODE_INVAL)
13429 params[constants.IDISK_SIZE] = size
13430 name = params.get(constants.IDISK_NAME, None)
13431 if name is not None and name.lower() == constants.VALUE_NONE:
13432 params[constants.IDISK_NAME] = None
13434 elif op == constants.DDM_MODIFY:
13435 if constants.IDISK_SIZE in params:
13436 raise errors.OpPrereqError("Disk size change not possible, use"
13437 " grow-disk", errors.ECODE_INVAL)
13438 if len(params) > 2:
13439 raise errors.OpPrereqError("Disk modification doesn't support"
13440 " additional arbitrary parameters",
13441 errors.ECODE_INVAL)
13442 name = params.get(constants.IDISK_NAME, None)
13443 if name is not None and name.lower() == constants.VALUE_NONE:
13444 params[constants.IDISK_NAME] = None
13447 def _VerifyNicModification(op, params):
13448 """Verifies a network interface modification.
13451 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13452 ip = params.get(constants.INIC_IP, None)
13453 name = params.get(constants.INIC_NAME, None)
13454 req_net = params.get(constants.INIC_NETWORK, None)
13455 link = params.get(constants.NIC_LINK, None)
13456 mode = params.get(constants.NIC_MODE, None)
13457 if name is not None and name.lower() == constants.VALUE_NONE:
13458 params[constants.INIC_NAME] = None
13459 if req_net is not None:
13460 if req_net.lower() == constants.VALUE_NONE:
13461 params[constants.INIC_NETWORK] = None
13463 elif link is not None or mode is not None:
13464 raise errors.OpPrereqError("If network is given"
13465 " mode or link should not",
13466 errors.ECODE_INVAL)
13468 if op == constants.DDM_ADD:
13469 macaddr = params.get(constants.INIC_MAC, None)
13470 if macaddr is None:
13471 params[constants.INIC_MAC] = constants.VALUE_AUTO
13474 if ip.lower() == constants.VALUE_NONE:
13475 params[constants.INIC_IP] = None
13477 if ip.lower() == constants.NIC_IP_POOL:
13478 if op == constants.DDM_ADD and req_net is None:
13479 raise errors.OpPrereqError("If ip=pool, parameter network"
13481 errors.ECODE_INVAL)
13483 if not netutils.IPAddress.IsValid(ip):
13484 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13485 errors.ECODE_INVAL)
13487 if constants.INIC_MAC in params:
13488 macaddr = params[constants.INIC_MAC]
13489 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13490 macaddr = utils.NormalizeAndValidateMac(macaddr)
13492 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13493 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13494 " modifying an existing NIC",
13495 errors.ECODE_INVAL)
13497 def CheckArguments(self):
13498 if not (self.op.nics or self.op.disks or self.op.disk_template or
13499 self.op.hvparams or self.op.beparams or self.op.os_name or
13500 self.op.offline is not None or self.op.runtime_mem or
13502 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13504 if self.op.hvparams:
13505 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
13506 "hypervisor", "instance", "cluster")
13508 self.op.disks = self._UpgradeDiskNicMods(
13509 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13510 self.op.nics = self._UpgradeDiskNicMods(
13511 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13513 if self.op.disks and self.op.disk_template is not None:
13514 raise errors.OpPrereqError("Disk template conversion and other disk"
13515 " changes not supported at the same time",
13516 errors.ECODE_INVAL)
13518 if (self.op.disk_template and
13519 self.op.disk_template in constants.DTS_INT_MIRROR and
13520 self.op.remote_node is None):
13521 raise errors.OpPrereqError("Changing the disk template to a mirrored"
13522 " one requires specifying a secondary node",
13523 errors.ECODE_INVAL)
13525 # Check NIC modifications
13526 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13527 self._VerifyNicModification)
13530 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
13532 def ExpandNames(self):
13533 self._ExpandAndLockInstance()
13534 self.needed_locks[locking.LEVEL_NODEGROUP] = []
13535 # Can't even acquire node locks in shared mode as upcoming changes in
13536 # Ganeti 2.6 will start to modify the node object on disk conversion
13537 self.needed_locks[locking.LEVEL_NODE] = []
13538 self.needed_locks[locking.LEVEL_NODE_RES] = []
13539 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13540 # Look node group to look up the ipolicy
13541 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13543 def DeclareLocks(self, level):
13544 if level == locking.LEVEL_NODEGROUP:
13545 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13546 # Acquire locks for the instance's nodegroups optimistically. Needs
13547 # to be verified in CheckPrereq
13548 self.needed_locks[locking.LEVEL_NODEGROUP] = \
13549 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13550 elif level == locking.LEVEL_NODE:
13551 self._LockInstancesNodes()
13552 if self.op.disk_template and self.op.remote_node:
13553 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13554 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13555 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13557 self.needed_locks[locking.LEVEL_NODE_RES] = \
13558 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13560 def BuildHooksEnv(self):
13561 """Build hooks env.
13563 This runs on the master, primary and secondaries.
13567 if constants.BE_MINMEM in self.be_new:
13568 args["minmem"] = self.be_new[constants.BE_MINMEM]
13569 if constants.BE_MAXMEM in self.be_new:
13570 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13571 if constants.BE_VCPUS in self.be_new:
13572 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13573 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13574 # information at all.
13576 if self._new_nics is not None:
13579 for nic in self._new_nics:
13580 n = copy.deepcopy(nic)
13581 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13582 n.nicparams = nicparams
13583 nics.append(_NICToTuple(self, n))
13585 args["nics"] = nics
13587 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13588 if self.op.disk_template:
13589 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13590 if self.op.runtime_mem:
13591 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13595 def BuildHooksNodes(self):
13596 """Build hooks nodes.
13599 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13602 def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
13603 old_params, cluster, pnode):
13605 update_params_dict = dict([(key, params[key])
13606 for key in constants.NICS_PARAMETERS
13609 req_link = update_params_dict.get(constants.NIC_LINK, None)
13610 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13612 new_net_uuid = None
13613 new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
13614 if new_net_uuid_or_name:
13615 new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
13616 new_net_obj = self.cfg.GetNetwork(new_net_uuid)
13619 old_net_obj = self.cfg.GetNetwork(old_net_uuid)
13622 netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
13624 raise errors.OpPrereqError("No netparams found for the network"
13625 " %s, probably not connected" %
13626 new_net_obj.name, errors.ECODE_INVAL)
13627 new_params = dict(netparams)
13629 new_params = _GetUpdatedParams(old_params, update_params_dict)
13631 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13633 new_filled_params = cluster.SimpleFillNIC(new_params)
13634 objects.NIC.CheckParameterSyntax(new_filled_params)
13636 new_mode = new_filled_params[constants.NIC_MODE]
13637 if new_mode == constants.NIC_MODE_BRIDGED:
13638 bridge = new_filled_params[constants.NIC_LINK]
13639 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13641 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13643 self.warn.append(msg)
13645 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13647 elif new_mode == constants.NIC_MODE_ROUTED:
13648 ip = params.get(constants.INIC_IP, old_ip)
13650 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13651 " on a routed NIC", errors.ECODE_INVAL)
13653 elif new_mode == constants.NIC_MODE_OVS:
13654 # TODO: check OVS link
13655 self.LogInfo("OVS links are currently not checked for correctness")
13657 if constants.INIC_MAC in params:
13658 mac = params[constants.INIC_MAC]
13660 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13661 errors.ECODE_INVAL)
13662 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13663 # otherwise generate the MAC address
13664 params[constants.INIC_MAC] = \
13665 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13667 # or validate/reserve the current one
13669 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13670 except errors.ReservationError:
13671 raise errors.OpPrereqError("MAC address '%s' already in use"
13672 " in cluster" % mac,
13673 errors.ECODE_NOTUNIQUE)
13674 elif new_net_uuid != old_net_uuid:
13676 def get_net_prefix(net_uuid):
13679 nobj = self.cfg.GetNetwork(net_uuid)
13680 mac_prefix = nobj.mac_prefix
13684 new_prefix = get_net_prefix(new_net_uuid)
13685 old_prefix = get_net_prefix(old_net_uuid)
13686 if old_prefix != new_prefix:
13687 params[constants.INIC_MAC] = \
13688 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13690 # if there is a change in (ip, network) tuple
13691 new_ip = params.get(constants.INIC_IP, old_ip)
13692 if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
13694 # if IP is pool then require a network and generate one IP
13695 if new_ip.lower() == constants.NIC_IP_POOL:
13698 new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
13699 except errors.ReservationError:
13700 raise errors.OpPrereqError("Unable to get a free IP"
13701 " from the address pool",
13702 errors.ECODE_STATE)
13703 self.LogInfo("Chose IP %s from network %s",
13706 params[constants.INIC_IP] = new_ip
13708 raise errors.OpPrereqError("ip=pool, but no network found",
13709 errors.ECODE_INVAL)
13710 # Reserve new IP if in the new network if any
13713 self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
13714 self.LogInfo("Reserving IP %s in network %s",
13715 new_ip, new_net_obj.name)
13716 except errors.ReservationError:
13717 raise errors.OpPrereqError("IP %s not available in network %s" %
13718 (new_ip, new_net_obj.name),
13719 errors.ECODE_NOTUNIQUE)
13720 # new network is None so check if new IP is a conflicting IP
13721 elif self.op.conflicts_check:
13722 _CheckForConflictingIp(self, new_ip, pnode)
13724 # release old IP if old network is not None
13725 if old_ip and old_net_uuid:
13727 self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
13728 except errors.AddressPoolError:
13729 logging.warning("Release IP %s not contained in network %s",
13730 old_ip, old_net_obj.name)
13732 # there are no changes in (ip, network) tuple and old network is not None
13733 elif (old_net_uuid is not None and
13734 (req_link is not None or req_mode is not None)):
13735 raise errors.OpPrereqError("Not allowed to change link or mode of"
13736 " a NIC that is connected to a network",
13737 errors.ECODE_INVAL)
13739 private.params = new_params
13740 private.filled = new_filled_params
13742 def _PreCheckDiskTemplate(self, pnode_info):
13743 """CheckPrereq checks related to a new disk template."""
13744 # Arguments are passed to avoid configuration lookups
13745 instance = self.instance
13746 pnode = instance.primary_node
13747 cluster = self.cluster
13748 if instance.disk_template == self.op.disk_template:
13749 raise errors.OpPrereqError("Instance already has disk template %s" %
13750 instance.disk_template, errors.ECODE_INVAL)
13752 if (instance.disk_template,
13753 self.op.disk_template) not in self._DISK_CONVERSIONS:
13754 raise errors.OpPrereqError("Unsupported disk template conversion from"
13755 " %s to %s" % (instance.disk_template,
13756 self.op.disk_template),
13757 errors.ECODE_INVAL)
13758 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13759 msg="cannot change disk template")
13760 if self.op.disk_template in constants.DTS_INT_MIRROR:
13761 if self.op.remote_node == pnode:
13762 raise errors.OpPrereqError("Given new secondary node %s is the same"
13763 " as the primary node of the instance" %
13764 self.op.remote_node, errors.ECODE_STATE)
13765 _CheckNodeOnline(self, self.op.remote_node)
13766 _CheckNodeNotDrained(self, self.op.remote_node)
13767 # FIXME: here we assume that the old instance type is DT_PLAIN
13768 assert instance.disk_template == constants.DT_PLAIN
13769 disks = [{constants.IDISK_SIZE: d.size,
13770 constants.IDISK_VG: d.logical_id[0]}
13771 for d in instance.disks]
13772 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13773 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13775 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13776 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13777 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13779 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, self.cfg,
13780 ignore=self.op.ignore_ipolicy)
13781 if pnode_info.group != snode_info.group:
13782 self.LogWarning("The primary and secondary nodes are in two"
13783 " different node groups; the disk parameters"
13784 " from the first disk's node group will be"
13787 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13788 # Make sure none of the nodes require exclusive storage
13789 nodes = [pnode_info]
13790 if self.op.disk_template in constants.DTS_INT_MIRROR:
13792 nodes.append(snode_info)
13793 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13794 if compat.any(map(has_es, nodes)):
13795 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13796 " storage is enabled" % (instance.disk_template,
13797 self.op.disk_template))
13798 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13800 def CheckPrereq(self):
13801 """Check prerequisites.
13803 This only checks the instance list against the existing names.
13806 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13807 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13809 cluster = self.cluster = self.cfg.GetClusterInfo()
13810 assert self.instance is not None, \
13811 "Cannot retrieve locked instance %s" % self.op.instance_name
13813 pnode = instance.primary_node
13817 if (self.op.pnode is not None and self.op.pnode != pnode and
13818 not self.op.force):
13819 # verify that the instance is not up
13820 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13821 instance.hypervisor)
13822 if instance_info.fail_msg:
13823 self.warn.append("Can't get instance runtime information: %s" %
13824 instance_info.fail_msg)
13825 elif instance_info.payload:
13826 raise errors.OpPrereqError("Instance is still running on %s" % pnode,
13827 errors.ECODE_STATE)
13829 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13830 nodelist = list(instance.all_nodes)
13831 pnode_info = self.cfg.GetNodeInfo(pnode)
13832 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13834 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13835 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13836 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13838 # dictionary with instance information after the modification
13841 # Check disk modifications. This is done here and not in CheckArguments
13842 # (as with NICs), because we need to know the instance's disk template
13843 if instance.disk_template == constants.DT_EXT:
13844 self._CheckMods("disk", self.op.disks, {},
13845 self._VerifyDiskModification)
13847 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13848 self._VerifyDiskModification)
13850 # Prepare disk/NIC modifications
13851 self.diskmod = PrepareContainerMods(self.op.disks, None)
13852 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13854 # Check the validity of the `provider' parameter
13855 if instance.disk_template in constants.DT_EXT:
13856 for mod in self.diskmod:
13857 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13858 if mod[0] == constants.DDM_ADD:
13859 if ext_provider is None:
13860 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13861 " '%s' missing, during disk add" %
13863 constants.IDISK_PROVIDER),
13864 errors.ECODE_NOENT)
13865 elif mod[0] == constants.DDM_MODIFY:
13867 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13869 constants.IDISK_PROVIDER,
13870 errors.ECODE_INVAL)
13872 for mod in self.diskmod:
13873 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13874 if ext_provider is not None:
13875 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13876 " instances of type '%s'" %
13877 (constants.IDISK_PROVIDER,
13879 errors.ECODE_INVAL)
13882 if self.op.os_name and not self.op.force:
13883 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13884 self.op.force_variant)
13885 instance_os = self.op.os_name
13887 instance_os = instance.os
13889 assert not (self.op.disk_template and self.op.disks), \
13890 "Can't modify disk template and apply disk changes at the same time"
13892 if self.op.disk_template:
13893 self._PreCheckDiskTemplate(pnode_info)
13895 # hvparams processing
13896 if self.op.hvparams:
13897 hv_type = instance.hypervisor
13898 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13899 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13900 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13903 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13904 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13905 self.hv_proposed = self.hv_new = hv_new # the new actual values
13906 self.hv_inst = i_hvdict # the new dict (without defaults)
13908 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13910 self.hv_new = self.hv_inst = {}
13912 # beparams processing
13913 if self.op.beparams:
13914 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13916 objects.UpgradeBeParams(i_bedict)
13917 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13918 be_new = cluster.SimpleFillBE(i_bedict)
13919 self.be_proposed = self.be_new = be_new # the new actual values
13920 self.be_inst = i_bedict # the new dict (without defaults)
13922 self.be_new = self.be_inst = {}
13923 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13924 be_old = cluster.FillBE(instance)
13926 # CPU param validation -- checking every time a parameter is
13927 # changed to cover all cases where either CPU mask or vcpus have
13929 if (constants.BE_VCPUS in self.be_proposed and
13930 constants.HV_CPU_MASK in self.hv_proposed):
13932 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13933 # Verify mask is consistent with number of vCPUs. Can skip this
13934 # test if only 1 entry in the CPU mask, which means same mask
13935 # is applied to all vCPUs.
13936 if (len(cpu_list) > 1 and
13937 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13938 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13940 (self.be_proposed[constants.BE_VCPUS],
13941 self.hv_proposed[constants.HV_CPU_MASK]),
13942 errors.ECODE_INVAL)
13944 # Only perform this test if a new CPU mask is given
13945 if constants.HV_CPU_MASK in self.hv_new:
13946 # Calculate the largest CPU number requested
13947 max_requested_cpu = max(map(max, cpu_list))
13948 # Check that all of the instance's nodes have enough physical CPUs to
13949 # satisfy the requested CPU mask
13950 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13951 max_requested_cpu + 1, instance.hypervisor)
13953 # osparams processing
13954 if self.op.osparams:
13955 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13956 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13957 self.os_inst = i_osdict # the new dict (without defaults)
13961 #TODO(dynmem): do the appropriate check involving MINMEM
13962 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13963 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13964 mem_check_list = [pnode]
13965 if be_new[constants.BE_AUTO_BALANCE]:
13966 # either we changed auto_balance to yes or it was from before
13967 mem_check_list.extend(instance.secondary_nodes)
13968 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13969 instance.hypervisor)
13970 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13971 [instance.hypervisor], False)
13972 pninfo = nodeinfo[pnode]
13973 msg = pninfo.fail_msg
13975 # Assume the primary node is unreachable and go ahead
13976 self.warn.append("Can't get info from primary node %s: %s" %
13979 (_, _, (pnhvinfo, )) = pninfo.payload
13980 if not isinstance(pnhvinfo.get("memory_free", None), int):
13981 self.warn.append("Node data from primary node %s doesn't contain"
13982 " free memory information" % pnode)
13983 elif instance_info.fail_msg:
13984 self.warn.append("Can't get instance runtime information: %s" %
13985 instance_info.fail_msg)
13987 if instance_info.payload:
13988 current_mem = int(instance_info.payload["memory"])
13990 # Assume instance not running
13991 # (there is a slight race condition here, but it's not very
13992 # probable, and we have no other way to check)
13993 # TODO: Describe race condition
13995 #TODO(dynmem): do the appropriate check involving MINMEM
13996 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13997 pnhvinfo["memory_free"])
13999 raise errors.OpPrereqError("This change will prevent the instance"
14000 " from starting, due to %d MB of memory"
14001 " missing on its primary node" %
14002 miss_mem, errors.ECODE_NORES)
14004 if be_new[constants.BE_AUTO_BALANCE]:
14005 for node, nres in nodeinfo.items():
14006 if node not in instance.secondary_nodes:
14008 nres.Raise("Can't get info from secondary node %s" % node,
14009 prereq=True, ecode=errors.ECODE_STATE)
14010 (_, _, (nhvinfo, )) = nres.payload
14011 if not isinstance(nhvinfo.get("memory_free", None), int):
14012 raise errors.OpPrereqError("Secondary node %s didn't return free"
14013 " memory information" % node,
14014 errors.ECODE_STATE)
14015 #TODO(dynmem): do the appropriate check involving MINMEM
14016 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
14017 raise errors.OpPrereqError("This change will prevent the instance"
14018 " from failover to its secondary node"
14019 " %s, due to not enough memory" % node,
14020 errors.ECODE_STATE)
14022 if self.op.runtime_mem:
14023 remote_info = self.rpc.call_instance_info(instance.primary_node,
14025 instance.hypervisor)
14026 remote_info.Raise("Error checking node %s" % instance.primary_node)
14027 if not remote_info.payload: # not running already
14028 raise errors.OpPrereqError("Instance %s is not running" %
14029 instance.name, errors.ECODE_STATE)
14031 current_memory = remote_info.payload["memory"]
14032 if (not self.op.force and
14033 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
14034 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
14035 raise errors.OpPrereqError("Instance %s must have memory between %d"
14036 " and %d MB of memory unless --force is"
14039 self.be_proposed[constants.BE_MINMEM],
14040 self.be_proposed[constants.BE_MAXMEM]),
14041 errors.ECODE_INVAL)
14043 delta = self.op.runtime_mem - current_memory
14045 _CheckNodeFreeMemory(self, instance.primary_node,
14046 "ballooning memory for instance %s" %
14047 instance.name, delta, instance.hypervisor)
14049 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
14050 raise errors.OpPrereqError("Disk operations not supported for"
14051 " diskless instances", errors.ECODE_INVAL)
14053 def _PrepareNicCreate(_, params, private):
14054 self._PrepareNicModification(params, private, None, None,
14055 {}, cluster, pnode)
14056 return (None, None)
14058 def _PrepareNicMod(_, nic, params, private):
14059 self._PrepareNicModification(params, private, nic.ip, nic.network,
14060 nic.nicparams, cluster, pnode)
14063 def _PrepareNicRemove(_, params, __):
14065 net = params.network
14066 if net is not None and ip is not None:
14067 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
14069 # Verify NIC changes (operating on copy)
14070 nics = instance.nics[:]
14071 ApplyContainerMods("NIC", nics, None, self.nicmod,
14072 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
14073 if len(nics) > constants.MAX_NICS:
14074 raise errors.OpPrereqError("Instance has too many network interfaces"
14075 " (%d), cannot add more" % constants.MAX_NICS,
14076 errors.ECODE_STATE)
14078 def _PrepareDiskMod(_, disk, params, __):
14079 disk.name = params.get(constants.IDISK_NAME, None)
14081 # Verify disk changes (operating on a copy)
14082 disks = copy.deepcopy(instance.disks)
14083 ApplyContainerMods("disk", disks, None, self.diskmod, None, _PrepareDiskMod,
14085 utils.ValidateDeviceNames("disk", disks)
14086 if len(disks) > constants.MAX_DISKS:
14087 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
14088 " more" % constants.MAX_DISKS,
14089 errors.ECODE_STATE)
14090 disk_sizes = [disk.size for disk in instance.disks]
14091 disk_sizes.extend(params["size"] for (op, idx, params, private) in
14092 self.diskmod if op == constants.DDM_ADD)
14093 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
14094 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
14096 if self.op.offline is not None and self.op.offline:
14097 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
14098 msg="can't change to offline")
14100 # Pre-compute NIC changes (necessary to use result in hooks)
14101 self._nic_chgdesc = []
14103 # Operate on copies as this is still in prereq
14104 nics = [nic.Copy() for nic in instance.nics]
14105 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
14106 self._CreateNewNic, self._ApplyNicMods, None)
14107 # Verify that NIC names are unique and valid
14108 utils.ValidateDeviceNames("NIC", nics)
14109 self._new_nics = nics
14110 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
14112 self._new_nics = None
14113 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
14115 if not self.op.ignore_ipolicy:
14116 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
14119 # Fill ispec with backend parameters
14120 ispec[constants.ISPEC_SPINDLE_USE] = \
14121 self.be_new.get(constants.BE_SPINDLE_USE, None)
14122 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
14125 # Copy ispec to verify parameters with min/max values separately
14126 if self.op.disk_template:
14127 new_disk_template = self.op.disk_template
14129 new_disk_template = instance.disk_template
14130 ispec_max = ispec.copy()
14131 ispec_max[constants.ISPEC_MEM_SIZE] = \
14132 self.be_new.get(constants.BE_MAXMEM, None)
14133 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max,
14135 ispec_min = ispec.copy()
14136 ispec_min[constants.ISPEC_MEM_SIZE] = \
14137 self.be_new.get(constants.BE_MINMEM, None)
14138 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min,
14141 if (res_max or res_min):
14142 # FIXME: Improve error message by including information about whether
14143 # the upper or lower limit of the parameter fails the ipolicy.
14144 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
14145 (group_info, group_info.name,
14146 utils.CommaJoin(set(res_max + res_min))))
14147 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
14149 def _ConvertPlainToDrbd(self, feedback_fn):
14150 """Converts an instance from plain to drbd.
14153 feedback_fn("Converting template to drbd")
14154 instance = self.instance
14155 pnode = instance.primary_node
14156 snode = self.op.remote_node
14158 assert instance.disk_template == constants.DT_PLAIN
14160 # create a fake disk info for _GenerateDiskTemplate
14161 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
14162 constants.IDISK_VG: d.logical_id[0],
14163 constants.IDISK_NAME: d.name}
14164 for d in instance.disks]
14165 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
14166 instance.name, pnode, [snode],
14167 disk_info, None, None, 0, feedback_fn,
14169 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
14171 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
14172 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
14173 info = _GetInstanceInfoText(instance)
14174 feedback_fn("Creating additional volumes...")
14175 # first, create the missing data and meta devices
14176 for disk in anno_disks:
14177 # unfortunately this is... not too nice
14178 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
14179 info, True, p_excl_stor)
14180 for child in disk.children:
14181 _CreateSingleBlockDev(self, snode, instance, child, info, True,
14183 # at this stage, all new LVs have been created, we can rename the
14185 feedback_fn("Renaming original volumes...")
14186 rename_list = [(o, n.children[0].logical_id)
14187 for (o, n) in zip(instance.disks, new_disks)]
14188 result = self.rpc.call_blockdev_rename(pnode, rename_list)
14189 result.Raise("Failed to rename original LVs")
14191 feedback_fn("Initializing DRBD devices...")
14192 # all child devices are in place, we can now create the DRBD devices
14193 for disk in anno_disks:
14194 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
14195 f_create = node == pnode
14196 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
14199 # at this point, the instance has been modified
14200 instance.disk_template = constants.DT_DRBD8
14201 instance.disks = new_disks
14202 self.cfg.Update(instance, feedback_fn)
14204 # Release node locks while waiting for sync
14205 _ReleaseLocks(self, locking.LEVEL_NODE)
14207 # disks are created, waiting for sync
14208 disk_abort = not _WaitForSync(self, instance,
14209 oneshot=not self.op.wait_for_sync)
14211 raise errors.OpExecError("There are some degraded disks for"
14212 " this instance, please cleanup manually")
14214 # Node resource locks will be released by caller
14216 def _ConvertDrbdToPlain(self, feedback_fn):
14217 """Converts an instance from drbd to plain.
14220 instance = self.instance
14222 assert len(instance.secondary_nodes) == 1
14223 assert instance.disk_template == constants.DT_DRBD8
14225 pnode = instance.primary_node
14226 snode = instance.secondary_nodes[0]
14227 feedback_fn("Converting template to plain")
14229 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
14230 new_disks = [d.children[0] for d in instance.disks]
14232 # copy over size, mode and name
14233 for parent, child in zip(old_disks, new_disks):
14234 child.size = parent.size
14235 child.mode = parent.mode
14236 child.name = parent.name
14238 # this is a DRBD disk, return its port to the pool
14239 # NOTE: this must be done right before the call to cfg.Update!
14240 for disk in old_disks:
14241 tcp_port = disk.logical_id[2]
14242 self.cfg.AddTcpUdpPort(tcp_port)
14244 # update instance structure
14245 instance.disks = new_disks
14246 instance.disk_template = constants.DT_PLAIN
14247 _UpdateIvNames(0, instance.disks)
14248 self.cfg.Update(instance, feedback_fn)
14250 # Release locks in case removing disks takes a while
14251 _ReleaseLocks(self, locking.LEVEL_NODE)
14253 feedback_fn("Removing volumes on the secondary node...")
14254 for disk in old_disks:
14255 self.cfg.SetDiskID(disk, snode)
14256 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14258 self.LogWarning("Could not remove block device %s on node %s,"
14259 " continuing anyway: %s", disk.iv_name, snode, msg)
14261 feedback_fn("Removing unneeded volumes on the primary node...")
14262 for idx, disk in enumerate(old_disks):
14263 meta = disk.children[1]
14264 self.cfg.SetDiskID(meta, pnode)
14265 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14267 self.LogWarning("Could not remove metadata for disk %d on node %s,"
14268 " continuing anyway: %s", idx, pnode, msg)
14270 def _CreateNewDisk(self, idx, params, _):
14271 """Creates a new disk.
14274 instance = self.instance
14277 if instance.disk_template in constants.DTS_FILEBASED:
14278 (file_driver, file_path) = instance.disks[0].logical_id
14279 file_path = os.path.dirname(file_path)
14281 file_driver = file_path = None
14284 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14285 instance.primary_node, instance.secondary_nodes,
14286 [params], file_path, file_driver, idx,
14287 self.Log, self.diskparams)[0]
14289 info = _GetInstanceInfoText(instance)
14291 logging.info("Creating volume %s for instance %s",
14292 disk.iv_name, instance.name)
14293 # Note: this needs to be kept in sync with _CreateDisks
14295 for node in instance.all_nodes:
14296 f_create = (node == instance.primary_node)
14298 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14299 except errors.OpExecError, err:
14300 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14301 disk.iv_name, disk, node, err)
14303 if self.cluster.prealloc_wipe_disks:
14305 _WipeDisks(self, instance,
14306 disks=[(idx, disk, 0)])
14309 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14313 def _ModifyDisk(idx, disk, params, _):
14314 """Modifies a disk.
14318 mode = params.get(constants.IDISK_MODE, None)
14321 changes.append(("disk.mode/%d" % idx, disk.mode))
14323 name = params.get(constants.IDISK_NAME, None)
14325 changes.append(("disk.name/%d" % idx, disk.name))
14329 def _RemoveDisk(self, idx, root, _):
14333 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14334 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14335 self.cfg.SetDiskID(disk, node)
14336 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14338 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14339 " continuing anyway", idx, node, msg)
14341 # if this is a DRBD disk, return its port to the pool
14342 if root.dev_type in constants.LDS_DRBD:
14343 self.cfg.AddTcpUdpPort(root.logical_id[2])
14345 def _CreateNewNic(self, idx, params, private):
14346 """Creates data structure for a new network interface.
14349 mac = params[constants.INIC_MAC]
14350 ip = params.get(constants.INIC_IP, None)
14351 net = params.get(constants.INIC_NETWORK, None)
14352 name = params.get(constants.INIC_NAME, None)
14353 net_uuid = self.cfg.LookupNetwork(net)
14354 #TODO: not private.filled?? can a nic have no nicparams??
14355 nicparams = private.filled
14356 nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, name=name,
14357 nicparams=nicparams)
14358 nobj.uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14362 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14363 (mac, ip, private.filled[constants.NIC_MODE],
14364 private.filled[constants.NIC_LINK],
14368 def _ApplyNicMods(self, idx, nic, params, private):
14369 """Modifies a network interface.
14374 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NAME]:
14376 changes.append(("nic.%s/%d" % (key, idx), params[key]))
14377 setattr(nic, key, params[key])
14379 new_net = params.get(constants.INIC_NETWORK, nic.network)
14380 new_net_uuid = self.cfg.LookupNetwork(new_net)
14381 if new_net_uuid != nic.network:
14382 changes.append(("nic.network/%d" % idx, new_net))
14383 nic.network = new_net_uuid
14386 nic.nicparams = private.filled
14388 for (key, val) in nic.nicparams.items():
14389 changes.append(("nic.%s/%d" % (key, idx), val))
14393 def Exec(self, feedback_fn):
14394 """Modifies an instance.
14396 All parameters take effect only at the next restart of the instance.
14399 # Process here the warnings from CheckPrereq, as we don't have a
14400 # feedback_fn there.
14401 # TODO: Replace with self.LogWarning
14402 for warn in self.warn:
14403 feedback_fn("WARNING: %s" % warn)
14405 assert ((self.op.disk_template is None) ^
14406 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14407 "Not owning any node resource locks"
14410 instance = self.instance
14414 instance.primary_node = self.op.pnode
14417 if self.op.runtime_mem:
14418 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14420 self.op.runtime_mem)
14421 rpcres.Raise("Cannot modify instance runtime memory")
14422 result.append(("runtime_memory", self.op.runtime_mem))
14424 # Apply disk changes
14425 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14426 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14427 _UpdateIvNames(0, instance.disks)
14429 if self.op.disk_template:
14431 check_nodes = set(instance.all_nodes)
14432 if self.op.remote_node:
14433 check_nodes.add(self.op.remote_node)
14434 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14435 owned = self.owned_locks(level)
14436 assert not (check_nodes - owned), \
14437 ("Not owning the correct locks, owning %r, expected at least %r" %
14438 (owned, check_nodes))
14440 r_shut = _ShutdownInstanceDisks(self, instance)
14442 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14443 " proceed with disk template conversion")
14444 mode = (instance.disk_template, self.op.disk_template)
14446 self._DISK_CONVERSIONS[mode](self, feedback_fn)
14448 self.cfg.ReleaseDRBDMinors(instance.name)
14450 result.append(("disk_template", self.op.disk_template))
14452 assert instance.disk_template == self.op.disk_template, \
14453 ("Expected disk template '%s', found '%s'" %
14454 (self.op.disk_template, instance.disk_template))
14456 # Release node and resource locks if there are any (they might already have
14457 # been released during disk conversion)
14458 _ReleaseLocks(self, locking.LEVEL_NODE)
14459 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14461 # Apply NIC changes
14462 if self._new_nics is not None:
14463 instance.nics = self._new_nics
14464 result.extend(self._nic_chgdesc)
14467 if self.op.hvparams:
14468 instance.hvparams = self.hv_inst
14469 for key, val in self.op.hvparams.iteritems():
14470 result.append(("hv/%s" % key, val))
14473 if self.op.beparams:
14474 instance.beparams = self.be_inst
14475 for key, val in self.op.beparams.iteritems():
14476 result.append(("be/%s" % key, val))
14479 if self.op.os_name:
14480 instance.os = self.op.os_name
14483 if self.op.osparams:
14484 instance.osparams = self.os_inst
14485 for key, val in self.op.osparams.iteritems():
14486 result.append(("os/%s" % key, val))
14488 if self.op.offline is None:
14491 elif self.op.offline:
14492 # Mark instance as offline
14493 self.cfg.MarkInstanceOffline(instance.name)
14494 result.append(("admin_state", constants.ADMINST_OFFLINE))
14496 # Mark instance as online, but stopped
14497 self.cfg.MarkInstanceDown(instance.name)
14498 result.append(("admin_state", constants.ADMINST_DOWN))
14500 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14502 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14503 self.owned_locks(locking.LEVEL_NODE)), \
14504 "All node locks should have been released by now"
14508 _DISK_CONVERSIONS = {
14509 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14510 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14514 class LUInstanceChangeGroup(LogicalUnit):
14515 HPATH = "instance-change-group"
14516 HTYPE = constants.HTYPE_INSTANCE
14519 def ExpandNames(self):
14520 self.share_locks = _ShareAll()
14522 self.needed_locks = {
14523 locking.LEVEL_NODEGROUP: [],
14524 locking.LEVEL_NODE: [],
14525 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14528 self._ExpandAndLockInstance()
14530 if self.op.target_groups:
14531 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14532 self.op.target_groups)
14534 self.req_target_uuids = None
14536 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14538 def DeclareLocks(self, level):
14539 if level == locking.LEVEL_NODEGROUP:
14540 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14542 if self.req_target_uuids:
14543 lock_groups = set(self.req_target_uuids)
14545 # Lock all groups used by instance optimistically; this requires going
14546 # via the node before it's locked, requiring verification later on
14547 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14548 lock_groups.update(instance_groups)
14550 # No target groups, need to lock all of them
14551 lock_groups = locking.ALL_SET
14553 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14555 elif level == locking.LEVEL_NODE:
14556 if self.req_target_uuids:
14557 # Lock all nodes used by instances
14558 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14559 self._LockInstancesNodes()
14561 # Lock all nodes in all potential target groups
14562 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14563 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14564 member_nodes = [node_name
14565 for group in lock_groups
14566 for node_name in self.cfg.GetNodeGroup(group).members]
14567 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14569 # Lock all nodes as all groups are potential targets
14570 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14572 def CheckPrereq(self):
14573 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14574 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14575 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14577 assert (self.req_target_uuids is None or
14578 owned_groups.issuperset(self.req_target_uuids))
14579 assert owned_instances == set([self.op.instance_name])
14581 # Get instance information
14582 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14584 # Check if node groups for locked instance are still correct
14585 assert owned_nodes.issuperset(self.instance.all_nodes), \
14586 ("Instance %s's nodes changed while we kept the lock" %
14587 self.op.instance_name)
14589 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14592 if self.req_target_uuids:
14593 # User requested specific target groups
14594 self.target_uuids = frozenset(self.req_target_uuids)
14596 # All groups except those used by the instance are potential targets
14597 self.target_uuids = owned_groups - inst_groups
14599 conflicting_groups = self.target_uuids & inst_groups
14600 if conflicting_groups:
14601 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14602 " used by the instance '%s'" %
14603 (utils.CommaJoin(conflicting_groups),
14604 self.op.instance_name),
14605 errors.ECODE_INVAL)
14607 if not self.target_uuids:
14608 raise errors.OpPrereqError("There are no possible target groups",
14609 errors.ECODE_INVAL)
14611 def BuildHooksEnv(self):
14612 """Build hooks env.
14615 assert self.target_uuids
14618 "TARGET_GROUPS": " ".join(self.target_uuids),
14621 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14625 def BuildHooksNodes(self):
14626 """Build hooks nodes.
14629 mn = self.cfg.GetMasterNode()
14630 return ([mn], [mn])
14632 def Exec(self, feedback_fn):
14633 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14635 assert instances == [self.op.instance_name], "Instance not locked"
14637 req = iallocator.IAReqGroupChange(instances=instances,
14638 target_groups=list(self.target_uuids))
14639 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14641 ial.Run(self.op.iallocator)
14643 if not ial.success:
14644 raise errors.OpPrereqError("Can't compute solution for changing group of"
14645 " instance '%s' using iallocator '%s': %s" %
14646 (self.op.instance_name, self.op.iallocator,
14647 ial.info), errors.ECODE_NORES)
14649 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14651 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14652 " instance '%s'", len(jobs), self.op.instance_name)
14654 return ResultWithJobs(jobs)
14657 class LUBackupQuery(NoHooksLU):
14658 """Query the exports list
14663 def CheckArguments(self):
14664 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14665 ["node", "export"], self.op.use_locking)
14667 def ExpandNames(self):
14668 self.expq.ExpandNames(self)
14670 def DeclareLocks(self, level):
14671 self.expq.DeclareLocks(self, level)
14673 def Exec(self, feedback_fn):
14676 for (node, expname) in self.expq.OldStyleQuery(self):
14677 if expname is None:
14678 result[node] = False
14680 result.setdefault(node, []).append(expname)
14685 class _ExportQuery(_QueryBase):
14686 FIELDS = query.EXPORT_FIELDS
14688 #: The node name is not a unique key for this query
14689 SORT_FIELD = "node"
14691 def ExpandNames(self, lu):
14692 lu.needed_locks = {}
14694 # The following variables interact with _QueryBase._GetNames
14696 self.wanted = _GetWantedNodes(lu, self.names)
14698 self.wanted = locking.ALL_SET
14700 self.do_locking = self.use_locking
14702 if self.do_locking:
14703 lu.share_locks = _ShareAll()
14704 lu.needed_locks = {
14705 locking.LEVEL_NODE: self.wanted,
14709 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14711 def DeclareLocks(self, lu, level):
14714 def _GetQueryData(self, lu):
14715 """Computes the list of nodes and their attributes.
14718 # Locking is not used
14720 assert not (compat.any(lu.glm.is_owned(level)
14721 for level in locking.LEVELS
14722 if level != locking.LEVEL_CLUSTER) or
14723 self.do_locking or self.use_locking)
14725 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14729 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14731 result.append((node, None))
14733 result.extend((node, expname) for expname in nres.payload)
14738 class LUBackupPrepare(NoHooksLU):
14739 """Prepares an instance for an export and returns useful information.
14744 def ExpandNames(self):
14745 self._ExpandAndLockInstance()
14747 def CheckPrereq(self):
14748 """Check prerequisites.
14751 instance_name = self.op.instance_name
14753 self.instance = self.cfg.GetInstanceInfo(instance_name)
14754 assert self.instance is not None, \
14755 "Cannot retrieve locked instance %s" % self.op.instance_name
14756 _CheckNodeOnline(self, self.instance.primary_node)
14758 self._cds = _GetClusterDomainSecret()
14760 def Exec(self, feedback_fn):
14761 """Prepares an instance for an export.
14764 instance = self.instance
14766 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14767 salt = utils.GenerateSecret(8)
14769 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14770 result = self.rpc.call_x509_cert_create(instance.primary_node,
14771 constants.RIE_CERT_VALIDITY)
14772 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14774 (name, cert_pem) = result.payload
14776 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14780 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14781 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14783 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14789 class LUBackupExport(LogicalUnit):
14790 """Export an instance to an image in the cluster.
14793 HPATH = "instance-export"
14794 HTYPE = constants.HTYPE_INSTANCE
14797 def CheckArguments(self):
14798 """Check the arguments.
14801 self.x509_key_name = self.op.x509_key_name
14802 self.dest_x509_ca_pem = self.op.destination_x509_ca
14804 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14805 if not self.x509_key_name:
14806 raise errors.OpPrereqError("Missing X509 key name for encryption",
14807 errors.ECODE_INVAL)
14809 if not self.dest_x509_ca_pem:
14810 raise errors.OpPrereqError("Missing destination X509 CA",
14811 errors.ECODE_INVAL)
14813 def ExpandNames(self):
14814 self._ExpandAndLockInstance()
14816 # Lock all nodes for local exports
14817 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14818 # FIXME: lock only instance primary and destination node
14820 # Sad but true, for now we have do lock all nodes, as we don't know where
14821 # the previous export might be, and in this LU we search for it and
14822 # remove it from its current node. In the future we could fix this by:
14823 # - making a tasklet to search (share-lock all), then create the
14824 # new one, then one to remove, after
14825 # - removing the removal operation altogether
14826 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14828 # Allocations should be stopped while this LU runs with node locks, but
14829 # it doesn't have to be exclusive
14830 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14831 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14833 def DeclareLocks(self, level):
14834 """Last minute lock declaration."""
14835 # All nodes are locked anyway, so nothing to do here.
14837 def BuildHooksEnv(self):
14838 """Build hooks env.
14840 This will run on the master, primary node and target node.
14844 "EXPORT_MODE": self.op.mode,
14845 "EXPORT_NODE": self.op.target_node,
14846 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14847 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14848 # TODO: Generic function for boolean env variables
14849 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14852 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14856 def BuildHooksNodes(self):
14857 """Build hooks nodes.
14860 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14862 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14863 nl.append(self.op.target_node)
14867 def CheckPrereq(self):
14868 """Check prerequisites.
14870 This checks that the instance and node names are valid.
14873 instance_name = self.op.instance_name
14875 self.instance = self.cfg.GetInstanceInfo(instance_name)
14876 assert self.instance is not None, \
14877 "Cannot retrieve locked instance %s" % self.op.instance_name
14878 _CheckNodeOnline(self, self.instance.primary_node)
14880 if (self.op.remove_instance and
14881 self.instance.admin_state == constants.ADMINST_UP and
14882 not self.op.shutdown):
14883 raise errors.OpPrereqError("Can not remove instance without shutting it"
14884 " down before", errors.ECODE_STATE)
14886 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14887 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14888 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14889 assert self.dst_node is not None
14891 _CheckNodeOnline(self, self.dst_node.name)
14892 _CheckNodeNotDrained(self, self.dst_node.name)
14895 self.dest_disk_info = None
14896 self.dest_x509_ca = None
14898 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14899 self.dst_node = None
14901 if len(self.op.target_node) != len(self.instance.disks):
14902 raise errors.OpPrereqError(("Received destination information for %s"
14903 " disks, but instance %s has %s disks") %
14904 (len(self.op.target_node), instance_name,
14905 len(self.instance.disks)),
14906 errors.ECODE_INVAL)
14908 cds = _GetClusterDomainSecret()
14910 # Check X509 key name
14912 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14913 except (TypeError, ValueError), err:
14914 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14915 errors.ECODE_INVAL)
14917 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14918 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14919 errors.ECODE_INVAL)
14921 # Load and verify CA
14923 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14924 except OpenSSL.crypto.Error, err:
14925 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14926 (err, ), errors.ECODE_INVAL)
14928 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14929 if errcode is not None:
14930 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14931 (msg, ), errors.ECODE_INVAL)
14933 self.dest_x509_ca = cert
14935 # Verify target information
14937 for idx, disk_data in enumerate(self.op.target_node):
14939 (host, port, magic) = \
14940 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14941 except errors.GenericError, err:
14942 raise errors.OpPrereqError("Target info for disk %s: %s" %
14943 (idx, err), errors.ECODE_INVAL)
14945 disk_info.append((host, port, magic))
14947 assert len(disk_info) == len(self.op.target_node)
14948 self.dest_disk_info = disk_info
14951 raise errors.ProgrammerError("Unhandled export mode %r" %
14954 # instance disk type verification
14955 # TODO: Implement export support for file-based disks
14956 for disk in self.instance.disks:
14957 if disk.dev_type == constants.LD_FILE:
14958 raise errors.OpPrereqError("Export not supported for instances with"
14959 " file-based disks", errors.ECODE_INVAL)
14961 def _CleanupExports(self, feedback_fn):
14962 """Removes exports of current instance from all other nodes.
14964 If an instance in a cluster with nodes A..D was exported to node C, its
14965 exports will be removed from the nodes A, B and D.
14968 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14970 nodelist = self.cfg.GetNodeList()
14971 nodelist.remove(self.dst_node.name)
14973 # on one-node clusters nodelist will be empty after the removal
14974 # if we proceed the backup would be removed because OpBackupQuery
14975 # substitutes an empty list with the full cluster node list.
14976 iname = self.instance.name
14978 feedback_fn("Removing old exports for instance %s" % iname)
14979 exportlist = self.rpc.call_export_list(nodelist)
14980 for node in exportlist:
14981 if exportlist[node].fail_msg:
14983 if iname in exportlist[node].payload:
14984 msg = self.rpc.call_export_remove(node, iname).fail_msg
14986 self.LogWarning("Could not remove older export for instance %s"
14987 " on node %s: %s", iname, node, msg)
14989 def Exec(self, feedback_fn):
14990 """Export an instance to an image in the cluster.
14993 assert self.op.mode in constants.EXPORT_MODES
14995 instance = self.instance
14996 src_node = instance.primary_node
14998 if self.op.shutdown:
14999 # shutdown the instance, but not the disks
15000 feedback_fn("Shutting down instance %s" % instance.name)
15001 result = self.rpc.call_instance_shutdown(src_node, instance,
15002 self.op.shutdown_timeout)
15003 # TODO: Maybe ignore failures if ignore_remove_failures is set
15004 result.Raise("Could not shutdown instance %s on"
15005 " node %s" % (instance.name, src_node))
15007 # set the disks ID correctly since call_instance_start needs the
15008 # correct drbd minor to create the symlinks
15009 for disk in instance.disks:
15010 self.cfg.SetDiskID(disk, src_node)
15012 activate_disks = (instance.admin_state != constants.ADMINST_UP)
15015 # Activate the instance disks if we'exporting a stopped instance
15016 feedback_fn("Activating disks for %s" % instance.name)
15017 _StartInstanceDisks(self, instance, None)
15020 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
15023 helper.CreateSnapshots()
15025 if (self.op.shutdown and
15026 instance.admin_state == constants.ADMINST_UP and
15027 not self.op.remove_instance):
15028 assert not activate_disks
15029 feedback_fn("Starting instance %s" % instance.name)
15030 result = self.rpc.call_instance_start(src_node,
15031 (instance, None, None), False)
15032 msg = result.fail_msg
15034 feedback_fn("Failed to start instance: %s" % msg)
15035 _ShutdownInstanceDisks(self, instance)
15036 raise errors.OpExecError("Could not start instance: %s" % msg)
15038 if self.op.mode == constants.EXPORT_MODE_LOCAL:
15039 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
15040 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
15041 connect_timeout = constants.RIE_CONNECT_TIMEOUT
15042 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
15044 (key_name, _, _) = self.x509_key_name
15047 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
15050 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
15051 key_name, dest_ca_pem,
15056 # Check for backwards compatibility
15057 assert len(dresults) == len(instance.disks)
15058 assert compat.all(isinstance(i, bool) for i in dresults), \
15059 "Not all results are boolean: %r" % dresults
15063 feedback_fn("Deactivating disks for %s" % instance.name)
15064 _ShutdownInstanceDisks(self, instance)
15066 if not (compat.all(dresults) and fin_resu):
15069 failures.append("export finalization")
15070 if not compat.all(dresults):
15071 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
15073 failures.append("disk export: disk(s) %s" % fdsk)
15075 raise errors.OpExecError("Export failed, errors in %s" %
15076 utils.CommaJoin(failures))
15078 # At this point, the export was successful, we can cleanup/finish
15080 # Remove instance if requested
15081 if self.op.remove_instance:
15082 feedback_fn("Removing instance %s" % instance.name)
15083 _RemoveInstance(self, feedback_fn, instance,
15084 self.op.ignore_remove_failures)
15086 if self.op.mode == constants.EXPORT_MODE_LOCAL:
15087 self._CleanupExports(feedback_fn)
15089 return fin_resu, dresults
15092 class LUBackupRemove(NoHooksLU):
15093 """Remove exports related to the named instance.
15098 def ExpandNames(self):
15099 self.needed_locks = {
15100 # We need all nodes to be locked in order for RemoveExport to work, but
15101 # we don't need to lock the instance itself, as nothing will happen to it
15102 # (and we can remove exports also for a removed instance)
15103 locking.LEVEL_NODE: locking.ALL_SET,
15105 # Removing backups is quick, so blocking allocations is justified
15106 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
15109 # Allocations should be stopped while this LU runs with node locks, but it
15110 # doesn't have to be exclusive
15111 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
15113 def Exec(self, feedback_fn):
15114 """Remove any export.
15117 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
15118 # If the instance was not found we'll try with the name that was passed in.
15119 # This will only work if it was an FQDN, though.
15121 if not instance_name:
15123 instance_name = self.op.instance_name
15125 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
15126 exportlist = self.rpc.call_export_list(locked_nodes)
15128 for node in exportlist:
15129 msg = exportlist[node].fail_msg
15131 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
15133 if instance_name in exportlist[node].payload:
15135 result = self.rpc.call_export_remove(node, instance_name)
15136 msg = result.fail_msg
15138 logging.error("Could not remove export for instance %s"
15139 " on node %s: %s", instance_name, node, msg)
15141 if fqdn_warn and not found:
15142 feedback_fn("Export not found. If trying to remove an export belonging"
15143 " to a deleted instance please use its Fully Qualified"
15147 class LUGroupAdd(LogicalUnit):
15148 """Logical unit for creating node groups.
15151 HPATH = "group-add"
15152 HTYPE = constants.HTYPE_GROUP
15155 def ExpandNames(self):
15156 # We need the new group's UUID here so that we can create and acquire the
15157 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
15158 # that it should not check whether the UUID exists in the configuration.
15159 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15160 self.needed_locks = {}
15161 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15163 def CheckPrereq(self):
15164 """Check prerequisites.
15166 This checks that the given group name is not an existing node group
15171 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15172 except errors.OpPrereqError:
15175 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
15176 " node group (UUID: %s)" %
15177 (self.op.group_name, existing_uuid),
15178 errors.ECODE_EXISTS)
15180 if self.op.ndparams:
15181 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
15183 if self.op.hv_state:
15184 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
15186 self.new_hv_state = None
15188 if self.op.disk_state:
15189 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
15191 self.new_disk_state = None
15193 if self.op.diskparams:
15194 for templ in constants.DISK_TEMPLATES:
15195 if templ in self.op.diskparams:
15196 utils.ForceDictType(self.op.diskparams[templ],
15197 constants.DISK_DT_TYPES)
15198 self.new_diskparams = self.op.diskparams
15200 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15201 except errors.OpPrereqError, err:
15202 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15203 errors.ECODE_INVAL)
15205 self.new_diskparams = {}
15207 if self.op.ipolicy:
15208 cluster = self.cfg.GetClusterInfo()
15209 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
15211 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
15212 except errors.ConfigurationError, err:
15213 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
15214 errors.ECODE_INVAL)
15216 def BuildHooksEnv(self):
15217 """Build hooks env.
15221 "GROUP_NAME": self.op.group_name,
15224 def BuildHooksNodes(self):
15225 """Build hooks nodes.
15228 mn = self.cfg.GetMasterNode()
15229 return ([mn], [mn])
15231 def Exec(self, feedback_fn):
15232 """Add the node group to the cluster.
15235 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
15236 uuid=self.group_uuid,
15237 alloc_policy=self.op.alloc_policy,
15238 ndparams=self.op.ndparams,
15239 diskparams=self.new_diskparams,
15240 ipolicy=self.op.ipolicy,
15241 hv_state_static=self.new_hv_state,
15242 disk_state_static=self.new_disk_state)
15244 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
15245 del self.remove_locks[locking.LEVEL_NODEGROUP]
15248 class LUGroupAssignNodes(NoHooksLU):
15249 """Logical unit for assigning nodes to groups.
15254 def ExpandNames(self):
15255 # These raise errors.OpPrereqError on their own:
15256 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15257 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15259 # We want to lock all the affected nodes and groups. We have readily
15260 # available the list of nodes, and the *destination* group. To gather the
15261 # list of "source" groups, we need to fetch node information later on.
15262 self.needed_locks = {
15263 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
15264 locking.LEVEL_NODE: self.op.nodes,
15267 def DeclareLocks(self, level):
15268 if level == locking.LEVEL_NODEGROUP:
15269 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15271 # Try to get all affected nodes' groups without having the group or node
15272 # lock yet. Needs verification later in the code flow.
15273 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15275 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15277 def CheckPrereq(self):
15278 """Check prerequisites.
15281 assert self.needed_locks[locking.LEVEL_NODEGROUP]
15282 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15283 frozenset(self.op.nodes))
15285 expected_locks = (set([self.group_uuid]) |
15286 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15287 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15288 if actual_locks != expected_locks:
15289 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15290 " current groups are '%s', used to be '%s'" %
15291 (utils.CommaJoin(expected_locks),
15292 utils.CommaJoin(actual_locks)))
15294 self.node_data = self.cfg.GetAllNodesInfo()
15295 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15296 instance_data = self.cfg.GetAllInstancesInfo()
15298 if self.group is None:
15299 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15300 (self.op.group_name, self.group_uuid))
15302 (new_splits, previous_splits) = \
15303 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15304 for node in self.op.nodes],
15305 self.node_data, instance_data)
15308 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15310 if not self.op.force:
15311 raise errors.OpExecError("The following instances get split by this"
15312 " change and --force was not given: %s" %
15315 self.LogWarning("This operation will split the following instances: %s",
15318 if previous_splits:
15319 self.LogWarning("In addition, these already-split instances continue"
15320 " to be split across groups: %s",
15321 utils.CommaJoin(utils.NiceSort(previous_splits)))
15323 def Exec(self, feedback_fn):
15324 """Assign nodes to a new group.
15327 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15329 self.cfg.AssignGroupNodes(mods)
15332 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15333 """Check for split instances after a node assignment.
15335 This method considers a series of node assignments as an atomic operation,
15336 and returns information about split instances after applying the set of
15339 In particular, it returns information about newly split instances, and
15340 instances that were already split, and remain so after the change.
15342 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15345 @type changes: list of (node_name, new_group_uuid) pairs.
15346 @param changes: list of node assignments to consider.
15347 @param node_data: a dict with data for all nodes
15348 @param instance_data: a dict with all instances to consider
15349 @rtype: a two-tuple
15350 @return: a list of instances that were previously okay and result split as a
15351 consequence of this change, and a list of instances that were previously
15352 split and this change does not fix.
15355 changed_nodes = dict((node, group) for node, group in changes
15356 if node_data[node].group != group)
15358 all_split_instances = set()
15359 previously_split_instances = set()
15361 def InstanceNodes(instance):
15362 return [instance.primary_node] + list(instance.secondary_nodes)
15364 for inst in instance_data.values():
15365 if inst.disk_template not in constants.DTS_INT_MIRROR:
15368 instance_nodes = InstanceNodes(inst)
15370 if len(set(node_data[node].group for node in instance_nodes)) > 1:
15371 previously_split_instances.add(inst.name)
15373 if len(set(changed_nodes.get(node, node_data[node].group)
15374 for node in instance_nodes)) > 1:
15375 all_split_instances.add(inst.name)
15377 return (list(all_split_instances - previously_split_instances),
15378 list(previously_split_instances & all_split_instances))
15381 class _GroupQuery(_QueryBase):
15382 FIELDS = query.GROUP_FIELDS
15384 def ExpandNames(self, lu):
15385 lu.needed_locks = {}
15387 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15388 self._cluster = lu.cfg.GetClusterInfo()
15389 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15392 self.wanted = [name_to_uuid[name]
15393 for name in utils.NiceSort(name_to_uuid.keys())]
15395 # Accept names to be either names or UUIDs.
15398 all_uuid = frozenset(self._all_groups.keys())
15400 for name in self.names:
15401 if name in all_uuid:
15402 self.wanted.append(name)
15403 elif name in name_to_uuid:
15404 self.wanted.append(name_to_uuid[name])
15406 missing.append(name)
15409 raise errors.OpPrereqError("Some groups do not exist: %s" %
15410 utils.CommaJoin(missing),
15411 errors.ECODE_NOENT)
15413 def DeclareLocks(self, lu, level):
15416 def _GetQueryData(self, lu):
15417 """Computes the list of node groups and their attributes.
15420 do_nodes = query.GQ_NODE in self.requested_data
15421 do_instances = query.GQ_INST in self.requested_data
15423 group_to_nodes = None
15424 group_to_instances = None
15426 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15427 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15428 # latter GetAllInstancesInfo() is not enough, for we have to go through
15429 # instance->node. Hence, we will need to process nodes even if we only need
15430 # instance information.
15431 if do_nodes or do_instances:
15432 all_nodes = lu.cfg.GetAllNodesInfo()
15433 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15436 for node in all_nodes.values():
15437 if node.group in group_to_nodes:
15438 group_to_nodes[node.group].append(node.name)
15439 node_to_group[node.name] = node.group
15442 all_instances = lu.cfg.GetAllInstancesInfo()
15443 group_to_instances = dict((uuid, []) for uuid in self.wanted)
15445 for instance in all_instances.values():
15446 node = instance.primary_node
15447 if node in node_to_group:
15448 group_to_instances[node_to_group[node]].append(instance.name)
15451 # Do not pass on node information if it was not requested.
15452 group_to_nodes = None
15454 return query.GroupQueryData(self._cluster,
15455 [self._all_groups[uuid]
15456 for uuid in self.wanted],
15457 group_to_nodes, group_to_instances,
15458 query.GQ_DISKPARAMS in self.requested_data)
15461 class LUGroupQuery(NoHooksLU):
15462 """Logical unit for querying node groups.
15467 def CheckArguments(self):
15468 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15469 self.op.output_fields, False)
15471 def ExpandNames(self):
15472 self.gq.ExpandNames(self)
15474 def DeclareLocks(self, level):
15475 self.gq.DeclareLocks(self, level)
15477 def Exec(self, feedback_fn):
15478 return self.gq.OldStyleQuery(self)
15481 class LUGroupSetParams(LogicalUnit):
15482 """Modifies the parameters of a node group.
15485 HPATH = "group-modify"
15486 HTYPE = constants.HTYPE_GROUP
15489 def CheckArguments(self):
15492 self.op.diskparams,
15493 self.op.alloc_policy,
15495 self.op.disk_state,
15499 if all_changes.count(None) == len(all_changes):
15500 raise errors.OpPrereqError("Please pass at least one modification",
15501 errors.ECODE_INVAL)
15503 def ExpandNames(self):
15504 # This raises errors.OpPrereqError on its own:
15505 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15507 self.needed_locks = {
15508 locking.LEVEL_INSTANCE: [],
15509 locking.LEVEL_NODEGROUP: [self.group_uuid],
15512 self.share_locks[locking.LEVEL_INSTANCE] = 1
15514 def DeclareLocks(self, level):
15515 if level == locking.LEVEL_INSTANCE:
15516 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15518 # Lock instances optimistically, needs verification once group lock has
15520 self.needed_locks[locking.LEVEL_INSTANCE] = \
15521 self.cfg.GetNodeGroupInstances(self.group_uuid)
15524 def _UpdateAndVerifyDiskParams(old, new):
15525 """Updates and verifies disk parameters.
15528 new_params = _GetUpdatedParams(old, new)
15529 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15532 def CheckPrereq(self):
15533 """Check prerequisites.
15536 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15538 # Check if locked instances are still correct
15539 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15541 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15542 cluster = self.cfg.GetClusterInfo()
15544 if self.group is None:
15545 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15546 (self.op.group_name, self.group_uuid))
15548 if self.op.ndparams:
15549 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15550 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15551 self.new_ndparams = new_ndparams
15553 if self.op.diskparams:
15554 diskparams = self.group.diskparams
15555 uavdp = self._UpdateAndVerifyDiskParams
15556 # For each disktemplate subdict update and verify the values
15557 new_diskparams = dict((dt,
15558 uavdp(diskparams.get(dt, {}),
15559 self.op.diskparams[dt]))
15560 for dt in constants.DISK_TEMPLATES
15561 if dt in self.op.diskparams)
15562 # As we've all subdicts of diskparams ready, lets merge the actual
15563 # dict with all updated subdicts
15564 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15566 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15567 except errors.OpPrereqError, err:
15568 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15569 errors.ECODE_INVAL)
15571 if self.op.hv_state:
15572 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15573 self.group.hv_state_static)
15575 if self.op.disk_state:
15576 self.new_disk_state = \
15577 _MergeAndVerifyDiskState(self.op.disk_state,
15578 self.group.disk_state_static)
15580 if self.op.ipolicy:
15581 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15585 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15586 inst_filter = lambda inst: inst.name in owned_instances
15587 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15588 gmi = ganeti.masterd.instance
15590 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15592 new_ipolicy, instances, self.cfg)
15595 self.LogWarning("After the ipolicy change the following instances"
15596 " violate them: %s",
15597 utils.CommaJoin(violations))
15599 def BuildHooksEnv(self):
15600 """Build hooks env.
15604 "GROUP_NAME": self.op.group_name,
15605 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15608 def BuildHooksNodes(self):
15609 """Build hooks nodes.
15612 mn = self.cfg.GetMasterNode()
15613 return ([mn], [mn])
15615 def Exec(self, feedback_fn):
15616 """Modifies the node group.
15621 if self.op.ndparams:
15622 self.group.ndparams = self.new_ndparams
15623 result.append(("ndparams", str(self.group.ndparams)))
15625 if self.op.diskparams:
15626 self.group.diskparams = self.new_diskparams
15627 result.append(("diskparams", str(self.group.diskparams)))
15629 if self.op.alloc_policy:
15630 self.group.alloc_policy = self.op.alloc_policy
15632 if self.op.hv_state:
15633 self.group.hv_state_static = self.new_hv_state
15635 if self.op.disk_state:
15636 self.group.disk_state_static = self.new_disk_state
15638 if self.op.ipolicy:
15639 self.group.ipolicy = self.new_ipolicy
15641 self.cfg.Update(self.group, feedback_fn)
15645 class LUGroupRemove(LogicalUnit):
15646 HPATH = "group-remove"
15647 HTYPE = constants.HTYPE_GROUP
15650 def ExpandNames(self):
15651 # This will raises errors.OpPrereqError on its own:
15652 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15653 self.needed_locks = {
15654 locking.LEVEL_NODEGROUP: [self.group_uuid],
15657 def CheckPrereq(self):
15658 """Check prerequisites.
15660 This checks that the given group name exists as a node group, that is
15661 empty (i.e., contains no nodes), and that is not the last group of the
15665 # Verify that the group is empty.
15666 group_nodes = [node.name
15667 for node in self.cfg.GetAllNodesInfo().values()
15668 if node.group == self.group_uuid]
15671 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15673 (self.op.group_name,
15674 utils.CommaJoin(utils.NiceSort(group_nodes))),
15675 errors.ECODE_STATE)
15677 # Verify the cluster would not be left group-less.
15678 if len(self.cfg.GetNodeGroupList()) == 1:
15679 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15680 " removed" % self.op.group_name,
15681 errors.ECODE_STATE)
15683 def BuildHooksEnv(self):
15684 """Build hooks env.
15688 "GROUP_NAME": self.op.group_name,
15691 def BuildHooksNodes(self):
15692 """Build hooks nodes.
15695 mn = self.cfg.GetMasterNode()
15696 return ([mn], [mn])
15698 def Exec(self, feedback_fn):
15699 """Remove the node group.
15703 self.cfg.RemoveNodeGroup(self.group_uuid)
15704 except errors.ConfigurationError:
15705 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15706 (self.op.group_name, self.group_uuid))
15708 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15711 class LUGroupRename(LogicalUnit):
15712 HPATH = "group-rename"
15713 HTYPE = constants.HTYPE_GROUP
15716 def ExpandNames(self):
15717 # This raises errors.OpPrereqError on its own:
15718 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15720 self.needed_locks = {
15721 locking.LEVEL_NODEGROUP: [self.group_uuid],
15724 def CheckPrereq(self):
15725 """Check prerequisites.
15727 Ensures requested new name is not yet used.
15731 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15732 except errors.OpPrereqError:
15735 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15736 " node group (UUID: %s)" %
15737 (self.op.new_name, new_name_uuid),
15738 errors.ECODE_EXISTS)
15740 def BuildHooksEnv(self):
15741 """Build hooks env.
15745 "OLD_NAME": self.op.group_name,
15746 "NEW_NAME": self.op.new_name,
15749 def BuildHooksNodes(self):
15750 """Build hooks nodes.
15753 mn = self.cfg.GetMasterNode()
15755 all_nodes = self.cfg.GetAllNodesInfo()
15756 all_nodes.pop(mn, None)
15759 run_nodes.extend(node.name for node in all_nodes.values()
15760 if node.group == self.group_uuid)
15762 return (run_nodes, run_nodes)
15764 def Exec(self, feedback_fn):
15765 """Rename the node group.
15768 group = self.cfg.GetNodeGroup(self.group_uuid)
15771 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15772 (self.op.group_name, self.group_uuid))
15774 group.name = self.op.new_name
15775 self.cfg.Update(group, feedback_fn)
15777 return self.op.new_name
15780 class LUGroupEvacuate(LogicalUnit):
15781 HPATH = "group-evacuate"
15782 HTYPE = constants.HTYPE_GROUP
15785 def ExpandNames(self):
15786 # This raises errors.OpPrereqError on its own:
15787 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15789 if self.op.target_groups:
15790 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15791 self.op.target_groups)
15793 self.req_target_uuids = []
15795 if self.group_uuid in self.req_target_uuids:
15796 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15797 " as a target group (targets are %s)" %
15799 utils.CommaJoin(self.req_target_uuids)),
15800 errors.ECODE_INVAL)
15802 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15804 self.share_locks = _ShareAll()
15805 self.needed_locks = {
15806 locking.LEVEL_INSTANCE: [],
15807 locking.LEVEL_NODEGROUP: [],
15808 locking.LEVEL_NODE: [],
15811 def DeclareLocks(self, level):
15812 if level == locking.LEVEL_INSTANCE:
15813 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15815 # Lock instances optimistically, needs verification once node and group
15816 # locks have been acquired
15817 self.needed_locks[locking.LEVEL_INSTANCE] = \
15818 self.cfg.GetNodeGroupInstances(self.group_uuid)
15820 elif level == locking.LEVEL_NODEGROUP:
15821 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15823 if self.req_target_uuids:
15824 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15826 # Lock all groups used by instances optimistically; this requires going
15827 # via the node before it's locked, requiring verification later on
15828 lock_groups.update(group_uuid
15829 for instance_name in
15830 self.owned_locks(locking.LEVEL_INSTANCE)
15832 self.cfg.GetInstanceNodeGroups(instance_name))
15834 # No target groups, need to lock all of them
15835 lock_groups = locking.ALL_SET
15837 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15839 elif level == locking.LEVEL_NODE:
15840 # This will only lock the nodes in the group to be evacuated which
15841 # contain actual instances
15842 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15843 self._LockInstancesNodes()
15845 # Lock all nodes in group to be evacuated and target groups
15846 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15847 assert self.group_uuid in owned_groups
15848 member_nodes = [node_name
15849 for group in owned_groups
15850 for node_name in self.cfg.GetNodeGroup(group).members]
15851 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15853 def CheckPrereq(self):
15854 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15855 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15856 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15858 assert owned_groups.issuperset(self.req_target_uuids)
15859 assert self.group_uuid in owned_groups
15861 # Check if locked instances are still correct
15862 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15864 # Get instance information
15865 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15867 # Check if node groups for locked instances are still correct
15868 _CheckInstancesNodeGroups(self.cfg, self.instances,
15869 owned_groups, owned_nodes, self.group_uuid)
15871 if self.req_target_uuids:
15872 # User requested specific target groups
15873 self.target_uuids = self.req_target_uuids
15875 # All groups except the one to be evacuated are potential targets
15876 self.target_uuids = [group_uuid for group_uuid in owned_groups
15877 if group_uuid != self.group_uuid]
15879 if not self.target_uuids:
15880 raise errors.OpPrereqError("There are no possible target groups",
15881 errors.ECODE_INVAL)
15883 def BuildHooksEnv(self):
15884 """Build hooks env.
15888 "GROUP_NAME": self.op.group_name,
15889 "TARGET_GROUPS": " ".join(self.target_uuids),
15892 def BuildHooksNodes(self):
15893 """Build hooks nodes.
15896 mn = self.cfg.GetMasterNode()
15898 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15900 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15902 return (run_nodes, run_nodes)
15904 def Exec(self, feedback_fn):
15905 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15907 assert self.group_uuid not in self.target_uuids
15909 req = iallocator.IAReqGroupChange(instances=instances,
15910 target_groups=self.target_uuids)
15911 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15913 ial.Run(self.op.iallocator)
15915 if not ial.success:
15916 raise errors.OpPrereqError("Can't compute group evacuation using"
15917 " iallocator '%s': %s" %
15918 (self.op.iallocator, ial.info),
15919 errors.ECODE_NORES)
15921 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15923 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15924 len(jobs), self.op.group_name)
15926 return ResultWithJobs(jobs)
15929 class TagsLU(NoHooksLU): # pylint: disable=W0223
15930 """Generic tags LU.
15932 This is an abstract class which is the parent of all the other tags LUs.
15935 def ExpandNames(self):
15936 self.group_uuid = None
15937 self.needed_locks = {}
15939 if self.op.kind == constants.TAG_NODE:
15940 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15941 lock_level = locking.LEVEL_NODE
15942 lock_name = self.op.name
15943 elif self.op.kind == constants.TAG_INSTANCE:
15944 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15945 lock_level = locking.LEVEL_INSTANCE
15946 lock_name = self.op.name
15947 elif self.op.kind == constants.TAG_NODEGROUP:
15948 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15949 lock_level = locking.LEVEL_NODEGROUP
15950 lock_name = self.group_uuid
15951 elif self.op.kind == constants.TAG_NETWORK:
15952 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15953 lock_level = locking.LEVEL_NETWORK
15954 lock_name = self.network_uuid
15959 if lock_level and getattr(self.op, "use_locking", True):
15960 self.needed_locks[lock_level] = lock_name
15962 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15963 # not possible to acquire the BGL based on opcode parameters)
15965 def CheckPrereq(self):
15966 """Check prerequisites.
15969 if self.op.kind == constants.TAG_CLUSTER:
15970 self.target = self.cfg.GetClusterInfo()
15971 elif self.op.kind == constants.TAG_NODE:
15972 self.target = self.cfg.GetNodeInfo(self.op.name)
15973 elif self.op.kind == constants.TAG_INSTANCE:
15974 self.target = self.cfg.GetInstanceInfo(self.op.name)
15975 elif self.op.kind == constants.TAG_NODEGROUP:
15976 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15977 elif self.op.kind == constants.TAG_NETWORK:
15978 self.target = self.cfg.GetNetwork(self.network_uuid)
15980 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15981 str(self.op.kind), errors.ECODE_INVAL)
15984 class LUTagsGet(TagsLU):
15985 """Returns the tags of a given object.
15990 def ExpandNames(self):
15991 TagsLU.ExpandNames(self)
15993 # Share locks as this is only a read operation
15994 self.share_locks = _ShareAll()
15996 def Exec(self, feedback_fn):
15997 """Returns the tag list.
16000 return list(self.target.GetTags())
16003 class LUTagsSearch(NoHooksLU):
16004 """Searches the tags for a given pattern.
16009 def ExpandNames(self):
16010 self.needed_locks = {}
16012 def CheckPrereq(self):
16013 """Check prerequisites.
16015 This checks the pattern passed for validity by compiling it.
16019 self.re = re.compile(self.op.pattern)
16020 except re.error, err:
16021 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
16022 (self.op.pattern, err), errors.ECODE_INVAL)
16024 def Exec(self, feedback_fn):
16025 """Returns the tag list.
16029 tgts = [("/cluster", cfg.GetClusterInfo())]
16030 ilist = cfg.GetAllInstancesInfo().values()
16031 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
16032 nlist = cfg.GetAllNodesInfo().values()
16033 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
16034 tgts.extend(("/nodegroup/%s" % n.name, n)
16035 for n in cfg.GetAllNodeGroupsInfo().values())
16037 for path, target in tgts:
16038 for tag in target.GetTags():
16039 if self.re.search(tag):
16040 results.append((path, tag))
16044 class LUTagsSet(TagsLU):
16045 """Sets a tag on a given object.
16050 def CheckPrereq(self):
16051 """Check prerequisites.
16053 This checks the type and length of the tag name and value.
16056 TagsLU.CheckPrereq(self)
16057 for tag in self.op.tags:
16058 objects.TaggableObject.ValidateTag(tag)
16060 def Exec(self, feedback_fn):
16065 for tag in self.op.tags:
16066 self.target.AddTag(tag)
16067 except errors.TagError, err:
16068 raise errors.OpExecError("Error while setting tag: %s" % str(err))
16069 self.cfg.Update(self.target, feedback_fn)
16072 class LUTagsDel(TagsLU):
16073 """Delete a list of tags from a given object.
16078 def CheckPrereq(self):
16079 """Check prerequisites.
16081 This checks that we have the given tag.
16084 TagsLU.CheckPrereq(self)
16085 for tag in self.op.tags:
16086 objects.TaggableObject.ValidateTag(tag)
16087 del_tags = frozenset(self.op.tags)
16088 cur_tags = self.target.GetTags()
16090 diff_tags = del_tags - cur_tags
16092 diff_names = ("'%s'" % i for i in sorted(diff_tags))
16093 raise errors.OpPrereqError("Tag(s) %s not found" %
16094 (utils.CommaJoin(diff_names), ),
16095 errors.ECODE_NOENT)
16097 def Exec(self, feedback_fn):
16098 """Remove the tag from the object.
16101 for tag in self.op.tags:
16102 self.target.RemoveTag(tag)
16103 self.cfg.Update(self.target, feedback_fn)
16106 class LUTestDelay(NoHooksLU):
16107 """Sleep for a specified amount of time.
16109 This LU sleeps on the master and/or nodes for a specified amount of
16115 def ExpandNames(self):
16116 """Expand names and set required locks.
16118 This expands the node list, if any.
16121 self.needed_locks = {}
16122 if self.op.on_nodes:
16123 # _GetWantedNodes can be used here, but is not always appropriate to use
16124 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
16125 # more information.
16126 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
16127 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
16129 def _TestDelay(self):
16130 """Do the actual sleep.
16133 if self.op.on_master:
16134 if not utils.TestDelay(self.op.duration):
16135 raise errors.OpExecError("Error during master delay test")
16136 if self.op.on_nodes:
16137 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
16138 for node, node_result in result.items():
16139 node_result.Raise("Failure during rpc call to node %s" % node)
16141 def Exec(self, feedback_fn):
16142 """Execute the test delay opcode, with the wanted repetitions.
16145 if self.op.repeat == 0:
16148 top_value = self.op.repeat - 1
16149 for i in range(self.op.repeat):
16150 self.LogInfo("Test delay iteration %d/%d", i, top_value)
16154 class LURestrictedCommand(NoHooksLU):
16155 """Logical unit for executing restricted commands.
16160 def ExpandNames(self):
16162 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
16164 self.needed_locks = {
16165 locking.LEVEL_NODE: self.op.nodes,
16167 self.share_locks = {
16168 locking.LEVEL_NODE: not self.op.use_locking,
16171 def CheckPrereq(self):
16172 """Check prerequisites.
16176 def Exec(self, feedback_fn):
16177 """Execute restricted command and return output.
16180 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
16182 # Check if correct locks are held
16183 assert set(self.op.nodes).issubset(owned_nodes)
16185 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
16189 for node_name in self.op.nodes:
16190 nres = rpcres[node_name]
16192 msg = ("Command '%s' on node '%s' failed: %s" %
16193 (self.op.command, node_name, nres.fail_msg))
16194 result.append((False, msg))
16196 result.append((True, nres.payload))
16201 class LUTestJqueue(NoHooksLU):
16202 """Utility LU to test some aspects of the job queue.
16207 # Must be lower than default timeout for WaitForJobChange to see whether it
16208 # notices changed jobs
16209 _CLIENT_CONNECT_TIMEOUT = 20.0
16210 _CLIENT_CONFIRM_TIMEOUT = 60.0
16213 def _NotifyUsingSocket(cls, cb, errcls):
16214 """Opens a Unix socket and waits for another program to connect.
16217 @param cb: Callback to send socket name to client
16218 @type errcls: class
16219 @param errcls: Exception class to use for errors
16222 # Using a temporary directory as there's no easy way to create temporary
16223 # sockets without writing a custom loop around tempfile.mktemp and
16225 tmpdir = tempfile.mkdtemp()
16227 tmpsock = utils.PathJoin(tmpdir, "sock")
16229 logging.debug("Creating temporary socket at %s", tmpsock)
16230 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
16235 # Send details to client
16238 # Wait for client to connect before continuing
16239 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
16241 (conn, _) = sock.accept()
16242 except socket.error, err:
16243 raise errcls("Client didn't connect in time (%s)" % err)
16247 # Remove as soon as client is connected
16248 shutil.rmtree(tmpdir)
16250 # Wait for client to close
16253 # pylint: disable=E1101
16254 # Instance of '_socketobject' has no ... member
16255 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
16257 except socket.error, err:
16258 raise errcls("Client failed to confirm notification (%s)" % err)
16262 def _SendNotification(self, test, arg, sockname):
16263 """Sends a notification to the client.
16266 @param test: Test name
16267 @param arg: Test argument (depends on test)
16268 @type sockname: string
16269 @param sockname: Socket path
16272 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16274 def _Notify(self, prereq, test, arg):
16275 """Notifies the client of a test.
16278 @param prereq: Whether this is a prereq-phase test
16280 @param test: Test name
16281 @param arg: Test argument (depends on test)
16285 errcls = errors.OpPrereqError
16287 errcls = errors.OpExecError
16289 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16293 def CheckArguments(self):
16294 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16295 self.expandnames_calls = 0
16297 def ExpandNames(self):
16298 checkargs_calls = getattr(self, "checkargs_calls", 0)
16299 if checkargs_calls < 1:
16300 raise errors.ProgrammerError("CheckArguments was not called")
16302 self.expandnames_calls += 1
16304 if self.op.notify_waitlock:
16305 self._Notify(True, constants.JQT_EXPANDNAMES, None)
16307 self.LogInfo("Expanding names")
16309 # Get lock on master node (just to get a lock, not for a particular reason)
16310 self.needed_locks = {
16311 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16314 def Exec(self, feedback_fn):
16315 if self.expandnames_calls < 1:
16316 raise errors.ProgrammerError("ExpandNames was not called")
16318 if self.op.notify_exec:
16319 self._Notify(False, constants.JQT_EXEC, None)
16321 self.LogInfo("Executing")
16323 if self.op.log_messages:
16324 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16325 for idx, msg in enumerate(self.op.log_messages):
16326 self.LogInfo("Sending log message %s", idx + 1)
16327 feedback_fn(constants.JQT_MSGPREFIX + msg)
16328 # Report how many test messages have been sent
16329 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16332 raise errors.OpExecError("Opcode failure was requested")
16337 class LUTestAllocator(NoHooksLU):
16338 """Run allocator tests.
16340 This LU runs the allocator tests
16343 def CheckPrereq(self):
16344 """Check prerequisites.
16346 This checks the opcode parameters depending on the director and mode test.
16349 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16350 constants.IALLOCATOR_MODE_MULTI_ALLOC):
16351 for attr in ["memory", "disks", "disk_template",
16352 "os", "tags", "nics", "vcpus"]:
16353 if not hasattr(self.op, attr):
16354 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16355 attr, errors.ECODE_INVAL)
16356 iname = self.cfg.ExpandInstanceName(self.op.name)
16357 if iname is not None:
16358 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16359 iname, errors.ECODE_EXISTS)
16360 if not isinstance(self.op.nics, list):
16361 raise errors.OpPrereqError("Invalid parameter 'nics'",
16362 errors.ECODE_INVAL)
16363 if not isinstance(self.op.disks, list):
16364 raise errors.OpPrereqError("Invalid parameter 'disks'",
16365 errors.ECODE_INVAL)
16366 for row in self.op.disks:
16367 if (not isinstance(row, dict) or
16368 constants.IDISK_SIZE not in row or
16369 not isinstance(row[constants.IDISK_SIZE], int) or
16370 constants.IDISK_MODE not in row or
16371 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16372 raise errors.OpPrereqError("Invalid contents of the 'disks'"
16373 " parameter", errors.ECODE_INVAL)
16374 if self.op.hypervisor is None:
16375 self.op.hypervisor = self.cfg.GetHypervisorType()
16376 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16377 fname = _ExpandInstanceName(self.cfg, self.op.name)
16378 self.op.name = fname
16379 self.relocate_from = \
16380 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16381 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16382 constants.IALLOCATOR_MODE_NODE_EVAC):
16383 if not self.op.instances:
16384 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16385 self.op.instances = _GetWantedInstances(self, self.op.instances)
16387 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16388 self.op.mode, errors.ECODE_INVAL)
16390 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16391 if self.op.iallocator is None:
16392 raise errors.OpPrereqError("Missing allocator name",
16393 errors.ECODE_INVAL)
16394 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16395 raise errors.OpPrereqError("Wrong allocator test '%s'" %
16396 self.op.direction, errors.ECODE_INVAL)
16398 def Exec(self, feedback_fn):
16399 """Run the allocator test.
16402 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16403 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16404 memory=self.op.memory,
16405 disks=self.op.disks,
16406 disk_template=self.op.disk_template,
16410 vcpus=self.op.vcpus,
16411 spindle_use=self.op.spindle_use,
16412 hypervisor=self.op.hypervisor,
16413 node_whitelist=None)
16414 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16415 req = iallocator.IAReqRelocate(name=self.op.name,
16416 relocate_from=list(self.relocate_from))
16417 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16418 req = iallocator.IAReqGroupChange(instances=self.op.instances,
16419 target_groups=self.op.target_groups)
16420 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16421 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16422 evac_mode=self.op.evac_mode)
16423 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16424 disk_template = self.op.disk_template
16425 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16426 memory=self.op.memory,
16427 disks=self.op.disks,
16428 disk_template=disk_template,
16432 vcpus=self.op.vcpus,
16433 spindle_use=self.op.spindle_use,
16434 hypervisor=self.op.hypervisor)
16435 for idx in range(self.op.count)]
16436 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16438 raise errors.ProgrammerError("Uncatched mode %s in"
16439 " LUTestAllocator.Exec", self.op.mode)
16441 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16442 if self.op.direction == constants.IALLOCATOR_DIR_IN:
16443 result = ial.in_text
16445 ial.Run(self.op.iallocator, validate=False)
16446 result = ial.out_text
16450 class LUNetworkAdd(LogicalUnit):
16451 """Logical unit for creating networks.
16454 HPATH = "network-add"
16455 HTYPE = constants.HTYPE_NETWORK
16458 def BuildHooksNodes(self):
16459 """Build hooks nodes.
16462 mn = self.cfg.GetMasterNode()
16463 return ([mn], [mn])
16465 def CheckArguments(self):
16466 if self.op.mac_prefix:
16467 self.op.mac_prefix = \
16468 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16470 def ExpandNames(self):
16471 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16473 if self.op.conflicts_check:
16474 self.share_locks[locking.LEVEL_NODE] = 1
16475 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16476 self.needed_locks = {
16477 locking.LEVEL_NODE: locking.ALL_SET,
16478 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16481 self.needed_locks = {}
16483 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16485 def CheckPrereq(self):
16486 if self.op.network is None:
16487 raise errors.OpPrereqError("Network must be given",
16488 errors.ECODE_INVAL)
16491 existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
16492 except errors.OpPrereqError:
16495 raise errors.OpPrereqError("Desired network name '%s' already exists as a"
16496 " network (UUID: %s)" %
16497 (self.op.network_name, existing_uuid),
16498 errors.ECODE_EXISTS)
16500 # Check tag validity
16501 for tag in self.op.tags:
16502 objects.TaggableObject.ValidateTag(tag)
16504 def BuildHooksEnv(self):
16505 """Build hooks env.
16509 "name": self.op.network_name,
16510 "subnet": self.op.network,
16511 "gateway": self.op.gateway,
16512 "network6": self.op.network6,
16513 "gateway6": self.op.gateway6,
16514 "mac_prefix": self.op.mac_prefix,
16515 "tags": self.op.tags,
16517 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16519 def Exec(self, feedback_fn):
16520 """Add the ip pool to the cluster.
16523 nobj = objects.Network(name=self.op.network_name,
16524 network=self.op.network,
16525 gateway=self.op.gateway,
16526 network6=self.op.network6,
16527 gateway6=self.op.gateway6,
16528 mac_prefix=self.op.mac_prefix,
16529 uuid=self.network_uuid)
16530 # Initialize the associated address pool
16532 pool = network.AddressPool.InitializeNetwork(nobj)
16533 except errors.AddressPoolError, err:
16534 raise errors.OpExecError("Cannot create IP address pool for network"
16535 " '%s': %s" % (self.op.network_name, err))
16537 # Check if we need to reserve the nodes and the cluster master IP
16538 # These may not be allocated to any instances in routed mode, as
16539 # they wouldn't function anyway.
16540 if self.op.conflicts_check:
16541 for node in self.cfg.GetAllNodesInfo().values():
16542 for ip in [node.primary_ip, node.secondary_ip]:
16544 if pool.Contains(ip):
16546 self.LogInfo("Reserved IP address of node '%s' (%s)",
16548 except errors.AddressPoolError, err:
16549 self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s",
16550 ip, node.name, err)
16552 master_ip = self.cfg.GetClusterInfo().master_ip
16554 if pool.Contains(master_ip):
16555 pool.Reserve(master_ip)
16556 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16557 except errors.AddressPoolError, err:
16558 self.LogWarning("Cannot reserve cluster master IP address (%s): %s",
16561 if self.op.add_reserved_ips:
16562 for ip in self.op.add_reserved_ips:
16564 pool.Reserve(ip, external=True)
16565 except errors.AddressPoolError, err:
16566 raise errors.OpExecError("Cannot reserve IP address '%s': %s" %
16570 for tag in self.op.tags:
16573 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16574 del self.remove_locks[locking.LEVEL_NETWORK]
16577 class LUNetworkRemove(LogicalUnit):
16578 HPATH = "network-remove"
16579 HTYPE = constants.HTYPE_NETWORK
16582 def ExpandNames(self):
16583 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16585 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16586 self.needed_locks = {
16587 locking.LEVEL_NETWORK: [self.network_uuid],
16588 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16591 def CheckPrereq(self):
16592 """Check prerequisites.
16594 This checks that the given network name exists as a network, that is
16595 empty (i.e., contains no nodes), and that is not the last group of the
16599 # Verify that the network is not conncted.
16600 node_groups = [group.name
16601 for group in self.cfg.GetAllNodeGroupsInfo().values()
16602 if self.network_uuid in group.networks]
16605 self.LogWarning("Network '%s' is connected to the following"
16606 " node groups: %s" %
16607 (self.op.network_name,
16608 utils.CommaJoin(utils.NiceSort(node_groups))))
16609 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16611 def BuildHooksEnv(self):
16612 """Build hooks env.
16616 "NETWORK_NAME": self.op.network_name,
16619 def BuildHooksNodes(self):
16620 """Build hooks nodes.
16623 mn = self.cfg.GetMasterNode()
16624 return ([mn], [mn])
16626 def Exec(self, feedback_fn):
16627 """Remove the network.
16631 self.cfg.RemoveNetwork(self.network_uuid)
16632 except errors.ConfigurationError:
16633 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16634 (self.op.network_name, self.network_uuid))
16637 class LUNetworkSetParams(LogicalUnit):
16638 """Modifies the parameters of a network.
16641 HPATH = "network-modify"
16642 HTYPE = constants.HTYPE_NETWORK
16645 def CheckArguments(self):
16646 if (self.op.gateway and
16647 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16648 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16649 " at once", errors.ECODE_INVAL)
16651 def ExpandNames(self):
16652 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16654 self.needed_locks = {
16655 locking.LEVEL_NETWORK: [self.network_uuid],
16658 def CheckPrereq(self):
16659 """Check prerequisites.
16662 self.network = self.cfg.GetNetwork(self.network_uuid)
16663 self.gateway = self.network.gateway
16664 self.mac_prefix = self.network.mac_prefix
16665 self.network6 = self.network.network6
16666 self.gateway6 = self.network.gateway6
16667 self.tags = self.network.tags
16669 self.pool = network.AddressPool(self.network)
16671 if self.op.gateway:
16672 if self.op.gateway == constants.VALUE_NONE:
16673 self.gateway = None
16675 self.gateway = self.op.gateway
16676 if self.pool.IsReserved(self.gateway):
16677 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16678 " reserved" % self.gateway,
16679 errors.ECODE_STATE)
16681 if self.op.mac_prefix:
16682 if self.op.mac_prefix == constants.VALUE_NONE:
16683 self.mac_prefix = None
16685 self.mac_prefix = \
16686 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16688 if self.op.gateway6:
16689 if self.op.gateway6 == constants.VALUE_NONE:
16690 self.gateway6 = None
16692 self.gateway6 = self.op.gateway6
16694 if self.op.network6:
16695 if self.op.network6 == constants.VALUE_NONE:
16696 self.network6 = None
16698 self.network6 = self.op.network6
16700 def BuildHooksEnv(self):
16701 """Build hooks env.
16705 "name": self.op.network_name,
16706 "subnet": self.network.network,
16707 "gateway": self.gateway,
16708 "network6": self.network6,
16709 "gateway6": self.gateway6,
16710 "mac_prefix": self.mac_prefix,
16713 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16715 def BuildHooksNodes(self):
16716 """Build hooks nodes.
16719 mn = self.cfg.GetMasterNode()
16720 return ([mn], [mn])
16722 def Exec(self, feedback_fn):
16723 """Modifies the network.
16726 #TODO: reserve/release via temporary reservation manager
16727 # extend cfg.ReserveIp/ReleaseIp with the external flag
16728 if self.op.gateway:
16729 if self.gateway == self.network.gateway:
16730 self.LogWarning("Gateway is already %s", self.gateway)
16733 self.pool.Reserve(self.gateway, external=True)
16734 if self.network.gateway:
16735 self.pool.Release(self.network.gateway, external=True)
16736 self.network.gateway = self.gateway
16738 if self.op.add_reserved_ips:
16739 for ip in self.op.add_reserved_ips:
16741 if self.pool.IsReserved(ip):
16742 self.LogWarning("IP address %s is already reserved", ip)
16744 self.pool.Reserve(ip, external=True)
16745 except errors.AddressPoolError, err:
16746 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16748 if self.op.remove_reserved_ips:
16749 for ip in self.op.remove_reserved_ips:
16750 if ip == self.network.gateway:
16751 self.LogWarning("Cannot unreserve Gateway's IP")
16754 if not self.pool.IsReserved(ip):
16755 self.LogWarning("IP address %s is already unreserved", ip)
16757 self.pool.Release(ip, external=True)
16758 except errors.AddressPoolError, err:
16759 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16761 if self.op.mac_prefix:
16762 self.network.mac_prefix = self.mac_prefix
16764 if self.op.network6:
16765 self.network.network6 = self.network6
16767 if self.op.gateway6:
16768 self.network.gateway6 = self.gateway6
16770 self.pool.Validate()
16772 self.cfg.Update(self.network, feedback_fn)
16775 class _NetworkQuery(_QueryBase):
16776 FIELDS = query.NETWORK_FIELDS
16778 def ExpandNames(self, lu):
16779 lu.needed_locks = {}
16780 lu.share_locks = _ShareAll()
16782 self.do_locking = self.use_locking
16784 all_networks = lu.cfg.GetAllNetworksInfo()
16785 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16791 for name in self.names:
16792 if name in name_to_uuid:
16793 self.wanted.append(name_to_uuid[name])
16795 missing.append(name)
16798 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16799 errors.ECODE_NOENT)
16801 self.wanted = locking.ALL_SET
16803 if self.do_locking:
16804 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16805 if query.NETQ_INST in self.requested_data:
16806 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16807 if query.NETQ_GROUP in self.requested_data:
16808 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16810 def DeclareLocks(self, lu, level):
16813 def _GetQueryData(self, lu):
16814 """Computes the list of networks and their attributes.
16817 all_networks = lu.cfg.GetAllNetworksInfo()
16819 network_uuids = self._GetNames(lu, all_networks.keys(),
16820 locking.LEVEL_NETWORK)
16822 do_instances = query.NETQ_INST in self.requested_data
16823 do_groups = query.NETQ_GROUP in self.requested_data
16825 network_to_instances = None
16826 network_to_groups = None
16828 # For NETQ_GROUP, we need to map network->[groups]
16830 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16831 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16832 for _, group in all_groups.iteritems():
16833 for net_uuid in network_uuids:
16834 netparams = group.networks.get(net_uuid, None)
16836 info = (group.name, netparams[constants.NIC_MODE],
16837 netparams[constants.NIC_LINK])
16839 network_to_groups[net_uuid].append(info)
16842 all_instances = lu.cfg.GetAllInstancesInfo()
16843 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16844 for instance in all_instances.values():
16845 for nic in instance.nics:
16846 if nic.network in network_uuids:
16847 network_to_instances[nic.network].append(instance.name)
16850 if query.NETQ_STATS in self.requested_data:
16853 self._GetStats(network.AddressPool(all_networks[uuid])))
16854 for uuid in network_uuids)
16858 return query.NetworkQueryData([all_networks[uuid]
16859 for uuid in network_uuids],
16861 network_to_instances,
16865 def _GetStats(pool):
16866 """Returns statistics for a network address pool.
16870 "free_count": pool.GetFreeCount(),
16871 "reserved_count": pool.GetReservedCount(),
16872 "map": pool.GetMap(),
16873 "external_reservations":
16874 utils.CommaJoin(pool.GetExternalReservations()),
16878 class LUNetworkQuery(NoHooksLU):
16879 """Logical unit for querying networks.
16884 def CheckArguments(self):
16885 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16886 self.op.output_fields, self.op.use_locking)
16888 def ExpandNames(self):
16889 self.nq.ExpandNames(self)
16891 def Exec(self, feedback_fn):
16892 return self.nq.OldStyleQuery(self)
16895 class LUNetworkConnect(LogicalUnit):
16896 """Connect a network to a nodegroup
16899 HPATH = "network-connect"
16900 HTYPE = constants.HTYPE_NETWORK
16903 def ExpandNames(self):
16904 self.network_name = self.op.network_name
16905 self.group_name = self.op.group_name
16906 self.network_mode = self.op.network_mode
16907 self.network_link = self.op.network_link
16909 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16910 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16912 self.needed_locks = {
16913 locking.LEVEL_INSTANCE: [],
16914 locking.LEVEL_NODEGROUP: [self.group_uuid],
16916 self.share_locks[locking.LEVEL_INSTANCE] = 1
16918 if self.op.conflicts_check:
16919 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16920 self.share_locks[locking.LEVEL_NETWORK] = 1
16922 def DeclareLocks(self, level):
16923 if level == locking.LEVEL_INSTANCE:
16924 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16926 # Lock instances optimistically, needs verification once group lock has
16928 if self.op.conflicts_check:
16929 self.needed_locks[locking.LEVEL_INSTANCE] = \
16930 self.cfg.GetNodeGroupInstances(self.group_uuid)
16932 def BuildHooksEnv(self):
16934 "GROUP_NAME": self.group_name,
16935 "GROUP_NETWORK_MODE": self.network_mode,
16936 "GROUP_NETWORK_LINK": self.network_link,
16940 def BuildHooksNodes(self):
16941 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16942 return (nodes, nodes)
16944 def CheckPrereq(self):
16945 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16947 assert self.group_uuid in owned_groups
16949 # Check if locked instances are still correct
16950 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16951 if self.op.conflicts_check:
16952 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16955 constants.NIC_MODE: self.network_mode,
16956 constants.NIC_LINK: self.network_link,
16958 objects.NIC.CheckParameterSyntax(self.netparams)
16960 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16961 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16962 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16963 self.connected = False
16964 if self.network_uuid in self.group.networks:
16965 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16966 (self.network_name, self.group.name))
16967 self.connected = True
16969 # check only if not already connected
16970 elif self.op.conflicts_check:
16971 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16973 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16974 "connect to", owned_instances)
16976 def Exec(self, feedback_fn):
16977 # Connect the network and update the group only if not already connected
16978 if not self.connected:
16979 self.group.networks[self.network_uuid] = self.netparams
16980 self.cfg.Update(self.group, feedback_fn)
16983 def _NetworkConflictCheck(lu, check_fn, action, instances):
16984 """Checks for network interface conflicts with a network.
16986 @type lu: L{LogicalUnit}
16987 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16989 @param check_fn: Function checking for conflict
16990 @type action: string
16991 @param action: Part of error message (see code)
16992 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16997 for (_, instance) in lu.cfg.GetMultiInstanceInfo(instances):
16998 instconflicts = [(idx, nic.ip)
16999 for (idx, nic) in enumerate(instance.nics)
17003 conflicts.append((instance.name, instconflicts))
17006 lu.LogWarning("IP addresses from network '%s', which is about to %s"
17007 " node group '%s', are in use: %s" %
17008 (lu.network_name, action, lu.group.name,
17009 utils.CommaJoin(("%s: %s" %
17010 (name, _FmtNetworkConflict(details)))
17011 for (name, details) in conflicts)))
17013 raise errors.OpPrereqError("Conflicting IP addresses found; "
17014 " remove/modify the corresponding network"
17015 " interfaces", errors.ECODE_STATE)
17018 def _FmtNetworkConflict(details):
17019 """Utility for L{_NetworkConflictCheck}.
17022 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
17023 for (idx, ipaddr) in details)
17026 class LUNetworkDisconnect(LogicalUnit):
17027 """Disconnect a network to a nodegroup
17030 HPATH = "network-disconnect"
17031 HTYPE = constants.HTYPE_NETWORK
17034 def ExpandNames(self):
17035 self.network_name = self.op.network_name
17036 self.group_name = self.op.group_name
17038 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
17039 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
17041 self.needed_locks = {
17042 locking.LEVEL_INSTANCE: [],
17043 locking.LEVEL_NODEGROUP: [self.group_uuid],
17045 self.share_locks[locking.LEVEL_INSTANCE] = 1
17047 def DeclareLocks(self, level):
17048 if level == locking.LEVEL_INSTANCE:
17049 assert not self.needed_locks[locking.LEVEL_INSTANCE]
17051 # Lock instances optimistically, needs verification once group lock has
17053 self.needed_locks[locking.LEVEL_INSTANCE] = \
17054 self.cfg.GetNodeGroupInstances(self.group_uuid)
17056 def BuildHooksEnv(self):
17058 "GROUP_NAME": self.group_name,
17062 def BuildHooksNodes(self):
17063 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
17064 return (nodes, nodes)
17066 def CheckPrereq(self):
17067 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
17069 assert self.group_uuid in owned_groups
17071 # Check if locked instances are still correct
17072 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
17073 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
17075 self.group = self.cfg.GetNodeGroup(self.group_uuid)
17076 self.connected = True
17077 if self.network_uuid not in self.group.networks:
17078 self.LogWarning("Network '%s' is not mapped to group '%s'",
17079 self.network_name, self.group.name)
17080 self.connected = False
17082 # We need this check only if network is not already connected
17084 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid,
17085 "disconnect from", owned_instances)
17087 def Exec(self, feedback_fn):
17088 # Disconnect the network and update the group only if network is connected
17090 del self.group.networks[self.network_uuid]
17091 self.cfg.Update(self.group, feedback_fn)
17094 #: Query type implementations
17096 constants.QR_CLUSTER: _ClusterQuery,
17097 constants.QR_INSTANCE: _InstanceQuery,
17098 constants.QR_NODE: _NodeQuery,
17099 constants.QR_GROUP: _GroupQuery,
17100 constants.QR_NETWORK: _NetworkQuery,
17101 constants.QR_OS: _OsQuery,
17102 constants.QR_EXTSTORAGE: _ExtStorageQuery,
17103 constants.QR_EXPORT: _ExportQuery,
17106 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
17109 def _GetQueryImplementation(name):
17110 """Returns the implemtnation for a query type.
17112 @param name: Query type, must be one of L{constants.QR_VIA_OP}
17116 return _QUERY_IMPL[name]
17118 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
17119 errors.ECODE_INVAL)
17122 def _CheckForConflictingIp(lu, ip, node):
17123 """In case of conflicting IP address raise error.
17126 @param ip: IP address
17128 @param node: node name
17131 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
17132 if conf_net is not None:
17133 raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
17135 errors.ECODE_STATE)
17137 return (None, None)