4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node.
703 @type cfg: L{config.ConfigWriter}
704 @param cfg: The cluster configuration
705 @type node: L{objects.Node}
706 @param node: The node
708 @return: The effective value of exclusive_storage
711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node.
717 @type cfg: L{config.ConfigWriter}
718 @param cfg: The cluster configuration
719 @type nodename: string
720 @param nodename: The node
722 @return: The effective value of exclusive_storage
723 @raise errors.OpPrereqError: if no node exists with the given name
726 ni = cfg.GetNodeInfo(nodename)
728 raise errors.OpPrereqError("Invalid node name %s" % nodename,
730 return _IsExclusiveStorageEnabledNode(cfg, ni)
733 def _CopyLockList(names):
734 """Makes a copy of a list of lock names.
736 Handles L{locking.ALL_SET} correctly.
739 if names == locking.ALL_SET:
740 return locking.ALL_SET
745 def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names.
748 @type lu: L{LogicalUnit}
749 @param lu: the logical unit on whose behalf we execute
751 @param nodes: list of node names or None for all nodes
753 @return: the list of nodes, sorted
754 @raise errors.ProgrammerError: if the nodes parameter is wrong type
758 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
760 return utils.NiceSort(lu.cfg.GetNodeList())
763 def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names.
766 @type lu: L{LogicalUnit}
767 @param lu: the logical unit on whose behalf we execute
768 @type instances: list
769 @param instances: list of instance names or None for all instances
771 @return: the list of instances, sorted
772 @raise errors.OpPrereqError: if the instances parameter is wrong type
773 @raise errors.OpPrereqError: if any of the passed instances is not found
777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
779 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
783 def _GetUpdatedParams(old_params, update_dict,
784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary.
787 @type old_params: dict
788 @param old_params: old parameters
789 @type update_dict: dict
790 @param update_dict: dict containing new parameter values, or
791 constants.VALUE_DEFAULT to reset the parameter to its default
793 @param use_default: boolean
794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
795 values as 'to be deleted' values
796 @param use_none: boolean
797 @type use_none: whether to recognise C{None} values as 'to be
800 @return: the new parameter dictionary
803 params_copy = copy.deepcopy(old_params)
804 for key, val in update_dict.iteritems():
805 if ((use_default and val == constants.VALUE_DEFAULT) or
806 (use_none and val is None)):
812 params_copy[key] = val
816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
817 """Return the new version of a instance policy.
819 @param group_policy: whether this policy applies to a group and thus
820 we should support removal of policy entries
823 use_none = use_default = group_policy
824 ipolicy = copy.deepcopy(old_ipolicy)
825 for key, value in new_ipolicy.items():
826 if key not in constants.IPOLICY_ALL_KEYS:
827 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
829 if key in constants.IPOLICY_ISPECS:
830 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
831 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
833 use_default=use_default)
835 if (not value or value == [constants.VALUE_DEFAULT] or
836 value == constants.VALUE_DEFAULT):
840 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
841 " on the cluster'" % key,
844 if key in constants.IPOLICY_PARAMETERS:
845 # FIXME: we assume all such values are float
847 ipolicy[key] = float(value)
848 except (TypeError, ValueError), err:
849 raise errors.OpPrereqError("Invalid value for attribute"
850 " '%s': '%s', error: %s" %
851 (key, value, err), errors.ECODE_INVAL)
853 # FIXME: we assume all others are lists; this should be redone
855 ipolicy[key] = list(value)
857 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
858 except errors.ConfigurationError, err:
859 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
864 def _UpdateAndVerifySubDict(base, updates, type_check):
865 """Updates and verifies a dict with sub dicts of the same type.
867 @param base: The dict with the old data
868 @param updates: The dict with the new data
869 @param type_check: Dict suitable to ForceDictType to verify correct types
870 @returns: A new dict with updated and verified values
874 new = _GetUpdatedParams(old, value)
875 utils.ForceDictType(new, type_check)
878 ret = copy.deepcopy(base)
879 ret.update(dict((key, fn(base.get(key, {}), value))
880 for key, value in updates.items()))
884 def _MergeAndVerifyHvState(op_input, obj_input):
885 """Combines the hv state from an opcode with the one of the object
887 @param op_input: The input dict from the opcode
888 @param obj_input: The input dict from the objects
889 @return: The verified and updated dict
893 invalid_hvs = set(op_input) - constants.HYPER_TYPES
895 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
896 " %s" % utils.CommaJoin(invalid_hvs),
898 if obj_input is None:
900 type_check = constants.HVSTS_PARAMETER_TYPES
901 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
906 def _MergeAndVerifyDiskState(op_input, obj_input):
907 """Combines the disk state from an opcode with the one of the object
909 @param op_input: The input dict from the opcode
910 @param obj_input: The input dict from the objects
911 @return: The verified and updated dict
914 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
916 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
917 utils.CommaJoin(invalid_dst),
919 type_check = constants.DSS_PARAMETER_TYPES
920 if obj_input is None:
922 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
924 for key, value in op_input.items())
929 def _ReleaseLocks(lu, level, names=None, keep=None):
930 """Releases locks owned by an LU.
932 @type lu: L{LogicalUnit}
933 @param level: Lock level
934 @type names: list or None
935 @param names: Names of locks to release
936 @type keep: list or None
937 @param keep: Names of locks to retain
940 assert not (keep is not None and names is not None), \
941 "Only one of the 'names' and the 'keep' parameters can be given"
943 if names is not None:
944 should_release = names.__contains__
946 should_release = lambda name: name not in keep
948 should_release = None
950 owned = lu.owned_locks(level)
952 # Not owning any lock at this level, do nothing
959 # Determine which locks to release
961 if should_release(name):
966 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
968 # Release just some locks
969 lu.glm.release(level, names=release)
971 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
974 lu.glm.release(level)
976 assert not lu.glm.is_owned(level), "No locks should be owned"
979 def _MapInstanceDisksToNodes(instances):
980 """Creates a map from (node, volume) to instance name.
982 @type instances: list of L{objects.Instance}
983 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
986 return dict(((node, vol), inst.name)
987 for inst in instances
988 for (node, vols) in inst.MapLVsByNode().items()
992 def _RunPostHook(lu, node_name):
993 """Runs the post-hook for an opcode on a single node.
996 hm = lu.proc.BuildHooksManager(lu)
998 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
999 except Exception, err: # pylint: disable=W0703
1000 lu.LogWarning("Errors occurred running hooks on %s: %s",
1004 def _CheckOutputFields(static, dynamic, selected):
1005 """Checks whether all selected fields are valid.
1007 @type static: L{utils.FieldSet}
1008 @param static: static fields set
1009 @type dynamic: L{utils.FieldSet}
1010 @param dynamic: dynamic fields set
1013 f = utils.FieldSet()
1017 delta = f.NonMatching(selected)
1019 raise errors.OpPrereqError("Unknown output fields selected: %s"
1020 % ",".join(delta), errors.ECODE_INVAL)
1023 def _CheckGlobalHvParams(params):
1024 """Validates that given hypervisor params are not global ones.
1026 This will ensure that instances don't get customised versions of
1030 used_globals = constants.HVC_GLOBALS.intersection(params)
1032 msg = ("The following hypervisor parameters are global and cannot"
1033 " be customized at instance level, please modify them at"
1034 " cluster level: %s" % utils.CommaJoin(used_globals))
1035 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1038 def _CheckNodeOnline(lu, node, msg=None):
1039 """Ensure that a given node is online.
1041 @param lu: the LU on behalf of which we make the check
1042 @param node: the node to check
1043 @param msg: if passed, should be a message to replace the default one
1044 @raise errors.OpPrereqError: if the node is offline
1048 msg = "Can't use offline node"
1049 if lu.cfg.GetNodeInfo(node).offline:
1050 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1053 def _CheckNodeNotDrained(lu, node):
1054 """Ensure that a given node is not drained.
1056 @param lu: the LU on behalf of which we make the check
1057 @param node: the node to check
1058 @raise errors.OpPrereqError: if the node is drained
1061 if lu.cfg.GetNodeInfo(node).drained:
1062 raise errors.OpPrereqError("Can't use drained node %s" % node,
1066 def _CheckNodeVmCapable(lu, node):
1067 """Ensure that a given node is vm capable.
1069 @param lu: the LU on behalf of which we make the check
1070 @param node: the node to check
1071 @raise errors.OpPrereqError: if the node is not vm capable
1074 if not lu.cfg.GetNodeInfo(node).vm_capable:
1075 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1079 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1080 """Ensure that a node supports a given OS.
1082 @param lu: the LU on behalf of which we make the check
1083 @param node: the node to check
1084 @param os_name: the OS to query about
1085 @param force_variant: whether to ignore variant errors
1086 @raise errors.OpPrereqError: if the node is not supporting the OS
1089 result = lu.rpc.call_os_get(node, os_name)
1090 result.Raise("OS '%s' not in supported OS list for node %s" %
1092 prereq=True, ecode=errors.ECODE_INVAL)
1093 if not force_variant:
1094 _CheckOSVariant(result.payload, os_name)
1097 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1098 """Ensure that a node has the given secondary ip.
1100 @type lu: L{LogicalUnit}
1101 @param lu: the LU on behalf of which we make the check
1103 @param node: the node to check
1104 @type secondary_ip: string
1105 @param secondary_ip: the ip to check
1106 @type prereq: boolean
1107 @param prereq: whether to throw a prerequisite or an execute error
1108 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1109 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1112 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1113 result.Raise("Failure checking secondary ip on node %s" % node,
1114 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1115 if not result.payload:
1116 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1117 " please fix and re-run this command" % secondary_ip)
1119 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1121 raise errors.OpExecError(msg)
1124 def _CheckNodePVs(nresult, exclusive_storage):
1128 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1129 if pvlist_dict is None:
1130 return (["Can't get PV list from node"], None)
1131 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1133 # check that ':' is not present in PV names, since it's a
1134 # special character for lvcreate (denotes the range of PEs to
1138 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1139 (pv.name, pv.vg_name))
1141 if exclusive_storage:
1142 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1143 errlist.extend(errmsgs)
1144 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1146 for (pvname, lvlist) in shared_pvs:
1147 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1148 errlist.append("PV %s is shared among unrelated LVs (%s)" %
1149 (pvname, utils.CommaJoin(lvlist)))
1150 return (errlist, es_pvinfo)
1153 def _GetClusterDomainSecret():
1154 """Reads the cluster domain secret.
1157 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1161 def _CheckInstanceState(lu, instance, req_states, msg=None):
1162 """Ensure that an instance is in one of the required states.
1164 @param lu: the LU on behalf of which we make the check
1165 @param instance: the instance to check
1166 @param msg: if passed, should be a message to replace the default one
1167 @raise errors.OpPrereqError: if the instance is not in the required state
1171 msg = ("can't use instance from outside %s states" %
1172 utils.CommaJoin(req_states))
1173 if instance.admin_state not in req_states:
1174 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1175 (instance.name, instance.admin_state, msg),
1178 if constants.ADMINST_UP not in req_states:
1179 pnode = instance.primary_node
1180 if not lu.cfg.GetNodeInfo(pnode).offline:
1181 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1182 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1183 prereq=True, ecode=errors.ECODE_ENVIRON)
1184 if instance.name in ins_l.payload:
1185 raise errors.OpPrereqError("Instance %s is running, %s" %
1186 (instance.name, msg), errors.ECODE_STATE)
1188 lu.LogWarning("Primary node offline, ignoring check that instance"
1192 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1193 """Computes if value is in the desired range.
1195 @param name: name of the parameter for which we perform the check
1196 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1198 @param ipolicy: dictionary containing min, max and std values
1199 @param value: actual value that we want to use
1200 @return: None or element not meeting the criteria
1204 if value in [None, constants.VALUE_AUTO]:
1206 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1207 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1208 if value > max_v or min_v > value:
1210 fqn = "%s/%s" % (name, qualifier)
1213 return ("%s value %s is not in range [%s, %s]" %
1214 (fqn, value, min_v, max_v))
1218 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1219 nic_count, disk_sizes, spindle_use,
1220 _compute_fn=_ComputeMinMaxSpec):
1221 """Verifies ipolicy against provided specs.
1224 @param ipolicy: The ipolicy
1226 @param mem_size: The memory size
1227 @type cpu_count: int
1228 @param cpu_count: Used cpu cores
1229 @type disk_count: int
1230 @param disk_count: Number of disks used
1231 @type nic_count: int
1232 @param nic_count: Number of nics used
1233 @type disk_sizes: list of ints
1234 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1235 @type spindle_use: int
1236 @param spindle_use: The number of spindles this instance uses
1237 @param _compute_fn: The compute function (unittest only)
1238 @return: A list of violations, or an empty list of no violations are found
1241 assert disk_count == len(disk_sizes)
1244 (constants.ISPEC_MEM_SIZE, "", mem_size),
1245 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1246 (constants.ISPEC_DISK_COUNT, "", disk_count),
1247 (constants.ISPEC_NIC_COUNT, "", nic_count),
1248 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1249 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1250 for idx, d in enumerate(disk_sizes)]
1253 (_compute_fn(name, qualifier, ipolicy, value)
1254 for (name, qualifier, value) in test_settings))
1257 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1258 _compute_fn=_ComputeIPolicySpecViolation):
1259 """Compute if instance meets the specs of ipolicy.
1262 @param ipolicy: The ipolicy to verify against
1263 @type instance: L{objects.Instance}
1264 @param instance: The instance to verify
1265 @param _compute_fn: The function to verify ipolicy (unittest only)
1266 @see: L{_ComputeIPolicySpecViolation}
1269 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1270 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1271 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1272 disk_count = len(instance.disks)
1273 disk_sizes = [disk.size for disk in instance.disks]
1274 nic_count = len(instance.nics)
1276 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1277 disk_sizes, spindle_use)
1280 def _ComputeIPolicyInstanceSpecViolation(
1281 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1282 """Compute if instance specs meets the specs of ipolicy.
1285 @param ipolicy: The ipolicy to verify against
1286 @param instance_spec: dict
1287 @param instance_spec: The instance spec to verify
1288 @param _compute_fn: The function to verify ipolicy (unittest only)
1289 @see: L{_ComputeIPolicySpecViolation}
1292 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1293 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1294 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1295 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1296 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1297 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1299 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1300 disk_sizes, spindle_use)
1303 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1305 _compute_fn=_ComputeIPolicyInstanceViolation):
1306 """Compute if instance meets the specs of the new target group.
1308 @param ipolicy: The ipolicy to verify
1309 @param instance: The instance object to verify
1310 @param current_group: The current group of the instance
1311 @param target_group: The new group of the instance
1312 @param _compute_fn: The function to verify ipolicy (unittest only)
1313 @see: L{_ComputeIPolicySpecViolation}
1316 if current_group == target_group:
1319 return _compute_fn(ipolicy, instance)
1322 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1323 _compute_fn=_ComputeIPolicyNodeViolation):
1324 """Checks that the target node is correct in terms of instance policy.
1326 @param ipolicy: The ipolicy to verify
1327 @param instance: The instance object to verify
1328 @param node: The new node to relocate
1329 @param ignore: Ignore violations of the ipolicy
1330 @param _compute_fn: The function to verify ipolicy (unittest only)
1331 @see: L{_ComputeIPolicySpecViolation}
1334 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1335 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1338 msg = ("Instance does not meet target node group's (%s) instance"
1339 " policy: %s") % (node.group, utils.CommaJoin(res))
1343 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1346 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1347 """Computes a set of any instances that would violate the new ipolicy.
1349 @param old_ipolicy: The current (still in-place) ipolicy
1350 @param new_ipolicy: The new (to become) ipolicy
1351 @param instances: List of instances to verify
1352 @return: A list of instances which violates the new ipolicy but
1356 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1357 _ComputeViolatingInstances(old_ipolicy, instances))
1360 def _ExpandItemName(fn, name, kind):
1361 """Expand an item name.
1363 @param fn: the function to use for expansion
1364 @param name: requested item name
1365 @param kind: text description ('Node' or 'Instance')
1366 @return: the resolved (full) name
1367 @raise errors.OpPrereqError: if the item is not found
1370 full_name = fn(name)
1371 if full_name is None:
1372 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1377 def _ExpandNodeName(cfg, name):
1378 """Wrapper over L{_ExpandItemName} for nodes."""
1379 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1382 def _ExpandInstanceName(cfg, name):
1383 """Wrapper over L{_ExpandItemName} for instance."""
1384 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1387 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1388 network_type, mac_prefix, tags):
1389 """Builds network related env variables for hooks
1391 This builds the hook environment from individual variables.
1394 @param name: the name of the network
1395 @type subnet: string
1396 @param subnet: the ipv4 subnet
1397 @type gateway: string
1398 @param gateway: the ipv4 gateway
1399 @type network6: string
1400 @param network6: the ipv6 subnet
1401 @type gateway6: string
1402 @param gateway6: the ipv6 gateway
1403 @type network_type: string
1404 @param network_type: the type of the network
1405 @type mac_prefix: string
1406 @param mac_prefix: the mac_prefix
1408 @param tags: the tags of the network
1413 env["NETWORK_NAME"] = name
1415 env["NETWORK_SUBNET"] = subnet
1417 env["NETWORK_GATEWAY"] = gateway
1419 env["NETWORK_SUBNET6"] = network6
1421 env["NETWORK_GATEWAY6"] = gateway6
1423 env["NETWORK_MAC_PREFIX"] = mac_prefix
1425 env["NETWORK_TYPE"] = network_type
1427 env["NETWORK_TAGS"] = " ".join(tags)
1432 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1433 minmem, maxmem, vcpus, nics, disk_template, disks,
1434 bep, hvp, hypervisor_name, tags):
1435 """Builds instance related env variables for hooks
1437 This builds the hook environment from individual variables.
1440 @param name: the name of the instance
1441 @type primary_node: string
1442 @param primary_node: the name of the instance's primary node
1443 @type secondary_nodes: list
1444 @param secondary_nodes: list of secondary nodes as strings
1445 @type os_type: string
1446 @param os_type: the name of the instance's OS
1447 @type status: string
1448 @param status: the desired status of the instance
1449 @type minmem: string
1450 @param minmem: the minimum memory size of the instance
1451 @type maxmem: string
1452 @param maxmem: the maximum memory size of the instance
1454 @param vcpus: the count of VCPUs the instance has
1456 @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1457 the NICs the instance has
1458 @type disk_template: string
1459 @param disk_template: the disk template of the instance
1461 @param disks: the list of (size, mode) pairs
1463 @param bep: the backend parameters for the instance
1465 @param hvp: the hypervisor parameters for the instance
1466 @type hypervisor_name: string
1467 @param hypervisor_name: the hypervisor for the instance
1469 @param tags: list of instance tags as strings
1471 @return: the hook environment for this instance
1476 "INSTANCE_NAME": name,
1477 "INSTANCE_PRIMARY": primary_node,
1478 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1479 "INSTANCE_OS_TYPE": os_type,
1480 "INSTANCE_STATUS": status,
1481 "INSTANCE_MINMEM": minmem,
1482 "INSTANCE_MAXMEM": maxmem,
1483 # TODO(2.7) remove deprecated "memory" value
1484 "INSTANCE_MEMORY": maxmem,
1485 "INSTANCE_VCPUS": vcpus,
1486 "INSTANCE_DISK_TEMPLATE": disk_template,
1487 "INSTANCE_HYPERVISOR": hypervisor_name,
1490 nic_count = len(nics)
1491 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1494 env["INSTANCE_NIC%d_IP" % idx] = ip
1495 env["INSTANCE_NIC%d_MAC" % idx] = mac
1496 env["INSTANCE_NIC%d_MODE" % idx] = mode
1497 env["INSTANCE_NIC%d_LINK" % idx] = link
1499 nobj = objects.Network.FromDict(netinfo)
1500 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1502 # FIXME: broken network reference: the instance NIC specifies a
1503 # network, but the relevant network entry was not in the config. This
1504 # should be made impossible.
1505 env["INSTANCE_NIC%d_NETWORK" % idx] = net
1506 if mode == constants.NIC_MODE_BRIDGED:
1507 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1511 env["INSTANCE_NIC_COUNT"] = nic_count
1514 disk_count = len(disks)
1515 for idx, (size, mode) in enumerate(disks):
1516 env["INSTANCE_DISK%d_SIZE" % idx] = size
1517 env["INSTANCE_DISK%d_MODE" % idx] = mode
1521 env["INSTANCE_DISK_COUNT"] = disk_count
1526 env["INSTANCE_TAGS"] = " ".join(tags)
1528 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1529 for key, value in source.items():
1530 env["INSTANCE_%s_%s" % (kind, key)] = value
1535 def _NICToTuple(lu, nic):
1536 """Build a tupple of nic information.
1538 @type lu: L{LogicalUnit}
1539 @param lu: the logical unit on whose behalf we execute
1540 @type nic: L{objects.NIC}
1541 @param nic: nic to convert to hooks tuple
1546 cluster = lu.cfg.GetClusterInfo()
1547 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1548 mode = filled_params[constants.NIC_MODE]
1549 link = filled_params[constants.NIC_LINK]
1553 net_uuid = lu.cfg.LookupNetwork(net)
1555 nobj = lu.cfg.GetNetwork(net_uuid)
1556 netinfo = objects.Network.ToDict(nobj)
1557 return (ip, mac, mode, link, net, netinfo)
1560 def _NICListToTuple(lu, nics):
1561 """Build a list of nic information tuples.
1563 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1564 value in LUInstanceQueryData.
1566 @type lu: L{LogicalUnit}
1567 @param lu: the logical unit on whose behalf we execute
1568 @type nics: list of L{objects.NIC}
1569 @param nics: list of nics to convert to hooks tuples
1574 hooks_nics.append(_NICToTuple(lu, nic))
1578 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1579 """Builds instance related env variables for hooks from an object.
1581 @type lu: L{LogicalUnit}
1582 @param lu: the logical unit on whose behalf we execute
1583 @type instance: L{objects.Instance}
1584 @param instance: the instance for which we should build the
1586 @type override: dict
1587 @param override: dictionary with key/values that will override
1590 @return: the hook environment dictionary
1593 cluster = lu.cfg.GetClusterInfo()
1594 bep = cluster.FillBE(instance)
1595 hvp = cluster.FillHV(instance)
1597 "name": instance.name,
1598 "primary_node": instance.primary_node,
1599 "secondary_nodes": instance.secondary_nodes,
1600 "os_type": instance.os,
1601 "status": instance.admin_state,
1602 "maxmem": bep[constants.BE_MAXMEM],
1603 "minmem": bep[constants.BE_MINMEM],
1604 "vcpus": bep[constants.BE_VCPUS],
1605 "nics": _NICListToTuple(lu, instance.nics),
1606 "disk_template": instance.disk_template,
1607 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1610 "hypervisor_name": instance.hypervisor,
1611 "tags": instance.tags,
1614 args.update(override)
1615 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1618 def _AdjustCandidatePool(lu, exceptions):
1619 """Adjust the candidate pool after node operations.
1622 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1624 lu.LogInfo("Promoted nodes to master candidate role: %s",
1625 utils.CommaJoin(node.name for node in mod_list))
1626 for name in mod_list:
1627 lu.context.ReaddNode(name)
1628 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1630 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1634 def _DecideSelfPromotion(lu, exceptions=None):
1635 """Decide whether I should promote myself as a master candidate.
1638 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1639 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1640 # the new node will increase mc_max with one, so:
1641 mc_should = min(mc_should + 1, cp_size)
1642 return mc_now < mc_should
1645 def _ComputeViolatingInstances(ipolicy, instances):
1646 """Computes a set of instances who violates given ipolicy.
1648 @param ipolicy: The ipolicy to verify
1649 @type instances: object.Instance
1650 @param instances: List of instances to verify
1651 @return: A frozenset of instance names violating the ipolicy
1654 return frozenset([inst.name for inst in instances
1655 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1658 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1659 """Check that the brigdes needed by a list of nics exist.
1662 cluster = lu.cfg.GetClusterInfo()
1663 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1664 brlist = [params[constants.NIC_LINK] for params in paramslist
1665 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1667 result = lu.rpc.call_bridges_exist(target_node, brlist)
1668 result.Raise("Error checking bridges on destination node '%s'" %
1669 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1672 def _CheckInstanceBridgesExist(lu, instance, node=None):
1673 """Check that the brigdes needed by an instance exist.
1677 node = instance.primary_node
1678 _CheckNicsBridgesExist(lu, instance.nics, node)
1681 def _CheckOSVariant(os_obj, name):
1682 """Check whether an OS name conforms to the os variants specification.
1684 @type os_obj: L{objects.OS}
1685 @param os_obj: OS object to check
1687 @param name: OS name passed by the user, to check for validity
1690 variant = objects.OS.GetVariant(name)
1691 if not os_obj.supported_variants:
1693 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1694 " passed)" % (os_obj.name, variant),
1698 raise errors.OpPrereqError("OS name must include a variant",
1701 if variant not in os_obj.supported_variants:
1702 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1705 def _GetNodeInstancesInner(cfg, fn):
1706 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1709 def _GetNodeInstances(cfg, node_name):
1710 """Returns a list of all primary and secondary instances on a node.
1714 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1717 def _GetNodePrimaryInstances(cfg, node_name):
1718 """Returns primary instances on a node.
1721 return _GetNodeInstancesInner(cfg,
1722 lambda inst: node_name == inst.primary_node)
1725 def _GetNodeSecondaryInstances(cfg, node_name):
1726 """Returns secondary instances on a node.
1729 return _GetNodeInstancesInner(cfg,
1730 lambda inst: node_name in inst.secondary_nodes)
1733 def _GetStorageTypeArgs(cfg, storage_type):
1734 """Returns the arguments for a storage type.
1737 # Special case for file storage
1738 if storage_type == constants.ST_FILE:
1739 # storage.FileStorage wants a list of storage directories
1740 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1745 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1748 for dev in instance.disks:
1749 cfg.SetDiskID(dev, node_name)
1751 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1753 result.Raise("Failed to get disk status from node %s" % node_name,
1754 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1756 for idx, bdev_status in enumerate(result.payload):
1757 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1763 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1764 """Check the sanity of iallocator and node arguments and use the
1765 cluster-wide iallocator if appropriate.
1767 Check that at most one of (iallocator, node) is specified. If none is
1768 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1769 then the LU's opcode's iallocator slot is filled with the cluster-wide
1772 @type iallocator_slot: string
1773 @param iallocator_slot: the name of the opcode iallocator slot
1774 @type node_slot: string
1775 @param node_slot: the name of the opcode target node slot
1778 node = getattr(lu.op, node_slot, None)
1779 ialloc = getattr(lu.op, iallocator_slot, None)
1783 if node is not None and ialloc is not None:
1784 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1786 elif ((node is None and ialloc is None) or
1787 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1788 default_iallocator = lu.cfg.GetDefaultIAllocator()
1789 if default_iallocator:
1790 setattr(lu.op, iallocator_slot, default_iallocator)
1792 raise errors.OpPrereqError("No iallocator or node given and no"
1793 " cluster-wide default iallocator found;"
1794 " please specify either an iallocator or a"
1795 " node, or set a cluster-wide default"
1796 " iallocator", errors.ECODE_INVAL)
1799 def _GetDefaultIAllocator(cfg, ialloc):
1800 """Decides on which iallocator to use.
1802 @type cfg: L{config.ConfigWriter}
1803 @param cfg: Cluster configuration object
1804 @type ialloc: string or None
1805 @param ialloc: Iallocator specified in opcode
1807 @return: Iallocator name
1811 # Use default iallocator
1812 ialloc = cfg.GetDefaultIAllocator()
1815 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1816 " opcode nor as a cluster-wide default",
1822 def _CheckHostnameSane(lu, name):
1823 """Ensures that a given hostname resolves to a 'sane' name.
1825 The given name is required to be a prefix of the resolved hostname,
1826 to prevent accidental mismatches.
1828 @param lu: the logical unit on behalf of which we're checking
1829 @param name: the name we should resolve and check
1830 @return: the resolved hostname object
1833 hostname = netutils.GetHostname(name=name)
1834 if hostname.name != name:
1835 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1836 if not utils.MatchNameComponent(name, [hostname.name]):
1837 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1838 " same as given hostname '%s'") %
1839 (hostname.name, name), errors.ECODE_INVAL)
1843 class LUClusterPostInit(LogicalUnit):
1844 """Logical unit for running hooks after cluster initialization.
1847 HPATH = "cluster-init"
1848 HTYPE = constants.HTYPE_CLUSTER
1850 def BuildHooksEnv(self):
1855 "OP_TARGET": self.cfg.GetClusterName(),
1858 def BuildHooksNodes(self):
1859 """Build hooks nodes.
1862 return ([], [self.cfg.GetMasterNode()])
1864 def Exec(self, feedback_fn):
1871 class LUClusterDestroy(LogicalUnit):
1872 """Logical unit for destroying the cluster.
1875 HPATH = "cluster-destroy"
1876 HTYPE = constants.HTYPE_CLUSTER
1878 def BuildHooksEnv(self):
1883 "OP_TARGET": self.cfg.GetClusterName(),
1886 def BuildHooksNodes(self):
1887 """Build hooks nodes.
1892 def CheckPrereq(self):
1893 """Check prerequisites.
1895 This checks whether the cluster is empty.
1897 Any errors are signaled by raising errors.OpPrereqError.
1900 master = self.cfg.GetMasterNode()
1902 nodelist = self.cfg.GetNodeList()
1903 if len(nodelist) != 1 or nodelist[0] != master:
1904 raise errors.OpPrereqError("There are still %d node(s) in"
1905 " this cluster." % (len(nodelist) - 1),
1907 instancelist = self.cfg.GetInstanceList()
1909 raise errors.OpPrereqError("There are still %d instance(s) in"
1910 " this cluster." % len(instancelist),
1913 def Exec(self, feedback_fn):
1914 """Destroys the cluster.
1917 master_params = self.cfg.GetMasterNetworkParameters()
1919 # Run post hooks on master node before it's removed
1920 _RunPostHook(self, master_params.name)
1922 ems = self.cfg.GetUseExternalMipScript()
1923 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1926 self.LogWarning("Error disabling the master IP address: %s",
1929 return master_params.name
1932 def _VerifyCertificate(filename):
1933 """Verifies a certificate for L{LUClusterVerifyConfig}.
1935 @type filename: string
1936 @param filename: Path to PEM file
1940 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1941 utils.ReadFile(filename))
1942 except Exception, err: # pylint: disable=W0703
1943 return (LUClusterVerifyConfig.ETYPE_ERROR,
1944 "Failed to load X509 certificate %s: %s" % (filename, err))
1947 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1948 constants.SSL_CERT_EXPIRATION_ERROR)
1951 fnamemsg = "While verifying %s: %s" % (filename, msg)
1956 return (None, fnamemsg)
1957 elif errcode == utils.CERT_WARNING:
1958 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1959 elif errcode == utils.CERT_ERROR:
1960 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1962 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1965 def _GetAllHypervisorParameters(cluster, instances):
1966 """Compute the set of all hypervisor parameters.
1968 @type cluster: L{objects.Cluster}
1969 @param cluster: the cluster object
1970 @param instances: list of L{objects.Instance}
1971 @param instances: additional instances from which to obtain parameters
1972 @rtype: list of (origin, hypervisor, parameters)
1973 @return: a list with all parameters found, indicating the hypervisor they
1974 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1979 for hv_name in cluster.enabled_hypervisors:
1980 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1982 for os_name, os_hvp in cluster.os_hvp.items():
1983 for hv_name, hv_params in os_hvp.items():
1985 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1986 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1988 # TODO: collapse identical parameter values in a single one
1989 for instance in instances:
1990 if instance.hvparams:
1991 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1992 cluster.FillHV(instance)))
1997 class _VerifyErrors(object):
1998 """Mix-in for cluster/group verify LUs.
2000 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2001 self.op and self._feedback_fn to be available.)
2005 ETYPE_FIELD = "code"
2006 ETYPE_ERROR = "ERROR"
2007 ETYPE_WARNING = "WARNING"
2009 def _Error(self, ecode, item, msg, *args, **kwargs):
2010 """Format an error message.
2012 Based on the opcode's error_codes parameter, either format a
2013 parseable error code, or a simpler error string.
2015 This must be called only from Exec and functions called from Exec.
2018 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2019 itype, etxt, _ = ecode
2020 # If the error code is in the list of ignored errors, demote the error to a
2022 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2023 ltype = self.ETYPE_WARNING
2024 # first complete the msg
2027 # then format the whole message
2028 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2029 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2035 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2036 # and finally report it via the feedback_fn
2037 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2038 # do not mark the operation as failed for WARN cases only
2039 if ltype == self.ETYPE_ERROR:
2042 def _ErrorIf(self, cond, *args, **kwargs):
2043 """Log an error message if the passed condition is True.
2047 or self.op.debug_simulate_errors): # pylint: disable=E1101
2048 self._Error(*args, **kwargs)
2051 class LUClusterVerify(NoHooksLU):
2052 """Submits all jobs necessary to verify the cluster.
2057 def ExpandNames(self):
2058 self.needed_locks = {}
2060 def Exec(self, feedback_fn):
2063 if self.op.group_name:
2064 groups = [self.op.group_name]
2065 depends_fn = lambda: None
2067 groups = self.cfg.GetNodeGroupList()
2069 # Verify global configuration
2071 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2074 # Always depend on global verification
2075 depends_fn = lambda: [(-len(jobs), [])]
2078 [opcodes.OpClusterVerifyGroup(group_name=group,
2079 ignore_errors=self.op.ignore_errors,
2080 depends=depends_fn())]
2081 for group in groups)
2083 # Fix up all parameters
2084 for op in itertools.chain(*jobs): # pylint: disable=W0142
2085 op.debug_simulate_errors = self.op.debug_simulate_errors
2086 op.verbose = self.op.verbose
2087 op.error_codes = self.op.error_codes
2089 op.skip_checks = self.op.skip_checks
2090 except AttributeError:
2091 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2093 return ResultWithJobs(jobs)
2096 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2097 """Verifies the cluster config.
2102 def _VerifyHVP(self, hvp_data):
2103 """Verifies locally the syntax of the hypervisor parameters.
2106 for item, hv_name, hv_params in hvp_data:
2107 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2110 hv_class = hypervisor.GetHypervisorClass(hv_name)
2111 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2112 hv_class.CheckParameterSyntax(hv_params)
2113 except errors.GenericError, err:
2114 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2116 def ExpandNames(self):
2117 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2118 self.share_locks = _ShareAll()
2120 def CheckPrereq(self):
2121 """Check prerequisites.
2124 # Retrieve all information
2125 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2126 self.all_node_info = self.cfg.GetAllNodesInfo()
2127 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2129 def Exec(self, feedback_fn):
2130 """Verify integrity of cluster, performing various test on nodes.
2134 self._feedback_fn = feedback_fn
2136 feedback_fn("* Verifying cluster config")
2138 for msg in self.cfg.VerifyConfig():
2139 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2141 feedback_fn("* Verifying cluster certificate files")
2143 for cert_filename in pathutils.ALL_CERT_FILES:
2144 (errcode, msg) = _VerifyCertificate(cert_filename)
2145 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2147 feedback_fn("* Verifying hypervisor parameters")
2149 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2150 self.all_inst_info.values()))
2152 feedback_fn("* Verifying all nodes belong to an existing group")
2154 # We do this verification here because, should this bogus circumstance
2155 # occur, it would never be caught by VerifyGroup, which only acts on
2156 # nodes/instances reachable from existing node groups.
2158 dangling_nodes = set(node.name for node in self.all_node_info.values()
2159 if node.group not in self.all_group_info)
2161 dangling_instances = {}
2162 no_node_instances = []
2164 for inst in self.all_inst_info.values():
2165 if inst.primary_node in dangling_nodes:
2166 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2167 elif inst.primary_node not in self.all_node_info:
2168 no_node_instances.append(inst.name)
2173 utils.CommaJoin(dangling_instances.get(node.name,
2175 for node in dangling_nodes]
2177 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2179 "the following nodes (and their instances) belong to a non"
2180 " existing group: %s", utils.CommaJoin(pretty_dangling))
2182 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2184 "the following instances have a non-existing primary-node:"
2185 " %s", utils.CommaJoin(no_node_instances))
2190 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2191 """Verifies the status of a node group.
2194 HPATH = "cluster-verify"
2195 HTYPE = constants.HTYPE_CLUSTER
2198 _HOOKS_INDENT_RE = re.compile("^", re.M)
2200 class NodeImage(object):
2201 """A class representing the logical and physical status of a node.
2204 @ivar name: the node name to which this object refers
2205 @ivar volumes: a structure as returned from
2206 L{ganeti.backend.GetVolumeList} (runtime)
2207 @ivar instances: a list of running instances (runtime)
2208 @ivar pinst: list of configured primary instances (config)
2209 @ivar sinst: list of configured secondary instances (config)
2210 @ivar sbp: dictionary of {primary-node: list of instances} for all
2211 instances for which this node is secondary (config)
2212 @ivar mfree: free memory, as reported by hypervisor (runtime)
2213 @ivar dfree: free disk, as reported by the node (runtime)
2214 @ivar offline: the offline status (config)
2215 @type rpc_fail: boolean
2216 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2217 not whether the individual keys were correct) (runtime)
2218 @type lvm_fail: boolean
2219 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2220 @type hyp_fail: boolean
2221 @ivar hyp_fail: whether the RPC call didn't return the instance list
2222 @type ghost: boolean
2223 @ivar ghost: whether this is a known node or not (config)
2224 @type os_fail: boolean
2225 @ivar os_fail: whether the RPC call didn't return valid OS data
2227 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2228 @type vm_capable: boolean
2229 @ivar vm_capable: whether the node can host instances
2231 @ivar pv_min: size in MiB of the smallest PVs
2233 @ivar pv_max: size in MiB of the biggest PVs
2236 def __init__(self, offline=False, name=None, vm_capable=True):
2245 self.offline = offline
2246 self.vm_capable = vm_capable
2247 self.rpc_fail = False
2248 self.lvm_fail = False
2249 self.hyp_fail = False
2251 self.os_fail = False
2256 def ExpandNames(self):
2257 # This raises errors.OpPrereqError on its own:
2258 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2260 # Get instances in node group; this is unsafe and needs verification later
2262 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2264 self.needed_locks = {
2265 locking.LEVEL_INSTANCE: inst_names,
2266 locking.LEVEL_NODEGROUP: [self.group_uuid],
2267 locking.LEVEL_NODE: [],
2269 # This opcode is run by watcher every five minutes and acquires all nodes
2270 # for a group. It doesn't run for a long time, so it's better to acquire
2271 # the node allocation lock as well.
2272 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2275 self.share_locks = _ShareAll()
2277 def DeclareLocks(self, level):
2278 if level == locking.LEVEL_NODE:
2279 # Get members of node group; this is unsafe and needs verification later
2280 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2282 all_inst_info = self.cfg.GetAllInstancesInfo()
2284 # In Exec(), we warn about mirrored instances that have primary and
2285 # secondary living in separate node groups. To fully verify that
2286 # volumes for these instances are healthy, we will need to do an
2287 # extra call to their secondaries. We ensure here those nodes will
2289 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2290 # Important: access only the instances whose lock is owned
2291 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2292 nodes.update(all_inst_info[inst].secondary_nodes)
2294 self.needed_locks[locking.LEVEL_NODE] = nodes
2296 def CheckPrereq(self):
2297 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2298 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2300 group_nodes = set(self.group_info.members)
2302 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2305 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2307 unlocked_instances = \
2308 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2311 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2312 utils.CommaJoin(unlocked_nodes),
2315 if unlocked_instances:
2316 raise errors.OpPrereqError("Missing lock for instances: %s" %
2317 utils.CommaJoin(unlocked_instances),
2320 self.all_node_info = self.cfg.GetAllNodesInfo()
2321 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2323 self.my_node_names = utils.NiceSort(group_nodes)
2324 self.my_inst_names = utils.NiceSort(group_instances)
2326 self.my_node_info = dict((name, self.all_node_info[name])
2327 for name in self.my_node_names)
2329 self.my_inst_info = dict((name, self.all_inst_info[name])
2330 for name in self.my_inst_names)
2332 # We detect here the nodes that will need the extra RPC calls for verifying
2333 # split LV volumes; they should be locked.
2334 extra_lv_nodes = set()
2336 for inst in self.my_inst_info.values():
2337 if inst.disk_template in constants.DTS_INT_MIRROR:
2338 for nname in inst.all_nodes:
2339 if self.all_node_info[nname].group != self.group_uuid:
2340 extra_lv_nodes.add(nname)
2342 unlocked_lv_nodes = \
2343 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2345 if unlocked_lv_nodes:
2346 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2347 utils.CommaJoin(unlocked_lv_nodes),
2349 self.extra_lv_nodes = list(extra_lv_nodes)
2351 def _VerifyNode(self, ninfo, nresult):
2352 """Perform some basic validation on data returned from a node.
2354 - check the result data structure is well formed and has all the
2356 - check ganeti version
2358 @type ninfo: L{objects.Node}
2359 @param ninfo: the node to check
2360 @param nresult: the results from the node
2362 @return: whether overall this call was successful (and we can expect
2363 reasonable values in the respose)
2367 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2369 # main result, nresult should be a non-empty dict
2370 test = not nresult or not isinstance(nresult, dict)
2371 _ErrorIf(test, constants.CV_ENODERPC, node,
2372 "unable to verify node: no data returned")
2376 # compares ganeti version
2377 local_version = constants.PROTOCOL_VERSION
2378 remote_version = nresult.get("version", None)
2379 test = not (remote_version and
2380 isinstance(remote_version, (list, tuple)) and
2381 len(remote_version) == 2)
2382 _ErrorIf(test, constants.CV_ENODERPC, node,
2383 "connection to node returned invalid data")
2387 test = local_version != remote_version[0]
2388 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2389 "incompatible protocol versions: master %s,"
2390 " node %s", local_version, remote_version[0])
2394 # node seems compatible, we can actually try to look into its results
2396 # full package version
2397 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2398 constants.CV_ENODEVERSION, node,
2399 "software version mismatch: master %s, node %s",
2400 constants.RELEASE_VERSION, remote_version[1],
2401 code=self.ETYPE_WARNING)
2403 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2404 if ninfo.vm_capable and isinstance(hyp_result, dict):
2405 for hv_name, hv_result in hyp_result.iteritems():
2406 test = hv_result is not None
2407 _ErrorIf(test, constants.CV_ENODEHV, node,
2408 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2410 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2411 if ninfo.vm_capable and isinstance(hvp_result, list):
2412 for item, hv_name, hv_result in hvp_result:
2413 _ErrorIf(True, constants.CV_ENODEHV, node,
2414 "hypervisor %s parameter verify failure (source %s): %s",
2415 hv_name, item, hv_result)
2417 test = nresult.get(constants.NV_NODESETUP,
2418 ["Missing NODESETUP results"])
2419 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2424 def _VerifyNodeTime(self, ninfo, nresult,
2425 nvinfo_starttime, nvinfo_endtime):
2426 """Check the node time.
2428 @type ninfo: L{objects.Node}
2429 @param ninfo: the node to check
2430 @param nresult: the remote results for the node
2431 @param nvinfo_starttime: the start time of the RPC call
2432 @param nvinfo_endtime: the end time of the RPC call
2436 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2438 ntime = nresult.get(constants.NV_TIME, None)
2440 ntime_merged = utils.MergeTime(ntime)
2441 except (ValueError, TypeError):
2442 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2445 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2446 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2447 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2448 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2452 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2453 "Node time diverges by at least %s from master node time",
2456 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2457 """Check the node LVM results and update info for cross-node checks.
2459 @type ninfo: L{objects.Node}
2460 @param ninfo: the node to check
2461 @param nresult: the remote results for the node
2462 @param vg_name: the configured VG name
2463 @type nimg: L{NodeImage}
2464 @param nimg: node image
2471 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2473 # checks vg existence and size > 20G
2474 vglist = nresult.get(constants.NV_VGLIST, None)
2476 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2478 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2479 constants.MIN_VG_SIZE)
2480 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2483 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2485 self._Error(constants.CV_ENODELVM, node, em)
2486 if pvminmax is not None:
2487 (nimg.pv_min, nimg.pv_max) = pvminmax
2489 def _VerifyGroupLVM(self, node_image, vg_name):
2490 """Check cross-node consistency in LVM.
2492 @type node_image: dict
2493 @param node_image: info about nodes, mapping from node to names to
2494 L{NodeImage} objects
2495 @param vg_name: the configured VG name
2501 # Only exlcusive storage needs this kind of checks
2502 if not self._exclusive_storage:
2505 # exclusive_storage wants all PVs to have the same size (approximately),
2506 # if the smallest and the biggest ones are okay, everything is fine.
2507 # pv_min is None iff pv_max is None
2508 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2511 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2512 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2513 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2514 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2515 "PV sizes differ too much in the group; smallest (%s MB) is"
2516 " on %s, biggest (%s MB) is on %s",
2517 pvmin, minnode, pvmax, maxnode)
2519 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2520 """Check the node bridges.
2522 @type ninfo: L{objects.Node}
2523 @param ninfo: the node to check
2524 @param nresult: the remote results for the node
2525 @param bridges: the expected list of bridges
2532 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2534 missing = nresult.get(constants.NV_BRIDGES, None)
2535 test = not isinstance(missing, list)
2536 _ErrorIf(test, constants.CV_ENODENET, node,
2537 "did not return valid bridge information")
2539 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2540 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2542 def _VerifyNodeUserScripts(self, ninfo, nresult):
2543 """Check the results of user scripts presence and executability on the node
2545 @type ninfo: L{objects.Node}
2546 @param ninfo: the node to check
2547 @param nresult: the remote results for the node
2552 test = not constants.NV_USERSCRIPTS in nresult
2553 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2554 "did not return user scripts information")
2556 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2558 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2559 "user scripts not present or not executable: %s" %
2560 utils.CommaJoin(sorted(broken_scripts)))
2562 def _VerifyNodeNetwork(self, ninfo, nresult):
2563 """Check the node network connectivity results.
2565 @type ninfo: L{objects.Node}
2566 @param ninfo: the node to check
2567 @param nresult: the remote results for the node
2571 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2573 test = constants.NV_NODELIST not in nresult
2574 _ErrorIf(test, constants.CV_ENODESSH, node,
2575 "node hasn't returned node ssh connectivity data")
2577 if nresult[constants.NV_NODELIST]:
2578 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2579 _ErrorIf(True, constants.CV_ENODESSH, node,
2580 "ssh communication with node '%s': %s", a_node, a_msg)
2582 test = constants.NV_NODENETTEST not in nresult
2583 _ErrorIf(test, constants.CV_ENODENET, node,
2584 "node hasn't returned node tcp connectivity data")
2586 if nresult[constants.NV_NODENETTEST]:
2587 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2589 _ErrorIf(True, constants.CV_ENODENET, node,
2590 "tcp communication with node '%s': %s",
2591 anode, nresult[constants.NV_NODENETTEST][anode])
2593 test = constants.NV_MASTERIP not in nresult
2594 _ErrorIf(test, constants.CV_ENODENET, node,
2595 "node hasn't returned node master IP reachability data")
2597 if not nresult[constants.NV_MASTERIP]:
2598 if node == self.master_node:
2599 msg = "the master node cannot reach the master IP (not configured?)"
2601 msg = "cannot reach the master IP"
2602 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2604 def _VerifyInstance(self, instance, inst_config, node_image,
2606 """Verify an instance.
2608 This function checks to see if the required block devices are
2609 available on the instance's node, and that the nodes are in the correct
2613 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2614 pnode = inst_config.primary_node
2615 pnode_img = node_image[pnode]
2616 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2618 node_vol_should = {}
2619 inst_config.MapLVsByNode(node_vol_should)
2621 cluster = self.cfg.GetClusterInfo()
2622 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2624 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2625 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2626 code=self.ETYPE_WARNING)
2628 for node in node_vol_should:
2629 n_img = node_image[node]
2630 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2631 # ignore missing volumes on offline or broken nodes
2633 for volume in node_vol_should[node]:
2634 test = volume not in n_img.volumes
2635 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2636 "volume %s missing on node %s", volume, node)
2638 if inst_config.admin_state == constants.ADMINST_UP:
2639 test = instance not in pnode_img.instances and not pnode_img.offline
2640 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2641 "instance not running on its primary node %s",
2643 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2644 "instance is marked as running and lives on offline node %s",
2647 diskdata = [(nname, success, status, idx)
2648 for (nname, disks) in diskstatus.items()
2649 for idx, (success, status) in enumerate(disks)]
2651 for nname, success, bdev_status, idx in diskdata:
2652 # the 'ghost node' construction in Exec() ensures that we have a
2654 snode = node_image[nname]
2655 bad_snode = snode.ghost or snode.offline
2656 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2657 not success and not bad_snode,
2658 constants.CV_EINSTANCEFAULTYDISK, instance,
2659 "couldn't retrieve status for disk/%s on %s: %s",
2660 idx, nname, bdev_status)
2661 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2662 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2663 constants.CV_EINSTANCEFAULTYDISK, instance,
2664 "disk/%s on %s is faulty", idx, nname)
2666 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2667 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2668 " primary node failed", instance)
2670 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2671 constants.CV_EINSTANCELAYOUT,
2672 instance, "instance has multiple secondary nodes: %s",
2673 utils.CommaJoin(inst_config.secondary_nodes),
2674 code=self.ETYPE_WARNING)
2676 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2677 # Disk template not compatible with exclusive_storage: no instance
2678 # node should have the flag set
2679 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2680 inst_config.all_nodes)
2681 es_nodes = [n for (n, es) in es_flags.items()
2683 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2684 "instance has template %s, which is not supported on nodes"
2685 " that have exclusive storage set: %s",
2686 inst_config.disk_template, utils.CommaJoin(es_nodes))
2688 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2689 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2690 instance_groups = {}
2692 for node in instance_nodes:
2693 instance_groups.setdefault(self.all_node_info[node].group,
2697 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2698 # Sort so that we always list the primary node first.
2699 for group, nodes in sorted(instance_groups.items(),
2700 key=lambda (_, nodes): pnode in nodes,
2703 self._ErrorIf(len(instance_groups) > 1,
2704 constants.CV_EINSTANCESPLITGROUPS,
2705 instance, "instance has primary and secondary nodes in"
2706 " different groups: %s", utils.CommaJoin(pretty_list),
2707 code=self.ETYPE_WARNING)
2709 inst_nodes_offline = []
2710 for snode in inst_config.secondary_nodes:
2711 s_img = node_image[snode]
2712 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2713 snode, "instance %s, connection to secondary node failed",
2717 inst_nodes_offline.append(snode)
2719 # warn that the instance lives on offline nodes
2720 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2721 "instance has offline secondary node(s) %s",
2722 utils.CommaJoin(inst_nodes_offline))
2723 # ... or ghost/non-vm_capable nodes
2724 for node in inst_config.all_nodes:
2725 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2726 instance, "instance lives on ghost node %s", node)
2727 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2728 instance, "instance lives on non-vm_capable node %s", node)
2730 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2731 """Verify if there are any unknown volumes in the cluster.
2733 The .os, .swap and backup volumes are ignored. All other volumes are
2734 reported as unknown.
2736 @type reserved: L{ganeti.utils.FieldSet}
2737 @param reserved: a FieldSet of reserved volume names
2740 for node, n_img in node_image.items():
2741 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2742 self.all_node_info[node].group != self.group_uuid):
2743 # skip non-healthy nodes
2745 for volume in n_img.volumes:
2746 test = ((node not in node_vol_should or
2747 volume not in node_vol_should[node]) and
2748 not reserved.Matches(volume))
2749 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2750 "volume %s is unknown", volume)
2752 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2753 """Verify N+1 Memory Resilience.
2755 Check that if one single node dies we can still start all the
2756 instances it was primary for.
2759 cluster_info = self.cfg.GetClusterInfo()
2760 for node, n_img in node_image.items():
2761 # This code checks that every node which is now listed as
2762 # secondary has enough memory to host all instances it is
2763 # supposed to should a single other node in the cluster fail.
2764 # FIXME: not ready for failover to an arbitrary node
2765 # FIXME: does not support file-backed instances
2766 # WARNING: we currently take into account down instances as well
2767 # as up ones, considering that even if they're down someone
2768 # might want to start them even in the event of a node failure.
2769 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2770 # we're skipping nodes marked offline and nodes in other groups from
2771 # the N+1 warning, since most likely we don't have good memory
2772 # infromation from them; we already list instances living on such
2773 # nodes, and that's enough warning
2775 #TODO(dynmem): also consider ballooning out other instances
2776 for prinode, instances in n_img.sbp.items():
2778 for instance in instances:
2779 bep = cluster_info.FillBE(instance_cfg[instance])
2780 if bep[constants.BE_AUTO_BALANCE]:
2781 needed_mem += bep[constants.BE_MINMEM]
2782 test = n_img.mfree < needed_mem
2783 self._ErrorIf(test, constants.CV_ENODEN1, node,
2784 "not enough memory to accomodate instance failovers"
2785 " should node %s fail (%dMiB needed, %dMiB available)",
2786 prinode, needed_mem, n_img.mfree)
2789 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2790 (files_all, files_opt, files_mc, files_vm)):
2791 """Verifies file checksums collected from all nodes.
2793 @param errorif: Callback for reporting errors
2794 @param nodeinfo: List of L{objects.Node} objects
2795 @param master_node: Name of master node
2796 @param all_nvinfo: RPC results
2799 # Define functions determining which nodes to consider for a file
2802 (files_mc, lambda node: (node.master_candidate or
2803 node.name == master_node)),
2804 (files_vm, lambda node: node.vm_capable),
2807 # Build mapping from filename to list of nodes which should have the file
2809 for (files, fn) in files2nodefn:
2811 filenodes = nodeinfo
2813 filenodes = filter(fn, nodeinfo)
2814 nodefiles.update((filename,
2815 frozenset(map(operator.attrgetter("name"), filenodes)))
2816 for filename in files)
2818 assert set(nodefiles) == (files_all | files_mc | files_vm)
2820 fileinfo = dict((filename, {}) for filename in nodefiles)
2821 ignore_nodes = set()
2823 for node in nodeinfo:
2825 ignore_nodes.add(node.name)
2828 nresult = all_nvinfo[node.name]
2830 if nresult.fail_msg or not nresult.payload:
2833 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2834 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2835 for (key, value) in fingerprints.items())
2838 test = not (node_files and isinstance(node_files, dict))
2839 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2840 "Node did not return file checksum data")
2842 ignore_nodes.add(node.name)
2845 # Build per-checksum mapping from filename to nodes having it
2846 for (filename, checksum) in node_files.items():
2847 assert filename in nodefiles
2848 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2850 for (filename, checksums) in fileinfo.items():
2851 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2853 # Nodes having the file
2854 with_file = frozenset(node_name
2855 for nodes in fileinfo[filename].values()
2856 for node_name in nodes) - ignore_nodes
2858 expected_nodes = nodefiles[filename] - ignore_nodes
2860 # Nodes missing file
2861 missing_file = expected_nodes - with_file
2863 if filename in files_opt:
2865 errorif(missing_file and missing_file != expected_nodes,
2866 constants.CV_ECLUSTERFILECHECK, None,
2867 "File %s is optional, but it must exist on all or no"
2868 " nodes (not found on %s)",
2869 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2871 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2872 "File %s is missing from node(s) %s", filename,
2873 utils.CommaJoin(utils.NiceSort(missing_file)))
2875 # Warn if a node has a file it shouldn't
2876 unexpected = with_file - expected_nodes
2878 constants.CV_ECLUSTERFILECHECK, None,
2879 "File %s should not exist on node(s) %s",
2880 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2882 # See if there are multiple versions of the file
2883 test = len(checksums) > 1
2885 variants = ["variant %s on %s" %
2886 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2887 for (idx, (checksum, nodes)) in
2888 enumerate(sorted(checksums.items()))]
2892 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2893 "File %s found with %s different checksums (%s)",
2894 filename, len(checksums), "; ".join(variants))
2896 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2898 """Verifies and the node DRBD status.
2900 @type ninfo: L{objects.Node}
2901 @param ninfo: the node to check
2902 @param nresult: the remote results for the node
2903 @param instanceinfo: the dict of instances
2904 @param drbd_helper: the configured DRBD usermode helper
2905 @param drbd_map: the DRBD map as returned by
2906 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2910 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2913 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2914 test = (helper_result is None)
2915 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2916 "no drbd usermode helper returned")
2918 status, payload = helper_result
2920 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2921 "drbd usermode helper check unsuccessful: %s", payload)
2922 test = status and (payload != drbd_helper)
2923 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2924 "wrong drbd usermode helper: %s", payload)
2926 # compute the DRBD minors
2928 for minor, instance in drbd_map[node].items():
2929 test = instance not in instanceinfo
2930 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2931 "ghost instance '%s' in temporary DRBD map", instance)
2932 # ghost instance should not be running, but otherwise we
2933 # don't give double warnings (both ghost instance and
2934 # unallocated minor in use)
2936 node_drbd[minor] = (instance, False)
2938 instance = instanceinfo[instance]
2939 node_drbd[minor] = (instance.name,
2940 instance.admin_state == constants.ADMINST_UP)
2942 # and now check them
2943 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2944 test = not isinstance(used_minors, (tuple, list))
2945 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2946 "cannot parse drbd status file: %s", str(used_minors))
2948 # we cannot check drbd status
2951 for minor, (iname, must_exist) in node_drbd.items():
2952 test = minor not in used_minors and must_exist
2953 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2954 "drbd minor %d of instance %s is not active", minor, iname)
2955 for minor in used_minors:
2956 test = minor not in node_drbd
2957 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2958 "unallocated drbd minor %d is in use", minor)
2960 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2961 """Builds the node OS structures.
2963 @type ninfo: L{objects.Node}
2964 @param ninfo: the node to check
2965 @param nresult: the remote results for the node
2966 @param nimg: the node image object
2970 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2972 remote_os = nresult.get(constants.NV_OSLIST, None)
2973 test = (not isinstance(remote_os, list) or
2974 not compat.all(isinstance(v, list) and len(v) == 7
2975 for v in remote_os))
2977 _ErrorIf(test, constants.CV_ENODEOS, node,
2978 "node hasn't returned valid OS data")
2987 for (name, os_path, status, diagnose,
2988 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2990 if name not in os_dict:
2993 # parameters is a list of lists instead of list of tuples due to
2994 # JSON lacking a real tuple type, fix it:
2995 parameters = [tuple(v) for v in parameters]
2996 os_dict[name].append((os_path, status, diagnose,
2997 set(variants), set(parameters), set(api_ver)))
2999 nimg.oslist = os_dict
3001 def _VerifyNodeOS(self, ninfo, nimg, base):
3002 """Verifies the node OS list.
3004 @type ninfo: L{objects.Node}
3005 @param ninfo: the node to check
3006 @param nimg: the node image object
3007 @param base: the 'template' node we match against (e.g. from the master)
3011 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3013 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3015 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3016 for os_name, os_data in nimg.oslist.items():
3017 assert os_data, "Empty OS status for OS %s?!" % os_name
3018 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3019 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3020 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3021 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3022 "OS '%s' has multiple entries (first one shadows the rest): %s",
3023 os_name, utils.CommaJoin([v[0] for v in os_data]))
3024 # comparisons with the 'base' image
3025 test = os_name not in base.oslist
3026 _ErrorIf(test, constants.CV_ENODEOS, node,
3027 "Extra OS %s not present on reference node (%s)",
3031 assert base.oslist[os_name], "Base node has empty OS status?"
3032 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3034 # base OS is invalid, skipping
3036 for kind, a, b in [("API version", f_api, b_api),
3037 ("variants list", f_var, b_var),
3038 ("parameters", beautify_params(f_param),
3039 beautify_params(b_param))]:
3040 _ErrorIf(a != b, constants.CV_ENODEOS, node,
3041 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3042 kind, os_name, base.name,
3043 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3045 # check any missing OSes
3046 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3047 _ErrorIf(missing, constants.CV_ENODEOS, node,
3048 "OSes present on reference node %s but missing on this node: %s",
3049 base.name, utils.CommaJoin(missing))
3051 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3052 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3054 @type ninfo: L{objects.Node}
3055 @param ninfo: the node to check
3056 @param nresult: the remote results for the node
3057 @type is_master: bool
3058 @param is_master: Whether node is the master node
3064 (constants.ENABLE_FILE_STORAGE or
3065 constants.ENABLE_SHARED_FILE_STORAGE)):
3067 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3069 # This should never happen
3070 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3071 "Node did not return forbidden file storage paths")
3073 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3074 "Found forbidden file storage paths: %s",
3075 utils.CommaJoin(fspaths))
3077 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3078 constants.CV_ENODEFILESTORAGEPATHS, node,
3079 "Node should not have returned forbidden file storage"
3082 def _VerifyOob(self, ninfo, nresult):
3083 """Verifies out of band functionality of a node.
3085 @type ninfo: L{objects.Node}
3086 @param ninfo: the node to check
3087 @param nresult: the remote results for the node
3091 # We just have to verify the paths on master and/or master candidates
3092 # as the oob helper is invoked on the master
3093 if ((ninfo.master_candidate or ninfo.master_capable) and
3094 constants.NV_OOB_PATHS in nresult):
3095 for path_result in nresult[constants.NV_OOB_PATHS]:
3096 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3098 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3099 """Verifies and updates the node volume data.
3101 This function will update a L{NodeImage}'s internal structures
3102 with data from the remote call.
3104 @type ninfo: L{objects.Node}
3105 @param ninfo: the node to check
3106 @param nresult: the remote results for the node
3107 @param nimg: the node image object
3108 @param vg_name: the configured VG name
3112 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3114 nimg.lvm_fail = True
3115 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3118 elif isinstance(lvdata, basestring):
3119 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3120 utils.SafeEncode(lvdata))
3121 elif not isinstance(lvdata, dict):
3122 _ErrorIf(True, constants.CV_ENODELVM, node,
3123 "rpc call to node failed (lvlist)")
3125 nimg.volumes = lvdata
3126 nimg.lvm_fail = False
3128 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3129 """Verifies and updates the node instance list.
3131 If the listing was successful, then updates this node's instance
3132 list. Otherwise, it marks the RPC call as failed for the instance
3135 @type ninfo: L{objects.Node}
3136 @param ninfo: the node to check
3137 @param nresult: the remote results for the node
3138 @param nimg: the node image object
3141 idata = nresult.get(constants.NV_INSTANCELIST, None)
3142 test = not isinstance(idata, list)
3143 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3144 "rpc call to node failed (instancelist): %s",
3145 utils.SafeEncode(str(idata)))
3147 nimg.hyp_fail = True
3149 nimg.instances = idata
3151 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3152 """Verifies and computes a node information map
3154 @type ninfo: L{objects.Node}
3155 @param ninfo: the node to check
3156 @param nresult: the remote results for the node
3157 @param nimg: the node image object
3158 @param vg_name: the configured VG name
3162 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3164 # try to read free memory (from the hypervisor)
3165 hv_info = nresult.get(constants.NV_HVINFO, None)
3166 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3167 _ErrorIf(test, constants.CV_ENODEHV, node,
3168 "rpc call to node failed (hvinfo)")
3171 nimg.mfree = int(hv_info["memory_free"])
3172 except (ValueError, TypeError):
3173 _ErrorIf(True, constants.CV_ENODERPC, node,
3174 "node returned invalid nodeinfo, check hypervisor")
3176 # FIXME: devise a free space model for file based instances as well
3177 if vg_name is not None:
3178 test = (constants.NV_VGLIST not in nresult or
3179 vg_name not in nresult[constants.NV_VGLIST])
3180 _ErrorIf(test, constants.CV_ENODELVM, node,
3181 "node didn't return data for the volume group '%s'"
3182 " - it is either missing or broken", vg_name)
3185 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3186 except (ValueError, TypeError):
3187 _ErrorIf(True, constants.CV_ENODERPC, node,
3188 "node returned invalid LVM info, check LVM status")
3190 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3191 """Gets per-disk status information for all instances.
3193 @type nodelist: list of strings
3194 @param nodelist: Node names
3195 @type node_image: dict of (name, L{objects.Node})
3196 @param node_image: Node objects
3197 @type instanceinfo: dict of (name, L{objects.Instance})
3198 @param instanceinfo: Instance objects
3199 @rtype: {instance: {node: [(succes, payload)]}}
3200 @return: a dictionary of per-instance dictionaries with nodes as
3201 keys and disk information as values; the disk information is a
3202 list of tuples (success, payload)
3205 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3208 node_disks_devonly = {}
3209 diskless_instances = set()
3210 diskless = constants.DT_DISKLESS
3212 for nname in nodelist:
3213 node_instances = list(itertools.chain(node_image[nname].pinst,
3214 node_image[nname].sinst))
3215 diskless_instances.update(inst for inst in node_instances
3216 if instanceinfo[inst].disk_template == diskless)
3217 disks = [(inst, disk)
3218 for inst in node_instances
3219 for disk in instanceinfo[inst].disks]
3222 # No need to collect data
3225 node_disks[nname] = disks
3227 # _AnnotateDiskParams makes already copies of the disks
3229 for (inst, dev) in disks:
3230 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3231 self.cfg.SetDiskID(anno_disk, nname)
3232 devonly.append(anno_disk)
3234 node_disks_devonly[nname] = devonly
3236 assert len(node_disks) == len(node_disks_devonly)
3238 # Collect data from all nodes with disks
3239 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3242 assert len(result) == len(node_disks)
3246 for (nname, nres) in result.items():
3247 disks = node_disks[nname]
3250 # No data from this node
3251 data = len(disks) * [(False, "node offline")]
3254 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3255 "while getting disk information: %s", msg)
3257 # No data from this node
3258 data = len(disks) * [(False, msg)]
3261 for idx, i in enumerate(nres.payload):
3262 if isinstance(i, (tuple, list)) and len(i) == 2:
3265 logging.warning("Invalid result from node %s, entry %d: %s",
3267 data.append((False, "Invalid result from the remote node"))
3269 for ((inst, _), status) in zip(disks, data):
3270 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3272 # Add empty entries for diskless instances.
3273 for inst in diskless_instances:
3274 assert inst not in instdisk
3277 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3278 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3279 compat.all(isinstance(s, (tuple, list)) and
3280 len(s) == 2 for s in statuses)
3281 for inst, nnames in instdisk.items()
3282 for nname, statuses in nnames.items())
3284 instdisk_keys = set(instdisk)
3285 instanceinfo_keys = set(instanceinfo)
3286 assert instdisk_keys == instanceinfo_keys, \
3287 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3288 (instdisk_keys, instanceinfo_keys))
3293 def _SshNodeSelector(group_uuid, all_nodes):
3294 """Create endless iterators for all potential SSH check hosts.
3297 nodes = [node for node in all_nodes
3298 if (node.group != group_uuid and
3300 keyfunc = operator.attrgetter("group")
3302 return map(itertools.cycle,
3303 [sorted(map(operator.attrgetter("name"), names))
3304 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3308 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3309 """Choose which nodes should talk to which other nodes.
3311 We will make nodes contact all nodes in their group, and one node from
3314 @warning: This algorithm has a known issue if one node group is much
3315 smaller than others (e.g. just one node). In such a case all other
3316 nodes will talk to the single node.
3319 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3320 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3322 return (online_nodes,
3323 dict((name, sorted([i.next() for i in sel]))
3324 for name in online_nodes))
3326 def BuildHooksEnv(self):
3329 Cluster-Verify hooks just ran in the post phase and their failure makes
3330 the output be logged in the verify output and the verification to fail.
3334 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3337 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3338 for node in self.my_node_info.values())
3342 def BuildHooksNodes(self):
3343 """Build hooks nodes.
3346 return ([], self.my_node_names)
3348 def Exec(self, feedback_fn):
3349 """Verify integrity of the node group, performing various test on nodes.
3352 # This method has too many local variables. pylint: disable=R0914
3353 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3355 if not self.my_node_names:
3357 feedback_fn("* Empty node group, skipping verification")
3361 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3362 verbose = self.op.verbose
3363 self._feedback_fn = feedback_fn
3365 vg_name = self.cfg.GetVGName()
3366 drbd_helper = self.cfg.GetDRBDHelper()
3367 cluster = self.cfg.GetClusterInfo()
3368 hypervisors = cluster.enabled_hypervisors
3369 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3371 i_non_redundant = [] # Non redundant instances
3372 i_non_a_balanced = [] # Non auto-balanced instances
3373 i_offline = 0 # Count of offline instances
3374 n_offline = 0 # Count of offline nodes
3375 n_drained = 0 # Count of nodes being drained
3376 node_vol_should = {}
3378 # FIXME: verify OS list
3381 filemap = _ComputeAncillaryFiles(cluster, False)
3383 # do local checksums
3384 master_node = self.master_node = self.cfg.GetMasterNode()
3385 master_ip = self.cfg.GetMasterIP()
3387 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3390 if self.cfg.GetUseExternalMipScript():
3391 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3393 node_verify_param = {
3394 constants.NV_FILELIST:
3395 map(vcluster.MakeVirtualPath,
3396 utils.UniqueSequence(filename
3397 for files in filemap
3398 for filename in files)),
3399 constants.NV_NODELIST:
3400 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3401 self.all_node_info.values()),
3402 constants.NV_HYPERVISOR: hypervisors,
3403 constants.NV_HVPARAMS:
3404 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3405 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3406 for node in node_data_list
3407 if not node.offline],
3408 constants.NV_INSTANCELIST: hypervisors,
3409 constants.NV_VERSION: None,
3410 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3411 constants.NV_NODESETUP: None,
3412 constants.NV_TIME: None,
3413 constants.NV_MASTERIP: (master_node, master_ip),
3414 constants.NV_OSLIST: None,
3415 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3416 constants.NV_USERSCRIPTS: user_scripts,
3419 if vg_name is not None:
3420 node_verify_param[constants.NV_VGLIST] = None
3421 node_verify_param[constants.NV_LVLIST] = vg_name
3422 node_verify_param[constants.NV_PVLIST] = [vg_name]
3425 node_verify_param[constants.NV_DRBDLIST] = None
3426 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3428 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3429 # Load file storage paths only from master node
3430 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3433 # FIXME: this needs to be changed per node-group, not cluster-wide
3435 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3436 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3437 bridges.add(default_nicpp[constants.NIC_LINK])
3438 for instance in self.my_inst_info.values():
3439 for nic in instance.nics:
3440 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3441 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3442 bridges.add(full_nic[constants.NIC_LINK])
3445 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3447 # Build our expected cluster state
3448 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3450 vm_capable=node.vm_capable))
3451 for node in node_data_list)
3455 for node in self.all_node_info.values():
3456 path = _SupportsOob(self.cfg, node)
3457 if path and path not in oob_paths:
3458 oob_paths.append(path)
3461 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3463 for instance in self.my_inst_names:
3464 inst_config = self.my_inst_info[instance]
3465 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3468 for nname in inst_config.all_nodes:
3469 if nname not in node_image:
3470 gnode = self.NodeImage(name=nname)
3471 gnode.ghost = (nname not in self.all_node_info)
3472 node_image[nname] = gnode
3474 inst_config.MapLVsByNode(node_vol_should)
3476 pnode = inst_config.primary_node
3477 node_image[pnode].pinst.append(instance)
3479 for snode in inst_config.secondary_nodes:
3480 nimg = node_image[snode]
3481 nimg.sinst.append(instance)
3482 if pnode not in nimg.sbp:
3483 nimg.sbp[pnode] = []
3484 nimg.sbp[pnode].append(instance)
3486 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3488 # The value of exclusive_storage should be the same across the group, so if
3489 # it's True for at least a node, we act as if it were set for all the nodes
3490 self._exclusive_storage = compat.any(es_flags.values())
3491 if self._exclusive_storage:
3492 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3493 es_unset_nodes = [n for (n, es) in es_flags.items()
3497 self._Error(constants.CV_EGROUPMIXEDESFLAG, self.group_info.name,
3498 "The exclusive_storage flag should be uniform in a group,"
3499 " but these nodes have it unset: %s",
3500 utils.CommaJoin(utils.NiceSort(es_unset_nodes)))
3501 self.LogWarning("Some checks required by exclusive storage will be"
3502 " performed also on nodes with the flag unset")
3504 # At this point, we have the in-memory data structures complete,
3505 # except for the runtime information, which we'll gather next
3507 # Due to the way our RPC system works, exact response times cannot be
3508 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3509 # time before and after executing the request, we can at least have a time
3511 nvinfo_starttime = time.time()
3512 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3514 self.cfg.GetClusterName())
3515 nvinfo_endtime = time.time()
3517 if self.extra_lv_nodes and vg_name is not None:
3519 self.rpc.call_node_verify(self.extra_lv_nodes,
3520 {constants.NV_LVLIST: vg_name},
3521 self.cfg.GetClusterName())
3523 extra_lv_nvinfo = {}
3525 all_drbd_map = self.cfg.ComputeDRBDMap()
3527 feedback_fn("* Gathering disk information (%s nodes)" %
3528 len(self.my_node_names))
3529 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3532 feedback_fn("* Verifying configuration file consistency")
3534 # If not all nodes are being checked, we need to make sure the master node
3535 # and a non-checked vm_capable node are in the list.
3536 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3538 vf_nvinfo = all_nvinfo.copy()
3539 vf_node_info = list(self.my_node_info.values())
3540 additional_nodes = []
3541 if master_node not in self.my_node_info:
3542 additional_nodes.append(master_node)
3543 vf_node_info.append(self.all_node_info[master_node])
3544 # Add the first vm_capable node we find which is not included,
3545 # excluding the master node (which we already have)
3546 for node in absent_nodes:
3547 nodeinfo = self.all_node_info[node]
3548 if (nodeinfo.vm_capable and not nodeinfo.offline and
3549 node != master_node):
3550 additional_nodes.append(node)
3551 vf_node_info.append(self.all_node_info[node])
3553 key = constants.NV_FILELIST
3554 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3555 {key: node_verify_param[key]},
3556 self.cfg.GetClusterName()))
3558 vf_nvinfo = all_nvinfo
3559 vf_node_info = self.my_node_info.values()
3561 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3563 feedback_fn("* Verifying node status")
3567 for node_i in node_data_list:
3569 nimg = node_image[node]
3573 feedback_fn("* Skipping offline node %s" % (node,))
3577 if node == master_node:
3579 elif node_i.master_candidate:
3580 ntype = "master candidate"
3581 elif node_i.drained:
3587 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3589 msg = all_nvinfo[node].fail_msg
3590 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3593 nimg.rpc_fail = True
3596 nresult = all_nvinfo[node].payload
3598 nimg.call_ok = self._VerifyNode(node_i, nresult)
3599 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3600 self._VerifyNodeNetwork(node_i, nresult)
3601 self._VerifyNodeUserScripts(node_i, nresult)
3602 self._VerifyOob(node_i, nresult)
3603 self._VerifyFileStoragePaths(node_i, nresult,
3604 node == master_node)
3607 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3608 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3611 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3612 self._UpdateNodeInstances(node_i, nresult, nimg)
3613 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3614 self._UpdateNodeOS(node_i, nresult, nimg)
3616 if not nimg.os_fail:
3617 if refos_img is None:
3619 self._VerifyNodeOS(node_i, nimg, refos_img)
3620 self._VerifyNodeBridges(node_i, nresult, bridges)
3622 # Check whether all running instancies are primary for the node. (This
3623 # can no longer be done from _VerifyInstance below, since some of the
3624 # wrong instances could be from other node groups.)
3625 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3627 for inst in non_primary_inst:
3628 test = inst in self.all_inst_info
3629 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3630 "instance should not run on node %s", node_i.name)
3631 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3632 "node is running unknown instance %s", inst)
3634 self._VerifyGroupLVM(node_image, vg_name)
3636 for node, result in extra_lv_nvinfo.items():
3637 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3638 node_image[node], vg_name)
3640 feedback_fn("* Verifying instance status")
3641 for instance in self.my_inst_names:
3643 feedback_fn("* Verifying instance %s" % instance)
3644 inst_config = self.my_inst_info[instance]
3645 self._VerifyInstance(instance, inst_config, node_image,
3648 # If the instance is non-redundant we cannot survive losing its primary
3649 # node, so we are not N+1 compliant.
3650 if inst_config.disk_template not in constants.DTS_MIRRORED:
3651 i_non_redundant.append(instance)
3653 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3654 i_non_a_balanced.append(instance)
3656 feedback_fn("* Verifying orphan volumes")
3657 reserved = utils.FieldSet(*cluster.reserved_lvs)
3659 # We will get spurious "unknown volume" warnings if any node of this group
3660 # is secondary for an instance whose primary is in another group. To avoid
3661 # them, we find these instances and add their volumes to node_vol_should.
3662 for inst in self.all_inst_info.values():
3663 for secondary in inst.secondary_nodes:
3664 if (secondary in self.my_node_info
3665 and inst.name not in self.my_inst_info):
3666 inst.MapLVsByNode(node_vol_should)
3669 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3671 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3672 feedback_fn("* Verifying N+1 Memory redundancy")
3673 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3675 feedback_fn("* Other Notes")
3677 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3678 % len(i_non_redundant))
3680 if i_non_a_balanced:
3681 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3682 % len(i_non_a_balanced))
3685 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3688 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3691 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3695 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3696 """Analyze the post-hooks' result
3698 This method analyses the hook result, handles it, and sends some
3699 nicely-formatted feedback back to the user.
3701 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3702 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3703 @param hooks_results: the results of the multi-node hooks rpc call
3704 @param feedback_fn: function used send feedback back to the caller
3705 @param lu_result: previous Exec result
3706 @return: the new Exec result, based on the previous result
3710 # We only really run POST phase hooks, only for non-empty groups,
3711 # and are only interested in their results
3712 if not self.my_node_names:
3715 elif phase == constants.HOOKS_PHASE_POST:
3716 # Used to change hooks' output to proper indentation
3717 feedback_fn("* Hooks Results")
3718 assert hooks_results, "invalid result from hooks"
3720 for node_name in hooks_results:
3721 res = hooks_results[node_name]
3723 test = msg and not res.offline
3724 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3725 "Communication failure in hooks execution: %s", msg)
3726 if res.offline or msg:
3727 # No need to investigate payload if node is offline or gave
3730 for script, hkr, output in res.payload:
3731 test = hkr == constants.HKR_FAIL
3732 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3733 "Script %s failed, output:", script)
3735 output = self._HOOKS_INDENT_RE.sub(" ", output)
3736 feedback_fn("%s" % output)
3742 class LUClusterVerifyDisks(NoHooksLU):
3743 """Verifies the cluster disks status.
3748 def ExpandNames(self):
3749 self.share_locks = _ShareAll()
3750 self.needed_locks = {
3751 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3754 def Exec(self, feedback_fn):
3755 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3757 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3758 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3759 for group in group_names])
3762 class LUGroupVerifyDisks(NoHooksLU):
3763 """Verifies the status of all disks in a node group.
3768 def ExpandNames(self):
3769 # Raises errors.OpPrereqError on its own if group can't be found
3770 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3772 self.share_locks = _ShareAll()
3773 self.needed_locks = {
3774 locking.LEVEL_INSTANCE: [],
3775 locking.LEVEL_NODEGROUP: [],
3776 locking.LEVEL_NODE: [],
3778 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3779 # starts one instance of this opcode for every group, which means all
3780 # nodes will be locked for a short amount of time, so it's better to
3781 # acquire the node allocation lock as well.
3782 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3785 def DeclareLocks(self, level):
3786 if level == locking.LEVEL_INSTANCE:
3787 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3789 # Lock instances optimistically, needs verification once node and group
3790 # locks have been acquired
3791 self.needed_locks[locking.LEVEL_INSTANCE] = \
3792 self.cfg.GetNodeGroupInstances(self.group_uuid)
3794 elif level == locking.LEVEL_NODEGROUP:
3795 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3797 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3798 set([self.group_uuid] +
3799 # Lock all groups used by instances optimistically; this requires
3800 # going via the node before it's locked, requiring verification
3803 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3804 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3806 elif level == locking.LEVEL_NODE:
3807 # This will only lock the nodes in the group to be verified which contain
3809 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3810 self._LockInstancesNodes()
3812 # Lock all nodes in group to be verified
3813 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3814 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3815 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3817 def CheckPrereq(self):
3818 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3819 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3820 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3822 assert self.group_uuid in owned_groups
3824 # Check if locked instances are still correct
3825 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3827 # Get instance information
3828 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3830 # Check if node groups for locked instances are still correct
3831 _CheckInstancesNodeGroups(self.cfg, self.instances,
3832 owned_groups, owned_nodes, self.group_uuid)
3834 def Exec(self, feedback_fn):
3835 """Verify integrity of cluster disks.
3837 @rtype: tuple of three items
3838 @return: a tuple of (dict of node-to-node_error, list of instances
3839 which need activate-disks, dict of instance: (node, volume) for
3844 res_instances = set()
3847 nv_dict = _MapInstanceDisksToNodes(
3848 [inst for inst in self.instances.values()
3849 if inst.admin_state == constants.ADMINST_UP])
3852 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3853 set(self.cfg.GetVmCapableNodeList()))
3855 node_lvs = self.rpc.call_lv_list(nodes, [])
3857 for (node, node_res) in node_lvs.items():
3858 if node_res.offline:
3861 msg = node_res.fail_msg
3863 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3864 res_nodes[node] = msg
3867 for lv_name, (_, _, lv_online) in node_res.payload.items():
3868 inst = nv_dict.pop((node, lv_name), None)
3869 if not (lv_online or inst is None):
3870 res_instances.add(inst)
3872 # any leftover items in nv_dict are missing LVs, let's arrange the data
3874 for key, inst in nv_dict.iteritems():
3875 res_missing.setdefault(inst, []).append(list(key))
3877 return (res_nodes, list(res_instances), res_missing)
3880 class LUClusterRepairDiskSizes(NoHooksLU):
3881 """Verifies the cluster disks sizes.
3886 def ExpandNames(self):
3887 if self.op.instances:
3888 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3889 # Not getting the node allocation lock as only a specific set of
3890 # instances (and their nodes) is going to be acquired
3891 self.needed_locks = {
3892 locking.LEVEL_NODE_RES: [],
3893 locking.LEVEL_INSTANCE: self.wanted_names,
3895 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3897 self.wanted_names = None
3898 self.needed_locks = {
3899 locking.LEVEL_NODE_RES: locking.ALL_SET,
3900 locking.LEVEL_INSTANCE: locking.ALL_SET,
3902 # This opcode is acquires the node locks for all instances
3903 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3906 self.share_locks = {
3907 locking.LEVEL_NODE_RES: 1,
3908 locking.LEVEL_INSTANCE: 0,
3909 locking.LEVEL_NODE_ALLOC: 1,
3912 def DeclareLocks(self, level):
3913 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3914 self._LockInstancesNodes(primary_only=True, level=level)
3916 def CheckPrereq(self):
3917 """Check prerequisites.
3919 This only checks the optional instance list against the existing names.
3922 if self.wanted_names is None:
3923 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3925 self.wanted_instances = \
3926 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3928 def _EnsureChildSizes(self, disk):
3929 """Ensure children of the disk have the needed disk size.
3931 This is valid mainly for DRBD8 and fixes an issue where the
3932 children have smaller disk size.
3934 @param disk: an L{ganeti.objects.Disk} object
3937 if disk.dev_type == constants.LD_DRBD8:
3938 assert disk.children, "Empty children for DRBD8?"
3939 fchild = disk.children[0]
3940 mismatch = fchild.size < disk.size
3942 self.LogInfo("Child disk has size %d, parent %d, fixing",
3943 fchild.size, disk.size)
3944 fchild.size = disk.size
3946 # and we recurse on this child only, not on the metadev
3947 return self._EnsureChildSizes(fchild) or mismatch
3951 def Exec(self, feedback_fn):
3952 """Verify the size of cluster disks.
3955 # TODO: check child disks too
3956 # TODO: check differences in size between primary/secondary nodes
3958 for instance in self.wanted_instances:
3959 pnode = instance.primary_node
3960 if pnode not in per_node_disks:
3961 per_node_disks[pnode] = []
3962 for idx, disk in enumerate(instance.disks):
3963 per_node_disks[pnode].append((instance, idx, disk))
3965 assert not (frozenset(per_node_disks.keys()) -
3966 self.owned_locks(locking.LEVEL_NODE_RES)), \
3967 "Not owning correct locks"
3968 assert not self.owned_locks(locking.LEVEL_NODE)
3971 for node, dskl in per_node_disks.items():
3972 newl = [v[2].Copy() for v in dskl]
3974 self.cfg.SetDiskID(dsk, node)
3975 result = self.rpc.call_blockdev_getsize(node, newl)
3977 self.LogWarning("Failure in blockdev_getsize call to node"
3978 " %s, ignoring", node)
3980 if len(result.payload) != len(dskl):
3981 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3982 " result.payload=%s", node, len(dskl), result.payload)
3983 self.LogWarning("Invalid result from node %s, ignoring node results",
3986 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3988 self.LogWarning("Disk %d of instance %s did not return size"
3989 " information, ignoring", idx, instance.name)
3991 if not isinstance(size, (int, long)):
3992 self.LogWarning("Disk %d of instance %s did not return valid"
3993 " size information, ignoring", idx, instance.name)
3996 if size != disk.size:
3997 self.LogInfo("Disk %d of instance %s has mismatched size,"
3998 " correcting: recorded %d, actual %d", idx,
3999 instance.name, disk.size, size)
4001 self.cfg.Update(instance, feedback_fn)
4002 changed.append((instance.name, idx, size))
4003 if self._EnsureChildSizes(disk):
4004 self.cfg.Update(instance, feedback_fn)
4005 changed.append((instance.name, idx, disk.size))
4009 class LUClusterRename(LogicalUnit):
4010 """Rename the cluster.
4013 HPATH = "cluster-rename"
4014 HTYPE = constants.HTYPE_CLUSTER
4016 def BuildHooksEnv(self):
4021 "OP_TARGET": self.cfg.GetClusterName(),
4022 "NEW_NAME": self.op.name,
4025 def BuildHooksNodes(self):
4026 """Build hooks nodes.
4029 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4031 def CheckPrereq(self):
4032 """Verify that the passed name is a valid one.
4035 hostname = netutils.GetHostname(name=self.op.name,
4036 family=self.cfg.GetPrimaryIPFamily())
4038 new_name = hostname.name
4039 self.ip = new_ip = hostname.ip
4040 old_name = self.cfg.GetClusterName()
4041 old_ip = self.cfg.GetMasterIP()
4042 if new_name == old_name and new_ip == old_ip:
4043 raise errors.OpPrereqError("Neither the name nor the IP address of the"
4044 " cluster has changed",
4046 if new_ip != old_ip:
4047 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4048 raise errors.OpPrereqError("The given cluster IP address (%s) is"
4049 " reachable on the network" %
4050 new_ip, errors.ECODE_NOTUNIQUE)
4052 self.op.name = new_name
4054 def Exec(self, feedback_fn):
4055 """Rename the cluster.
4058 clustername = self.op.name
4061 # shutdown the master IP
4062 master_params = self.cfg.GetMasterNetworkParameters()
4063 ems = self.cfg.GetUseExternalMipScript()
4064 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4066 result.Raise("Could not disable the master role")
4069 cluster = self.cfg.GetClusterInfo()
4070 cluster.cluster_name = clustername
4071 cluster.master_ip = new_ip
4072 self.cfg.Update(cluster, feedback_fn)
4074 # update the known hosts file
4075 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4076 node_list = self.cfg.GetOnlineNodeList()
4078 node_list.remove(master_params.name)
4081 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4083 master_params.ip = new_ip
4084 result = self.rpc.call_node_activate_master_ip(master_params.name,
4086 msg = result.fail_msg
4088 self.LogWarning("Could not re-enable the master role on"
4089 " the master, please restart manually: %s", msg)
4094 def _ValidateNetmask(cfg, netmask):
4095 """Checks if a netmask is valid.
4097 @type cfg: L{config.ConfigWriter}
4098 @param cfg: The cluster configuration
4100 @param netmask: the netmask to be verified
4101 @raise errors.OpPrereqError: if the validation fails
4104 ip_family = cfg.GetPrimaryIPFamily()
4106 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4107 except errors.ProgrammerError:
4108 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4109 ip_family, errors.ECODE_INVAL)
4110 if not ipcls.ValidateNetmask(netmask):
4111 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4112 (netmask), errors.ECODE_INVAL)
4115 class LUClusterSetParams(LogicalUnit):
4116 """Change the parameters of the cluster.
4119 HPATH = "cluster-modify"
4120 HTYPE = constants.HTYPE_CLUSTER
4123 def CheckArguments(self):
4127 if self.op.uid_pool:
4128 uidpool.CheckUidPool(self.op.uid_pool)
4130 if self.op.add_uids:
4131 uidpool.CheckUidPool(self.op.add_uids)
4133 if self.op.remove_uids:
4134 uidpool.CheckUidPool(self.op.remove_uids)
4136 if self.op.master_netmask is not None:
4137 _ValidateNetmask(self.cfg, self.op.master_netmask)
4139 if self.op.diskparams:
4140 for dt_params in self.op.diskparams.values():
4141 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4143 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4144 except errors.OpPrereqError, err:
4145 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4148 def ExpandNames(self):
4149 # FIXME: in the future maybe other cluster params won't require checking on
4150 # all nodes to be modified.
4151 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4152 # resource locks the right thing, shouldn't it be the BGL instead?
4153 self.needed_locks = {
4154 locking.LEVEL_NODE: locking.ALL_SET,
4155 locking.LEVEL_INSTANCE: locking.ALL_SET,
4156 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4157 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4159 self.share_locks = _ShareAll()
4161 def BuildHooksEnv(self):
4166 "OP_TARGET": self.cfg.GetClusterName(),
4167 "NEW_VG_NAME": self.op.vg_name,
4170 def BuildHooksNodes(self):
4171 """Build hooks nodes.
4174 mn = self.cfg.GetMasterNode()
4177 def CheckPrereq(self):
4178 """Check prerequisites.
4180 This checks whether the given params don't conflict and
4181 if the given volume group is valid.
4184 if self.op.vg_name is not None and not self.op.vg_name:
4185 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4186 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4187 " instances exist", errors.ECODE_INVAL)
4189 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4190 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4191 raise errors.OpPrereqError("Cannot disable drbd helper while"
4192 " drbd-based instances exist",
4195 node_list = self.owned_locks(locking.LEVEL_NODE)
4197 # if vg_name not None, checks given volume group on all nodes
4199 vglist = self.rpc.call_vg_list(node_list)
4200 for node in node_list:
4201 msg = vglist[node].fail_msg
4203 # ignoring down node
4204 self.LogWarning("Error while gathering data on node %s"
4205 " (ignoring node): %s", node, msg)
4207 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4209 constants.MIN_VG_SIZE)
4211 raise errors.OpPrereqError("Error on node '%s': %s" %
4212 (node, vgstatus), errors.ECODE_ENVIRON)
4214 if self.op.drbd_helper:
4215 # checks given drbd helper on all nodes
4216 helpers = self.rpc.call_drbd_helper(node_list)
4217 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4219 self.LogInfo("Not checking drbd helper on offline node %s", node)
4221 msg = helpers[node].fail_msg
4223 raise errors.OpPrereqError("Error checking drbd helper on node"
4224 " '%s': %s" % (node, msg),
4225 errors.ECODE_ENVIRON)
4226 node_helper = helpers[node].payload
4227 if node_helper != self.op.drbd_helper:
4228 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4229 (node, node_helper), errors.ECODE_ENVIRON)
4231 self.cluster = cluster = self.cfg.GetClusterInfo()
4232 # validate params changes
4233 if self.op.beparams:
4234 objects.UpgradeBeParams(self.op.beparams)
4235 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4236 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4238 if self.op.ndparams:
4239 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4240 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4242 # TODO: we need a more general way to handle resetting
4243 # cluster-level parameters to default values
4244 if self.new_ndparams["oob_program"] == "":
4245 self.new_ndparams["oob_program"] = \
4246 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4248 if self.op.hv_state:
4249 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4250 self.cluster.hv_state_static)
4251 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4252 for hv, values in new_hv_state.items())
4254 if self.op.disk_state:
4255 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4256 self.cluster.disk_state_static)
4257 self.new_disk_state = \
4258 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4259 for name, values in svalues.items()))
4260 for storage, svalues in new_disk_state.items())
4263 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4266 all_instances = self.cfg.GetAllInstancesInfo().values()
4268 for group in self.cfg.GetAllNodeGroupsInfo().values():
4269 instances = frozenset([inst for inst in all_instances
4270 if compat.any(node in group.members
4271 for node in inst.all_nodes)])
4272 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4273 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4274 new = _ComputeNewInstanceViolations(ipol,
4275 new_ipolicy, instances)
4277 violations.update(new)
4280 self.LogWarning("After the ipolicy change the following instances"
4281 " violate them: %s",
4282 utils.CommaJoin(utils.NiceSort(violations)))
4284 if self.op.nicparams:
4285 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4286 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4287 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4290 # check all instances for consistency
4291 for instance in self.cfg.GetAllInstancesInfo().values():
4292 for nic_idx, nic in enumerate(instance.nics):
4293 params_copy = copy.deepcopy(nic.nicparams)
4294 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4296 # check parameter syntax
4298 objects.NIC.CheckParameterSyntax(params_filled)
4299 except errors.ConfigurationError, err:
4300 nic_errors.append("Instance %s, nic/%d: %s" %
4301 (instance.name, nic_idx, err))
4303 # if we're moving instances to routed, check that they have an ip
4304 target_mode = params_filled[constants.NIC_MODE]
4305 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4306 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4307 " address" % (instance.name, nic_idx))
4309 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4310 "\n".join(nic_errors), errors.ECODE_INVAL)
4312 # hypervisor list/parameters
4313 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4314 if self.op.hvparams:
4315 for hv_name, hv_dict in self.op.hvparams.items():
4316 if hv_name not in self.new_hvparams:
4317 self.new_hvparams[hv_name] = hv_dict
4319 self.new_hvparams[hv_name].update(hv_dict)
4321 # disk template parameters
4322 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4323 if self.op.diskparams:
4324 for dt_name, dt_params in self.op.diskparams.items():
4325 if dt_name not in self.op.diskparams:
4326 self.new_diskparams[dt_name] = dt_params
4328 self.new_diskparams[dt_name].update(dt_params)
4330 # os hypervisor parameters
4331 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4333 for os_name, hvs in self.op.os_hvp.items():
4334 if os_name not in self.new_os_hvp:
4335 self.new_os_hvp[os_name] = hvs
4337 for hv_name, hv_dict in hvs.items():
4339 # Delete if it exists
4340 self.new_os_hvp[os_name].pop(hv_name, None)
4341 elif hv_name not in self.new_os_hvp[os_name]:
4342 self.new_os_hvp[os_name][hv_name] = hv_dict
4344 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4347 self.new_osp = objects.FillDict(cluster.osparams, {})
4348 if self.op.osparams:
4349 for os_name, osp in self.op.osparams.items():
4350 if os_name not in self.new_osp:
4351 self.new_osp[os_name] = {}
4353 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4356 if not self.new_osp[os_name]:
4357 # we removed all parameters
4358 del self.new_osp[os_name]
4360 # check the parameter validity (remote check)
4361 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4362 os_name, self.new_osp[os_name])
4364 # changes to the hypervisor list
4365 if self.op.enabled_hypervisors is not None:
4366 self.hv_list = self.op.enabled_hypervisors
4367 for hv in self.hv_list:
4368 # if the hypervisor doesn't already exist in the cluster
4369 # hvparams, we initialize it to empty, and then (in both
4370 # cases) we make sure to fill the defaults, as we might not
4371 # have a complete defaults list if the hypervisor wasn't
4373 if hv not in new_hvp:
4375 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4376 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4378 self.hv_list = cluster.enabled_hypervisors
4380 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4381 # either the enabled list has changed, or the parameters have, validate
4382 for hv_name, hv_params in self.new_hvparams.items():
4383 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4384 (self.op.enabled_hypervisors and
4385 hv_name in self.op.enabled_hypervisors)):
4386 # either this is a new hypervisor, or its parameters have changed
4387 hv_class = hypervisor.GetHypervisorClass(hv_name)
4388 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4389 hv_class.CheckParameterSyntax(hv_params)
4390 _CheckHVParams(self, node_list, hv_name, hv_params)
4393 # no need to check any newly-enabled hypervisors, since the
4394 # defaults have already been checked in the above code-block
4395 for os_name, os_hvp in self.new_os_hvp.items():
4396 for hv_name, hv_params in os_hvp.items():
4397 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4398 # we need to fill in the new os_hvp on top of the actual hv_p
4399 cluster_defaults = self.new_hvparams.get(hv_name, {})
4400 new_osp = objects.FillDict(cluster_defaults, hv_params)
4401 hv_class = hypervisor.GetHypervisorClass(hv_name)
4402 hv_class.CheckParameterSyntax(new_osp)
4403 _CheckHVParams(self, node_list, hv_name, new_osp)
4405 if self.op.default_iallocator:
4406 alloc_script = utils.FindFile(self.op.default_iallocator,
4407 constants.IALLOCATOR_SEARCH_PATH,
4409 if alloc_script is None:
4410 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4411 " specified" % self.op.default_iallocator,
4414 def Exec(self, feedback_fn):
4415 """Change the parameters of the cluster.
4418 if self.op.vg_name is not None:
4419 new_volume = self.op.vg_name
4422 if new_volume != self.cfg.GetVGName():
4423 self.cfg.SetVGName(new_volume)
4425 feedback_fn("Cluster LVM configuration already in desired"
4426 " state, not changing")
4427 if self.op.drbd_helper is not None:
4428 new_helper = self.op.drbd_helper
4431 if new_helper != self.cfg.GetDRBDHelper():
4432 self.cfg.SetDRBDHelper(new_helper)
4434 feedback_fn("Cluster DRBD helper already in desired state,"
4436 if self.op.hvparams:
4437 self.cluster.hvparams = self.new_hvparams
4439 self.cluster.os_hvp = self.new_os_hvp
4440 if self.op.enabled_hypervisors is not None:
4441 self.cluster.hvparams = self.new_hvparams
4442 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4443 if self.op.beparams:
4444 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4445 if self.op.nicparams:
4446 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4448 self.cluster.ipolicy = self.new_ipolicy
4449 if self.op.osparams:
4450 self.cluster.osparams = self.new_osp
4451 if self.op.ndparams:
4452 self.cluster.ndparams = self.new_ndparams
4453 if self.op.diskparams:
4454 self.cluster.diskparams = self.new_diskparams
4455 if self.op.hv_state:
4456 self.cluster.hv_state_static = self.new_hv_state
4457 if self.op.disk_state:
4458 self.cluster.disk_state_static = self.new_disk_state
4460 if self.op.candidate_pool_size is not None:
4461 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4462 # we need to update the pool size here, otherwise the save will fail
4463 _AdjustCandidatePool(self, [])
4465 if self.op.maintain_node_health is not None:
4466 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4467 feedback_fn("Note: CONFD was disabled at build time, node health"
4468 " maintenance is not useful (still enabling it)")
4469 self.cluster.maintain_node_health = self.op.maintain_node_health
4471 if self.op.prealloc_wipe_disks is not None:
4472 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4474 if self.op.add_uids is not None:
4475 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4477 if self.op.remove_uids is not None:
4478 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4480 if self.op.uid_pool is not None:
4481 self.cluster.uid_pool = self.op.uid_pool
4483 if self.op.default_iallocator is not None:
4484 self.cluster.default_iallocator = self.op.default_iallocator
4486 if self.op.reserved_lvs is not None:
4487 self.cluster.reserved_lvs = self.op.reserved_lvs
4489 if self.op.use_external_mip_script is not None:
4490 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4492 def helper_os(aname, mods, desc):
4494 lst = getattr(self.cluster, aname)
4495 for key, val in mods:
4496 if key == constants.DDM_ADD:
4498 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4501 elif key == constants.DDM_REMOVE:
4505 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4507 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4509 if self.op.hidden_os:
4510 helper_os("hidden_os", self.op.hidden_os, "hidden")
4512 if self.op.blacklisted_os:
4513 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4515 if self.op.master_netdev:
4516 master_params = self.cfg.GetMasterNetworkParameters()
4517 ems = self.cfg.GetUseExternalMipScript()
4518 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4519 self.cluster.master_netdev)
4520 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4522 result.Raise("Could not disable the master ip")
4523 feedback_fn("Changing master_netdev from %s to %s" %
4524 (master_params.netdev, self.op.master_netdev))
4525 self.cluster.master_netdev = self.op.master_netdev
4527 if self.op.master_netmask:
4528 master_params = self.cfg.GetMasterNetworkParameters()
4529 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4530 result = self.rpc.call_node_change_master_netmask(master_params.name,
4531 master_params.netmask,
4532 self.op.master_netmask,
4534 master_params.netdev)
4536 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4539 self.cluster.master_netmask = self.op.master_netmask
4541 self.cfg.Update(self.cluster, feedback_fn)
4543 if self.op.master_netdev:
4544 master_params = self.cfg.GetMasterNetworkParameters()
4545 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4546 self.op.master_netdev)
4547 ems = self.cfg.GetUseExternalMipScript()
4548 result = self.rpc.call_node_activate_master_ip(master_params.name,
4551 self.LogWarning("Could not re-enable the master ip on"
4552 " the master, please restart manually: %s",
4556 def _UploadHelper(lu, nodes, fname):
4557 """Helper for uploading a file and showing warnings.
4560 if os.path.exists(fname):
4561 result = lu.rpc.call_upload_file(nodes, fname)
4562 for to_node, to_result in result.items():
4563 msg = to_result.fail_msg
4565 msg = ("Copy of file %s to node %s failed: %s" %
4566 (fname, to_node, msg))
4570 def _ComputeAncillaryFiles(cluster, redist):
4571 """Compute files external to Ganeti which need to be consistent.
4573 @type redist: boolean
4574 @param redist: Whether to include files which need to be redistributed
4577 # Compute files for all nodes
4579 pathutils.SSH_KNOWN_HOSTS_FILE,
4580 pathutils.CONFD_HMAC_KEY,
4581 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4582 pathutils.SPICE_CERT_FILE,
4583 pathutils.SPICE_CACERT_FILE,
4584 pathutils.RAPI_USERS_FILE,
4588 # we need to ship at least the RAPI certificate
4589 files_all.add(pathutils.RAPI_CERT_FILE)
4591 files_all.update(pathutils.ALL_CERT_FILES)
4592 files_all.update(ssconf.SimpleStore().GetFileList())
4594 if cluster.modify_etc_hosts:
4595 files_all.add(pathutils.ETC_HOSTS)
4597 if cluster.use_external_mip_script:
4598 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4600 # Files which are optional, these must:
4601 # - be present in one other category as well
4602 # - either exist or not exist on all nodes of that category (mc, vm all)
4604 pathutils.RAPI_USERS_FILE,
4607 # Files which should only be on master candidates
4611 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4615 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4616 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4617 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4619 # Files which should only be on VM-capable nodes
4622 for hv_name in cluster.enabled_hypervisors
4624 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4628 for hv_name in cluster.enabled_hypervisors
4630 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4632 # Filenames in each category must be unique
4633 all_files_set = files_all | files_mc | files_vm
4634 assert (len(all_files_set) ==
4635 sum(map(len, [files_all, files_mc, files_vm]))), \
4636 "Found file listed in more than one file list"
4638 # Optional files must be present in one other category
4639 assert all_files_set.issuperset(files_opt), \
4640 "Optional file not in a different required list"
4642 # This one file should never ever be re-distributed via RPC
4643 assert not (redist and
4644 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4646 return (files_all, files_opt, files_mc, files_vm)
4649 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4650 """Distribute additional files which are part of the cluster configuration.
4652 ConfigWriter takes care of distributing the config and ssconf files, but
4653 there are more files which should be distributed to all nodes. This function
4654 makes sure those are copied.
4656 @param lu: calling logical unit
4657 @param additional_nodes: list of nodes not in the config to distribute to
4658 @type additional_vm: boolean
4659 @param additional_vm: whether the additional nodes are vm-capable or not
4662 # Gather target nodes
4663 cluster = lu.cfg.GetClusterInfo()
4664 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4666 online_nodes = lu.cfg.GetOnlineNodeList()
4667 online_set = frozenset(online_nodes)
4668 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4670 if additional_nodes is not None:
4671 online_nodes.extend(additional_nodes)
4673 vm_nodes.extend(additional_nodes)
4675 # Never distribute to master node
4676 for nodelist in [online_nodes, vm_nodes]:
4677 if master_info.name in nodelist:
4678 nodelist.remove(master_info.name)
4681 (files_all, _, files_mc, files_vm) = \
4682 _ComputeAncillaryFiles(cluster, True)
4684 # Never re-distribute configuration file from here
4685 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4686 pathutils.CLUSTER_CONF_FILE in files_vm)
4687 assert not files_mc, "Master candidates not handled in this function"
4690 (online_nodes, files_all),
4691 (vm_nodes, files_vm),
4695 for (node_list, files) in filemap:
4697 _UploadHelper(lu, node_list, fname)
4700 class LUClusterRedistConf(NoHooksLU):
4701 """Force the redistribution of cluster configuration.
4703 This is a very simple LU.
4708 def ExpandNames(self):
4709 self.needed_locks = {
4710 locking.LEVEL_NODE: locking.ALL_SET,
4711 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4713 self.share_locks = _ShareAll()
4715 def Exec(self, feedback_fn):
4716 """Redistribute the configuration.
4719 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4720 _RedistributeAncillaryFiles(self)
4723 class LUClusterActivateMasterIp(NoHooksLU):
4724 """Activate the master IP on the master node.
4727 def Exec(self, feedback_fn):
4728 """Activate the master IP.
4731 master_params = self.cfg.GetMasterNetworkParameters()
4732 ems = self.cfg.GetUseExternalMipScript()
4733 result = self.rpc.call_node_activate_master_ip(master_params.name,
4735 result.Raise("Could not activate the master IP")
4738 class LUClusterDeactivateMasterIp(NoHooksLU):
4739 """Deactivate the master IP on the master node.
4742 def Exec(self, feedback_fn):
4743 """Deactivate the master IP.
4746 master_params = self.cfg.GetMasterNetworkParameters()
4747 ems = self.cfg.GetUseExternalMipScript()
4748 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4750 result.Raise("Could not deactivate the master IP")
4753 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4754 """Sleep and poll for an instance's disk to sync.
4757 if not instance.disks or disks is not None and not disks:
4760 disks = _ExpandCheckDisks(instance, disks)
4763 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4765 node = instance.primary_node
4768 lu.cfg.SetDiskID(dev, node)
4770 # TODO: Convert to utils.Retry
4773 degr_retries = 10 # in seconds, as we sleep 1 second each time
4777 cumul_degraded = False
4778 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4779 msg = rstats.fail_msg
4781 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4784 raise errors.RemoteError("Can't contact node %s for mirror data,"
4785 " aborting." % node)
4788 rstats = rstats.payload
4790 for i, mstat in enumerate(rstats):
4792 lu.LogWarning("Can't compute data for node %s/%s",
4793 node, disks[i].iv_name)
4796 cumul_degraded = (cumul_degraded or
4797 (mstat.is_degraded and mstat.sync_percent is None))
4798 if mstat.sync_percent is not None:
4800 if mstat.estimated_time is not None:
4801 rem_time = ("%s remaining (estimated)" %
4802 utils.FormatSeconds(mstat.estimated_time))
4803 max_time = mstat.estimated_time
4805 rem_time = "no time estimate"
4806 lu.LogInfo("- device %s: %5.2f%% done, %s",
4807 disks[i].iv_name, mstat.sync_percent, rem_time)
4809 # if we're done but degraded, let's do a few small retries, to
4810 # make sure we see a stable and not transient situation; therefore
4811 # we force restart of the loop
4812 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4813 logging.info("Degraded disks found, %d retries left", degr_retries)
4821 time.sleep(min(60, max_time))
4824 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4826 return not cumul_degraded
4829 def _BlockdevFind(lu, node, dev, instance):
4830 """Wrapper around call_blockdev_find to annotate diskparams.
4832 @param lu: A reference to the lu object
4833 @param node: The node to call out
4834 @param dev: The device to find
4835 @param instance: The instance object the device belongs to
4836 @returns The result of the rpc call
4839 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4840 return lu.rpc.call_blockdev_find(node, disk)
4843 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4844 """Wrapper around L{_CheckDiskConsistencyInner}.
4847 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4848 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4852 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4854 """Check that mirrors are not degraded.
4856 @attention: The device has to be annotated already.
4858 The ldisk parameter, if True, will change the test from the
4859 is_degraded attribute (which represents overall non-ok status for
4860 the device(s)) to the ldisk (representing the local storage status).
4863 lu.cfg.SetDiskID(dev, node)
4867 if on_primary or dev.AssembleOnSecondary():
4868 rstats = lu.rpc.call_blockdev_find(node, dev)
4869 msg = rstats.fail_msg
4871 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4873 elif not rstats.payload:
4874 lu.LogWarning("Can't find disk on node %s", node)
4878 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4880 result = result and not rstats.payload.is_degraded
4883 for child in dev.children:
4884 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4890 class LUOobCommand(NoHooksLU):
4891 """Logical unit for OOB handling.
4895 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4897 def ExpandNames(self):
4898 """Gather locks we need.
4901 if self.op.node_names:
4902 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4903 lock_names = self.op.node_names
4905 lock_names = locking.ALL_SET
4907 self.needed_locks = {
4908 locking.LEVEL_NODE: lock_names,
4911 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4913 if not self.op.node_names:
4914 # Acquire node allocation lock only if all nodes are affected
4915 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4917 def CheckPrereq(self):
4918 """Check prerequisites.
4921 - the node exists in the configuration
4924 Any errors are signaled by raising errors.OpPrereqError.
4928 self.master_node = self.cfg.GetMasterNode()
4930 assert self.op.power_delay >= 0.0
4932 if self.op.node_names:
4933 if (self.op.command in self._SKIP_MASTER and
4934 self.master_node in self.op.node_names):
4935 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4936 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4938 if master_oob_handler:
4939 additional_text = ("run '%s %s %s' if you want to operate on the"
4940 " master regardless") % (master_oob_handler,
4944 additional_text = "it does not support out-of-band operations"
4946 raise errors.OpPrereqError(("Operating on the master node %s is not"
4947 " allowed for %s; %s") %
4948 (self.master_node, self.op.command,
4949 additional_text), errors.ECODE_INVAL)
4951 self.op.node_names = self.cfg.GetNodeList()
4952 if self.op.command in self._SKIP_MASTER:
4953 self.op.node_names.remove(self.master_node)
4955 if self.op.command in self._SKIP_MASTER:
4956 assert self.master_node not in self.op.node_names
4958 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4960 raise errors.OpPrereqError("Node %s not found" % node_name,
4963 self.nodes.append(node)
4965 if (not self.op.ignore_status and
4966 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4967 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4968 " not marked offline") % node_name,
4971 def Exec(self, feedback_fn):
4972 """Execute OOB and return result if we expect any.
4975 master_node = self.master_node
4978 for idx, node in enumerate(utils.NiceSort(self.nodes,
4979 key=lambda node: node.name)):
4980 node_entry = [(constants.RS_NORMAL, node.name)]
4981 ret.append(node_entry)
4983 oob_program = _SupportsOob(self.cfg, node)
4986 node_entry.append((constants.RS_UNAVAIL, None))
4989 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4990 self.op.command, oob_program, node.name)
4991 result = self.rpc.call_run_oob(master_node, oob_program,
4992 self.op.command, node.name,
4996 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4997 node.name, result.fail_msg)
4998 node_entry.append((constants.RS_NODATA, None))
5001 self._CheckPayload(result)
5002 except errors.OpExecError, err:
5003 self.LogWarning("Payload returned by node '%s' is not valid: %s",
5005 node_entry.append((constants.RS_NODATA, None))
5007 if self.op.command == constants.OOB_HEALTH:
5008 # For health we should log important events
5009 for item, status in result.payload:
5010 if status in [constants.OOB_STATUS_WARNING,
5011 constants.OOB_STATUS_CRITICAL]:
5012 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5013 item, node.name, status)
5015 if self.op.command == constants.OOB_POWER_ON:
5017 elif self.op.command == constants.OOB_POWER_OFF:
5018 node.powered = False
5019 elif self.op.command == constants.OOB_POWER_STATUS:
5020 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5021 if powered != node.powered:
5022 logging.warning(("Recorded power state (%s) of node '%s' does not"
5023 " match actual power state (%s)"), node.powered,
5026 # For configuration changing commands we should update the node
5027 if self.op.command in (constants.OOB_POWER_ON,
5028 constants.OOB_POWER_OFF):
5029 self.cfg.Update(node, feedback_fn)
5031 node_entry.append((constants.RS_NORMAL, result.payload))
5033 if (self.op.command == constants.OOB_POWER_ON and
5034 idx < len(self.nodes) - 1):
5035 time.sleep(self.op.power_delay)
5039 def _CheckPayload(self, result):
5040 """Checks if the payload is valid.
5042 @param result: RPC result
5043 @raises errors.OpExecError: If payload is not valid
5047 if self.op.command == constants.OOB_HEALTH:
5048 if not isinstance(result.payload, list):
5049 errs.append("command 'health' is expected to return a list but got %s" %
5050 type(result.payload))
5052 for item, status in result.payload:
5053 if status not in constants.OOB_STATUSES:
5054 errs.append("health item '%s' has invalid status '%s'" %
5057 if self.op.command == constants.OOB_POWER_STATUS:
5058 if not isinstance(result.payload, dict):
5059 errs.append("power-status is expected to return a dict but got %s" %
5060 type(result.payload))
5062 if self.op.command in [
5063 constants.OOB_POWER_ON,
5064 constants.OOB_POWER_OFF,
5065 constants.OOB_POWER_CYCLE,
5067 if result.payload is not None:
5068 errs.append("%s is expected to not return payload but got '%s'" %
5069 (self.op.command, result.payload))
5072 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5073 utils.CommaJoin(errs))
5076 class _OsQuery(_QueryBase):
5077 FIELDS = query.OS_FIELDS
5079 def ExpandNames(self, lu):
5080 # Lock all nodes in shared mode
5081 # Temporary removal of locks, should be reverted later
5082 # TODO: reintroduce locks when they are lighter-weight
5083 lu.needed_locks = {}
5084 #self.share_locks[locking.LEVEL_NODE] = 1
5085 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5087 # The following variables interact with _QueryBase._GetNames
5089 self.wanted = self.names
5091 self.wanted = locking.ALL_SET
5093 self.do_locking = self.use_locking
5095 def DeclareLocks(self, lu, level):
5099 def _DiagnoseByOS(rlist):
5100 """Remaps a per-node return list into an a per-os per-node dictionary
5102 @param rlist: a map with node names as keys and OS objects as values
5105 @return: a dictionary with osnames as keys and as value another
5106 map, with nodes as keys and tuples of (path, status, diagnose,
5107 variants, parameters, api_versions) as values, eg::
5109 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5110 (/srv/..., False, "invalid api")],
5111 "node2": [(/srv/..., True, "", [], [])]}
5116 # we build here the list of nodes that didn't fail the RPC (at RPC
5117 # level), so that nodes with a non-responding node daemon don't
5118 # make all OSes invalid
5119 good_nodes = [node_name for node_name in rlist
5120 if not rlist[node_name].fail_msg]
5121 for node_name, nr in rlist.items():
5122 if nr.fail_msg or not nr.payload:
5124 for (name, path, status, diagnose, variants,
5125 params, api_versions) in nr.payload:
5126 if name not in all_os:
5127 # build a list of nodes for this os containing empty lists
5128 # for each node in node_list
5130 for nname in good_nodes:
5131 all_os[name][nname] = []
5132 # convert params from [name, help] to (name, help)
5133 params = [tuple(v) for v in params]
5134 all_os[name][node_name].append((path, status, diagnose,
5135 variants, params, api_versions))
5138 def _GetQueryData(self, lu):
5139 """Computes the list of nodes and their attributes.
5142 # Locking is not used
5143 assert not (compat.any(lu.glm.is_owned(level)
5144 for level in locking.LEVELS
5145 if level != locking.LEVEL_CLUSTER) or
5146 self.do_locking or self.use_locking)
5148 valid_nodes = [node.name
5149 for node in lu.cfg.GetAllNodesInfo().values()
5150 if not node.offline and node.vm_capable]
5151 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5152 cluster = lu.cfg.GetClusterInfo()
5156 for (os_name, os_data) in pol.items():
5157 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5158 hidden=(os_name in cluster.hidden_os),
5159 blacklisted=(os_name in cluster.blacklisted_os))
5163 api_versions = set()
5165 for idx, osl in enumerate(os_data.values()):
5166 info.valid = bool(info.valid and osl and osl[0][1])
5170 (node_variants, node_params, node_api) = osl[0][3:6]
5173 variants.update(node_variants)
5174 parameters.update(node_params)
5175 api_versions.update(node_api)
5177 # Filter out inconsistent values
5178 variants.intersection_update(node_variants)
5179 parameters.intersection_update(node_params)
5180 api_versions.intersection_update(node_api)
5182 info.variants = list(variants)
5183 info.parameters = list(parameters)
5184 info.api_versions = list(api_versions)
5186 data[os_name] = info
5188 # Prepare data in requested order
5189 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5193 class LUOsDiagnose(NoHooksLU):
5194 """Logical unit for OS diagnose/query.
5200 def _BuildFilter(fields, names):
5201 """Builds a filter for querying OSes.
5204 name_filter = qlang.MakeSimpleFilter("name", names)
5206 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5207 # respective field is not requested
5208 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5209 for fname in ["hidden", "blacklisted"]
5210 if fname not in fields]
5211 if "valid" not in fields:
5212 status_filter.append([qlang.OP_TRUE, "valid"])
5215 status_filter.insert(0, qlang.OP_AND)
5217 status_filter = None
5219 if name_filter and status_filter:
5220 return [qlang.OP_AND, name_filter, status_filter]
5224 return status_filter
5226 def CheckArguments(self):
5227 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5228 self.op.output_fields, False)
5230 def ExpandNames(self):
5231 self.oq.ExpandNames(self)
5233 def Exec(self, feedback_fn):
5234 return self.oq.OldStyleQuery(self)
5237 class _ExtStorageQuery(_QueryBase):
5238 FIELDS = query.EXTSTORAGE_FIELDS
5240 def ExpandNames(self, lu):
5241 # Lock all nodes in shared mode
5242 # Temporary removal of locks, should be reverted later
5243 # TODO: reintroduce locks when they are lighter-weight
5244 lu.needed_locks = {}
5245 #self.share_locks[locking.LEVEL_NODE] = 1
5246 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5248 # The following variables interact with _QueryBase._GetNames
5250 self.wanted = self.names
5252 self.wanted = locking.ALL_SET
5254 self.do_locking = self.use_locking
5256 def DeclareLocks(self, lu, level):
5260 def _DiagnoseByProvider(rlist):
5261 """Remaps a per-node return list into an a per-provider per-node dictionary
5263 @param rlist: a map with node names as keys and ExtStorage objects as values
5266 @return: a dictionary with extstorage providers as keys and as
5267 value another map, with nodes as keys and tuples of
5268 (path, status, diagnose, parameters) as values, eg::
5270 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5271 "node2": [(/srv/..., False, "missing file")]
5272 "node3": [(/srv/..., True, "", [])]
5277 # we build here the list of nodes that didn't fail the RPC (at RPC
5278 # level), so that nodes with a non-responding node daemon don't
5279 # make all OSes invalid
5280 good_nodes = [node_name for node_name in rlist
5281 if not rlist[node_name].fail_msg]
5282 for node_name, nr in rlist.items():
5283 if nr.fail_msg or not nr.payload:
5285 for (name, path, status, diagnose, params) in nr.payload:
5286 if name not in all_es:
5287 # build a list of nodes for this os containing empty lists
5288 # for each node in node_list
5290 for nname in good_nodes:
5291 all_es[name][nname] = []
5292 # convert params from [name, help] to (name, help)
5293 params = [tuple(v) for v in params]
5294 all_es[name][node_name].append((path, status, diagnose, params))
5297 def _GetQueryData(self, lu):
5298 """Computes the list of nodes and their attributes.
5301 # Locking is not used
5302 assert not (compat.any(lu.glm.is_owned(level)
5303 for level in locking.LEVELS
5304 if level != locking.LEVEL_CLUSTER) or
5305 self.do_locking or self.use_locking)
5307 valid_nodes = [node.name
5308 for node in lu.cfg.GetAllNodesInfo().values()
5309 if not node.offline and node.vm_capable]
5310 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5314 nodegroup_list = lu.cfg.GetNodeGroupList()
5316 for (es_name, es_data) in pol.items():
5317 # For every provider compute the nodegroup validity.
5318 # To do this we need to check the validity of each node in es_data
5319 # and then construct the corresponding nodegroup dict:
5320 # { nodegroup1: status
5321 # nodegroup2: status
5324 for nodegroup in nodegroup_list:
5325 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5327 nodegroup_nodes = ndgrp.members
5328 nodegroup_name = ndgrp.name
5331 for node in nodegroup_nodes:
5332 if node in valid_nodes:
5333 if es_data[node] != []:
5334 node_status = es_data[node][0][1]
5335 node_statuses.append(node_status)
5337 node_statuses.append(False)
5339 if False in node_statuses:
5340 ndgrp_data[nodegroup_name] = False
5342 ndgrp_data[nodegroup_name] = True
5344 # Compute the provider's parameters
5346 for idx, esl in enumerate(es_data.values()):
5347 valid = bool(esl and esl[0][1])
5351 node_params = esl[0][3]
5354 parameters.update(node_params)
5356 # Filter out inconsistent values
5357 parameters.intersection_update(node_params)
5359 params = list(parameters)
5361 # Now fill all the info for this provider
5362 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5363 nodegroup_status=ndgrp_data,
5366 data[es_name] = info
5368 # Prepare data in requested order
5369 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5373 class LUExtStorageDiagnose(NoHooksLU):
5374 """Logical unit for ExtStorage diagnose/query.
5379 def CheckArguments(self):
5380 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5381 self.op.output_fields, False)
5383 def ExpandNames(self):
5384 self.eq.ExpandNames(self)
5386 def Exec(self, feedback_fn):
5387 return self.eq.OldStyleQuery(self)
5390 class LUNodeRemove(LogicalUnit):
5391 """Logical unit for removing a node.
5394 HPATH = "node-remove"
5395 HTYPE = constants.HTYPE_NODE
5397 def BuildHooksEnv(self):
5402 "OP_TARGET": self.op.node_name,
5403 "NODE_NAME": self.op.node_name,
5406 def BuildHooksNodes(self):
5407 """Build hooks nodes.
5409 This doesn't run on the target node in the pre phase as a failed
5410 node would then be impossible to remove.
5413 all_nodes = self.cfg.GetNodeList()
5415 all_nodes.remove(self.op.node_name)
5418 return (all_nodes, all_nodes)
5420 def CheckPrereq(self):
5421 """Check prerequisites.
5424 - the node exists in the configuration
5425 - it does not have primary or secondary instances
5426 - it's not the master
5428 Any errors are signaled by raising errors.OpPrereqError.
5431 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5432 node = self.cfg.GetNodeInfo(self.op.node_name)
5433 assert node is not None
5435 masternode = self.cfg.GetMasterNode()
5436 if node.name == masternode:
5437 raise errors.OpPrereqError("Node is the master node, failover to another"
5438 " node is required", errors.ECODE_INVAL)
5440 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5441 if node.name in instance.all_nodes:
5442 raise errors.OpPrereqError("Instance %s is still running on the node,"
5443 " please remove first" % instance_name,
5445 self.op.node_name = node.name
5448 def Exec(self, feedback_fn):
5449 """Removes the node from the cluster.
5453 logging.info("Stopping the node daemon and removing configs from node %s",
5456 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5458 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5461 # Promote nodes to master candidate as needed
5462 _AdjustCandidatePool(self, exceptions=[node.name])
5463 self.context.RemoveNode(node.name)
5465 # Run post hooks on the node before it's removed
5466 _RunPostHook(self, node.name)
5468 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5469 msg = result.fail_msg
5471 self.LogWarning("Errors encountered on the remote node while leaving"
5472 " the cluster: %s", msg)
5474 # Remove node from our /etc/hosts
5475 if self.cfg.GetClusterInfo().modify_etc_hosts:
5476 master_node = self.cfg.GetMasterNode()
5477 result = self.rpc.call_etc_hosts_modify(master_node,
5478 constants.ETC_HOSTS_REMOVE,
5480 result.Raise("Can't update hosts file with new host data")
5481 _RedistributeAncillaryFiles(self)
5484 class _NodeQuery(_QueryBase):
5485 FIELDS = query.NODE_FIELDS
5487 def ExpandNames(self, lu):
5488 lu.needed_locks = {}
5489 lu.share_locks = _ShareAll()
5492 self.wanted = _GetWantedNodes(lu, self.names)
5494 self.wanted = locking.ALL_SET
5496 self.do_locking = (self.use_locking and
5497 query.NQ_LIVE in self.requested_data)
5500 # If any non-static field is requested we need to lock the nodes
5501 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5502 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5504 def DeclareLocks(self, lu, level):
5507 def _GetQueryData(self, lu):
5508 """Computes the list of nodes and their attributes.
5511 all_info = lu.cfg.GetAllNodesInfo()
5513 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5515 # Gather data as requested
5516 if query.NQ_LIVE in self.requested_data:
5517 # filter out non-vm_capable nodes
5518 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5520 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5521 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5522 [lu.cfg.GetHypervisorType()], es_flags)
5523 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5524 for (name, nresult) in node_data.items()
5525 if not nresult.fail_msg and nresult.payload)
5529 if query.NQ_INST in self.requested_data:
5530 node_to_primary = dict([(name, set()) for name in nodenames])
5531 node_to_secondary = dict([(name, set()) for name in nodenames])
5533 inst_data = lu.cfg.GetAllInstancesInfo()
5535 for inst in inst_data.values():
5536 if inst.primary_node in node_to_primary:
5537 node_to_primary[inst.primary_node].add(inst.name)
5538 for secnode in inst.secondary_nodes:
5539 if secnode in node_to_secondary:
5540 node_to_secondary[secnode].add(inst.name)
5542 node_to_primary = None
5543 node_to_secondary = None
5545 if query.NQ_OOB in self.requested_data:
5546 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5547 for name, node in all_info.iteritems())
5551 if query.NQ_GROUP in self.requested_data:
5552 groups = lu.cfg.GetAllNodeGroupsInfo()
5556 return query.NodeQueryData([all_info[name] for name in nodenames],
5557 live_data, lu.cfg.GetMasterNode(),
5558 node_to_primary, node_to_secondary, groups,
5559 oob_support, lu.cfg.GetClusterInfo())
5562 class LUNodeQuery(NoHooksLU):
5563 """Logical unit for querying nodes.
5566 # pylint: disable=W0142
5569 def CheckArguments(self):
5570 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5571 self.op.output_fields, self.op.use_locking)
5573 def ExpandNames(self):
5574 self.nq.ExpandNames(self)
5576 def DeclareLocks(self, level):
5577 self.nq.DeclareLocks(self, level)
5579 def Exec(self, feedback_fn):
5580 return self.nq.OldStyleQuery(self)
5583 class LUNodeQueryvols(NoHooksLU):
5584 """Logical unit for getting volumes on node(s).
5588 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5589 _FIELDS_STATIC = utils.FieldSet("node")
5591 def CheckArguments(self):
5592 _CheckOutputFields(static=self._FIELDS_STATIC,
5593 dynamic=self._FIELDS_DYNAMIC,
5594 selected=self.op.output_fields)
5596 def ExpandNames(self):
5597 self.share_locks = _ShareAll()
5600 self.needed_locks = {
5601 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5604 self.needed_locks = {
5605 locking.LEVEL_NODE: locking.ALL_SET,
5606 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5609 def Exec(self, feedback_fn):
5610 """Computes the list of nodes and their attributes.
5613 nodenames = self.owned_locks(locking.LEVEL_NODE)
5614 volumes = self.rpc.call_node_volumes(nodenames)
5616 ilist = self.cfg.GetAllInstancesInfo()
5617 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5620 for node in nodenames:
5621 nresult = volumes[node]
5624 msg = nresult.fail_msg
5626 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5629 node_vols = sorted(nresult.payload,
5630 key=operator.itemgetter("dev"))
5632 for vol in node_vols:
5634 for field in self.op.output_fields:
5637 elif field == "phys":
5641 elif field == "name":
5643 elif field == "size":
5644 val = int(float(vol["size"]))
5645 elif field == "instance":
5646 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5648 raise errors.ParameterError(field)
5649 node_output.append(str(val))
5651 output.append(node_output)
5656 class LUNodeQueryStorage(NoHooksLU):
5657 """Logical unit for getting information on storage units on node(s).
5660 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5663 def CheckArguments(self):
5664 _CheckOutputFields(static=self._FIELDS_STATIC,
5665 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5666 selected=self.op.output_fields)
5668 def ExpandNames(self):
5669 self.share_locks = _ShareAll()
5672 self.needed_locks = {
5673 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5676 self.needed_locks = {
5677 locking.LEVEL_NODE: locking.ALL_SET,
5678 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5681 def Exec(self, feedback_fn):
5682 """Computes the list of nodes and their attributes.
5685 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5687 # Always get name to sort by
5688 if constants.SF_NAME in self.op.output_fields:
5689 fields = self.op.output_fields[:]
5691 fields = [constants.SF_NAME] + self.op.output_fields
5693 # Never ask for node or type as it's only known to the LU
5694 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5695 while extra in fields:
5696 fields.remove(extra)
5698 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5699 name_idx = field_idx[constants.SF_NAME]
5701 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5702 data = self.rpc.call_storage_list(self.nodes,
5703 self.op.storage_type, st_args,
5704 self.op.name, fields)
5708 for node in utils.NiceSort(self.nodes):
5709 nresult = data[node]
5713 msg = nresult.fail_msg
5715 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5718 rows = dict([(row[name_idx], row) for row in nresult.payload])
5720 for name in utils.NiceSort(rows.keys()):
5725 for field in self.op.output_fields:
5726 if field == constants.SF_NODE:
5728 elif field == constants.SF_TYPE:
5729 val = self.op.storage_type
5730 elif field in field_idx:
5731 val = row[field_idx[field]]
5733 raise errors.ParameterError(field)
5742 class _InstanceQuery(_QueryBase):
5743 FIELDS = query.INSTANCE_FIELDS
5745 def ExpandNames(self, lu):
5746 lu.needed_locks = {}
5747 lu.share_locks = _ShareAll()
5750 self.wanted = _GetWantedInstances(lu, self.names)
5752 self.wanted = locking.ALL_SET
5754 self.do_locking = (self.use_locking and
5755 query.IQ_LIVE in self.requested_data)
5757 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5758 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5759 lu.needed_locks[locking.LEVEL_NODE] = []
5760 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5762 self.do_grouplocks = (self.do_locking and
5763 query.IQ_NODES in self.requested_data)
5765 def DeclareLocks(self, lu, level):
5767 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5768 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5770 # Lock all groups used by instances optimistically; this requires going
5771 # via the node before it's locked, requiring verification later on
5772 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5774 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5775 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5776 elif level == locking.LEVEL_NODE:
5777 lu._LockInstancesNodes() # pylint: disable=W0212
5780 def _CheckGroupLocks(lu):
5781 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5782 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5784 # Check if node groups for locked instances are still correct
5785 for instance_name in owned_instances:
5786 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5788 def _GetQueryData(self, lu):
5789 """Computes the list of instances and their attributes.
5792 if self.do_grouplocks:
5793 self._CheckGroupLocks(lu)
5795 cluster = lu.cfg.GetClusterInfo()
5796 all_info = lu.cfg.GetAllInstancesInfo()
5798 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5800 instance_list = [all_info[name] for name in instance_names]
5801 nodes = frozenset(itertools.chain(*(inst.all_nodes
5802 for inst in instance_list)))
5803 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5806 wrongnode_inst = set()
5808 # Gather data as requested
5809 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5811 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5813 result = node_data[name]
5815 # offline nodes will be in both lists
5816 assert result.fail_msg
5817 offline_nodes.append(name)
5819 bad_nodes.append(name)
5820 elif result.payload:
5821 for inst in result.payload:
5822 if inst in all_info:
5823 if all_info[inst].primary_node == name:
5824 live_data.update(result.payload)
5826 wrongnode_inst.add(inst)
5828 # orphan instance; we don't list it here as we don't
5829 # handle this case yet in the output of instance listing
5830 logging.warning("Orphan instance '%s' found on node %s",
5832 # else no instance is alive
5836 if query.IQ_DISKUSAGE in self.requested_data:
5837 gmi = ganeti.masterd.instance
5838 disk_usage = dict((inst.name,
5839 gmi.ComputeDiskSize(inst.disk_template,
5840 [{constants.IDISK_SIZE: disk.size}
5841 for disk in inst.disks]))
5842 for inst in instance_list)
5846 if query.IQ_CONSOLE in self.requested_data:
5848 for inst in instance_list:
5849 if inst.name in live_data:
5850 # Instance is running
5851 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5853 consinfo[inst.name] = None
5854 assert set(consinfo.keys()) == set(instance_names)
5858 if query.IQ_NODES in self.requested_data:
5859 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5861 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5862 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5863 for uuid in set(map(operator.attrgetter("group"),
5869 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5870 disk_usage, offline_nodes, bad_nodes,
5871 live_data, wrongnode_inst, consinfo,
5875 class LUQuery(NoHooksLU):
5876 """Query for resources/items of a certain kind.
5879 # pylint: disable=W0142
5882 def CheckArguments(self):
5883 qcls = _GetQueryImplementation(self.op.what)
5885 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5887 def ExpandNames(self):
5888 self.impl.ExpandNames(self)
5890 def DeclareLocks(self, level):
5891 self.impl.DeclareLocks(self, level)
5893 def Exec(self, feedback_fn):
5894 return self.impl.NewStyleQuery(self)
5897 class LUQueryFields(NoHooksLU):
5898 """Query for resources/items of a certain kind.
5901 # pylint: disable=W0142
5904 def CheckArguments(self):
5905 self.qcls = _GetQueryImplementation(self.op.what)
5907 def ExpandNames(self):
5908 self.needed_locks = {}
5910 def Exec(self, feedback_fn):
5911 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5914 class LUNodeModifyStorage(NoHooksLU):
5915 """Logical unit for modifying a storage volume on a node.
5920 def CheckArguments(self):
5921 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5923 storage_type = self.op.storage_type
5926 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5928 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5929 " modified" % storage_type,
5932 diff = set(self.op.changes.keys()) - modifiable
5934 raise errors.OpPrereqError("The following fields can not be modified for"
5935 " storage units of type '%s': %r" %
5936 (storage_type, list(diff)),
5939 def ExpandNames(self):
5940 self.needed_locks = {
5941 locking.LEVEL_NODE: self.op.node_name,
5944 def Exec(self, feedback_fn):
5945 """Computes the list of nodes and their attributes.
5948 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5949 result = self.rpc.call_storage_modify(self.op.node_name,
5950 self.op.storage_type, st_args,
5951 self.op.name, self.op.changes)
5952 result.Raise("Failed to modify storage unit '%s' on %s" %
5953 (self.op.name, self.op.node_name))
5956 class LUNodeAdd(LogicalUnit):
5957 """Logical unit for adding node to the cluster.
5961 HTYPE = constants.HTYPE_NODE
5962 _NFLAGS = ["master_capable", "vm_capable"]
5964 def CheckArguments(self):
5965 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5966 # validate/normalize the node name
5967 self.hostname = netutils.GetHostname(name=self.op.node_name,
5968 family=self.primary_ip_family)
5969 self.op.node_name = self.hostname.name
5971 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5972 raise errors.OpPrereqError("Cannot readd the master node",
5975 if self.op.readd and self.op.group:
5976 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5977 " being readded", errors.ECODE_INVAL)
5979 def BuildHooksEnv(self):
5982 This will run on all nodes before, and on all nodes + the new node after.
5986 "OP_TARGET": self.op.node_name,
5987 "NODE_NAME": self.op.node_name,
5988 "NODE_PIP": self.op.primary_ip,
5989 "NODE_SIP": self.op.secondary_ip,
5990 "MASTER_CAPABLE": str(self.op.master_capable),
5991 "VM_CAPABLE": str(self.op.vm_capable),
5994 def BuildHooksNodes(self):
5995 """Build hooks nodes.
5998 # Exclude added node
5999 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6000 post_nodes = pre_nodes + [self.op.node_name, ]
6002 return (pre_nodes, post_nodes)
6004 def CheckPrereq(self):
6005 """Check prerequisites.
6008 - the new node is not already in the config
6010 - its parameters (single/dual homed) matches the cluster
6012 Any errors are signaled by raising errors.OpPrereqError.
6016 hostname = self.hostname
6017 node = hostname.name
6018 primary_ip = self.op.primary_ip = hostname.ip
6019 if self.op.secondary_ip is None:
6020 if self.primary_ip_family == netutils.IP6Address.family:
6021 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6022 " IPv4 address must be given as secondary",
6024 self.op.secondary_ip = primary_ip
6026 secondary_ip = self.op.secondary_ip
6027 if not netutils.IP4Address.IsValid(secondary_ip):
6028 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6029 " address" % secondary_ip, errors.ECODE_INVAL)
6031 node_list = cfg.GetNodeList()
6032 if not self.op.readd and node in node_list:
6033 raise errors.OpPrereqError("Node %s is already in the configuration" %
6034 node, errors.ECODE_EXISTS)
6035 elif self.op.readd and node not in node_list:
6036 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6039 self.changed_primary_ip = False
6041 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6042 if self.op.readd and node == existing_node_name:
6043 if existing_node.secondary_ip != secondary_ip:
6044 raise errors.OpPrereqError("Readded node doesn't have the same IP"
6045 " address configuration as before",
6047 if existing_node.primary_ip != primary_ip:
6048 self.changed_primary_ip = True
6052 if (existing_node.primary_ip == primary_ip or
6053 existing_node.secondary_ip == primary_ip or
6054 existing_node.primary_ip == secondary_ip or
6055 existing_node.secondary_ip == secondary_ip):
6056 raise errors.OpPrereqError("New node ip address(es) conflict with"
6057 " existing node %s" % existing_node.name,
6058 errors.ECODE_NOTUNIQUE)
6060 # After this 'if' block, None is no longer a valid value for the
6061 # _capable op attributes
6063 old_node = self.cfg.GetNodeInfo(node)
6064 assert old_node is not None, "Can't retrieve locked node %s" % node
6065 for attr in self._NFLAGS:
6066 if getattr(self.op, attr) is None:
6067 setattr(self.op, attr, getattr(old_node, attr))
6069 for attr in self._NFLAGS:
6070 if getattr(self.op, attr) is None:
6071 setattr(self.op, attr, True)
6073 if self.op.readd and not self.op.vm_capable:
6074 pri, sec = cfg.GetNodeInstances(node)
6076 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6077 " flag set to false, but it already holds"
6078 " instances" % node,
6081 # check that the type of the node (single versus dual homed) is the
6082 # same as for the master
6083 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6084 master_singlehomed = myself.secondary_ip == myself.primary_ip
6085 newbie_singlehomed = secondary_ip == primary_ip
6086 if master_singlehomed != newbie_singlehomed:
6087 if master_singlehomed:
6088 raise errors.OpPrereqError("The master has no secondary ip but the"
6089 " new node has one",
6092 raise errors.OpPrereqError("The master has a secondary ip but the"
6093 " new node doesn't have one",
6096 # checks reachability
6097 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6098 raise errors.OpPrereqError("Node not reachable by ping",
6099 errors.ECODE_ENVIRON)
6101 if not newbie_singlehomed:
6102 # check reachability from my secondary ip to newbie's secondary ip
6103 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6104 source=myself.secondary_ip):
6105 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6106 " based ping to node daemon port",
6107 errors.ECODE_ENVIRON)
6114 if self.op.master_capable:
6115 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6117 self.master_candidate = False
6120 self.new_node = old_node
6122 node_group = cfg.LookupNodeGroup(self.op.group)
6123 self.new_node = objects.Node(name=node,
6124 primary_ip=primary_ip,
6125 secondary_ip=secondary_ip,
6126 master_candidate=self.master_candidate,
6127 offline=False, drained=False,
6128 group=node_group, ndparams={})
6130 if self.op.ndparams:
6131 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6133 if self.op.hv_state:
6134 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6136 if self.op.disk_state:
6137 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6139 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6140 # it a property on the base class.
6141 rpcrunner = rpc.DnsOnlyRunner()
6142 result = rpcrunner.call_version([node])[node]
6143 result.Raise("Can't get version information from node %s" % node)
6144 if constants.PROTOCOL_VERSION == result.payload:
6145 logging.info("Communication to node %s fine, sw version %s match",
6146 node, result.payload)
6148 raise errors.OpPrereqError("Version mismatch master version %s,"
6149 " node version %s" %
6150 (constants.PROTOCOL_VERSION, result.payload),
6151 errors.ECODE_ENVIRON)
6153 vg_name = cfg.GetVGName()
6154 if vg_name is not None:
6155 vparams = {constants.NV_PVLIST: [vg_name]}
6156 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6157 if self.op.ndparams:
6158 excl_stor = self.op.ndparams.get(constants.ND_EXCLUSIVE_STORAGE,
6160 cname = self.cfg.GetClusterName()
6161 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6162 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6164 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6165 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6167 def Exec(self, feedback_fn):
6168 """Adds the new node to the cluster.
6171 new_node = self.new_node
6172 node = new_node.name
6174 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6177 # We adding a new node so we assume it's powered
6178 new_node.powered = True
6180 # for re-adds, reset the offline/drained/master-candidate flags;
6181 # we need to reset here, otherwise offline would prevent RPC calls
6182 # later in the procedure; this also means that if the re-add
6183 # fails, we are left with a non-offlined, broken node
6185 new_node.drained = new_node.offline = False # pylint: disable=W0201
6186 self.LogInfo("Readding a node, the offline/drained flags were reset")
6187 # if we demote the node, we do cleanup later in the procedure
6188 new_node.master_candidate = self.master_candidate
6189 if self.changed_primary_ip:
6190 new_node.primary_ip = self.op.primary_ip
6192 # copy the master/vm_capable flags
6193 for attr in self._NFLAGS:
6194 setattr(new_node, attr, getattr(self.op, attr))
6196 # notify the user about any possible mc promotion
6197 if new_node.master_candidate:
6198 self.LogInfo("Node will be a master candidate")
6200 if self.op.ndparams:
6201 new_node.ndparams = self.op.ndparams
6203 new_node.ndparams = {}
6205 if self.op.hv_state:
6206 new_node.hv_state_static = self.new_hv_state
6208 if self.op.disk_state:
6209 new_node.disk_state_static = self.new_disk_state
6211 # Add node to our /etc/hosts, and add key to known_hosts
6212 if self.cfg.GetClusterInfo().modify_etc_hosts:
6213 master_node = self.cfg.GetMasterNode()
6214 result = self.rpc.call_etc_hosts_modify(master_node,
6215 constants.ETC_HOSTS_ADD,
6218 result.Raise("Can't update hosts file with new host data")
6220 if new_node.secondary_ip != new_node.primary_ip:
6221 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6224 node_verify_list = [self.cfg.GetMasterNode()]
6225 node_verify_param = {
6226 constants.NV_NODELIST: ([node], {}),
6227 # TODO: do a node-net-test as well?
6230 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6231 self.cfg.GetClusterName())
6232 for verifier in node_verify_list:
6233 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6234 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6236 for failed in nl_payload:
6237 feedback_fn("ssh/hostname verification failed"
6238 " (checking from %s): %s" %
6239 (verifier, nl_payload[failed]))
6240 raise errors.OpExecError("ssh/hostname verification failed")
6243 _RedistributeAncillaryFiles(self)
6244 self.context.ReaddNode(new_node)
6245 # make sure we redistribute the config
6246 self.cfg.Update(new_node, feedback_fn)
6247 # and make sure the new node will not have old files around
6248 if not new_node.master_candidate:
6249 result = self.rpc.call_node_demote_from_mc(new_node.name)
6250 msg = result.fail_msg
6252 self.LogWarning("Node failed to demote itself from master"
6253 " candidate status: %s" % msg)
6255 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6256 additional_vm=self.op.vm_capable)
6257 self.context.AddNode(new_node, self.proc.GetECId())
6260 class LUNodeSetParams(LogicalUnit):
6261 """Modifies the parameters of a node.
6263 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6264 to the node role (as _ROLE_*)
6265 @cvar _R2F: a dictionary from node role to tuples of flags
6266 @cvar _FLAGS: a list of attribute names corresponding to the flags
6269 HPATH = "node-modify"
6270 HTYPE = constants.HTYPE_NODE
6272 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6274 (True, False, False): _ROLE_CANDIDATE,
6275 (False, True, False): _ROLE_DRAINED,
6276 (False, False, True): _ROLE_OFFLINE,
6277 (False, False, False): _ROLE_REGULAR,
6279 _R2F = dict((v, k) for k, v in _F2R.items())
6280 _FLAGS = ["master_candidate", "drained", "offline"]
6282 def CheckArguments(self):
6283 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6284 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6285 self.op.master_capable, self.op.vm_capable,
6286 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6288 if all_mods.count(None) == len(all_mods):
6289 raise errors.OpPrereqError("Please pass at least one modification",
6291 if all_mods.count(True) > 1:
6292 raise errors.OpPrereqError("Can't set the node into more than one"
6293 " state at the same time",
6296 # Boolean value that tells us whether we might be demoting from MC
6297 self.might_demote = (self.op.master_candidate is False or
6298 self.op.offline is True or
6299 self.op.drained is True or
6300 self.op.master_capable is False)
6302 if self.op.secondary_ip:
6303 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6304 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6305 " address" % self.op.secondary_ip,
6308 self.lock_all = self.op.auto_promote and self.might_demote
6309 self.lock_instances = self.op.secondary_ip is not None
6311 def _InstanceFilter(self, instance):
6312 """Filter for getting affected instances.
6315 return (instance.disk_template in constants.DTS_INT_MIRROR and
6316 self.op.node_name in instance.all_nodes)
6318 def ExpandNames(self):
6320 self.needed_locks = {
6321 locking.LEVEL_NODE: locking.ALL_SET,
6323 # Block allocations when all nodes are locked
6324 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6327 self.needed_locks = {
6328 locking.LEVEL_NODE: self.op.node_name,
6331 # Since modifying a node can have severe effects on currently running
6332 # operations the resource lock is at least acquired in shared mode
6333 self.needed_locks[locking.LEVEL_NODE_RES] = \
6334 self.needed_locks[locking.LEVEL_NODE]
6336 # Get all locks except nodes in shared mode; they are not used for anything
6337 # but read-only access
6338 self.share_locks = _ShareAll()
6339 self.share_locks[locking.LEVEL_NODE] = 0
6340 self.share_locks[locking.LEVEL_NODE_RES] = 0
6341 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6343 if self.lock_instances:
6344 self.needed_locks[locking.LEVEL_INSTANCE] = \
6345 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6347 def BuildHooksEnv(self):
6350 This runs on the master node.
6354 "OP_TARGET": self.op.node_name,
6355 "MASTER_CANDIDATE": str(self.op.master_candidate),
6356 "OFFLINE": str(self.op.offline),
6357 "DRAINED": str(self.op.drained),
6358 "MASTER_CAPABLE": str(self.op.master_capable),
6359 "VM_CAPABLE": str(self.op.vm_capable),
6362 def BuildHooksNodes(self):
6363 """Build hooks nodes.
6366 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6369 def CheckPrereq(self):
6370 """Check prerequisites.
6372 This only checks the instance list against the existing names.
6375 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6377 if self.lock_instances:
6378 affected_instances = \
6379 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6381 # Verify instance locks
6382 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6383 wanted_instances = frozenset(affected_instances.keys())
6384 if wanted_instances - owned_instances:
6385 raise errors.OpPrereqError("Instances affected by changing node %s's"
6386 " secondary IP address have changed since"
6387 " locks were acquired, wanted '%s', have"
6388 " '%s'; retry the operation" %
6390 utils.CommaJoin(wanted_instances),
6391 utils.CommaJoin(owned_instances)),
6394 affected_instances = None
6396 if (self.op.master_candidate is not None or
6397 self.op.drained is not None or
6398 self.op.offline is not None):
6399 # we can't change the master's node flags
6400 if self.op.node_name == self.cfg.GetMasterNode():
6401 raise errors.OpPrereqError("The master role can be changed"
6402 " only via master-failover",
6405 if self.op.master_candidate and not node.master_capable:
6406 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6407 " it a master candidate" % node.name,
6410 if self.op.vm_capable is False:
6411 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6413 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6414 " the vm_capable flag" % node.name,
6417 if node.master_candidate and self.might_demote and not self.lock_all:
6418 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6419 # check if after removing the current node, we're missing master
6421 (mc_remaining, mc_should, _) = \
6422 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6423 if mc_remaining < mc_should:
6424 raise errors.OpPrereqError("Not enough master candidates, please"
6425 " pass auto promote option to allow"
6426 " promotion (--auto-promote or RAPI"
6427 " auto_promote=True)", errors.ECODE_STATE)
6429 self.old_flags = old_flags = (node.master_candidate,
6430 node.drained, node.offline)
6431 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6432 self.old_role = old_role = self._F2R[old_flags]
6434 # Check for ineffective changes
6435 for attr in self._FLAGS:
6436 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6437 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6438 setattr(self.op, attr, None)
6440 # Past this point, any flag change to False means a transition
6441 # away from the respective state, as only real changes are kept
6443 # TODO: We might query the real power state if it supports OOB
6444 if _SupportsOob(self.cfg, node):
6445 if self.op.offline is False and not (node.powered or
6446 self.op.powered is True):
6447 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6448 " offline status can be reset") %
6449 self.op.node_name, errors.ECODE_STATE)
6450 elif self.op.powered is not None:
6451 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6452 " as it does not support out-of-band"
6453 " handling") % self.op.node_name,
6456 # If we're being deofflined/drained, we'll MC ourself if needed
6457 if (self.op.drained is False or self.op.offline is False or
6458 (self.op.master_capable and not node.master_capable)):
6459 if _DecideSelfPromotion(self):
6460 self.op.master_candidate = True
6461 self.LogInfo("Auto-promoting node to master candidate")
6463 # If we're no longer master capable, we'll demote ourselves from MC
6464 if self.op.master_capable is False and node.master_candidate:
6465 self.LogInfo("Demoting from master candidate")
6466 self.op.master_candidate = False
6469 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6470 if self.op.master_candidate:
6471 new_role = self._ROLE_CANDIDATE
6472 elif self.op.drained:
6473 new_role = self._ROLE_DRAINED
6474 elif self.op.offline:
6475 new_role = self._ROLE_OFFLINE
6476 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6477 # False is still in new flags, which means we're un-setting (the
6479 new_role = self._ROLE_REGULAR
6480 else: # no new flags, nothing, keep old role
6483 self.new_role = new_role
6485 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6486 # Trying to transition out of offline status
6487 result = self.rpc.call_version([node.name])[node.name]
6489 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6490 " to report its version: %s" %
6491 (node.name, result.fail_msg),
6494 self.LogWarning("Transitioning node from offline to online state"
6495 " without using re-add. Please make sure the node"
6498 # When changing the secondary ip, verify if this is a single-homed to
6499 # multi-homed transition or vice versa, and apply the relevant
6501 if self.op.secondary_ip:
6502 # Ok even without locking, because this can't be changed by any LU
6503 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6504 master_singlehomed = master.secondary_ip == master.primary_ip
6505 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6506 if self.op.force and node.name == master.name:
6507 self.LogWarning("Transitioning from single-homed to multi-homed"
6508 " cluster; all nodes will require a secondary IP"
6511 raise errors.OpPrereqError("Changing the secondary ip on a"
6512 " single-homed cluster requires the"
6513 " --force option to be passed, and the"
6514 " target node to be the master",
6516 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6517 if self.op.force and node.name == master.name:
6518 self.LogWarning("Transitioning from multi-homed to single-homed"
6519 " cluster; secondary IP addresses will have to be"
6522 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6523 " same as the primary IP on a multi-homed"
6524 " cluster, unless the --force option is"
6525 " passed, and the target node is the"
6526 " master", errors.ECODE_INVAL)
6528 assert not (frozenset(affected_instances) -
6529 self.owned_locks(locking.LEVEL_INSTANCE))
6532 if affected_instances:
6533 msg = ("Cannot change secondary IP address: offline node has"
6534 " instances (%s) configured to use it" %
6535 utils.CommaJoin(affected_instances.keys()))
6536 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6538 # On online nodes, check that no instances are running, and that
6539 # the node has the new ip and we can reach it.
6540 for instance in affected_instances.values():
6541 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6542 msg="cannot change secondary ip")
6544 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6545 if master.name != node.name:
6546 # check reachability from master secondary ip to new secondary ip
6547 if not netutils.TcpPing(self.op.secondary_ip,
6548 constants.DEFAULT_NODED_PORT,
6549 source=master.secondary_ip):
6550 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6551 " based ping to node daemon port",
6552 errors.ECODE_ENVIRON)
6554 if self.op.ndparams:
6555 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6556 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6557 self.new_ndparams = new_ndparams
6559 if self.op.hv_state:
6560 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6561 self.node.hv_state_static)
6563 if self.op.disk_state:
6564 self.new_disk_state = \
6565 _MergeAndVerifyDiskState(self.op.disk_state,
6566 self.node.disk_state_static)
6568 def Exec(self, feedback_fn):
6573 old_role = self.old_role
6574 new_role = self.new_role
6578 if self.op.ndparams:
6579 node.ndparams = self.new_ndparams
6581 if self.op.powered is not None:
6582 node.powered = self.op.powered
6584 if self.op.hv_state:
6585 node.hv_state_static = self.new_hv_state
6587 if self.op.disk_state:
6588 node.disk_state_static = self.new_disk_state
6590 for attr in ["master_capable", "vm_capable"]:
6591 val = getattr(self.op, attr)
6593 setattr(node, attr, val)
6594 result.append((attr, str(val)))
6596 if new_role != old_role:
6597 # Tell the node to demote itself, if no longer MC and not offline
6598 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6599 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6601 self.LogWarning("Node failed to demote itself: %s", msg)
6603 new_flags = self._R2F[new_role]
6604 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6606 result.append((desc, str(nf)))
6607 (node.master_candidate, node.drained, node.offline) = new_flags
6609 # we locked all nodes, we adjust the CP before updating this node
6611 _AdjustCandidatePool(self, [node.name])
6613 if self.op.secondary_ip:
6614 node.secondary_ip = self.op.secondary_ip
6615 result.append(("secondary_ip", self.op.secondary_ip))
6617 # this will trigger configuration file update, if needed
6618 self.cfg.Update(node, feedback_fn)
6620 # this will trigger job queue propagation or cleanup if the mc
6622 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6623 self.context.ReaddNode(node)
6628 class LUNodePowercycle(NoHooksLU):
6629 """Powercycles a node.
6634 def CheckArguments(self):
6635 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6636 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6637 raise errors.OpPrereqError("The node is the master and the force"
6638 " parameter was not set",
6641 def ExpandNames(self):
6642 """Locking for PowercycleNode.
6644 This is a last-resort option and shouldn't block on other
6645 jobs. Therefore, we grab no locks.
6648 self.needed_locks = {}
6650 def Exec(self, feedback_fn):
6654 result = self.rpc.call_node_powercycle(self.op.node_name,
6655 self.cfg.GetHypervisorType())
6656 result.Raise("Failed to schedule the reboot")
6657 return result.payload
6660 class LUClusterQuery(NoHooksLU):
6661 """Query cluster configuration.
6666 def ExpandNames(self):
6667 self.needed_locks = {}
6669 def Exec(self, feedback_fn):
6670 """Return cluster config.
6673 cluster = self.cfg.GetClusterInfo()
6676 # Filter just for enabled hypervisors
6677 for os_name, hv_dict in cluster.os_hvp.items():
6678 os_hvp[os_name] = {}
6679 for hv_name, hv_params in hv_dict.items():
6680 if hv_name in cluster.enabled_hypervisors:
6681 os_hvp[os_name][hv_name] = hv_params
6683 # Convert ip_family to ip_version
6684 primary_ip_version = constants.IP4_VERSION
6685 if cluster.primary_ip_family == netutils.IP6Address.family:
6686 primary_ip_version = constants.IP6_VERSION
6689 "software_version": constants.RELEASE_VERSION,
6690 "protocol_version": constants.PROTOCOL_VERSION,
6691 "config_version": constants.CONFIG_VERSION,
6692 "os_api_version": max(constants.OS_API_VERSIONS),
6693 "export_version": constants.EXPORT_VERSION,
6694 "architecture": runtime.GetArchInfo(),
6695 "name": cluster.cluster_name,
6696 "master": cluster.master_node,
6697 "default_hypervisor": cluster.primary_hypervisor,
6698 "enabled_hypervisors": cluster.enabled_hypervisors,
6699 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6700 for hypervisor_name in cluster.enabled_hypervisors]),
6702 "beparams": cluster.beparams,
6703 "osparams": cluster.osparams,
6704 "ipolicy": cluster.ipolicy,
6705 "nicparams": cluster.nicparams,
6706 "ndparams": cluster.ndparams,
6707 "diskparams": cluster.diskparams,
6708 "candidate_pool_size": cluster.candidate_pool_size,
6709 "master_netdev": cluster.master_netdev,
6710 "master_netmask": cluster.master_netmask,
6711 "use_external_mip_script": cluster.use_external_mip_script,
6712 "volume_group_name": cluster.volume_group_name,
6713 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6714 "file_storage_dir": cluster.file_storage_dir,
6715 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6716 "maintain_node_health": cluster.maintain_node_health,
6717 "ctime": cluster.ctime,
6718 "mtime": cluster.mtime,
6719 "uuid": cluster.uuid,
6720 "tags": list(cluster.GetTags()),
6721 "uid_pool": cluster.uid_pool,
6722 "default_iallocator": cluster.default_iallocator,
6723 "reserved_lvs": cluster.reserved_lvs,
6724 "primary_ip_version": primary_ip_version,
6725 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6726 "hidden_os": cluster.hidden_os,
6727 "blacklisted_os": cluster.blacklisted_os,
6733 class LUClusterConfigQuery(NoHooksLU):
6734 """Return configuration values.
6739 def CheckArguments(self):
6740 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6742 def ExpandNames(self):
6743 self.cq.ExpandNames(self)
6745 def DeclareLocks(self, level):
6746 self.cq.DeclareLocks(self, level)
6748 def Exec(self, feedback_fn):
6749 result = self.cq.OldStyleQuery(self)
6751 assert len(result) == 1
6756 class _ClusterQuery(_QueryBase):
6757 FIELDS = query.CLUSTER_FIELDS
6759 #: Do not sort (there is only one item)
6762 def ExpandNames(self, lu):
6763 lu.needed_locks = {}
6765 # The following variables interact with _QueryBase._GetNames
6766 self.wanted = locking.ALL_SET
6767 self.do_locking = self.use_locking
6770 raise errors.OpPrereqError("Can not use locking for cluster queries",
6773 def DeclareLocks(self, lu, level):
6776 def _GetQueryData(self, lu):
6777 """Computes the list of nodes and their attributes.
6780 # Locking is not used
6781 assert not (compat.any(lu.glm.is_owned(level)
6782 for level in locking.LEVELS
6783 if level != locking.LEVEL_CLUSTER) or
6784 self.do_locking or self.use_locking)
6786 if query.CQ_CONFIG in self.requested_data:
6787 cluster = lu.cfg.GetClusterInfo()
6789 cluster = NotImplemented
6791 if query.CQ_QUEUE_DRAINED in self.requested_data:
6792 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6794 drain_flag = NotImplemented
6796 if query.CQ_WATCHER_PAUSE in self.requested_data:
6797 master_name = lu.cfg.GetMasterNode()
6799 result = lu.rpc.call_get_watcher_pause(master_name)
6800 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6803 watcher_pause = result.payload
6805 watcher_pause = NotImplemented
6807 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6810 class LUInstanceActivateDisks(NoHooksLU):
6811 """Bring up an instance's disks.
6816 def ExpandNames(self):
6817 self._ExpandAndLockInstance()
6818 self.needed_locks[locking.LEVEL_NODE] = []
6819 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6821 def DeclareLocks(self, level):
6822 if level == locking.LEVEL_NODE:
6823 self._LockInstancesNodes()
6825 def CheckPrereq(self):
6826 """Check prerequisites.
6828 This checks that the instance is in the cluster.
6831 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6832 assert self.instance is not None, \
6833 "Cannot retrieve locked instance %s" % self.op.instance_name
6834 _CheckNodeOnline(self, self.instance.primary_node)
6836 def Exec(self, feedback_fn):
6837 """Activate the disks.
6840 disks_ok, disks_info = \
6841 _AssembleInstanceDisks(self, self.instance,
6842 ignore_size=self.op.ignore_size)
6844 raise errors.OpExecError("Cannot activate block devices")
6846 if self.op.wait_for_sync:
6847 if not _WaitForSync(self, self.instance):
6848 raise errors.OpExecError("Some disks of the instance are degraded!")
6853 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6855 """Prepare the block devices for an instance.
6857 This sets up the block devices on all nodes.
6859 @type lu: L{LogicalUnit}
6860 @param lu: the logical unit on whose behalf we execute
6861 @type instance: L{objects.Instance}
6862 @param instance: the instance for whose disks we assemble
6863 @type disks: list of L{objects.Disk} or None
6864 @param disks: which disks to assemble (or all, if None)
6865 @type ignore_secondaries: boolean
6866 @param ignore_secondaries: if true, errors on secondary nodes
6867 won't result in an error return from the function
6868 @type ignore_size: boolean
6869 @param ignore_size: if true, the current known size of the disk
6870 will not be used during the disk activation, useful for cases
6871 when the size is wrong
6872 @return: False if the operation failed, otherwise a list of
6873 (host, instance_visible_name, node_visible_name)
6874 with the mapping from node devices to instance devices
6879 iname = instance.name
6880 disks = _ExpandCheckDisks(instance, disks)
6882 # With the two passes mechanism we try to reduce the window of
6883 # opportunity for the race condition of switching DRBD to primary
6884 # before handshaking occured, but we do not eliminate it
6886 # The proper fix would be to wait (with some limits) until the
6887 # connection has been made and drbd transitions from WFConnection
6888 # into any other network-connected state (Connected, SyncTarget,
6891 # 1st pass, assemble on all nodes in secondary mode
6892 for idx, inst_disk in enumerate(disks):
6893 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6895 node_disk = node_disk.Copy()
6896 node_disk.UnsetSize()
6897 lu.cfg.SetDiskID(node_disk, node)
6898 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6900 msg = result.fail_msg
6902 is_offline_secondary = (node in instance.secondary_nodes and
6904 lu.LogWarning("Could not prepare block device %s on node %s"
6905 " (is_primary=False, pass=1): %s",
6906 inst_disk.iv_name, node, msg)
6907 if not (ignore_secondaries or is_offline_secondary):
6910 # FIXME: race condition on drbd migration to primary
6912 # 2nd pass, do only the primary node
6913 for idx, inst_disk in enumerate(disks):
6916 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6917 if node != instance.primary_node:
6920 node_disk = node_disk.Copy()
6921 node_disk.UnsetSize()
6922 lu.cfg.SetDiskID(node_disk, node)
6923 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6925 msg = result.fail_msg
6927 lu.LogWarning("Could not prepare block device %s on node %s"
6928 " (is_primary=True, pass=2): %s",
6929 inst_disk.iv_name, node, msg)
6932 dev_path = result.payload
6934 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6936 # leave the disks configured for the primary node
6937 # this is a workaround that would be fixed better by
6938 # improving the logical/physical id handling
6940 lu.cfg.SetDiskID(disk, instance.primary_node)
6942 return disks_ok, device_info
6945 def _StartInstanceDisks(lu, instance, force):
6946 """Start the disks of an instance.
6949 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6950 ignore_secondaries=force)
6952 _ShutdownInstanceDisks(lu, instance)
6953 if force is not None and not force:
6955 hint=("If the message above refers to a secondary node,"
6956 " you can retry the operation using '--force'"))
6957 raise errors.OpExecError("Disk consistency error")
6960 class LUInstanceDeactivateDisks(NoHooksLU):
6961 """Shutdown an instance's disks.
6966 def ExpandNames(self):
6967 self._ExpandAndLockInstance()
6968 self.needed_locks[locking.LEVEL_NODE] = []
6969 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6971 def DeclareLocks(self, level):
6972 if level == locking.LEVEL_NODE:
6973 self._LockInstancesNodes()
6975 def CheckPrereq(self):
6976 """Check prerequisites.
6978 This checks that the instance is in the cluster.
6981 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6982 assert self.instance is not None, \
6983 "Cannot retrieve locked instance %s" % self.op.instance_name
6985 def Exec(self, feedback_fn):
6986 """Deactivate the disks
6989 instance = self.instance
6991 _ShutdownInstanceDisks(self, instance)
6993 _SafeShutdownInstanceDisks(self, instance)
6996 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6997 """Shutdown block devices of an instance.
6999 This function checks if an instance is running, before calling
7000 _ShutdownInstanceDisks.
7003 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7004 _ShutdownInstanceDisks(lu, instance, disks=disks)
7007 def _ExpandCheckDisks(instance, disks):
7008 """Return the instance disks selected by the disks list
7010 @type disks: list of L{objects.Disk} or None
7011 @param disks: selected disks
7012 @rtype: list of L{objects.Disk}
7013 @return: selected instance disks to act on
7017 return instance.disks
7019 if not set(disks).issubset(instance.disks):
7020 raise errors.ProgrammerError("Can only act on disks belonging to the"
7025 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7026 """Shutdown block devices of an instance.
7028 This does the shutdown on all nodes of the instance.
7030 If the ignore_primary is false, errors on the primary node are
7035 disks = _ExpandCheckDisks(instance, disks)
7038 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7039 lu.cfg.SetDiskID(top_disk, node)
7040 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7041 msg = result.fail_msg
7043 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7044 disk.iv_name, node, msg)
7045 if ((node == instance.primary_node and not ignore_primary) or
7046 (node != instance.primary_node and not result.offline)):
7051 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7052 """Checks if a node has enough free memory.
7054 This function checks if a given node has the needed amount of free
7055 memory. In case the node has less memory or we cannot get the
7056 information from the node, this function raises an OpPrereqError
7059 @type lu: C{LogicalUnit}
7060 @param lu: a logical unit from which we get configuration data
7062 @param node: the node to check
7063 @type reason: C{str}
7064 @param reason: string to use in the error message
7065 @type requested: C{int}
7066 @param requested: the amount of memory in MiB to check for
7067 @type hypervisor_name: C{str}
7068 @param hypervisor_name: the hypervisor to ask for memory stats
7070 @return: node current free memory
7071 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7072 we cannot check the node
7075 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7076 nodeinfo[node].Raise("Can't get data from node %s" % node,
7077 prereq=True, ecode=errors.ECODE_ENVIRON)
7078 (_, _, (hv_info, )) = nodeinfo[node].payload
7080 free_mem = hv_info.get("memory_free", None)
7081 if not isinstance(free_mem, int):
7082 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7083 " was '%s'" % (node, free_mem),
7084 errors.ECODE_ENVIRON)
7085 if requested > free_mem:
7086 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7087 " needed %s MiB, available %s MiB" %
7088 (node, reason, requested, free_mem),
7093 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7094 """Checks if nodes have enough free disk space in all the VGs.
7096 This function checks if all given nodes have the needed amount of
7097 free disk. In case any node has less disk or we cannot get the
7098 information from the node, this function raises an OpPrereqError
7101 @type lu: C{LogicalUnit}
7102 @param lu: a logical unit from which we get configuration data
7103 @type nodenames: C{list}
7104 @param nodenames: the list of node names to check
7105 @type req_sizes: C{dict}
7106 @param req_sizes: the hash of vg and corresponding amount of disk in
7108 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7109 or we cannot check the node
7112 for vg, req_size in req_sizes.items():
7113 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7116 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7117 """Checks if nodes have enough free disk space in the specified VG.
7119 This function checks if all given nodes have the needed amount of
7120 free disk. In case any node has less disk or we cannot get the
7121 information from the node, this function raises an OpPrereqError
7124 @type lu: C{LogicalUnit}
7125 @param lu: a logical unit from which we get configuration data
7126 @type nodenames: C{list}
7127 @param nodenames: the list of node names to check
7129 @param vg: the volume group to check
7130 @type requested: C{int}
7131 @param requested: the amount of disk in MiB to check for
7132 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7133 or we cannot check the node
7136 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7137 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7138 for node in nodenames:
7139 info = nodeinfo[node]
7140 info.Raise("Cannot get current information from node %s" % node,
7141 prereq=True, ecode=errors.ECODE_ENVIRON)
7142 (_, (vg_info, ), _) = info.payload
7143 vg_free = vg_info.get("vg_free", None)
7144 if not isinstance(vg_free, int):
7145 raise errors.OpPrereqError("Can't compute free disk space on node"
7146 " %s for vg %s, result was '%s'" %
7147 (node, vg, vg_free), errors.ECODE_ENVIRON)
7148 if requested > vg_free:
7149 raise errors.OpPrereqError("Not enough disk space on target node %s"
7150 " vg %s: required %d MiB, available %d MiB" %
7151 (node, vg, requested, vg_free),
7155 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7156 """Checks if nodes have enough physical CPUs
7158 This function checks if all given nodes have the needed number of
7159 physical CPUs. In case any node has less CPUs or we cannot get the
7160 information from the node, this function raises an OpPrereqError
7163 @type lu: C{LogicalUnit}
7164 @param lu: a logical unit from which we get configuration data
7165 @type nodenames: C{list}
7166 @param nodenames: the list of node names to check
7167 @type requested: C{int}
7168 @param requested: the minimum acceptable number of physical CPUs
7169 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7170 or we cannot check the node
7173 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7174 for node in nodenames:
7175 info = nodeinfo[node]
7176 info.Raise("Cannot get current information from node %s" % node,
7177 prereq=True, ecode=errors.ECODE_ENVIRON)
7178 (_, _, (hv_info, )) = info.payload
7179 num_cpus = hv_info.get("cpu_total", None)
7180 if not isinstance(num_cpus, int):
7181 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7182 " on node %s, result was '%s'" %
7183 (node, num_cpus), errors.ECODE_ENVIRON)
7184 if requested > num_cpus:
7185 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7186 "required" % (node, num_cpus, requested),
7190 class LUInstanceStartup(LogicalUnit):
7191 """Starts an instance.
7194 HPATH = "instance-start"
7195 HTYPE = constants.HTYPE_INSTANCE
7198 def CheckArguments(self):
7200 if self.op.beparams:
7201 # fill the beparams dict
7202 objects.UpgradeBeParams(self.op.beparams)
7203 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7205 def ExpandNames(self):
7206 self._ExpandAndLockInstance()
7207 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7209 def DeclareLocks(self, level):
7210 if level == locking.LEVEL_NODE_RES:
7211 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7213 def BuildHooksEnv(self):
7216 This runs on master, primary and secondary nodes of the instance.
7220 "FORCE": self.op.force,
7223 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7227 def BuildHooksNodes(self):
7228 """Build hooks nodes.
7231 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7234 def CheckPrereq(self):
7235 """Check prerequisites.
7237 This checks that the instance is in the cluster.
7240 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7241 assert self.instance is not None, \
7242 "Cannot retrieve locked instance %s" % self.op.instance_name
7245 if self.op.hvparams:
7246 # check hypervisor parameter syntax (locally)
7247 cluster = self.cfg.GetClusterInfo()
7248 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7249 filled_hvp = cluster.FillHV(instance)
7250 filled_hvp.update(self.op.hvparams)
7251 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7252 hv_type.CheckParameterSyntax(filled_hvp)
7253 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7255 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7257 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7259 if self.primary_offline and self.op.ignore_offline_nodes:
7260 self.LogWarning("Ignoring offline primary node")
7262 if self.op.hvparams or self.op.beparams:
7263 self.LogWarning("Overridden parameters are ignored")
7265 _CheckNodeOnline(self, instance.primary_node)
7267 bep = self.cfg.GetClusterInfo().FillBE(instance)
7268 bep.update(self.op.beparams)
7270 # check bridges existence
7271 _CheckInstanceBridgesExist(self, instance)
7273 remote_info = self.rpc.call_instance_info(instance.primary_node,
7275 instance.hypervisor)
7276 remote_info.Raise("Error checking node %s" % instance.primary_node,
7277 prereq=True, ecode=errors.ECODE_ENVIRON)
7278 if not remote_info.payload: # not running already
7279 _CheckNodeFreeMemory(self, instance.primary_node,
7280 "starting instance %s" % instance.name,
7281 bep[constants.BE_MINMEM], instance.hypervisor)
7283 def Exec(self, feedback_fn):
7284 """Start the instance.
7287 instance = self.instance
7288 force = self.op.force
7290 if not self.op.no_remember:
7291 self.cfg.MarkInstanceUp(instance.name)
7293 if self.primary_offline:
7294 assert self.op.ignore_offline_nodes
7295 self.LogInfo("Primary node offline, marked instance as started")
7297 node_current = instance.primary_node
7299 _StartInstanceDisks(self, instance, force)
7302 self.rpc.call_instance_start(node_current,
7303 (instance, self.op.hvparams,
7305 self.op.startup_paused)
7306 msg = result.fail_msg
7308 _ShutdownInstanceDisks(self, instance)
7309 raise errors.OpExecError("Could not start instance: %s" % msg)
7312 class LUInstanceReboot(LogicalUnit):
7313 """Reboot an instance.
7316 HPATH = "instance-reboot"
7317 HTYPE = constants.HTYPE_INSTANCE
7320 def ExpandNames(self):
7321 self._ExpandAndLockInstance()
7323 def BuildHooksEnv(self):
7326 This runs on master, primary and secondary nodes of the instance.
7330 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7331 "REBOOT_TYPE": self.op.reboot_type,
7332 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7335 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7339 def BuildHooksNodes(self):
7340 """Build hooks nodes.
7343 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7346 def CheckPrereq(self):
7347 """Check prerequisites.
7349 This checks that the instance is in the cluster.
7352 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7353 assert self.instance is not None, \
7354 "Cannot retrieve locked instance %s" % self.op.instance_name
7355 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7356 _CheckNodeOnline(self, instance.primary_node)
7358 # check bridges existence
7359 _CheckInstanceBridgesExist(self, instance)
7361 def Exec(self, feedback_fn):
7362 """Reboot the instance.
7365 instance = self.instance
7366 ignore_secondaries = self.op.ignore_secondaries
7367 reboot_type = self.op.reboot_type
7369 remote_info = self.rpc.call_instance_info(instance.primary_node,
7371 instance.hypervisor)
7372 remote_info.Raise("Error checking node %s" % instance.primary_node)
7373 instance_running = bool(remote_info.payload)
7375 node_current = instance.primary_node
7377 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7378 constants.INSTANCE_REBOOT_HARD]:
7379 for disk in instance.disks:
7380 self.cfg.SetDiskID(disk, node_current)
7381 result = self.rpc.call_instance_reboot(node_current, instance,
7383 self.op.shutdown_timeout)
7384 result.Raise("Could not reboot instance")
7386 if instance_running:
7387 result = self.rpc.call_instance_shutdown(node_current, instance,
7388 self.op.shutdown_timeout)
7389 result.Raise("Could not shutdown instance for full reboot")
7390 _ShutdownInstanceDisks(self, instance)
7392 self.LogInfo("Instance %s was already stopped, starting now",
7394 _StartInstanceDisks(self, instance, ignore_secondaries)
7395 result = self.rpc.call_instance_start(node_current,
7396 (instance, None, None), False)
7397 msg = result.fail_msg
7399 _ShutdownInstanceDisks(self, instance)
7400 raise errors.OpExecError("Could not start instance for"
7401 " full reboot: %s" % msg)
7403 self.cfg.MarkInstanceUp(instance.name)
7406 class LUInstanceShutdown(LogicalUnit):
7407 """Shutdown an instance.
7410 HPATH = "instance-stop"
7411 HTYPE = constants.HTYPE_INSTANCE
7414 def ExpandNames(self):
7415 self._ExpandAndLockInstance()
7417 def BuildHooksEnv(self):
7420 This runs on master, primary and secondary nodes of the instance.
7423 env = _BuildInstanceHookEnvByObject(self, self.instance)
7424 env["TIMEOUT"] = self.op.timeout
7427 def BuildHooksNodes(self):
7428 """Build hooks nodes.
7431 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7434 def CheckPrereq(self):
7435 """Check prerequisites.
7437 This checks that the instance is in the cluster.
7440 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7441 assert self.instance is not None, \
7442 "Cannot retrieve locked instance %s" % self.op.instance_name
7444 if not self.op.force:
7445 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7447 self.LogWarning("Ignoring offline instance check")
7449 self.primary_offline = \
7450 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7452 if self.primary_offline and self.op.ignore_offline_nodes:
7453 self.LogWarning("Ignoring offline primary node")
7455 _CheckNodeOnline(self, self.instance.primary_node)
7457 def Exec(self, feedback_fn):
7458 """Shutdown the instance.
7461 instance = self.instance
7462 node_current = instance.primary_node
7463 timeout = self.op.timeout
7465 # If the instance is offline we shouldn't mark it as down, as that
7466 # resets the offline flag.
7467 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7468 self.cfg.MarkInstanceDown(instance.name)
7470 if self.primary_offline:
7471 assert self.op.ignore_offline_nodes
7472 self.LogInfo("Primary node offline, marked instance as stopped")
7474 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7475 msg = result.fail_msg
7477 self.LogWarning("Could not shutdown instance: %s", msg)
7479 _ShutdownInstanceDisks(self, instance)
7482 class LUInstanceReinstall(LogicalUnit):
7483 """Reinstall an instance.
7486 HPATH = "instance-reinstall"
7487 HTYPE = constants.HTYPE_INSTANCE
7490 def ExpandNames(self):
7491 self._ExpandAndLockInstance()
7493 def BuildHooksEnv(self):
7496 This runs on master, primary and secondary nodes of the instance.
7499 return _BuildInstanceHookEnvByObject(self, self.instance)
7501 def BuildHooksNodes(self):
7502 """Build hooks nodes.
7505 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7508 def CheckPrereq(self):
7509 """Check prerequisites.
7511 This checks that the instance is in the cluster and is not running.
7514 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7515 assert instance is not None, \
7516 "Cannot retrieve locked instance %s" % self.op.instance_name
7517 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7518 " offline, cannot reinstall")
7520 if instance.disk_template == constants.DT_DISKLESS:
7521 raise errors.OpPrereqError("Instance '%s' has no disks" %
7522 self.op.instance_name,
7524 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7526 if self.op.os_type is not None:
7528 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7529 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7530 instance_os = self.op.os_type
7532 instance_os = instance.os
7534 nodelist = list(instance.all_nodes)
7536 if self.op.osparams:
7537 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7538 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7539 self.os_inst = i_osdict # the new dict (without defaults)
7543 self.instance = instance
7545 def Exec(self, feedback_fn):
7546 """Reinstall the instance.
7549 inst = self.instance
7551 if self.op.os_type is not None:
7552 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7553 inst.os = self.op.os_type
7554 # Write to configuration
7555 self.cfg.Update(inst, feedback_fn)
7557 _StartInstanceDisks(self, inst, None)
7559 feedback_fn("Running the instance OS create scripts...")
7560 # FIXME: pass debug option from opcode to backend
7561 result = self.rpc.call_instance_os_add(inst.primary_node,
7562 (inst, self.os_inst), True,
7563 self.op.debug_level)
7564 result.Raise("Could not install OS for instance %s on node %s" %
7565 (inst.name, inst.primary_node))
7567 _ShutdownInstanceDisks(self, inst)
7570 class LUInstanceRecreateDisks(LogicalUnit):
7571 """Recreate an instance's missing disks.
7574 HPATH = "instance-recreate-disks"
7575 HTYPE = constants.HTYPE_INSTANCE
7578 _MODIFYABLE = compat.UniqueFrozenset([
7579 constants.IDISK_SIZE,
7580 constants.IDISK_MODE,
7583 # New or changed disk parameters may have different semantics
7584 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7585 constants.IDISK_ADOPT,
7587 # TODO: Implement support changing VG while recreating
7589 constants.IDISK_METAVG,
7590 constants.IDISK_PROVIDER,
7593 def _RunAllocator(self):
7594 """Run the allocator based on input opcode.
7597 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7600 # The allocator should actually run in "relocate" mode, but current
7601 # allocators don't support relocating all the nodes of an instance at
7602 # the same time. As a workaround we use "allocate" mode, but this is
7603 # suboptimal for two reasons:
7604 # - The instance name passed to the allocator is present in the list of
7605 # existing instances, so there could be a conflict within the
7606 # internal structures of the allocator. This doesn't happen with the
7607 # current allocators, but it's a liability.
7608 # - The allocator counts the resources used by the instance twice: once
7609 # because the instance exists already, and once because it tries to
7610 # allocate a new instance.
7611 # The allocator could choose some of the nodes on which the instance is
7612 # running, but that's not a problem. If the instance nodes are broken,
7613 # they should be already be marked as drained or offline, and hence
7614 # skipped by the allocator. If instance disks have been lost for other
7615 # reasons, then recreating the disks on the same nodes should be fine.
7616 disk_template = self.instance.disk_template
7617 spindle_use = be_full[constants.BE_SPINDLE_USE]
7618 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7619 disk_template=disk_template,
7620 tags=list(self.instance.GetTags()),
7621 os=self.instance.os,
7623 vcpus=be_full[constants.BE_VCPUS],
7624 memory=be_full[constants.BE_MAXMEM],
7625 spindle_use=spindle_use,
7626 disks=[{constants.IDISK_SIZE: d.size,
7627 constants.IDISK_MODE: d.mode}
7628 for d in self.instance.disks],
7629 hypervisor=self.instance.hypervisor,
7630 node_whitelist=None)
7631 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7633 ial.Run(self.op.iallocator)
7635 assert req.RequiredNodes() == len(self.instance.all_nodes)
7638 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7639 " %s" % (self.op.iallocator, ial.info),
7642 self.op.nodes = ial.result
7643 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7644 self.op.instance_name, self.op.iallocator,
7645 utils.CommaJoin(ial.result))
7647 def CheckArguments(self):
7648 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7649 # Normalize and convert deprecated list of disk indices
7650 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7652 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7654 raise errors.OpPrereqError("Some disks have been specified more than"
7655 " once: %s" % utils.CommaJoin(duplicates),
7658 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7659 # when neither iallocator nor nodes are specified
7660 if self.op.iallocator or self.op.nodes:
7661 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7663 for (idx, params) in self.op.disks:
7664 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7665 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7667 raise errors.OpPrereqError("Parameters for disk %s try to change"
7668 " unmodifyable parameter(s): %s" %
7669 (idx, utils.CommaJoin(unsupported)),
7672 def ExpandNames(self):
7673 self._ExpandAndLockInstance()
7674 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7677 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7678 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7680 self.needed_locks[locking.LEVEL_NODE] = []
7681 if self.op.iallocator:
7682 # iallocator will select a new node in the same group
7683 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7684 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7686 self.needed_locks[locking.LEVEL_NODE_RES] = []
7688 def DeclareLocks(self, level):
7689 if level == locking.LEVEL_NODEGROUP:
7690 assert self.op.iallocator is not None
7691 assert not self.op.nodes
7692 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7693 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7694 # Lock the primary group used by the instance optimistically; this
7695 # requires going via the node before it's locked, requiring
7696 # verification later on
7697 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7698 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7700 elif level == locking.LEVEL_NODE:
7701 # If an allocator is used, then we lock all the nodes in the current
7702 # instance group, as we don't know yet which ones will be selected;
7703 # if we replace the nodes without using an allocator, locks are
7704 # already declared in ExpandNames; otherwise, we need to lock all the
7705 # instance nodes for disk re-creation
7706 if self.op.iallocator:
7707 assert not self.op.nodes
7708 assert not self.needed_locks[locking.LEVEL_NODE]
7709 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7711 # Lock member nodes of the group of the primary node
7712 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7713 self.needed_locks[locking.LEVEL_NODE].extend(
7714 self.cfg.GetNodeGroup(group_uuid).members)
7716 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7717 elif not self.op.nodes:
7718 self._LockInstancesNodes(primary_only=False)
7719 elif level == locking.LEVEL_NODE_RES:
7721 self.needed_locks[locking.LEVEL_NODE_RES] = \
7722 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7724 def BuildHooksEnv(self):
7727 This runs on master, primary and secondary nodes of the instance.
7730 return _BuildInstanceHookEnvByObject(self, self.instance)
7732 def BuildHooksNodes(self):
7733 """Build hooks nodes.
7736 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7739 def CheckPrereq(self):
7740 """Check prerequisites.
7742 This checks that the instance is in the cluster and is not running.
7745 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7746 assert instance is not None, \
7747 "Cannot retrieve locked instance %s" % self.op.instance_name
7749 if len(self.op.nodes) != len(instance.all_nodes):
7750 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7751 " %d replacement nodes were specified" %
7752 (instance.name, len(instance.all_nodes),
7753 len(self.op.nodes)),
7755 assert instance.disk_template != constants.DT_DRBD8 or \
7756 len(self.op.nodes) == 2
7757 assert instance.disk_template != constants.DT_PLAIN or \
7758 len(self.op.nodes) == 1
7759 primary_node = self.op.nodes[0]
7761 primary_node = instance.primary_node
7762 if not self.op.iallocator:
7763 _CheckNodeOnline(self, primary_node)
7765 if instance.disk_template == constants.DT_DISKLESS:
7766 raise errors.OpPrereqError("Instance '%s' has no disks" %
7767 self.op.instance_name, errors.ECODE_INVAL)
7769 # Verify if node group locks are still correct
7770 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7772 # Node group locks are acquired only for the primary node (and only
7773 # when the allocator is used)
7774 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7777 # if we replace nodes *and* the old primary is offline, we don't
7778 # check the instance state
7779 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7780 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7781 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7782 msg="cannot recreate disks")
7785 self.disks = dict(self.op.disks)
7787 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7789 maxidx = max(self.disks.keys())
7790 if maxidx >= len(instance.disks):
7791 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7794 if ((self.op.nodes or self.op.iallocator) and
7795 sorted(self.disks.keys()) != range(len(instance.disks))):
7796 raise errors.OpPrereqError("Can't recreate disks partially and"
7797 " change the nodes at the same time",
7800 self.instance = instance
7802 if self.op.iallocator:
7803 self._RunAllocator()
7804 # Release unneeded node and node resource locks
7805 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7806 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7807 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7809 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7811 def Exec(self, feedback_fn):
7812 """Recreate the disks.
7815 instance = self.instance
7817 assert (self.owned_locks(locking.LEVEL_NODE) ==
7818 self.owned_locks(locking.LEVEL_NODE_RES))
7821 mods = [] # keeps track of needed changes
7823 for idx, disk in enumerate(instance.disks):
7825 changes = self.disks[idx]
7827 # Disk should not be recreated
7831 # update secondaries for disks, if needed
7832 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7833 # need to update the nodes and minors
7834 assert len(self.op.nodes) == 2
7835 assert len(disk.logical_id) == 6 # otherwise disk internals
7837 (_, _, old_port, _, _, old_secret) = disk.logical_id
7838 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7839 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7840 new_minors[0], new_minors[1], old_secret)
7841 assert len(disk.logical_id) == len(new_id)
7845 mods.append((idx, new_id, changes))
7847 # now that we have passed all asserts above, we can apply the mods
7848 # in a single run (to avoid partial changes)
7849 for idx, new_id, changes in mods:
7850 disk = instance.disks[idx]
7851 if new_id is not None:
7852 assert disk.dev_type == constants.LD_DRBD8
7853 disk.logical_id = new_id
7855 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7856 mode=changes.get(constants.IDISK_MODE, None))
7858 # change primary node, if needed
7860 instance.primary_node = self.op.nodes[0]
7861 self.LogWarning("Changing the instance's nodes, you will have to"
7862 " remove any disks left on the older nodes manually")
7865 self.cfg.Update(instance, feedback_fn)
7867 # All touched nodes must be locked
7868 mylocks = self.owned_locks(locking.LEVEL_NODE)
7869 assert mylocks.issuperset(frozenset(instance.all_nodes))
7870 _CreateDisks(self, instance, to_skip=to_skip)
7873 class LUInstanceRename(LogicalUnit):
7874 """Rename an instance.
7877 HPATH = "instance-rename"
7878 HTYPE = constants.HTYPE_INSTANCE
7880 def CheckArguments(self):
7884 if self.op.ip_check and not self.op.name_check:
7885 # TODO: make the ip check more flexible and not depend on the name check
7886 raise errors.OpPrereqError("IP address check requires a name check",
7889 def BuildHooksEnv(self):
7892 This runs on master, primary and secondary nodes of the instance.
7895 env = _BuildInstanceHookEnvByObject(self, self.instance)
7896 env["INSTANCE_NEW_NAME"] = self.op.new_name
7899 def BuildHooksNodes(self):
7900 """Build hooks nodes.
7903 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7906 def CheckPrereq(self):
7907 """Check prerequisites.
7909 This checks that the instance is in the cluster and is not running.
7912 self.op.instance_name = _ExpandInstanceName(self.cfg,
7913 self.op.instance_name)
7914 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7915 assert instance is not None
7916 _CheckNodeOnline(self, instance.primary_node)
7917 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7918 msg="cannot rename")
7919 self.instance = instance
7921 new_name = self.op.new_name
7922 if self.op.name_check:
7923 hostname = _CheckHostnameSane(self, new_name)
7924 new_name = self.op.new_name = hostname.name
7925 if (self.op.ip_check and
7926 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7927 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7928 (hostname.ip, new_name),
7929 errors.ECODE_NOTUNIQUE)
7931 instance_list = self.cfg.GetInstanceList()
7932 if new_name in instance_list and new_name != instance.name:
7933 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7934 new_name, errors.ECODE_EXISTS)
7936 def Exec(self, feedback_fn):
7937 """Rename the instance.
7940 inst = self.instance
7941 old_name = inst.name
7943 rename_file_storage = False
7944 if (inst.disk_template in constants.DTS_FILEBASED and
7945 self.op.new_name != inst.name):
7946 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7947 rename_file_storage = True
7949 self.cfg.RenameInstance(inst.name, self.op.new_name)
7950 # Change the instance lock. This is definitely safe while we hold the BGL.
7951 # Otherwise the new lock would have to be added in acquired mode.
7953 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7954 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7955 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7957 # re-read the instance from the configuration after rename
7958 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7960 if rename_file_storage:
7961 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7962 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7963 old_file_storage_dir,
7964 new_file_storage_dir)
7965 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7966 " (but the instance has been renamed in Ganeti)" %
7967 (inst.primary_node, old_file_storage_dir,
7968 new_file_storage_dir))
7970 _StartInstanceDisks(self, inst, None)
7971 # update info on disks
7972 info = _GetInstanceInfoText(inst)
7973 for (idx, disk) in enumerate(inst.disks):
7974 for node in inst.all_nodes:
7975 self.cfg.SetDiskID(disk, node)
7976 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7978 self.LogWarning("Error setting info on node %s for disk %s: %s",
7979 node, idx, result.fail_msg)
7981 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7982 old_name, self.op.debug_level)
7983 msg = result.fail_msg
7985 msg = ("Could not run OS rename script for instance %s on node %s"
7986 " (but the instance has been renamed in Ganeti): %s" %
7987 (inst.name, inst.primary_node, msg))
7988 self.LogWarning(msg)
7990 _ShutdownInstanceDisks(self, inst)
7995 class LUInstanceRemove(LogicalUnit):
7996 """Remove an instance.
7999 HPATH = "instance-remove"
8000 HTYPE = constants.HTYPE_INSTANCE
8003 def ExpandNames(self):
8004 self._ExpandAndLockInstance()
8005 self.needed_locks[locking.LEVEL_NODE] = []
8006 self.needed_locks[locking.LEVEL_NODE_RES] = []
8007 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8009 def DeclareLocks(self, level):
8010 if level == locking.LEVEL_NODE:
8011 self._LockInstancesNodes()
8012 elif level == locking.LEVEL_NODE_RES:
8014 self.needed_locks[locking.LEVEL_NODE_RES] = \
8015 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8017 def BuildHooksEnv(self):
8020 This runs on master, primary and secondary nodes of the instance.
8023 env = _BuildInstanceHookEnvByObject(self, self.instance)
8024 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8027 def BuildHooksNodes(self):
8028 """Build hooks nodes.
8031 nl = [self.cfg.GetMasterNode()]
8032 nl_post = list(self.instance.all_nodes) + nl
8033 return (nl, nl_post)
8035 def CheckPrereq(self):
8036 """Check prerequisites.
8038 This checks that the instance is in the cluster.
8041 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8042 assert self.instance is not None, \
8043 "Cannot retrieve locked instance %s" % self.op.instance_name
8045 def Exec(self, feedback_fn):
8046 """Remove the instance.
8049 instance = self.instance
8050 logging.info("Shutting down instance %s on node %s",
8051 instance.name, instance.primary_node)
8053 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8054 self.op.shutdown_timeout)
8055 msg = result.fail_msg
8057 if self.op.ignore_failures:
8058 feedback_fn("Warning: can't shutdown instance: %s" % msg)
8060 raise errors.OpExecError("Could not shutdown instance %s on"
8062 (instance.name, instance.primary_node, msg))
8064 assert (self.owned_locks(locking.LEVEL_NODE) ==
8065 self.owned_locks(locking.LEVEL_NODE_RES))
8066 assert not (set(instance.all_nodes) -
8067 self.owned_locks(locking.LEVEL_NODE)), \
8068 "Not owning correct locks"
8070 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8073 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8074 """Utility function to remove an instance.
8077 logging.info("Removing block devices for instance %s", instance.name)
8079 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8080 if not ignore_failures:
8081 raise errors.OpExecError("Can't remove instance's disks")
8082 feedback_fn("Warning: can't remove instance's disks")
8084 logging.info("Removing instance %s out of cluster config", instance.name)
8086 lu.cfg.RemoveInstance(instance.name)
8088 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8089 "Instance lock removal conflict"
8091 # Remove lock for the instance
8092 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8095 class LUInstanceQuery(NoHooksLU):
8096 """Logical unit for querying instances.
8099 # pylint: disable=W0142
8102 def CheckArguments(self):
8103 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8104 self.op.output_fields, self.op.use_locking)
8106 def ExpandNames(self):
8107 self.iq.ExpandNames(self)
8109 def DeclareLocks(self, level):
8110 self.iq.DeclareLocks(self, level)
8112 def Exec(self, feedback_fn):
8113 return self.iq.OldStyleQuery(self)
8116 def _ExpandNamesForMigration(lu):
8117 """Expands names for use with L{TLMigrateInstance}.
8119 @type lu: L{LogicalUnit}
8122 if lu.op.target_node is not None:
8123 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8125 lu.needed_locks[locking.LEVEL_NODE] = []
8126 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8128 lu.needed_locks[locking.LEVEL_NODE_RES] = []
8129 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8131 # The node allocation lock is actually only needed for replicated instances
8132 # (e.g. DRBD8) and if an iallocator is used.
8133 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8136 def _DeclareLocksForMigration(lu, level):
8137 """Declares locks for L{TLMigrateInstance}.
8139 @type lu: L{LogicalUnit}
8140 @param level: Lock level
8143 if level == locking.LEVEL_NODE_ALLOC:
8144 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8146 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8148 # Node locks are already declared here rather than at LEVEL_NODE as we need
8149 # the instance object anyway to declare the node allocation lock.
8150 if instance.disk_template in constants.DTS_EXT_MIRROR:
8151 if lu.op.target_node is None:
8152 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8153 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8155 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8157 del lu.recalculate_locks[locking.LEVEL_NODE]
8159 lu._LockInstancesNodes() # pylint: disable=W0212
8161 elif level == locking.LEVEL_NODE:
8162 # Node locks are declared together with the node allocation lock
8163 assert (lu.needed_locks[locking.LEVEL_NODE] or
8164 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8166 elif level == locking.LEVEL_NODE_RES:
8168 lu.needed_locks[locking.LEVEL_NODE_RES] = \
8169 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8172 class LUInstanceFailover(LogicalUnit):
8173 """Failover an instance.
8176 HPATH = "instance-failover"
8177 HTYPE = constants.HTYPE_INSTANCE
8180 def CheckArguments(self):
8181 """Check the arguments.
8184 self.iallocator = getattr(self.op, "iallocator", None)
8185 self.target_node = getattr(self.op, "target_node", None)
8187 def ExpandNames(self):
8188 self._ExpandAndLockInstance()
8189 _ExpandNamesForMigration(self)
8192 TLMigrateInstance(self, self.op.instance_name, False, True, False,
8193 self.op.ignore_consistency, True,
8194 self.op.shutdown_timeout, self.op.ignore_ipolicy)
8196 self.tasklets = [self._migrater]
8198 def DeclareLocks(self, level):
8199 _DeclareLocksForMigration(self, level)
8201 def BuildHooksEnv(self):
8204 This runs on master, primary and secondary nodes of the instance.
8207 instance = self._migrater.instance
8208 source_node = instance.primary_node
8209 target_node = self.op.target_node
8211 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8212 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8213 "OLD_PRIMARY": source_node,
8214 "NEW_PRIMARY": target_node,
8217 if instance.disk_template in constants.DTS_INT_MIRROR:
8218 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8219 env["NEW_SECONDARY"] = source_node
8221 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8223 env.update(_BuildInstanceHookEnvByObject(self, instance))
8227 def BuildHooksNodes(self):
8228 """Build hooks nodes.
8231 instance = self._migrater.instance
8232 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8233 return (nl, nl + [instance.primary_node])
8236 class LUInstanceMigrate(LogicalUnit):
8237 """Migrate an instance.
8239 This is migration without shutting down, compared to the failover,
8240 which is done with shutdown.
8243 HPATH = "instance-migrate"
8244 HTYPE = constants.HTYPE_INSTANCE
8247 def ExpandNames(self):
8248 self._ExpandAndLockInstance()
8249 _ExpandNamesForMigration(self)
8252 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8253 False, self.op.allow_failover, False,
8254 self.op.allow_runtime_changes,
8255 constants.DEFAULT_SHUTDOWN_TIMEOUT,
8256 self.op.ignore_ipolicy)
8258 self.tasklets = [self._migrater]
8260 def DeclareLocks(self, level):
8261 _DeclareLocksForMigration(self, level)
8263 def BuildHooksEnv(self):
8266 This runs on master, primary and secondary nodes of the instance.
8269 instance = self._migrater.instance
8270 source_node = instance.primary_node
8271 target_node = self.op.target_node
8272 env = _BuildInstanceHookEnvByObject(self, instance)
8274 "MIGRATE_LIVE": self._migrater.live,
8275 "MIGRATE_CLEANUP": self.op.cleanup,
8276 "OLD_PRIMARY": source_node,
8277 "NEW_PRIMARY": target_node,
8278 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8281 if instance.disk_template in constants.DTS_INT_MIRROR:
8282 env["OLD_SECONDARY"] = target_node
8283 env["NEW_SECONDARY"] = source_node
8285 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8289 def BuildHooksNodes(self):
8290 """Build hooks nodes.
8293 instance = self._migrater.instance
8294 snodes = list(instance.secondary_nodes)
8295 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8299 class LUInstanceMove(LogicalUnit):
8300 """Move an instance by data-copying.
8303 HPATH = "instance-move"
8304 HTYPE = constants.HTYPE_INSTANCE
8307 def ExpandNames(self):
8308 self._ExpandAndLockInstance()
8309 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8310 self.op.target_node = target_node
8311 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8312 self.needed_locks[locking.LEVEL_NODE_RES] = []
8313 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8315 def DeclareLocks(self, level):
8316 if level == locking.LEVEL_NODE:
8317 self._LockInstancesNodes(primary_only=True)
8318 elif level == locking.LEVEL_NODE_RES:
8320 self.needed_locks[locking.LEVEL_NODE_RES] = \
8321 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8323 def BuildHooksEnv(self):
8326 This runs on master, primary and secondary nodes of the instance.
8330 "TARGET_NODE": self.op.target_node,
8331 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8333 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8336 def BuildHooksNodes(self):
8337 """Build hooks nodes.
8341 self.cfg.GetMasterNode(),
8342 self.instance.primary_node,
8343 self.op.target_node,
8347 def CheckPrereq(self):
8348 """Check prerequisites.
8350 This checks that the instance is in the cluster.
8353 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8354 assert self.instance is not None, \
8355 "Cannot retrieve locked instance %s" % self.op.instance_name
8357 node = self.cfg.GetNodeInfo(self.op.target_node)
8358 assert node is not None, \
8359 "Cannot retrieve locked node %s" % self.op.target_node
8361 self.target_node = target_node = node.name
8363 if target_node == instance.primary_node:
8364 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8365 (instance.name, target_node),
8368 bep = self.cfg.GetClusterInfo().FillBE(instance)
8370 for idx, dsk in enumerate(instance.disks):
8371 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8372 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8373 " cannot copy" % idx, errors.ECODE_STATE)
8375 _CheckNodeOnline(self, target_node)
8376 _CheckNodeNotDrained(self, target_node)
8377 _CheckNodeVmCapable(self, target_node)
8378 cluster = self.cfg.GetClusterInfo()
8379 group_info = self.cfg.GetNodeGroup(node.group)
8380 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8381 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8382 ignore=self.op.ignore_ipolicy)
8384 if instance.admin_state == constants.ADMINST_UP:
8385 # check memory requirements on the secondary node
8386 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8387 instance.name, bep[constants.BE_MAXMEM],
8388 instance.hypervisor)
8390 self.LogInfo("Not checking memory on the secondary node as"
8391 " instance will not be started")
8393 # check bridge existance
8394 _CheckInstanceBridgesExist(self, instance, node=target_node)
8396 def Exec(self, feedback_fn):
8397 """Move an instance.
8399 The move is done by shutting it down on its present node, copying
8400 the data over (slow) and starting it on the new node.
8403 instance = self.instance
8405 source_node = instance.primary_node
8406 target_node = self.target_node
8408 self.LogInfo("Shutting down instance %s on source node %s",
8409 instance.name, source_node)
8411 assert (self.owned_locks(locking.LEVEL_NODE) ==
8412 self.owned_locks(locking.LEVEL_NODE_RES))
8414 result = self.rpc.call_instance_shutdown(source_node, instance,
8415 self.op.shutdown_timeout)
8416 msg = result.fail_msg
8418 if self.op.ignore_consistency:
8419 self.LogWarning("Could not shutdown instance %s on node %s."
8420 " Proceeding anyway. Please make sure node"
8421 " %s is down. Error details: %s",
8422 instance.name, source_node, source_node, msg)
8424 raise errors.OpExecError("Could not shutdown instance %s on"
8426 (instance.name, source_node, msg))
8428 # create the target disks
8430 _CreateDisks(self, instance, target_node=target_node)
8431 except errors.OpExecError:
8432 self.LogWarning("Device creation failed, reverting...")
8434 _RemoveDisks(self, instance, target_node=target_node)
8436 self.cfg.ReleaseDRBDMinors(instance.name)
8439 cluster_name = self.cfg.GetClusterInfo().cluster_name
8442 # activate, get path, copy the data over
8443 for idx, disk in enumerate(instance.disks):
8444 self.LogInfo("Copying data for disk %d", idx)
8445 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8446 instance.name, True, idx)
8448 self.LogWarning("Can't assemble newly created disk %d: %s",
8449 idx, result.fail_msg)
8450 errs.append(result.fail_msg)
8452 dev_path = result.payload
8453 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8454 target_node, dev_path,
8457 self.LogWarning("Can't copy data over for disk %d: %s",
8458 idx, result.fail_msg)
8459 errs.append(result.fail_msg)
8463 self.LogWarning("Some disks failed to copy, aborting")
8465 _RemoveDisks(self, instance, target_node=target_node)
8467 self.cfg.ReleaseDRBDMinors(instance.name)
8468 raise errors.OpExecError("Errors during disk copy: %s" %
8471 instance.primary_node = target_node
8472 self.cfg.Update(instance, feedback_fn)
8474 self.LogInfo("Removing the disks on the original node")
8475 _RemoveDisks(self, instance, target_node=source_node)
8477 # Only start the instance if it's marked as up
8478 if instance.admin_state == constants.ADMINST_UP:
8479 self.LogInfo("Starting instance %s on node %s",
8480 instance.name, target_node)
8482 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8483 ignore_secondaries=True)
8485 _ShutdownInstanceDisks(self, instance)
8486 raise errors.OpExecError("Can't activate the instance's disks")
8488 result = self.rpc.call_instance_start(target_node,
8489 (instance, None, None), False)
8490 msg = result.fail_msg
8492 _ShutdownInstanceDisks(self, instance)
8493 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8494 (instance.name, target_node, msg))
8497 class LUNodeMigrate(LogicalUnit):
8498 """Migrate all instances from a node.
8501 HPATH = "node-migrate"
8502 HTYPE = constants.HTYPE_NODE
8505 def CheckArguments(self):
8508 def ExpandNames(self):
8509 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8511 self.share_locks = _ShareAll()
8512 self.needed_locks = {
8513 locking.LEVEL_NODE: [self.op.node_name],
8516 def BuildHooksEnv(self):
8519 This runs on the master, the primary and all the secondaries.
8523 "NODE_NAME": self.op.node_name,
8524 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8527 def BuildHooksNodes(self):
8528 """Build hooks nodes.
8531 nl = [self.cfg.GetMasterNode()]
8534 def CheckPrereq(self):
8537 def Exec(self, feedback_fn):
8538 # Prepare jobs for migration instances
8539 allow_runtime_changes = self.op.allow_runtime_changes
8541 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8544 iallocator=self.op.iallocator,
8545 target_node=self.op.target_node,
8546 allow_runtime_changes=allow_runtime_changes,
8547 ignore_ipolicy=self.op.ignore_ipolicy)]
8548 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8550 # TODO: Run iallocator in this opcode and pass correct placement options to
8551 # OpInstanceMigrate. Since other jobs can modify the cluster between
8552 # running the iallocator and the actual migration, a good consistency model
8553 # will have to be found.
8555 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8556 frozenset([self.op.node_name]))
8558 return ResultWithJobs(jobs)
8561 class TLMigrateInstance(Tasklet):
8562 """Tasklet class for instance migration.
8565 @ivar live: whether the migration will be done live or non-live;
8566 this variable is initalized only after CheckPrereq has run
8567 @type cleanup: boolean
8568 @ivar cleanup: Wheater we cleanup from a failed migration
8569 @type iallocator: string
8570 @ivar iallocator: The iallocator used to determine target_node
8571 @type target_node: string
8572 @ivar target_node: If given, the target_node to reallocate the instance to
8573 @type failover: boolean
8574 @ivar failover: Whether operation results in failover or migration
8575 @type fallback: boolean
8576 @ivar fallback: Whether fallback to failover is allowed if migration not
8578 @type ignore_consistency: boolean
8579 @ivar ignore_consistency: Wheter we should ignore consistency between source
8581 @type shutdown_timeout: int
8582 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8583 @type ignore_ipolicy: bool
8584 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8589 _MIGRATION_POLL_INTERVAL = 1 # seconds
8590 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8592 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8593 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8595 """Initializes this class.
8598 Tasklet.__init__(self, lu)
8601 self.instance_name = instance_name
8602 self.cleanup = cleanup
8603 self.live = False # will be overridden later
8604 self.failover = failover
8605 self.fallback = fallback
8606 self.ignore_consistency = ignore_consistency
8607 self.shutdown_timeout = shutdown_timeout
8608 self.ignore_ipolicy = ignore_ipolicy
8609 self.allow_runtime_changes = allow_runtime_changes
8611 def CheckPrereq(self):
8612 """Check prerequisites.
8614 This checks that the instance is in the cluster.
8617 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8618 instance = self.cfg.GetInstanceInfo(instance_name)
8619 assert instance is not None
8620 self.instance = instance
8621 cluster = self.cfg.GetClusterInfo()
8623 if (not self.cleanup and
8624 not instance.admin_state == constants.ADMINST_UP and
8625 not self.failover and self.fallback):
8626 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8627 " switching to failover")
8628 self.failover = True
8630 if instance.disk_template not in constants.DTS_MIRRORED:
8635 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8636 " %s" % (instance.disk_template, text),
8639 if instance.disk_template in constants.DTS_EXT_MIRROR:
8640 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8642 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8644 if self.lu.op.iallocator:
8645 self._RunAllocator()
8647 # We set set self.target_node as it is required by
8649 self.target_node = self.lu.op.target_node
8651 # Check that the target node is correct in terms of instance policy
8652 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8653 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8654 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8656 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8657 ignore=self.ignore_ipolicy)
8659 # self.target_node is already populated, either directly or by the
8661 target_node = self.target_node
8662 if self.target_node == instance.primary_node:
8663 raise errors.OpPrereqError("Cannot migrate instance %s"
8664 " to its primary (%s)" %
8665 (instance.name, instance.primary_node),
8668 if len(self.lu.tasklets) == 1:
8669 # It is safe to release locks only when we're the only tasklet
8671 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8672 keep=[instance.primary_node, self.target_node])
8673 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8676 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8678 secondary_nodes = instance.secondary_nodes
8679 if not secondary_nodes:
8680 raise errors.ConfigurationError("No secondary node but using"
8681 " %s disk template" %
8682 instance.disk_template)
8683 target_node = secondary_nodes[0]
8684 if self.lu.op.iallocator or (self.lu.op.target_node and
8685 self.lu.op.target_node != target_node):
8687 text = "failed over"
8690 raise errors.OpPrereqError("Instances with disk template %s cannot"
8691 " be %s to arbitrary nodes"
8692 " (neither an iallocator nor a target"
8693 " node can be passed)" %
8694 (instance.disk_template, text),
8696 nodeinfo = self.cfg.GetNodeInfo(target_node)
8697 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8698 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8700 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8701 ignore=self.ignore_ipolicy)
8703 i_be = cluster.FillBE(instance)
8705 # check memory requirements on the secondary node
8706 if (not self.cleanup and
8707 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8708 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8709 "migrating instance %s" %
8711 i_be[constants.BE_MINMEM],
8712 instance.hypervisor)
8714 self.lu.LogInfo("Not checking memory on the secondary node as"
8715 " instance will not be started")
8717 # check if failover must be forced instead of migration
8718 if (not self.cleanup and not self.failover and
8719 i_be[constants.BE_ALWAYS_FAILOVER]):
8720 self.lu.LogInfo("Instance configured to always failover; fallback"
8722 self.failover = True
8724 # check bridge existance
8725 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8727 if not self.cleanup:
8728 _CheckNodeNotDrained(self.lu, target_node)
8729 if not self.failover:
8730 result = self.rpc.call_instance_migratable(instance.primary_node,
8732 if result.fail_msg and self.fallback:
8733 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8735 self.failover = True
8737 result.Raise("Can't migrate, please use failover",
8738 prereq=True, ecode=errors.ECODE_STATE)
8740 assert not (self.failover and self.cleanup)
8742 if not self.failover:
8743 if self.lu.op.live is not None and self.lu.op.mode is not None:
8744 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8745 " parameters are accepted",
8747 if self.lu.op.live is not None:
8749 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8751 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8752 # reset the 'live' parameter to None so that repeated
8753 # invocations of CheckPrereq do not raise an exception
8754 self.lu.op.live = None
8755 elif self.lu.op.mode is None:
8756 # read the default value from the hypervisor
8757 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8758 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8760 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8762 # Failover is never live
8765 if not (self.failover or self.cleanup):
8766 remote_info = self.rpc.call_instance_info(instance.primary_node,
8768 instance.hypervisor)
8769 remote_info.Raise("Error checking instance on node %s" %
8770 instance.primary_node)
8771 instance_running = bool(remote_info.payload)
8772 if instance_running:
8773 self.current_mem = int(remote_info.payload["memory"])
8775 def _RunAllocator(self):
8776 """Run the allocator based on input opcode.
8779 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8781 # FIXME: add a self.ignore_ipolicy option
8782 req = iallocator.IAReqRelocate(name=self.instance_name,
8783 relocate_from=[self.instance.primary_node])
8784 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8786 ial.Run(self.lu.op.iallocator)
8789 raise errors.OpPrereqError("Can't compute nodes using"
8790 " iallocator '%s': %s" %
8791 (self.lu.op.iallocator, ial.info),
8793 self.target_node = ial.result[0]
8794 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8795 self.instance_name, self.lu.op.iallocator,
8796 utils.CommaJoin(ial.result))
8798 def _WaitUntilSync(self):
8799 """Poll with custom rpc for disk sync.
8801 This uses our own step-based rpc call.
8804 self.feedback_fn("* wait until resync is done")
8808 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8810 (self.instance.disks,
8813 for node, nres in result.items():
8814 nres.Raise("Cannot resync disks on node %s" % node)
8815 node_done, node_percent = nres.payload
8816 all_done = all_done and node_done
8817 if node_percent is not None:
8818 min_percent = min(min_percent, node_percent)
8820 if min_percent < 100:
8821 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8824 def _EnsureSecondary(self, node):
8825 """Demote a node to secondary.
8828 self.feedback_fn("* switching node %s to secondary mode" % node)
8830 for dev in self.instance.disks:
8831 self.cfg.SetDiskID(dev, node)
8833 result = self.rpc.call_blockdev_close(node, self.instance.name,
8834 self.instance.disks)
8835 result.Raise("Cannot change disk to secondary on node %s" % node)
8837 def _GoStandalone(self):
8838 """Disconnect from the network.
8841 self.feedback_fn("* changing into standalone mode")
8842 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8843 self.instance.disks)
8844 for node, nres in result.items():
8845 nres.Raise("Cannot disconnect disks node %s" % node)
8847 def _GoReconnect(self, multimaster):
8848 """Reconnect to the network.
8854 msg = "single-master"
8855 self.feedback_fn("* changing disks into %s mode" % msg)
8856 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8857 (self.instance.disks, self.instance),
8858 self.instance.name, multimaster)
8859 for node, nres in result.items():
8860 nres.Raise("Cannot change disks config on node %s" % node)
8862 def _ExecCleanup(self):
8863 """Try to cleanup after a failed migration.
8865 The cleanup is done by:
8866 - check that the instance is running only on one node
8867 (and update the config if needed)
8868 - change disks on its secondary node to secondary
8869 - wait until disks are fully synchronized
8870 - disconnect from the network
8871 - change disks into single-master mode
8872 - wait again until disks are fully synchronized
8875 instance = self.instance
8876 target_node = self.target_node
8877 source_node = self.source_node
8879 # check running on only one node
8880 self.feedback_fn("* checking where the instance actually runs"
8881 " (if this hangs, the hypervisor might be in"
8883 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8884 for node, result in ins_l.items():
8885 result.Raise("Can't contact node %s" % node)
8887 runningon_source = instance.name in ins_l[source_node].payload
8888 runningon_target = instance.name in ins_l[target_node].payload
8890 if runningon_source and runningon_target:
8891 raise errors.OpExecError("Instance seems to be running on two nodes,"
8892 " or the hypervisor is confused; you will have"
8893 " to ensure manually that it runs only on one"
8894 " and restart this operation")
8896 if not (runningon_source or runningon_target):
8897 raise errors.OpExecError("Instance does not seem to be running at all;"
8898 " in this case it's safer to repair by"
8899 " running 'gnt-instance stop' to ensure disk"
8900 " shutdown, and then restarting it")
8902 if runningon_target:
8903 # the migration has actually succeeded, we need to update the config
8904 self.feedback_fn("* instance running on secondary node (%s),"
8905 " updating config" % target_node)
8906 instance.primary_node = target_node
8907 self.cfg.Update(instance, self.feedback_fn)
8908 demoted_node = source_node
8910 self.feedback_fn("* instance confirmed to be running on its"
8911 " primary node (%s)" % source_node)
8912 demoted_node = target_node
8914 if instance.disk_template in constants.DTS_INT_MIRROR:
8915 self._EnsureSecondary(demoted_node)
8917 self._WaitUntilSync()
8918 except errors.OpExecError:
8919 # we ignore here errors, since if the device is standalone, it
8920 # won't be able to sync
8922 self._GoStandalone()
8923 self._GoReconnect(False)
8924 self._WaitUntilSync()
8926 self.feedback_fn("* done")
8928 def _RevertDiskStatus(self):
8929 """Try to revert the disk status after a failed migration.
8932 target_node = self.target_node
8933 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8937 self._EnsureSecondary(target_node)
8938 self._GoStandalone()
8939 self._GoReconnect(False)
8940 self._WaitUntilSync()
8941 except errors.OpExecError, err:
8942 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8943 " please try to recover the instance manually;"
8944 " error '%s'" % str(err))
8946 def _AbortMigration(self):
8947 """Call the hypervisor code to abort a started migration.
8950 instance = self.instance
8951 target_node = self.target_node
8952 source_node = self.source_node
8953 migration_info = self.migration_info
8955 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8959 abort_msg = abort_result.fail_msg
8961 logging.error("Aborting migration failed on target node %s: %s",
8962 target_node, abort_msg)
8963 # Don't raise an exception here, as we stil have to try to revert the
8964 # disk status, even if this step failed.
8966 abort_result = self.rpc.call_instance_finalize_migration_src(
8967 source_node, instance, False, self.live)
8968 abort_msg = abort_result.fail_msg
8970 logging.error("Aborting migration failed on source node %s: %s",
8971 source_node, abort_msg)
8973 def _ExecMigration(self):
8974 """Migrate an instance.
8976 The migrate is done by:
8977 - change the disks into dual-master mode
8978 - wait until disks are fully synchronized again
8979 - migrate the instance
8980 - change disks on the new secondary node (the old primary) to secondary
8981 - wait until disks are fully synchronized
8982 - change disks into single-master mode
8985 instance = self.instance
8986 target_node = self.target_node
8987 source_node = self.source_node
8989 # Check for hypervisor version mismatch and warn the user.
8990 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8991 None, [self.instance.hypervisor], False)
8992 for ninfo in nodeinfo.values():
8993 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8995 (_, _, (src_info, )) = nodeinfo[source_node].payload
8996 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8998 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8999 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9000 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9001 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9002 if src_version != dst_version:
9003 self.feedback_fn("* warning: hypervisor version mismatch between"
9004 " source (%s) and target (%s) node" %
9005 (src_version, dst_version))
9007 self.feedback_fn("* checking disk consistency between source and target")
9008 for (idx, dev) in enumerate(instance.disks):
9009 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9010 raise errors.OpExecError("Disk %s is degraded or not fully"
9011 " synchronized on target node,"
9012 " aborting migration" % idx)
9014 if self.current_mem > self.tgt_free_mem:
9015 if not self.allow_runtime_changes:
9016 raise errors.OpExecError("Memory ballooning not allowed and not enough"
9017 " free memory to fit instance %s on target"
9018 " node %s (have %dMB, need %dMB)" %
9019 (instance.name, target_node,
9020 self.tgt_free_mem, self.current_mem))
9021 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9022 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9025 rpcres.Raise("Cannot modify instance runtime memory")
9027 # First get the migration information from the remote node
9028 result = self.rpc.call_migration_info(source_node, instance)
9029 msg = result.fail_msg
9031 log_err = ("Failed fetching source migration information from %s: %s" %
9033 logging.error(log_err)
9034 raise errors.OpExecError(log_err)
9036 self.migration_info = migration_info = result.payload
9038 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9039 # Then switch the disks to master/master mode
9040 self._EnsureSecondary(target_node)
9041 self._GoStandalone()
9042 self._GoReconnect(True)
9043 self._WaitUntilSync()
9045 self.feedback_fn("* preparing %s to accept the instance" % target_node)
9046 result = self.rpc.call_accept_instance(target_node,
9049 self.nodes_ip[target_node])
9051 msg = result.fail_msg
9053 logging.error("Instance pre-migration failed, trying to revert"
9054 " disk status: %s", msg)
9055 self.feedback_fn("Pre-migration failed, aborting")
9056 self._AbortMigration()
9057 self._RevertDiskStatus()
9058 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9059 (instance.name, msg))
9061 self.feedback_fn("* migrating instance to %s" % target_node)
9062 result = self.rpc.call_instance_migrate(source_node, instance,
9063 self.nodes_ip[target_node],
9065 msg = result.fail_msg
9067 logging.error("Instance migration failed, trying to revert"
9068 " disk status: %s", msg)
9069 self.feedback_fn("Migration failed, aborting")
9070 self._AbortMigration()
9071 self._RevertDiskStatus()
9072 raise errors.OpExecError("Could not migrate instance %s: %s" %
9073 (instance.name, msg))
9075 self.feedback_fn("* starting memory transfer")
9076 last_feedback = time.time()
9078 result = self.rpc.call_instance_get_migration_status(source_node,
9080 msg = result.fail_msg
9081 ms = result.payload # MigrationStatus instance
9082 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9083 logging.error("Instance migration failed, trying to revert"
9084 " disk status: %s", msg)
9085 self.feedback_fn("Migration failed, aborting")
9086 self._AbortMigration()
9087 self._RevertDiskStatus()
9089 msg = "hypervisor returned failure"
9090 raise errors.OpExecError("Could not migrate instance %s: %s" %
9091 (instance.name, msg))
9093 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9094 self.feedback_fn("* memory transfer complete")
9097 if (utils.TimeoutExpired(last_feedback,
9098 self._MIGRATION_FEEDBACK_INTERVAL) and
9099 ms.transferred_ram is not None):
9100 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9101 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9102 last_feedback = time.time()
9104 time.sleep(self._MIGRATION_POLL_INTERVAL)
9106 result = self.rpc.call_instance_finalize_migration_src(source_node,
9110 msg = result.fail_msg
9112 logging.error("Instance migration succeeded, but finalization failed"
9113 " on the source node: %s", msg)
9114 raise errors.OpExecError("Could not finalize instance migration: %s" %
9117 instance.primary_node = target_node
9119 # distribute new instance config to the other nodes
9120 self.cfg.Update(instance, self.feedback_fn)
9122 result = self.rpc.call_instance_finalize_migration_dst(target_node,
9126 msg = result.fail_msg
9128 logging.error("Instance migration succeeded, but finalization failed"
9129 " on the target node: %s", msg)
9130 raise errors.OpExecError("Could not finalize instance migration: %s" %
9133 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9134 self._EnsureSecondary(source_node)
9135 self._WaitUntilSync()
9136 self._GoStandalone()
9137 self._GoReconnect(False)
9138 self._WaitUntilSync()
9140 # If the instance's disk template is `rbd' or `ext' and there was a
9141 # successful migration, unmap the device from the source node.
9142 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9143 disks = _ExpandCheckDisks(instance, instance.disks)
9144 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9146 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9147 msg = result.fail_msg
9149 logging.error("Migration was successful, but couldn't unmap the"
9150 " block device %s on source node %s: %s",
9151 disk.iv_name, source_node, msg)
9152 logging.error("You need to unmap the device %s manually on %s",
9153 disk.iv_name, source_node)
9155 self.feedback_fn("* done")
9157 def _ExecFailover(self):
9158 """Failover an instance.
9160 The failover is done by shutting it down on its present node and
9161 starting it on the secondary.
9164 instance = self.instance
9165 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9167 source_node = instance.primary_node
9168 target_node = self.target_node
9170 if instance.admin_state == constants.ADMINST_UP:
9171 self.feedback_fn("* checking disk consistency between source and target")
9172 for (idx, dev) in enumerate(instance.disks):
9173 # for drbd, these are drbd over lvm
9174 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9176 if primary_node.offline:
9177 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9179 (primary_node.name, idx, target_node))
9180 elif not self.ignore_consistency:
9181 raise errors.OpExecError("Disk %s is degraded on target node,"
9182 " aborting failover" % idx)
9184 self.feedback_fn("* not checking disk consistency as instance is not"
9187 self.feedback_fn("* shutting down instance on source node")
9188 logging.info("Shutting down instance %s on node %s",
9189 instance.name, source_node)
9191 result = self.rpc.call_instance_shutdown(source_node, instance,
9192 self.shutdown_timeout)
9193 msg = result.fail_msg
9195 if self.ignore_consistency or primary_node.offline:
9196 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9197 " proceeding anyway; please make sure node"
9198 " %s is down; error details: %s",
9199 instance.name, source_node, source_node, msg)
9201 raise errors.OpExecError("Could not shutdown instance %s on"
9203 (instance.name, source_node, msg))
9205 self.feedback_fn("* deactivating the instance's disks on source node")
9206 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9207 raise errors.OpExecError("Can't shut down the instance's disks")
9209 instance.primary_node = target_node
9210 # distribute new instance config to the other nodes
9211 self.cfg.Update(instance, self.feedback_fn)
9213 # Only start the instance if it's marked as up
9214 if instance.admin_state == constants.ADMINST_UP:
9215 self.feedback_fn("* activating the instance's disks on target node %s" %
9217 logging.info("Starting instance %s on node %s",
9218 instance.name, target_node)
9220 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9221 ignore_secondaries=True)
9223 _ShutdownInstanceDisks(self.lu, instance)
9224 raise errors.OpExecError("Can't activate the instance's disks")
9226 self.feedback_fn("* starting the instance on the target node %s" %
9228 result = self.rpc.call_instance_start(target_node, (instance, None, None),
9230 msg = result.fail_msg
9232 _ShutdownInstanceDisks(self.lu, instance)
9233 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9234 (instance.name, target_node, msg))
9236 def Exec(self, feedback_fn):
9237 """Perform the migration.
9240 self.feedback_fn = feedback_fn
9241 self.source_node = self.instance.primary_node
9243 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9244 if self.instance.disk_template in constants.DTS_INT_MIRROR:
9245 self.target_node = self.instance.secondary_nodes[0]
9246 # Otherwise self.target_node has been populated either
9247 # directly, or through an iallocator.
9249 self.all_nodes = [self.source_node, self.target_node]
9250 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9251 in self.cfg.GetMultiNodeInfo(self.all_nodes))
9254 feedback_fn("Failover instance %s" % self.instance.name)
9255 self._ExecFailover()
9257 feedback_fn("Migrating instance %s" % self.instance.name)
9260 return self._ExecCleanup()
9262 return self._ExecMigration()
9265 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9267 """Wrapper around L{_CreateBlockDevInner}.
9269 This method annotates the root device first.
9272 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9273 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9274 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9275 force_open, excl_stor)
9278 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9279 info, force_open, excl_stor):
9280 """Create a tree of block devices on a given node.
9282 If this device type has to be created on secondaries, create it and
9285 If not, just recurse to children keeping the same 'force' value.
9287 @attention: The device has to be annotated already.
9289 @param lu: the lu on whose behalf we execute
9290 @param node: the node on which to create the device
9291 @type instance: L{objects.Instance}
9292 @param instance: the instance which owns the device
9293 @type device: L{objects.Disk}
9294 @param device: the device to create
9295 @type force_create: boolean
9296 @param force_create: whether to force creation of this device; this
9297 will be change to True whenever we find a device which has
9298 CreateOnSecondary() attribute
9299 @param info: the extra 'metadata' we should attach to the device
9300 (this will be represented as a LVM tag)
9301 @type force_open: boolean
9302 @param force_open: this parameter will be passes to the
9303 L{backend.BlockdevCreate} function where it specifies
9304 whether we run on primary or not, and it affects both
9305 the child assembly and the device own Open() execution
9306 @type excl_stor: boolean
9307 @param excl_stor: Whether exclusive_storage is active for the node
9310 if device.CreateOnSecondary():
9314 for child in device.children:
9315 _CreateBlockDevInner(lu, node, instance, child, force_create,
9316 info, force_open, excl_stor)
9318 if not force_create:
9321 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9325 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9327 """Create a single block device on a given node.
9329 This will not recurse over children of the device, so they must be
9332 @param lu: the lu on whose behalf we execute
9333 @param node: the node on which to create the device
9334 @type instance: L{objects.Instance}
9335 @param instance: the instance which owns the device
9336 @type device: L{objects.Disk}
9337 @param device: the device to create
9338 @param info: the extra 'metadata' we should attach to the device
9339 (this will be represented as a LVM tag)
9340 @type force_open: boolean
9341 @param force_open: this parameter will be passes to the
9342 L{backend.BlockdevCreate} function where it specifies
9343 whether we run on primary or not, and it affects both
9344 the child assembly and the device own Open() execution
9345 @type excl_stor: boolean
9346 @param excl_stor: Whether exclusive_storage is active for the node
9349 lu.cfg.SetDiskID(device, node)
9350 result = lu.rpc.call_blockdev_create(node, device, device.size,
9351 instance.name, force_open, info,
9353 result.Raise("Can't create block device %s on"
9354 " node %s for instance %s" % (device, node, instance.name))
9355 if device.physical_id is None:
9356 device.physical_id = result.payload
9359 def _GenerateUniqueNames(lu, exts):
9360 """Generate a suitable LV name.
9362 This will generate a logical volume name for the given instance.
9367 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9368 results.append("%s%s" % (new_id, val))
9372 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9373 iv_name, p_minor, s_minor):
9374 """Generate a drbd8 device complete with its children.
9377 assert len(vgnames) == len(names) == 2
9378 port = lu.cfg.AllocatePort()
9379 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9381 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9382 logical_id=(vgnames[0], names[0]),
9384 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9385 size=constants.DRBD_META_SIZE,
9386 logical_id=(vgnames[1], names[1]),
9388 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9389 logical_id=(primary, secondary, port,
9392 children=[dev_data, dev_meta],
9393 iv_name=iv_name, params={})
9397 _DISK_TEMPLATE_NAME_PREFIX = {
9398 constants.DT_PLAIN: "",
9399 constants.DT_RBD: ".rbd",
9400 constants.DT_EXT: ".ext",
9404 _DISK_TEMPLATE_DEVICE_TYPE = {
9405 constants.DT_PLAIN: constants.LD_LV,
9406 constants.DT_FILE: constants.LD_FILE,
9407 constants.DT_SHARED_FILE: constants.LD_FILE,
9408 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9409 constants.DT_RBD: constants.LD_RBD,
9410 constants.DT_EXT: constants.LD_EXT,
9414 def _GenerateDiskTemplate(
9415 lu, template_name, instance_name, primary_node, secondary_nodes,
9416 disk_info, file_storage_dir, file_driver, base_index,
9417 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9418 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9419 """Generate the entire disk layout for a given template type.
9422 vgname = lu.cfg.GetVGName()
9423 disk_count = len(disk_info)
9426 if template_name == constants.DT_DISKLESS:
9428 elif template_name == constants.DT_DRBD8:
9429 if len(secondary_nodes) != 1:
9430 raise errors.ProgrammerError("Wrong template configuration")
9431 remote_node = secondary_nodes[0]
9432 minors = lu.cfg.AllocateDRBDMinor(
9433 [primary_node, remote_node] * len(disk_info), instance_name)
9435 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9437 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9440 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9441 for i in range(disk_count)]):
9442 names.append(lv_prefix + "_data")
9443 names.append(lv_prefix + "_meta")
9444 for idx, disk in enumerate(disk_info):
9445 disk_index = idx + base_index
9446 data_vg = disk.get(constants.IDISK_VG, vgname)
9447 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9448 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9449 disk[constants.IDISK_SIZE],
9451 names[idx * 2:idx * 2 + 2],
9452 "disk/%d" % disk_index,
9453 minors[idx * 2], minors[idx * 2 + 1])
9454 disk_dev.mode = disk[constants.IDISK_MODE]
9455 disks.append(disk_dev)
9458 raise errors.ProgrammerError("Wrong template configuration")
9460 if template_name == constants.DT_FILE:
9462 elif template_name == constants.DT_SHARED_FILE:
9463 _req_shr_file_storage()
9465 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9466 if name_prefix is None:
9469 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9470 (name_prefix, base_index + i)
9471 for i in range(disk_count)])
9473 if template_name == constants.DT_PLAIN:
9475 def logical_id_fn(idx, _, disk):
9476 vg = disk.get(constants.IDISK_VG, vgname)
9477 return (vg, names[idx])
9479 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9481 lambda _, disk_index, disk: (file_driver,
9482 "%s/disk%d" % (file_storage_dir,
9484 elif template_name == constants.DT_BLOCK:
9486 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9487 disk[constants.IDISK_ADOPT])
9488 elif template_name == constants.DT_RBD:
9489 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9490 elif template_name == constants.DT_EXT:
9491 def logical_id_fn(idx, _, disk):
9492 provider = disk.get(constants.IDISK_PROVIDER, None)
9493 if provider is None:
9494 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9495 " not found", constants.DT_EXT,
9496 constants.IDISK_PROVIDER)
9497 return (provider, names[idx])
9499 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9501 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9503 for idx, disk in enumerate(disk_info):
9505 # Only for the Ext template add disk_info to params
9506 if template_name == constants.DT_EXT:
9507 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9509 if key not in constants.IDISK_PARAMS:
9510 params[key] = disk[key]
9511 disk_index = idx + base_index
9512 size = disk[constants.IDISK_SIZE]
9513 feedback_fn("* disk %s, size %s" %
9514 (disk_index, utils.FormatUnit(size, "h")))
9515 disks.append(objects.Disk(dev_type=dev_type, size=size,
9516 logical_id=logical_id_fn(idx, disk_index, disk),
9517 iv_name="disk/%d" % disk_index,
9518 mode=disk[constants.IDISK_MODE],
9524 def _GetInstanceInfoText(instance):
9525 """Compute that text that should be added to the disk's metadata.
9528 return "originstname+%s" % instance.name
9531 def _CalcEta(time_taken, written, total_size):
9532 """Calculates the ETA based on size written and total size.
9534 @param time_taken: The time taken so far
9535 @param written: amount written so far
9536 @param total_size: The total size of data to be written
9537 @return: The remaining time in seconds
9540 avg_time = time_taken / float(written)
9541 return (total_size - written) * avg_time
9544 def _WipeDisks(lu, instance, disks=None):
9545 """Wipes instance disks.
9547 @type lu: L{LogicalUnit}
9548 @param lu: the logical unit on whose behalf we execute
9549 @type instance: L{objects.Instance}
9550 @param instance: the instance whose disks we should create
9551 @return: the success of the wipe
9554 node = instance.primary_node
9557 disks = [(idx, disk, 0)
9558 for (idx, disk) in enumerate(instance.disks)]
9560 for (_, device, _) in disks:
9561 lu.cfg.SetDiskID(device, node)
9563 logging.info("Pausing synchronization of disks of instance '%s'",
9565 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9566 (map(compat.snd, disks),
9569 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9571 for idx, success in enumerate(result.payload):
9573 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9574 " failed", idx, instance.name)
9577 for (idx, device, offset) in disks:
9578 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9579 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9581 int(min(constants.MAX_WIPE_CHUNK,
9582 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9586 start_time = time.time()
9591 info_text = (" (from %s to %s)" %
9592 (utils.FormatUnit(offset, "h"),
9593 utils.FormatUnit(size, "h")))
9595 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9597 logging.info("Wiping disk %d for instance %s on node %s using"
9598 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9600 while offset < size:
9601 wipe_size = min(wipe_chunk_size, size - offset)
9603 logging.debug("Wiping disk %d, offset %s, chunk %s",
9604 idx, offset, wipe_size)
9606 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9608 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9609 (idx, offset, wipe_size))
9613 if now - last_output >= 60:
9614 eta = _CalcEta(now - start_time, offset, size)
9615 lu.LogInfo(" - done: %.1f%% ETA: %s",
9616 offset / float(size) * 100, utils.FormatSeconds(eta))
9619 logging.info("Resuming synchronization of disks for instance '%s'",
9622 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9623 (map(compat.snd, disks),
9628 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9629 node, result.fail_msg)
9631 for idx, success in enumerate(result.payload):
9633 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9634 " failed", idx, instance.name)
9637 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9638 """Create all disks for an instance.
9640 This abstracts away some work from AddInstance.
9642 @type lu: L{LogicalUnit}
9643 @param lu: the logical unit on whose behalf we execute
9644 @type instance: L{objects.Instance}
9645 @param instance: the instance whose disks we should create
9647 @param to_skip: list of indices to skip
9648 @type target_node: string
9649 @param target_node: if passed, overrides the target node for creation
9651 @return: the success of the creation
9654 info = _GetInstanceInfoText(instance)
9655 if target_node is None:
9656 pnode = instance.primary_node
9657 all_nodes = instance.all_nodes
9662 if instance.disk_template in constants.DTS_FILEBASED:
9663 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9664 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9666 result.Raise("Failed to create directory '%s' on"
9667 " node %s" % (file_storage_dir, pnode))
9669 # Note: this needs to be kept in sync with adding of disks in
9670 # LUInstanceSetParams
9671 for idx, device in enumerate(instance.disks):
9672 if to_skip and idx in to_skip:
9674 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9676 for node in all_nodes:
9677 f_create = node == pnode
9678 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9681 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9682 """Remove all disks for an instance.
9684 This abstracts away some work from `AddInstance()` and
9685 `RemoveInstance()`. Note that in case some of the devices couldn't
9686 be removed, the removal will continue with the other ones (compare
9687 with `_CreateDisks()`).
9689 @type lu: L{LogicalUnit}
9690 @param lu: the logical unit on whose behalf we execute
9691 @type instance: L{objects.Instance}
9692 @param instance: the instance whose disks we should remove
9693 @type target_node: string
9694 @param target_node: used to override the node on which to remove the disks
9696 @return: the success of the removal
9699 logging.info("Removing block devices for instance %s", instance.name)
9702 ports_to_release = set()
9703 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9704 for (idx, device) in enumerate(anno_disks):
9706 edata = [(target_node, device)]
9708 edata = device.ComputeNodeTree(instance.primary_node)
9709 for node, disk in edata:
9710 lu.cfg.SetDiskID(disk, node)
9711 result = lu.rpc.call_blockdev_remove(node, disk)
9713 lu.LogWarning("Could not remove disk %s on node %s,"
9714 " continuing anyway: %s", idx, node, result.fail_msg)
9715 if not (result.offline and node != instance.primary_node):
9718 # if this is a DRBD disk, return its port to the pool
9719 if device.dev_type in constants.LDS_DRBD:
9720 ports_to_release.add(device.logical_id[2])
9722 if all_result or ignore_failures:
9723 for port in ports_to_release:
9724 lu.cfg.AddTcpUdpPort(port)
9726 if instance.disk_template in constants.DTS_FILEBASED:
9727 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9731 tgt = instance.primary_node
9732 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9734 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9735 file_storage_dir, instance.primary_node, result.fail_msg)
9741 def _ComputeDiskSizePerVG(disk_template, disks):
9742 """Compute disk size requirements in the volume group
9745 def _compute(disks, payload):
9746 """Universal algorithm.
9751 vgs[disk[constants.IDISK_VG]] = \
9752 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9756 # Required free disk space as a function of disk and swap space
9758 constants.DT_DISKLESS: {},
9759 constants.DT_PLAIN: _compute(disks, 0),
9760 # 128 MB are added for drbd metadata for each disk
9761 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9762 constants.DT_FILE: {},
9763 constants.DT_SHARED_FILE: {},
9766 if disk_template not in req_size_dict:
9767 raise errors.ProgrammerError("Disk template '%s' size requirement"
9768 " is unknown" % disk_template)
9770 return req_size_dict[disk_template]
9773 def _FilterVmNodes(lu, nodenames):
9774 """Filters out non-vm_capable nodes from a list.
9776 @type lu: L{LogicalUnit}
9777 @param lu: the logical unit for which we check
9778 @type nodenames: list
9779 @param nodenames: the list of nodes on which we should check
9781 @return: the list of vm-capable nodes
9784 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9785 return [name for name in nodenames if name not in vm_nodes]
9788 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9789 """Hypervisor parameter validation.
9791 This function abstract the hypervisor parameter validation to be
9792 used in both instance create and instance modify.
9794 @type lu: L{LogicalUnit}
9795 @param lu: the logical unit for which we check
9796 @type nodenames: list
9797 @param nodenames: the list of nodes on which we should check
9798 @type hvname: string
9799 @param hvname: the name of the hypervisor we should use
9800 @type hvparams: dict
9801 @param hvparams: the parameters which we need to check
9802 @raise errors.OpPrereqError: if the parameters are not valid
9805 nodenames = _FilterVmNodes(lu, nodenames)
9807 cluster = lu.cfg.GetClusterInfo()
9808 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9810 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9811 for node in nodenames:
9815 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9818 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9819 """OS parameters validation.
9821 @type lu: L{LogicalUnit}
9822 @param lu: the logical unit for which we check
9823 @type required: boolean
9824 @param required: whether the validation should fail if the OS is not
9826 @type nodenames: list
9827 @param nodenames: the list of nodes on which we should check
9828 @type osname: string
9829 @param osname: the name of the hypervisor we should use
9830 @type osparams: dict
9831 @param osparams: the parameters which we need to check
9832 @raise errors.OpPrereqError: if the parameters are not valid
9835 nodenames = _FilterVmNodes(lu, nodenames)
9836 result = lu.rpc.call_os_validate(nodenames, required, osname,
9837 [constants.OS_VALIDATE_PARAMETERS],
9839 for node, nres in result.items():
9840 # we don't check for offline cases since this should be run only
9841 # against the master node and/or an instance's nodes
9842 nres.Raise("OS Parameters validation failed on node %s" % node)
9843 if not nres.payload:
9844 lu.LogInfo("OS %s not found on node %s, validation skipped",
9848 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9849 """Wrapper around IAReqInstanceAlloc.
9851 @param op: The instance opcode
9852 @param disks: The computed disks
9853 @param nics: The computed nics
9854 @param beparams: The full filled beparams
9855 @param node_whitelist: List of nodes which should appear as online to the
9856 allocator (unless the node is already marked offline)
9858 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9861 spindle_use = beparams[constants.BE_SPINDLE_USE]
9862 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9863 disk_template=op.disk_template,
9866 vcpus=beparams[constants.BE_VCPUS],
9867 memory=beparams[constants.BE_MAXMEM],
9868 spindle_use=spindle_use,
9870 nics=[n.ToDict() for n in nics],
9871 hypervisor=op.hypervisor,
9872 node_whitelist=node_whitelist)
9875 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9876 """Computes the nics.
9878 @param op: The instance opcode
9879 @param cluster: Cluster configuration object
9880 @param default_ip: The default ip to assign
9881 @param cfg: An instance of the configuration object
9882 @param ec_id: Execution context ID
9884 @returns: The build up nics
9889 nic_mode_req = nic.get(constants.INIC_MODE, None)
9890 nic_mode = nic_mode_req
9891 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9892 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9894 net = nic.get(constants.INIC_NETWORK, None)
9895 link = nic.get(constants.NIC_LINK, None)
9896 ip = nic.get(constants.INIC_IP, None)
9898 if net is None or net.lower() == constants.VALUE_NONE:
9901 if nic_mode_req is not None or link is not None:
9902 raise errors.OpPrereqError("If network is given, no mode or link"
9903 " is allowed to be passed",
9906 # ip validity checks
9907 if ip is None or ip.lower() == constants.VALUE_NONE:
9909 elif ip.lower() == constants.VALUE_AUTO:
9910 if not op.name_check:
9911 raise errors.OpPrereqError("IP address set to auto but name checks"
9912 " have been skipped",
9916 # We defer pool operations until later, so that the iallocator has
9917 # filled in the instance's node(s) dimara
9918 if ip.lower() == constants.NIC_IP_POOL:
9920 raise errors.OpPrereqError("if ip=pool, parameter network"
9921 " must be passed too",
9924 elif not netutils.IPAddress.IsValid(ip):
9925 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9930 # TODO: check the ip address for uniqueness
9931 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9932 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9935 # MAC address verification
9936 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9937 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9938 mac = utils.NormalizeAndValidateMac(mac)
9941 # TODO: We need to factor this out
9942 cfg.ReserveMAC(mac, ec_id)
9943 except errors.ReservationError:
9944 raise errors.OpPrereqError("MAC address %s already in use"
9945 " in cluster" % mac,
9946 errors.ECODE_NOTUNIQUE)
9948 # Build nic parameters
9951 nicparams[constants.NIC_MODE] = nic_mode
9953 nicparams[constants.NIC_LINK] = link
9955 check_params = cluster.SimpleFillNIC(nicparams)
9956 objects.NIC.CheckParameterSyntax(check_params)
9957 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9958 network=net, nicparams=nicparams))
9963 def _ComputeDisks(op, default_vg):
9964 """Computes the instance disks.
9966 @param op: The instance opcode
9967 @param default_vg: The default_vg to assume
9969 @return: The computed disks
9973 for disk in op.disks:
9974 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9975 if mode not in constants.DISK_ACCESS_SET:
9976 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9977 mode, errors.ECODE_INVAL)
9978 size = disk.get(constants.IDISK_SIZE, None)
9980 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9983 except (TypeError, ValueError):
9984 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9987 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9988 if ext_provider and op.disk_template != constants.DT_EXT:
9989 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
9990 " disk template, not %s" %
9991 (constants.IDISK_PROVIDER, constants.DT_EXT,
9992 op.disk_template), errors.ECODE_INVAL)
9994 data_vg = disk.get(constants.IDISK_VG, default_vg)
9996 constants.IDISK_SIZE: size,
9997 constants.IDISK_MODE: mode,
9998 constants.IDISK_VG: data_vg,
10001 if constants.IDISK_METAVG in disk:
10002 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10003 if constants.IDISK_ADOPT in disk:
10004 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10006 # For extstorage, demand the `provider' option and add any
10007 # additional parameters (ext-params) to the dict
10008 if op.disk_template == constants.DT_EXT:
10010 new_disk[constants.IDISK_PROVIDER] = ext_provider
10012 if key not in constants.IDISK_PARAMS:
10013 new_disk[key] = disk[key]
10015 raise errors.OpPrereqError("Missing provider for template '%s'" %
10016 constants.DT_EXT, errors.ECODE_INVAL)
10018 disks.append(new_disk)
10023 def _ComputeFullBeParams(op, cluster):
10024 """Computes the full beparams.
10026 @param op: The instance opcode
10027 @param cluster: The cluster config object
10029 @return: The fully filled beparams
10032 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10033 for param, value in op.beparams.iteritems():
10034 if value == constants.VALUE_AUTO:
10035 op.beparams[param] = default_beparams[param]
10036 objects.UpgradeBeParams(op.beparams)
10037 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10038 return cluster.SimpleFillBE(op.beparams)
10041 def _CheckOpportunisticLocking(op):
10042 """Generate error if opportunistic locking is not possible.
10045 if op.opportunistic_locking and not op.iallocator:
10046 raise errors.OpPrereqError("Opportunistic locking is only available in"
10047 " combination with an instance allocator",
10048 errors.ECODE_INVAL)
10051 class LUInstanceCreate(LogicalUnit):
10052 """Create an instance.
10055 HPATH = "instance-add"
10056 HTYPE = constants.HTYPE_INSTANCE
10059 def CheckArguments(self):
10060 """Check arguments.
10063 # do not require name_check to ease forward/backward compatibility
10065 if self.op.no_install and self.op.start:
10066 self.LogInfo("No-installation mode selected, disabling startup")
10067 self.op.start = False
10068 # validate/normalize the instance name
10069 self.op.instance_name = \
10070 netutils.Hostname.GetNormalizedName(self.op.instance_name)
10072 if self.op.ip_check and not self.op.name_check:
10073 # TODO: make the ip check more flexible and not depend on the name check
10074 raise errors.OpPrereqError("Cannot do IP address check without a name"
10075 " check", errors.ECODE_INVAL)
10077 # check nics' parameter names
10078 for nic in self.op.nics:
10079 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10081 # check disks. parameter names and consistent adopt/no-adopt strategy
10082 has_adopt = has_no_adopt = False
10083 for disk in self.op.disks:
10084 if self.op.disk_template != constants.DT_EXT:
10085 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10086 if constants.IDISK_ADOPT in disk:
10089 has_no_adopt = True
10090 if has_adopt and has_no_adopt:
10091 raise errors.OpPrereqError("Either all disks are adopted or none is",
10092 errors.ECODE_INVAL)
10094 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10095 raise errors.OpPrereqError("Disk adoption is not supported for the"
10096 " '%s' disk template" %
10097 self.op.disk_template,
10098 errors.ECODE_INVAL)
10099 if self.op.iallocator is not None:
10100 raise errors.OpPrereqError("Disk adoption not allowed with an"
10101 " iallocator script", errors.ECODE_INVAL)
10102 if self.op.mode == constants.INSTANCE_IMPORT:
10103 raise errors.OpPrereqError("Disk adoption not allowed for"
10104 " instance import", errors.ECODE_INVAL)
10106 if self.op.disk_template in constants.DTS_MUST_ADOPT:
10107 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10108 " but no 'adopt' parameter given" %
10109 self.op.disk_template,
10110 errors.ECODE_INVAL)
10112 self.adopt_disks = has_adopt
10114 # instance name verification
10115 if self.op.name_check:
10116 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10117 self.op.instance_name = self.hostname1.name
10118 # used in CheckPrereq for ip ping check
10119 self.check_ip = self.hostname1.ip
10121 self.check_ip = None
10123 # file storage checks
10124 if (self.op.file_driver and
10125 not self.op.file_driver in constants.FILE_DRIVER):
10126 raise errors.OpPrereqError("Invalid file driver name '%s'" %
10127 self.op.file_driver, errors.ECODE_INVAL)
10129 if self.op.disk_template == constants.DT_FILE:
10130 opcodes.RequireFileStorage()
10131 elif self.op.disk_template == constants.DT_SHARED_FILE:
10132 opcodes.RequireSharedFileStorage()
10134 ### Node/iallocator related checks
10135 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10137 if self.op.pnode is not None:
10138 if self.op.disk_template in constants.DTS_INT_MIRROR:
10139 if self.op.snode is None:
10140 raise errors.OpPrereqError("The networked disk templates need"
10141 " a mirror node", errors.ECODE_INVAL)
10142 elif self.op.snode:
10143 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10145 self.op.snode = None
10147 _CheckOpportunisticLocking(self.op)
10149 self._cds = _GetClusterDomainSecret()
10151 if self.op.mode == constants.INSTANCE_IMPORT:
10152 # On import force_variant must be True, because if we forced it at
10153 # initial install, our only chance when importing it back is that it
10155 self.op.force_variant = True
10157 if self.op.no_install:
10158 self.LogInfo("No-installation mode has no effect during import")
10160 elif self.op.mode == constants.INSTANCE_CREATE:
10161 if self.op.os_type is None:
10162 raise errors.OpPrereqError("No guest OS specified",
10163 errors.ECODE_INVAL)
10164 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10165 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10166 " installation" % self.op.os_type,
10167 errors.ECODE_STATE)
10168 if self.op.disk_template is None:
10169 raise errors.OpPrereqError("No disk template specified",
10170 errors.ECODE_INVAL)
10172 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10173 # Check handshake to ensure both clusters have the same domain secret
10174 src_handshake = self.op.source_handshake
10175 if not src_handshake:
10176 raise errors.OpPrereqError("Missing source handshake",
10177 errors.ECODE_INVAL)
10179 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10182 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10183 errors.ECODE_INVAL)
10185 # Load and check source CA
10186 self.source_x509_ca_pem = self.op.source_x509_ca
10187 if not self.source_x509_ca_pem:
10188 raise errors.OpPrereqError("Missing source X509 CA",
10189 errors.ECODE_INVAL)
10192 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10194 except OpenSSL.crypto.Error, err:
10195 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10196 (err, ), errors.ECODE_INVAL)
10198 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10199 if errcode is not None:
10200 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10201 errors.ECODE_INVAL)
10203 self.source_x509_ca = cert
10205 src_instance_name = self.op.source_instance_name
10206 if not src_instance_name:
10207 raise errors.OpPrereqError("Missing source instance name",
10208 errors.ECODE_INVAL)
10210 self.source_instance_name = \
10211 netutils.GetHostname(name=src_instance_name).name
10214 raise errors.OpPrereqError("Invalid instance creation mode %r" %
10215 self.op.mode, errors.ECODE_INVAL)
10217 def ExpandNames(self):
10218 """ExpandNames for CreateInstance.
10220 Figure out the right locks for instance creation.
10223 self.needed_locks = {}
10225 instance_name = self.op.instance_name
10226 # this is just a preventive check, but someone might still add this
10227 # instance in the meantime, and creation will fail at lock-add time
10228 if instance_name in self.cfg.GetInstanceList():
10229 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10230 instance_name, errors.ECODE_EXISTS)
10232 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10234 if self.op.iallocator:
10235 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10236 # specifying a group on instance creation and then selecting nodes from
10238 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10239 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10241 if self.op.opportunistic_locking:
10242 self.opportunistic_locks[locking.LEVEL_NODE] = True
10243 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10245 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10246 nodelist = [self.op.pnode]
10247 if self.op.snode is not None:
10248 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10249 nodelist.append(self.op.snode)
10250 self.needed_locks[locking.LEVEL_NODE] = nodelist
10252 # in case of import lock the source node too
10253 if self.op.mode == constants.INSTANCE_IMPORT:
10254 src_node = self.op.src_node
10255 src_path = self.op.src_path
10257 if src_path is None:
10258 self.op.src_path = src_path = self.op.instance_name
10260 if src_node is None:
10261 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10262 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10263 self.op.src_node = None
10264 if os.path.isabs(src_path):
10265 raise errors.OpPrereqError("Importing an instance from a path"
10266 " requires a source node option",
10267 errors.ECODE_INVAL)
10269 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10270 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10271 self.needed_locks[locking.LEVEL_NODE].append(src_node)
10272 if not os.path.isabs(src_path):
10273 self.op.src_path = src_path = \
10274 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10276 self.needed_locks[locking.LEVEL_NODE_RES] = \
10277 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10279 def _RunAllocator(self):
10280 """Run the allocator based on input opcode.
10283 if self.op.opportunistic_locking:
10284 # Only consider nodes for which a lock is held
10285 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10287 node_whitelist = None
10289 #TODO Export network to iallocator so that it chooses a pnode
10290 # in a nodegroup that has the desired network connected to
10291 req = _CreateInstanceAllocRequest(self.op, self.disks,
10292 self.nics, self.be_full,
10294 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10296 ial.Run(self.op.iallocator)
10298 if not ial.success:
10299 # When opportunistic locks are used only a temporary failure is generated
10300 if self.op.opportunistic_locking:
10301 ecode = errors.ECODE_TEMP_NORES
10303 ecode = errors.ECODE_NORES
10305 raise errors.OpPrereqError("Can't compute nodes using"
10306 " iallocator '%s': %s" %
10307 (self.op.iallocator, ial.info),
10310 self.op.pnode = ial.result[0]
10311 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10312 self.op.instance_name, self.op.iallocator,
10313 utils.CommaJoin(ial.result))
10315 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10317 if req.RequiredNodes() == 2:
10318 self.op.snode = ial.result[1]
10320 def BuildHooksEnv(self):
10321 """Build hooks env.
10323 This runs on master, primary and secondary nodes of the instance.
10327 "ADD_MODE": self.op.mode,
10329 if self.op.mode == constants.INSTANCE_IMPORT:
10330 env["SRC_NODE"] = self.op.src_node
10331 env["SRC_PATH"] = self.op.src_path
10332 env["SRC_IMAGES"] = self.src_images
10334 env.update(_BuildInstanceHookEnv(
10335 name=self.op.instance_name,
10336 primary_node=self.op.pnode,
10337 secondary_nodes=self.secondaries,
10338 status=self.op.start,
10339 os_type=self.op.os_type,
10340 minmem=self.be_full[constants.BE_MINMEM],
10341 maxmem=self.be_full[constants.BE_MAXMEM],
10342 vcpus=self.be_full[constants.BE_VCPUS],
10343 nics=_NICListToTuple(self, self.nics),
10344 disk_template=self.op.disk_template,
10345 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10346 for d in self.disks],
10349 hypervisor_name=self.op.hypervisor,
10355 def BuildHooksNodes(self):
10356 """Build hooks nodes.
10359 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10362 def _ReadExportInfo(self):
10363 """Reads the export information from disk.
10365 It will override the opcode source node and path with the actual
10366 information, if these two were not specified before.
10368 @return: the export information
10371 assert self.op.mode == constants.INSTANCE_IMPORT
10373 src_node = self.op.src_node
10374 src_path = self.op.src_path
10376 if src_node is None:
10377 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10378 exp_list = self.rpc.call_export_list(locked_nodes)
10380 for node in exp_list:
10381 if exp_list[node].fail_msg:
10383 if src_path in exp_list[node].payload:
10385 self.op.src_node = src_node = node
10386 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10390 raise errors.OpPrereqError("No export found for relative path %s" %
10391 src_path, errors.ECODE_INVAL)
10393 _CheckNodeOnline(self, src_node)
10394 result = self.rpc.call_export_info(src_node, src_path)
10395 result.Raise("No export or invalid export found in dir %s" % src_path)
10397 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10398 if not export_info.has_section(constants.INISECT_EXP):
10399 raise errors.ProgrammerError("Corrupted export config",
10400 errors.ECODE_ENVIRON)
10402 ei_version = export_info.get(constants.INISECT_EXP, "version")
10403 if (int(ei_version) != constants.EXPORT_VERSION):
10404 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10405 (ei_version, constants.EXPORT_VERSION),
10406 errors.ECODE_ENVIRON)
10409 def _ReadExportParams(self, einfo):
10410 """Use export parameters as defaults.
10412 In case the opcode doesn't specify (as in override) some instance
10413 parameters, then try to use them from the export information, if
10414 that declares them.
10417 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10419 if self.op.disk_template is None:
10420 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10421 self.op.disk_template = einfo.get(constants.INISECT_INS,
10423 if self.op.disk_template not in constants.DISK_TEMPLATES:
10424 raise errors.OpPrereqError("Disk template specified in configuration"
10425 " file is not one of the allowed values:"
10427 " ".join(constants.DISK_TEMPLATES),
10428 errors.ECODE_INVAL)
10430 raise errors.OpPrereqError("No disk template specified and the export"
10431 " is missing the disk_template information",
10432 errors.ECODE_INVAL)
10434 if not self.op.disks:
10436 # TODO: import the disk iv_name too
10437 for idx in range(constants.MAX_DISKS):
10438 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10439 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10440 disks.append({constants.IDISK_SIZE: disk_sz})
10441 self.op.disks = disks
10442 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10443 raise errors.OpPrereqError("No disk info specified and the export"
10444 " is missing the disk information",
10445 errors.ECODE_INVAL)
10447 if not self.op.nics:
10449 for idx in range(constants.MAX_NICS):
10450 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10452 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10453 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10458 self.op.nics = nics
10460 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10461 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10463 if (self.op.hypervisor is None and
10464 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10465 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10467 if einfo.has_section(constants.INISECT_HYP):
10468 # use the export parameters but do not override the ones
10469 # specified by the user
10470 for name, value in einfo.items(constants.INISECT_HYP):
10471 if name not in self.op.hvparams:
10472 self.op.hvparams[name] = value
10474 if einfo.has_section(constants.INISECT_BEP):
10475 # use the parameters, without overriding
10476 for name, value in einfo.items(constants.INISECT_BEP):
10477 if name not in self.op.beparams:
10478 self.op.beparams[name] = value
10479 # Compatibility for the old "memory" be param
10480 if name == constants.BE_MEMORY:
10481 if constants.BE_MAXMEM not in self.op.beparams:
10482 self.op.beparams[constants.BE_MAXMEM] = value
10483 if constants.BE_MINMEM not in self.op.beparams:
10484 self.op.beparams[constants.BE_MINMEM] = value
10486 # try to read the parameters old style, from the main section
10487 for name in constants.BES_PARAMETERS:
10488 if (name not in self.op.beparams and
10489 einfo.has_option(constants.INISECT_INS, name)):
10490 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10492 if einfo.has_section(constants.INISECT_OSP):
10493 # use the parameters, without overriding
10494 for name, value in einfo.items(constants.INISECT_OSP):
10495 if name not in self.op.osparams:
10496 self.op.osparams[name] = value
10498 def _RevertToDefaults(self, cluster):
10499 """Revert the instance parameters to the default values.
10503 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10504 for name in self.op.hvparams.keys():
10505 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10506 del self.op.hvparams[name]
10508 be_defs = cluster.SimpleFillBE({})
10509 for name in self.op.beparams.keys():
10510 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10511 del self.op.beparams[name]
10513 nic_defs = cluster.SimpleFillNIC({})
10514 for nic in self.op.nics:
10515 for name in constants.NICS_PARAMETERS:
10516 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10519 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10520 for name in self.op.osparams.keys():
10521 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10522 del self.op.osparams[name]
10524 def _CalculateFileStorageDir(self):
10525 """Calculate final instance file storage dir.
10528 # file storage dir calculation/check
10529 self.instance_file_storage_dir = None
10530 if self.op.disk_template in constants.DTS_FILEBASED:
10531 # build the full file storage dir path
10534 if self.op.disk_template == constants.DT_SHARED_FILE:
10535 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10537 get_fsd_fn = self.cfg.GetFileStorageDir
10539 cfg_storagedir = get_fsd_fn()
10540 if not cfg_storagedir:
10541 raise errors.OpPrereqError("Cluster file storage dir not defined",
10542 errors.ECODE_STATE)
10543 joinargs.append(cfg_storagedir)
10545 if self.op.file_storage_dir is not None:
10546 joinargs.append(self.op.file_storage_dir)
10548 joinargs.append(self.op.instance_name)
10550 # pylint: disable=W0142
10551 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10553 def CheckPrereq(self): # pylint: disable=R0914
10554 """Check prerequisites.
10557 self._CalculateFileStorageDir()
10559 if self.op.mode == constants.INSTANCE_IMPORT:
10560 export_info = self._ReadExportInfo()
10561 self._ReadExportParams(export_info)
10562 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10564 self._old_instance_name = None
10566 if (not self.cfg.GetVGName() and
10567 self.op.disk_template not in constants.DTS_NOT_LVM):
10568 raise errors.OpPrereqError("Cluster does not support lvm-based"
10569 " instances", errors.ECODE_STATE)
10571 if (self.op.hypervisor is None or
10572 self.op.hypervisor == constants.VALUE_AUTO):
10573 self.op.hypervisor = self.cfg.GetHypervisorType()
10575 cluster = self.cfg.GetClusterInfo()
10576 enabled_hvs = cluster.enabled_hypervisors
10577 if self.op.hypervisor not in enabled_hvs:
10578 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10580 (self.op.hypervisor, ",".join(enabled_hvs)),
10581 errors.ECODE_STATE)
10583 # Check tag validity
10584 for tag in self.op.tags:
10585 objects.TaggableObject.ValidateTag(tag)
10587 # check hypervisor parameter syntax (locally)
10588 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10589 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10591 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10592 hv_type.CheckParameterSyntax(filled_hvp)
10593 self.hv_full = filled_hvp
10594 # check that we don't specify global parameters on an instance
10595 _CheckGlobalHvParams(self.op.hvparams)
10597 # fill and remember the beparams dict
10598 self.be_full = _ComputeFullBeParams(self.op, cluster)
10600 # build os parameters
10601 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10603 # now that hvp/bep are in final format, let's reset to defaults,
10605 if self.op.identify_defaults:
10606 self._RevertToDefaults(cluster)
10609 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10610 self.proc.GetECId())
10612 # disk checks/pre-build
10613 default_vg = self.cfg.GetVGName()
10614 self.disks = _ComputeDisks(self.op, default_vg)
10616 if self.op.mode == constants.INSTANCE_IMPORT:
10618 for idx in range(len(self.disks)):
10619 option = "disk%d_dump" % idx
10620 if export_info.has_option(constants.INISECT_INS, option):
10621 # FIXME: are the old os-es, disk sizes, etc. useful?
10622 export_name = export_info.get(constants.INISECT_INS, option)
10623 image = utils.PathJoin(self.op.src_path, export_name)
10624 disk_images.append(image)
10626 disk_images.append(False)
10628 self.src_images = disk_images
10630 if self.op.instance_name == self._old_instance_name:
10631 for idx, nic in enumerate(self.nics):
10632 if nic.mac == constants.VALUE_AUTO:
10633 nic_mac_ini = "nic%d_mac" % idx
10634 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10636 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10638 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10639 if self.op.ip_check:
10640 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10641 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10642 (self.check_ip, self.op.instance_name),
10643 errors.ECODE_NOTUNIQUE)
10645 #### mac address generation
10646 # By generating here the mac address both the allocator and the hooks get
10647 # the real final mac address rather than the 'auto' or 'generate' value.
10648 # There is a race condition between the generation and the instance object
10649 # creation, which means that we know the mac is valid now, but we're not
10650 # sure it will be when we actually add the instance. If things go bad
10651 # adding the instance will abort because of a duplicate mac, and the
10652 # creation job will fail.
10653 for nic in self.nics:
10654 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10655 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10659 if self.op.iallocator is not None:
10660 self._RunAllocator()
10662 # Release all unneeded node locks
10663 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10664 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10665 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10666 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10668 assert (self.owned_locks(locking.LEVEL_NODE) ==
10669 self.owned_locks(locking.LEVEL_NODE_RES)), \
10670 "Node locks differ from node resource locks"
10672 #### node related checks
10674 # check primary node
10675 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10676 assert self.pnode is not None, \
10677 "Cannot retrieve locked node %s" % self.op.pnode
10679 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10680 pnode.name, errors.ECODE_STATE)
10682 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10683 pnode.name, errors.ECODE_STATE)
10684 if not pnode.vm_capable:
10685 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10686 " '%s'" % pnode.name, errors.ECODE_STATE)
10688 self.secondaries = []
10690 # Fill in any IPs from IP pools. This must happen here, because we need to
10691 # know the nic's primary node, as specified by the iallocator
10692 for idx, nic in enumerate(self.nics):
10694 if net is not None:
10695 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10696 if netparams is None:
10697 raise errors.OpPrereqError("No netparams found for network"
10698 " %s. Propably not connected to"
10699 " node's %s nodegroup" %
10700 (net, self.pnode.name),
10701 errors.ECODE_INVAL)
10702 self.LogInfo("NIC/%d inherits netparams %s" %
10703 (idx, netparams.values()))
10704 nic.nicparams = dict(netparams)
10705 if nic.ip is not None:
10706 if nic.ip.lower() == constants.NIC_IP_POOL:
10708 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10709 except errors.ReservationError:
10710 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10711 " from the address pool" % idx,
10712 errors.ECODE_STATE)
10713 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10716 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10717 except errors.ReservationError:
10718 raise errors.OpPrereqError("IP address %s already in use"
10719 " or does not belong to network %s" %
10721 errors.ECODE_NOTUNIQUE)
10723 # net is None, ip None or given
10724 elif self.op.conflicts_check:
10725 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10727 # mirror node verification
10728 if self.op.disk_template in constants.DTS_INT_MIRROR:
10729 if self.op.snode == pnode.name:
10730 raise errors.OpPrereqError("The secondary node cannot be the"
10731 " primary node", errors.ECODE_INVAL)
10732 _CheckNodeOnline(self, self.op.snode)
10733 _CheckNodeNotDrained(self, self.op.snode)
10734 _CheckNodeVmCapable(self, self.op.snode)
10735 self.secondaries.append(self.op.snode)
10737 snode = self.cfg.GetNodeInfo(self.op.snode)
10738 if pnode.group != snode.group:
10739 self.LogWarning("The primary and secondary nodes are in two"
10740 " different node groups; the disk parameters"
10741 " from the first disk's node group will be"
10744 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10746 if self.op.disk_template in constants.DTS_INT_MIRROR:
10747 nodes.append(snode)
10748 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10749 if compat.any(map(has_es, nodes)):
10750 raise errors.OpPrereqError("Disk template %s not supported with"
10751 " exclusive storage" % self.op.disk_template,
10752 errors.ECODE_STATE)
10754 nodenames = [pnode.name] + self.secondaries
10756 # Verify instance specs
10757 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10759 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10760 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10761 constants.ISPEC_DISK_COUNT: len(self.disks),
10762 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10763 constants.ISPEC_NIC_COUNT: len(self.nics),
10764 constants.ISPEC_SPINDLE_USE: spindle_use,
10767 group_info = self.cfg.GetNodeGroup(pnode.group)
10768 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10769 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10770 if not self.op.ignore_ipolicy and res:
10771 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10772 (pnode.group, group_info.name, utils.CommaJoin(res)))
10773 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10775 if not self.adopt_disks:
10776 if self.op.disk_template == constants.DT_RBD:
10777 # _CheckRADOSFreeSpace() is just a placeholder.
10778 # Any function that checks prerequisites can be placed here.
10779 # Check if there is enough space on the RADOS cluster.
10780 _CheckRADOSFreeSpace()
10781 elif self.op.disk_template == constants.DT_EXT:
10782 # FIXME: Function that checks prereqs if needed
10785 # Check lv size requirements, if not adopting
10786 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10787 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10789 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10790 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10791 disk[constants.IDISK_ADOPT])
10792 for disk in self.disks])
10793 if len(all_lvs) != len(self.disks):
10794 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10795 errors.ECODE_INVAL)
10796 for lv_name in all_lvs:
10798 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10799 # to ReserveLV uses the same syntax
10800 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10801 except errors.ReservationError:
10802 raise errors.OpPrereqError("LV named %s used by another instance" %
10803 lv_name, errors.ECODE_NOTUNIQUE)
10805 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10806 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10808 node_lvs = self.rpc.call_lv_list([pnode.name],
10809 vg_names.payload.keys())[pnode.name]
10810 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10811 node_lvs = node_lvs.payload
10813 delta = all_lvs.difference(node_lvs.keys())
10815 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10816 utils.CommaJoin(delta),
10817 errors.ECODE_INVAL)
10818 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10820 raise errors.OpPrereqError("Online logical volumes found, cannot"
10821 " adopt: %s" % utils.CommaJoin(online_lvs),
10822 errors.ECODE_STATE)
10823 # update the size of disk based on what is found
10824 for dsk in self.disks:
10825 dsk[constants.IDISK_SIZE] = \
10826 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10827 dsk[constants.IDISK_ADOPT])][0]))
10829 elif self.op.disk_template == constants.DT_BLOCK:
10830 # Normalize and de-duplicate device paths
10831 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10832 for disk in self.disks])
10833 if len(all_disks) != len(self.disks):
10834 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10835 errors.ECODE_INVAL)
10836 baddisks = [d for d in all_disks
10837 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10839 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10840 " cannot be adopted" %
10841 (utils.CommaJoin(baddisks),
10842 constants.ADOPTABLE_BLOCKDEV_ROOT),
10843 errors.ECODE_INVAL)
10845 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10846 list(all_disks))[pnode.name]
10847 node_disks.Raise("Cannot get block device information from node %s" %
10849 node_disks = node_disks.payload
10850 delta = all_disks.difference(node_disks.keys())
10852 raise errors.OpPrereqError("Missing block device(s): %s" %
10853 utils.CommaJoin(delta),
10854 errors.ECODE_INVAL)
10855 for dsk in self.disks:
10856 dsk[constants.IDISK_SIZE] = \
10857 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10859 # Verify instance specs
10860 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10862 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10863 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10864 constants.ISPEC_DISK_COUNT: len(self.disks),
10865 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10866 for disk in self.disks],
10867 constants.ISPEC_NIC_COUNT: len(self.nics),
10868 constants.ISPEC_SPINDLE_USE: spindle_use,
10871 group_info = self.cfg.GetNodeGroup(pnode.group)
10872 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10873 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10874 if not self.op.ignore_ipolicy and res:
10875 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10876 " policy: %s") % (pnode.group,
10877 utils.CommaJoin(res)),
10878 errors.ECODE_INVAL)
10880 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10882 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10883 # check OS parameters (remotely)
10884 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10886 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10888 #TODO: _CheckExtParams (remotely)
10889 # Check parameters for extstorage
10891 # memory check on primary node
10892 #TODO(dynmem): use MINMEM for checking
10894 _CheckNodeFreeMemory(self, self.pnode.name,
10895 "creating instance %s" % self.op.instance_name,
10896 self.be_full[constants.BE_MAXMEM],
10897 self.op.hypervisor)
10899 self.dry_run_result = list(nodenames)
10901 def Exec(self, feedback_fn):
10902 """Create and add the instance to the cluster.
10905 instance = self.op.instance_name
10906 pnode_name = self.pnode.name
10908 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10909 self.owned_locks(locking.LEVEL_NODE)), \
10910 "Node locks differ from node resource locks"
10911 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10913 ht_kind = self.op.hypervisor
10914 if ht_kind in constants.HTS_REQ_PORT:
10915 network_port = self.cfg.AllocatePort()
10917 network_port = None
10919 # This is ugly but we got a chicken-egg problem here
10920 # We can only take the group disk parameters, as the instance
10921 # has no disks yet (we are generating them right here).
10922 node = self.cfg.GetNodeInfo(pnode_name)
10923 nodegroup = self.cfg.GetNodeGroup(node.group)
10924 disks = _GenerateDiskTemplate(self,
10925 self.op.disk_template,
10926 instance, pnode_name,
10929 self.instance_file_storage_dir,
10930 self.op.file_driver,
10933 self.cfg.GetGroupDiskParams(nodegroup))
10935 iobj = objects.Instance(name=instance, os=self.op.os_type,
10936 primary_node=pnode_name,
10937 nics=self.nics, disks=disks,
10938 disk_template=self.op.disk_template,
10939 admin_state=constants.ADMINST_DOWN,
10940 network_port=network_port,
10941 beparams=self.op.beparams,
10942 hvparams=self.op.hvparams,
10943 hypervisor=self.op.hypervisor,
10944 osparams=self.op.osparams,
10948 for tag in self.op.tags:
10951 if self.adopt_disks:
10952 if self.op.disk_template == constants.DT_PLAIN:
10953 # rename LVs to the newly-generated names; we need to construct
10954 # 'fake' LV disks with the old data, plus the new unique_id
10955 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10957 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10958 rename_to.append(t_dsk.logical_id)
10959 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10960 self.cfg.SetDiskID(t_dsk, pnode_name)
10961 result = self.rpc.call_blockdev_rename(pnode_name,
10962 zip(tmp_disks, rename_to))
10963 result.Raise("Failed to rename adoped LVs")
10965 feedback_fn("* creating instance disks...")
10967 _CreateDisks(self, iobj)
10968 except errors.OpExecError:
10969 self.LogWarning("Device creation failed, reverting...")
10971 _RemoveDisks(self, iobj)
10973 self.cfg.ReleaseDRBDMinors(instance)
10976 feedback_fn("adding instance %s to cluster config" % instance)
10978 self.cfg.AddInstance(iobj, self.proc.GetECId())
10980 # Declare that we don't want to remove the instance lock anymore, as we've
10981 # added the instance to the config
10982 del self.remove_locks[locking.LEVEL_INSTANCE]
10984 if self.op.mode == constants.INSTANCE_IMPORT:
10985 # Release unused nodes
10986 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10988 # Release all nodes
10989 _ReleaseLocks(self, locking.LEVEL_NODE)
10992 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10993 feedback_fn("* wiping instance disks...")
10995 _WipeDisks(self, iobj)
10996 except errors.OpExecError, err:
10997 logging.exception("Wiping disks failed")
10998 self.LogWarning("Wiping instance disks failed (%s)", err)
11002 # Something is already wrong with the disks, don't do anything else
11004 elif self.op.wait_for_sync:
11005 disk_abort = not _WaitForSync(self, iobj)
11006 elif iobj.disk_template in constants.DTS_INT_MIRROR:
11007 # make sure the disks are not degraded (still sync-ing is ok)
11008 feedback_fn("* checking mirrors status")
11009 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11014 _RemoveDisks(self, iobj)
11015 self.cfg.RemoveInstance(iobj.name)
11016 # Make sure the instance lock gets removed
11017 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11018 raise errors.OpExecError("There are some degraded disks for"
11021 # Release all node resource locks
11022 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11024 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11025 # we need to set the disks ID to the primary node, since the
11026 # preceding code might or might have not done it, depending on
11027 # disk template and other options
11028 for disk in iobj.disks:
11029 self.cfg.SetDiskID(disk, pnode_name)
11030 if self.op.mode == constants.INSTANCE_CREATE:
11031 if not self.op.no_install:
11032 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11033 not self.op.wait_for_sync)
11035 feedback_fn("* pausing disk sync to install instance OS")
11036 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11039 for idx, success in enumerate(result.payload):
11041 logging.warn("pause-sync of instance %s for disk %d failed",
11044 feedback_fn("* running the instance OS create scripts...")
11045 # FIXME: pass debug option from opcode to backend
11047 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11048 self.op.debug_level)
11050 feedback_fn("* resuming disk sync")
11051 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11054 for idx, success in enumerate(result.payload):
11056 logging.warn("resume-sync of instance %s for disk %d failed",
11059 os_add_result.Raise("Could not add os for instance %s"
11060 " on node %s" % (instance, pnode_name))
11063 if self.op.mode == constants.INSTANCE_IMPORT:
11064 feedback_fn("* running the instance OS import scripts...")
11068 for idx, image in enumerate(self.src_images):
11072 # FIXME: pass debug option from opcode to backend
11073 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11074 constants.IEIO_FILE, (image, ),
11075 constants.IEIO_SCRIPT,
11076 (iobj.disks[idx], idx),
11078 transfers.append(dt)
11081 masterd.instance.TransferInstanceData(self, feedback_fn,
11082 self.op.src_node, pnode_name,
11083 self.pnode.secondary_ip,
11085 if not compat.all(import_result):
11086 self.LogWarning("Some disks for instance %s on node %s were not"
11087 " imported successfully" % (instance, pnode_name))
11089 rename_from = self._old_instance_name
11091 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11092 feedback_fn("* preparing remote import...")
11093 # The source cluster will stop the instance before attempting to make
11094 # a connection. In some cases stopping an instance can take a long
11095 # time, hence the shutdown timeout is added to the connection
11097 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11098 self.op.source_shutdown_timeout)
11099 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11101 assert iobj.primary_node == self.pnode.name
11103 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11104 self.source_x509_ca,
11105 self._cds, timeouts)
11106 if not compat.all(disk_results):
11107 # TODO: Should the instance still be started, even if some disks
11108 # failed to import (valid for local imports, too)?
11109 self.LogWarning("Some disks for instance %s on node %s were not"
11110 " imported successfully" % (instance, pnode_name))
11112 rename_from = self.source_instance_name
11115 # also checked in the prereq part
11116 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11119 # Run rename script on newly imported instance
11120 assert iobj.name == instance
11121 feedback_fn("Running rename script for %s" % instance)
11122 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11124 self.op.debug_level)
11125 if result.fail_msg:
11126 self.LogWarning("Failed to run rename script for %s on node"
11127 " %s: %s" % (instance, pnode_name, result.fail_msg))
11129 assert not self.owned_locks(locking.LEVEL_NODE_RES)
11132 iobj.admin_state = constants.ADMINST_UP
11133 self.cfg.Update(iobj, feedback_fn)
11134 logging.info("Starting instance %s on node %s", instance, pnode_name)
11135 feedback_fn("* starting instance...")
11136 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11138 result.Raise("Could not start instance")
11140 return list(iobj.all_nodes)
11143 class LUInstanceMultiAlloc(NoHooksLU):
11144 """Allocates multiple instances at the same time.
11149 def CheckArguments(self):
11150 """Check arguments.
11154 for inst in self.op.instances:
11155 if inst.iallocator is not None:
11156 raise errors.OpPrereqError("iallocator are not allowed to be set on"
11157 " instance objects", errors.ECODE_INVAL)
11158 nodes.append(bool(inst.pnode))
11159 if inst.disk_template in constants.DTS_INT_MIRROR:
11160 nodes.append(bool(inst.snode))
11162 has_nodes = compat.any(nodes)
11163 if compat.all(nodes) ^ has_nodes:
11164 raise errors.OpPrereqError("There are instance objects providing"
11165 " pnode/snode while others do not",
11166 errors.ECODE_INVAL)
11168 if self.op.iallocator is None:
11169 default_iallocator = self.cfg.GetDefaultIAllocator()
11170 if default_iallocator and has_nodes:
11171 self.op.iallocator = default_iallocator
11173 raise errors.OpPrereqError("No iallocator or nodes on the instances"
11174 " given and no cluster-wide default"
11175 " iallocator found; please specify either"
11176 " an iallocator or nodes on the instances"
11177 " or set a cluster-wide default iallocator",
11178 errors.ECODE_INVAL)
11180 _CheckOpportunisticLocking(self.op)
11182 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11184 raise errors.OpPrereqError("There are duplicate instance names: %s" %
11185 utils.CommaJoin(dups), errors.ECODE_INVAL)
11187 def ExpandNames(self):
11188 """Calculate the locks.
11191 self.share_locks = _ShareAll()
11192 self.needed_locks = {
11193 # iallocator will select nodes and even if no iallocator is used,
11194 # collisions with LUInstanceCreate should be avoided
11195 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11198 if self.op.iallocator:
11199 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11200 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11202 if self.op.opportunistic_locking:
11203 self.opportunistic_locks[locking.LEVEL_NODE] = True
11204 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11207 for inst in self.op.instances:
11208 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11209 nodeslist.append(inst.pnode)
11210 if inst.snode is not None:
11211 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11212 nodeslist.append(inst.snode)
11214 self.needed_locks[locking.LEVEL_NODE] = nodeslist
11215 # Lock resources of instance's primary and secondary nodes (copy to
11216 # prevent accidential modification)
11217 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11219 def CheckPrereq(self):
11220 """Check prerequisite.
11223 cluster = self.cfg.GetClusterInfo()
11224 default_vg = self.cfg.GetVGName()
11225 ec_id = self.proc.GetECId()
11227 if self.op.opportunistic_locking:
11228 # Only consider nodes for which a lock is held
11229 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11231 node_whitelist = None
11233 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11234 _ComputeNics(op, cluster, None,
11236 _ComputeFullBeParams(op, cluster),
11238 for op in self.op.instances]
11240 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11241 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11243 ial.Run(self.op.iallocator)
11245 if not ial.success:
11246 raise errors.OpPrereqError("Can't compute nodes using"
11247 " iallocator '%s': %s" %
11248 (self.op.iallocator, ial.info),
11249 errors.ECODE_NORES)
11251 self.ia_result = ial.result
11253 if self.op.dry_run:
11254 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11255 constants.JOB_IDS_KEY: [],
11258 def _ConstructPartialResult(self):
11259 """Contructs the partial result.
11262 (allocatable, failed) = self.ia_result
11264 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11265 map(compat.fst, allocatable),
11266 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11269 def Exec(self, feedback_fn):
11270 """Executes the opcode.
11273 op2inst = dict((op.instance_name, op) for op in self.op.instances)
11274 (allocatable, failed) = self.ia_result
11277 for (name, nodes) in allocatable:
11278 op = op2inst.pop(name)
11281 (op.pnode, op.snode) = nodes
11283 (op.pnode,) = nodes
11287 missing = set(op2inst.keys()) - set(failed)
11288 assert not missing, \
11289 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11291 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11294 def _CheckRADOSFreeSpace():
11295 """Compute disk size requirements inside the RADOS cluster.
11298 # For the RADOS cluster we assume there is always enough space.
11302 class LUInstanceConsole(NoHooksLU):
11303 """Connect to an instance's console.
11305 This is somewhat special in that it returns the command line that
11306 you need to run on the master node in order to connect to the
11312 def ExpandNames(self):
11313 self.share_locks = _ShareAll()
11314 self._ExpandAndLockInstance()
11316 def CheckPrereq(self):
11317 """Check prerequisites.
11319 This checks that the instance is in the cluster.
11322 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11323 assert self.instance is not None, \
11324 "Cannot retrieve locked instance %s" % self.op.instance_name
11325 _CheckNodeOnline(self, self.instance.primary_node)
11327 def Exec(self, feedback_fn):
11328 """Connect to the console of an instance
11331 instance = self.instance
11332 node = instance.primary_node
11334 node_insts = self.rpc.call_instance_list([node],
11335 [instance.hypervisor])[node]
11336 node_insts.Raise("Can't get node information from %s" % node)
11338 if instance.name not in node_insts.payload:
11339 if instance.admin_state == constants.ADMINST_UP:
11340 state = constants.INSTST_ERRORDOWN
11341 elif instance.admin_state == constants.ADMINST_DOWN:
11342 state = constants.INSTST_ADMINDOWN
11344 state = constants.INSTST_ADMINOFFLINE
11345 raise errors.OpExecError("Instance %s is not running (state %s)" %
11346 (instance.name, state))
11348 logging.debug("Connecting to console of %s on %s", instance.name, node)
11350 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11353 def _GetInstanceConsole(cluster, instance):
11354 """Returns console information for an instance.
11356 @type cluster: L{objects.Cluster}
11357 @type instance: L{objects.Instance}
11361 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11362 # beparams and hvparams are passed separately, to avoid editing the
11363 # instance and then saving the defaults in the instance itself.
11364 hvparams = cluster.FillHV(instance)
11365 beparams = cluster.FillBE(instance)
11366 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11368 assert console.instance == instance.name
11369 assert console.Validate()
11371 return console.ToDict()
11374 class LUInstanceReplaceDisks(LogicalUnit):
11375 """Replace the disks of an instance.
11378 HPATH = "mirrors-replace"
11379 HTYPE = constants.HTYPE_INSTANCE
11382 def CheckArguments(self):
11383 """Check arguments.
11386 remote_node = self.op.remote_node
11387 ialloc = self.op.iallocator
11388 if self.op.mode == constants.REPLACE_DISK_CHG:
11389 if remote_node is None and ialloc is None:
11390 raise errors.OpPrereqError("When changing the secondary either an"
11391 " iallocator script must be used or the"
11392 " new node given", errors.ECODE_INVAL)
11394 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11396 elif remote_node is not None or ialloc is not None:
11397 # Not replacing the secondary
11398 raise errors.OpPrereqError("The iallocator and new node options can"
11399 " only be used when changing the"
11400 " secondary node", errors.ECODE_INVAL)
11402 def ExpandNames(self):
11403 self._ExpandAndLockInstance()
11405 assert locking.LEVEL_NODE not in self.needed_locks
11406 assert locking.LEVEL_NODE_RES not in self.needed_locks
11407 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11409 assert self.op.iallocator is None or self.op.remote_node is None, \
11410 "Conflicting options"
11412 if self.op.remote_node is not None:
11413 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11415 # Warning: do not remove the locking of the new secondary here
11416 # unless DRBD8.AddChildren is changed to work in parallel;
11417 # currently it doesn't since parallel invocations of
11418 # FindUnusedMinor will conflict
11419 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11420 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11422 self.needed_locks[locking.LEVEL_NODE] = []
11423 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11425 if self.op.iallocator is not None:
11426 # iallocator will select a new node in the same group
11427 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11428 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11430 self.needed_locks[locking.LEVEL_NODE_RES] = []
11432 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11433 self.op.iallocator, self.op.remote_node,
11434 self.op.disks, self.op.early_release,
11435 self.op.ignore_ipolicy)
11437 self.tasklets = [self.replacer]
11439 def DeclareLocks(self, level):
11440 if level == locking.LEVEL_NODEGROUP:
11441 assert self.op.remote_node is None
11442 assert self.op.iallocator is not None
11443 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11445 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11446 # Lock all groups used by instance optimistically; this requires going
11447 # via the node before it's locked, requiring verification later on
11448 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11449 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11451 elif level == locking.LEVEL_NODE:
11452 if self.op.iallocator is not None:
11453 assert self.op.remote_node is None
11454 assert not self.needed_locks[locking.LEVEL_NODE]
11455 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11457 # Lock member nodes of all locked groups
11458 self.needed_locks[locking.LEVEL_NODE] = \
11460 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11461 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11463 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11465 self._LockInstancesNodes()
11467 elif level == locking.LEVEL_NODE_RES:
11469 self.needed_locks[locking.LEVEL_NODE_RES] = \
11470 self.needed_locks[locking.LEVEL_NODE]
11472 def BuildHooksEnv(self):
11473 """Build hooks env.
11475 This runs on the master, the primary and all the secondaries.
11478 instance = self.replacer.instance
11480 "MODE": self.op.mode,
11481 "NEW_SECONDARY": self.op.remote_node,
11482 "OLD_SECONDARY": instance.secondary_nodes[0],
11484 env.update(_BuildInstanceHookEnvByObject(self, instance))
11487 def BuildHooksNodes(self):
11488 """Build hooks nodes.
11491 instance = self.replacer.instance
11493 self.cfg.GetMasterNode(),
11494 instance.primary_node,
11496 if self.op.remote_node is not None:
11497 nl.append(self.op.remote_node)
11500 def CheckPrereq(self):
11501 """Check prerequisites.
11504 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11505 self.op.iallocator is None)
11507 # Verify if node group locks are still correct
11508 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11510 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11512 return LogicalUnit.CheckPrereq(self)
11515 class TLReplaceDisks(Tasklet):
11516 """Replaces disks for an instance.
11518 Note: Locking is not within the scope of this class.
11521 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11522 disks, early_release, ignore_ipolicy):
11523 """Initializes this class.
11526 Tasklet.__init__(self, lu)
11529 self.instance_name = instance_name
11531 self.iallocator_name = iallocator_name
11532 self.remote_node = remote_node
11534 self.early_release = early_release
11535 self.ignore_ipolicy = ignore_ipolicy
11538 self.instance = None
11539 self.new_node = None
11540 self.target_node = None
11541 self.other_node = None
11542 self.remote_node_info = None
11543 self.node_secondary_ip = None
11546 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11547 """Compute a new secondary node using an IAllocator.
11550 req = iallocator.IAReqRelocate(name=instance_name,
11551 relocate_from=list(relocate_from))
11552 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11554 ial.Run(iallocator_name)
11556 if not ial.success:
11557 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11558 " %s" % (iallocator_name, ial.info),
11559 errors.ECODE_NORES)
11561 remote_node_name = ial.result[0]
11563 lu.LogInfo("Selected new secondary for instance '%s': %s",
11564 instance_name, remote_node_name)
11566 return remote_node_name
11568 def _FindFaultyDisks(self, node_name):
11569 """Wrapper for L{_FindFaultyInstanceDisks}.
11572 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11575 def _CheckDisksActivated(self, instance):
11576 """Checks if the instance disks are activated.
11578 @param instance: The instance to check disks
11579 @return: True if they are activated, False otherwise
11582 nodes = instance.all_nodes
11584 for idx, dev in enumerate(instance.disks):
11586 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11587 self.cfg.SetDiskID(dev, node)
11589 result = _BlockdevFind(self, node, dev, instance)
11593 elif result.fail_msg or not result.payload:
11598 def CheckPrereq(self):
11599 """Check prerequisites.
11601 This checks that the instance is in the cluster.
11604 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11605 assert instance is not None, \
11606 "Cannot retrieve locked instance %s" % self.instance_name
11608 if instance.disk_template != constants.DT_DRBD8:
11609 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11610 " instances", errors.ECODE_INVAL)
11612 if len(instance.secondary_nodes) != 1:
11613 raise errors.OpPrereqError("The instance has a strange layout,"
11614 " expected one secondary but found %d" %
11615 len(instance.secondary_nodes),
11616 errors.ECODE_FAULT)
11618 instance = self.instance
11619 secondary_node = instance.secondary_nodes[0]
11621 if self.iallocator_name is None:
11622 remote_node = self.remote_node
11624 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11625 instance.name, instance.secondary_nodes)
11627 if remote_node is None:
11628 self.remote_node_info = None
11630 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11631 "Remote node '%s' is not locked" % remote_node
11633 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11634 assert self.remote_node_info is not None, \
11635 "Cannot retrieve locked node %s" % remote_node
11637 if remote_node == self.instance.primary_node:
11638 raise errors.OpPrereqError("The specified node is the primary node of"
11639 " the instance", errors.ECODE_INVAL)
11641 if remote_node == secondary_node:
11642 raise errors.OpPrereqError("The specified node is already the"
11643 " secondary node of the instance",
11644 errors.ECODE_INVAL)
11646 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11647 constants.REPLACE_DISK_CHG):
11648 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11649 errors.ECODE_INVAL)
11651 if self.mode == constants.REPLACE_DISK_AUTO:
11652 if not self._CheckDisksActivated(instance):
11653 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11654 " first" % self.instance_name,
11655 errors.ECODE_STATE)
11656 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11657 faulty_secondary = self._FindFaultyDisks(secondary_node)
11659 if faulty_primary and faulty_secondary:
11660 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11661 " one node and can not be repaired"
11662 " automatically" % self.instance_name,
11663 errors.ECODE_STATE)
11666 self.disks = faulty_primary
11667 self.target_node = instance.primary_node
11668 self.other_node = secondary_node
11669 check_nodes = [self.target_node, self.other_node]
11670 elif faulty_secondary:
11671 self.disks = faulty_secondary
11672 self.target_node = secondary_node
11673 self.other_node = instance.primary_node
11674 check_nodes = [self.target_node, self.other_node]
11680 # Non-automatic modes
11681 if self.mode == constants.REPLACE_DISK_PRI:
11682 self.target_node = instance.primary_node
11683 self.other_node = secondary_node
11684 check_nodes = [self.target_node, self.other_node]
11686 elif self.mode == constants.REPLACE_DISK_SEC:
11687 self.target_node = secondary_node
11688 self.other_node = instance.primary_node
11689 check_nodes = [self.target_node, self.other_node]
11691 elif self.mode == constants.REPLACE_DISK_CHG:
11692 self.new_node = remote_node
11693 self.other_node = instance.primary_node
11694 self.target_node = secondary_node
11695 check_nodes = [self.new_node, self.other_node]
11697 _CheckNodeNotDrained(self.lu, remote_node)
11698 _CheckNodeVmCapable(self.lu, remote_node)
11700 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11701 assert old_node_info is not None
11702 if old_node_info.offline and not self.early_release:
11703 # doesn't make sense to delay the release
11704 self.early_release = True
11705 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11706 " early-release mode", secondary_node)
11709 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11712 # If not specified all disks should be replaced
11714 self.disks = range(len(self.instance.disks))
11716 # TODO: This is ugly, but right now we can't distinguish between internal
11717 # submitted opcode and external one. We should fix that.
11718 if self.remote_node_info:
11719 # We change the node, lets verify it still meets instance policy
11720 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11721 cluster = self.cfg.GetClusterInfo()
11722 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11724 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11725 ignore=self.ignore_ipolicy)
11727 for node in check_nodes:
11728 _CheckNodeOnline(self.lu, node)
11730 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11733 if node_name is not None)
11735 # Release unneeded node and node resource locks
11736 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11737 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11738 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11740 # Release any owned node group
11741 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11743 # Check whether disks are valid
11744 for disk_idx in self.disks:
11745 instance.FindDisk(disk_idx)
11747 # Get secondary node IP addresses
11748 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11749 in self.cfg.GetMultiNodeInfo(touched_nodes))
11751 def Exec(self, feedback_fn):
11752 """Execute disk replacement.
11754 This dispatches the disk replacement to the appropriate handler.
11758 # Verify owned locks before starting operation
11759 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11760 assert set(owned_nodes) == set(self.node_secondary_ip), \
11761 ("Incorrect node locks, owning %s, expected %s" %
11762 (owned_nodes, self.node_secondary_ip.keys()))
11763 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11764 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11765 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11767 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11768 assert list(owned_instances) == [self.instance_name], \
11769 "Instance '%s' not locked" % self.instance_name
11771 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11772 "Should not own any node group lock at this point"
11775 feedback_fn("No disks need replacement for instance '%s'" %
11776 self.instance.name)
11779 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11780 (utils.CommaJoin(self.disks), self.instance.name))
11781 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11782 feedback_fn("Current seconary node: %s" %
11783 utils.CommaJoin(self.instance.secondary_nodes))
11785 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11787 # Activate the instance disks if we're replacing them on a down instance
11789 _StartInstanceDisks(self.lu, self.instance, True)
11792 # Should we replace the secondary node?
11793 if self.new_node is not None:
11794 fn = self._ExecDrbd8Secondary
11796 fn = self._ExecDrbd8DiskOnly
11798 result = fn(feedback_fn)
11800 # Deactivate the instance disks if we're replacing them on a
11803 _SafeShutdownInstanceDisks(self.lu, self.instance)
11805 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11808 # Verify owned locks
11809 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11810 nodes = frozenset(self.node_secondary_ip)
11811 assert ((self.early_release and not owned_nodes) or
11812 (not self.early_release and not (set(owned_nodes) - nodes))), \
11813 ("Not owning the correct locks, early_release=%s, owned=%r,"
11814 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11818 def _CheckVolumeGroup(self, nodes):
11819 self.lu.LogInfo("Checking volume groups")
11821 vgname = self.cfg.GetVGName()
11823 # Make sure volume group exists on all involved nodes
11824 results = self.rpc.call_vg_list(nodes)
11826 raise errors.OpExecError("Can't list volume groups on the nodes")
11829 res = results[node]
11830 res.Raise("Error checking node %s" % node)
11831 if vgname not in res.payload:
11832 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11835 def _CheckDisksExistence(self, nodes):
11836 # Check disk existence
11837 for idx, dev in enumerate(self.instance.disks):
11838 if idx not in self.disks:
11842 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11843 self.cfg.SetDiskID(dev, node)
11845 result = _BlockdevFind(self, node, dev, self.instance)
11847 msg = result.fail_msg
11848 if msg or not result.payload:
11850 msg = "disk not found"
11851 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11854 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11855 for idx, dev in enumerate(self.instance.disks):
11856 if idx not in self.disks:
11859 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11862 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11863 on_primary, ldisk=ldisk):
11864 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11865 " replace disks for instance %s" %
11866 (node_name, self.instance.name))
11868 def _CreateNewStorage(self, node_name):
11869 """Create new storage on the primary or secondary node.
11871 This is only used for same-node replaces, not for changing the
11872 secondary node, hence we don't want to modify the existing disk.
11877 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11878 for idx, dev in enumerate(disks):
11879 if idx not in self.disks:
11882 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11884 self.cfg.SetDiskID(dev, node_name)
11886 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11887 names = _GenerateUniqueNames(self.lu, lv_names)
11889 (data_disk, meta_disk) = dev.children
11890 vg_data = data_disk.logical_id[0]
11891 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11892 logical_id=(vg_data, names[0]),
11893 params=data_disk.params)
11894 vg_meta = meta_disk.logical_id[0]
11895 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11896 size=constants.DRBD_META_SIZE,
11897 logical_id=(vg_meta, names[1]),
11898 params=meta_disk.params)
11900 new_lvs = [lv_data, lv_meta]
11901 old_lvs = [child.Copy() for child in dev.children]
11902 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11903 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11905 # we pass force_create=True to force the LVM creation
11906 for new_lv in new_lvs:
11907 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11908 _GetInstanceInfoText(self.instance), False,
11913 def _CheckDevices(self, node_name, iv_names):
11914 for name, (dev, _, _) in iv_names.iteritems():
11915 self.cfg.SetDiskID(dev, node_name)
11917 result = _BlockdevFind(self, node_name, dev, self.instance)
11919 msg = result.fail_msg
11920 if msg or not result.payload:
11922 msg = "disk not found"
11923 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11926 if result.payload.is_degraded:
11927 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11929 def _RemoveOldStorage(self, node_name, iv_names):
11930 for name, (_, old_lvs, _) in iv_names.iteritems():
11931 self.lu.LogInfo("Remove logical volumes for %s", name)
11934 self.cfg.SetDiskID(lv, node_name)
11936 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11938 self.lu.LogWarning("Can't remove old LV: %s", msg,
11939 hint="remove unused LVs manually")
11941 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11942 """Replace a disk on the primary or secondary for DRBD 8.
11944 The algorithm for replace is quite complicated:
11946 1. for each disk to be replaced:
11948 1. create new LVs on the target node with unique names
11949 1. detach old LVs from the drbd device
11950 1. rename old LVs to name_replaced.<time_t>
11951 1. rename new LVs to old LVs
11952 1. attach the new LVs (with the old names now) to the drbd device
11954 1. wait for sync across all devices
11956 1. for each modified disk:
11958 1. remove old LVs (which have the name name_replaces.<time_t>)
11960 Failures are not very well handled.
11965 # Step: check device activation
11966 self.lu.LogStep(1, steps_total, "Check device existence")
11967 self._CheckDisksExistence([self.other_node, self.target_node])
11968 self._CheckVolumeGroup([self.target_node, self.other_node])
11970 # Step: check other node consistency
11971 self.lu.LogStep(2, steps_total, "Check peer consistency")
11972 self._CheckDisksConsistency(self.other_node,
11973 self.other_node == self.instance.primary_node,
11976 # Step: create new storage
11977 self.lu.LogStep(3, steps_total, "Allocate new storage")
11978 iv_names = self._CreateNewStorage(self.target_node)
11980 # Step: for each lv, detach+rename*2+attach
11981 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11982 for dev, old_lvs, new_lvs in iv_names.itervalues():
11983 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11985 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11987 result.Raise("Can't detach drbd from local storage on node"
11988 " %s for device %s" % (self.target_node, dev.iv_name))
11990 #cfg.Update(instance)
11992 # ok, we created the new LVs, so now we know we have the needed
11993 # storage; as such, we proceed on the target node to rename
11994 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11995 # using the assumption that logical_id == physical_id (which in
11996 # turn is the unique_id on that node)
11998 # FIXME(iustin): use a better name for the replaced LVs
11999 temp_suffix = int(time.time())
12000 ren_fn = lambda d, suff: (d.physical_id[0],
12001 d.physical_id[1] + "_replaced-%s" % suff)
12003 # Build the rename list based on what LVs exist on the node
12004 rename_old_to_new = []
12005 for to_ren in old_lvs:
12006 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12007 if not result.fail_msg and result.payload:
12009 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12011 self.lu.LogInfo("Renaming the old LVs on the target node")
12012 result = self.rpc.call_blockdev_rename(self.target_node,
12014 result.Raise("Can't rename old LVs on node %s" % self.target_node)
12016 # Now we rename the new LVs to the old LVs
12017 self.lu.LogInfo("Renaming the new LVs on the target node")
12018 rename_new_to_old = [(new, old.physical_id)
12019 for old, new in zip(old_lvs, new_lvs)]
12020 result = self.rpc.call_blockdev_rename(self.target_node,
12022 result.Raise("Can't rename new LVs on node %s" % self.target_node)
12024 # Intermediate steps of in memory modifications
12025 for old, new in zip(old_lvs, new_lvs):
12026 new.logical_id = old.logical_id
12027 self.cfg.SetDiskID(new, self.target_node)
12029 # We need to modify old_lvs so that removal later removes the
12030 # right LVs, not the newly added ones; note that old_lvs is a
12032 for disk in old_lvs:
12033 disk.logical_id = ren_fn(disk, temp_suffix)
12034 self.cfg.SetDiskID(disk, self.target_node)
12036 # Now that the new lvs have the old name, we can add them to the device
12037 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12038 result = self.rpc.call_blockdev_addchildren(self.target_node,
12039 (dev, self.instance), new_lvs)
12040 msg = result.fail_msg
12042 for new_lv in new_lvs:
12043 msg2 = self.rpc.call_blockdev_remove(self.target_node,
12046 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12047 hint=("cleanup manually the unused logical"
12049 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12051 cstep = itertools.count(5)
12053 if self.early_release:
12054 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12055 self._RemoveOldStorage(self.target_node, iv_names)
12056 # TODO: Check if releasing locks early still makes sense
12057 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12059 # Release all resource locks except those used by the instance
12060 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12061 keep=self.node_secondary_ip.keys())
12063 # Release all node locks while waiting for sync
12064 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12066 # TODO: Can the instance lock be downgraded here? Take the optional disk
12067 # shutdown in the caller into consideration.
12070 # This can fail as the old devices are degraded and _WaitForSync
12071 # does a combined result over all disks, so we don't check its return value
12072 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12073 _WaitForSync(self.lu, self.instance)
12075 # Check all devices manually
12076 self._CheckDevices(self.instance.primary_node, iv_names)
12078 # Step: remove old storage
12079 if not self.early_release:
12080 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12081 self._RemoveOldStorage(self.target_node, iv_names)
12083 def _ExecDrbd8Secondary(self, feedback_fn):
12084 """Replace the secondary node for DRBD 8.
12086 The algorithm for replace is quite complicated:
12087 - for all disks of the instance:
12088 - create new LVs on the new node with same names
12089 - shutdown the drbd device on the old secondary
12090 - disconnect the drbd network on the primary
12091 - create the drbd device on the new secondary
12092 - network attach the drbd on the primary, using an artifice:
12093 the drbd code for Attach() will connect to the network if it
12094 finds a device which is connected to the good local disks but
12095 not network enabled
12096 - wait for sync across all devices
12097 - remove all disks from the old secondary
12099 Failures are not very well handled.
12104 pnode = self.instance.primary_node
12106 # Step: check device activation
12107 self.lu.LogStep(1, steps_total, "Check device existence")
12108 self._CheckDisksExistence([self.instance.primary_node])
12109 self._CheckVolumeGroup([self.instance.primary_node])
12111 # Step: check other node consistency
12112 self.lu.LogStep(2, steps_total, "Check peer consistency")
12113 self._CheckDisksConsistency(self.instance.primary_node, True, True)
12115 # Step: create new storage
12116 self.lu.LogStep(3, steps_total, "Allocate new storage")
12117 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12118 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12119 for idx, dev in enumerate(disks):
12120 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12121 (self.new_node, idx))
12122 # we pass force_create=True to force LVM creation
12123 for new_lv in dev.children:
12124 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12125 True, _GetInstanceInfoText(self.instance), False,
12128 # Step 4: dbrd minors and drbd setups changes
12129 # after this, we must manually remove the drbd minors on both the
12130 # error and the success paths
12131 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12132 minors = self.cfg.AllocateDRBDMinor([self.new_node
12133 for dev in self.instance.disks],
12134 self.instance.name)
12135 logging.debug("Allocated minors %r", minors)
12138 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12139 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12140 (self.new_node, idx))
12141 # create new devices on new_node; note that we create two IDs:
12142 # one without port, so the drbd will be activated without
12143 # networking information on the new node at this stage, and one
12144 # with network, for the latter activation in step 4
12145 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12146 if self.instance.primary_node == o_node1:
12149 assert self.instance.primary_node == o_node2, "Three-node instance?"
12152 new_alone_id = (self.instance.primary_node, self.new_node, None,
12153 p_minor, new_minor, o_secret)
12154 new_net_id = (self.instance.primary_node, self.new_node, o_port,
12155 p_minor, new_minor, o_secret)
12157 iv_names[idx] = (dev, dev.children, new_net_id)
12158 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12160 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12161 logical_id=new_alone_id,
12162 children=dev.children,
12165 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12168 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12170 _GetInstanceInfoText(self.instance), False,
12172 except errors.GenericError:
12173 self.cfg.ReleaseDRBDMinors(self.instance.name)
12176 # We have new devices, shutdown the drbd on the old secondary
12177 for idx, dev in enumerate(self.instance.disks):
12178 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12179 self.cfg.SetDiskID(dev, self.target_node)
12180 msg = self.rpc.call_blockdev_shutdown(self.target_node,
12181 (dev, self.instance)).fail_msg
12183 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12184 "node: %s" % (idx, msg),
12185 hint=("Please cleanup this device manually as"
12186 " soon as possible"))
12188 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12189 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12190 self.instance.disks)[pnode]
12192 msg = result.fail_msg
12194 # detaches didn't succeed (unlikely)
12195 self.cfg.ReleaseDRBDMinors(self.instance.name)
12196 raise errors.OpExecError("Can't detach the disks from the network on"
12197 " old node: %s" % (msg,))
12199 # if we managed to detach at least one, we update all the disks of
12200 # the instance to point to the new secondary
12201 self.lu.LogInfo("Updating instance configuration")
12202 for dev, _, new_logical_id in iv_names.itervalues():
12203 dev.logical_id = new_logical_id
12204 self.cfg.SetDiskID(dev, self.instance.primary_node)
12206 self.cfg.Update(self.instance, feedback_fn)
12208 # Release all node locks (the configuration has been updated)
12209 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12211 # and now perform the drbd attach
12212 self.lu.LogInfo("Attaching primary drbds to new secondary"
12213 " (standalone => connected)")
12214 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12216 self.node_secondary_ip,
12217 (self.instance.disks, self.instance),
12218 self.instance.name,
12220 for to_node, to_result in result.items():
12221 msg = to_result.fail_msg
12223 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12225 hint=("please do a gnt-instance info to see the"
12226 " status of disks"))
12228 cstep = itertools.count(5)
12230 if self.early_release:
12231 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12232 self._RemoveOldStorage(self.target_node, iv_names)
12233 # TODO: Check if releasing locks early still makes sense
12234 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12236 # Release all resource locks except those used by the instance
12237 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12238 keep=self.node_secondary_ip.keys())
12240 # TODO: Can the instance lock be downgraded here? Take the optional disk
12241 # shutdown in the caller into consideration.
12244 # This can fail as the old devices are degraded and _WaitForSync
12245 # does a combined result over all disks, so we don't check its return value
12246 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12247 _WaitForSync(self.lu, self.instance)
12249 # Check all devices manually
12250 self._CheckDevices(self.instance.primary_node, iv_names)
12252 # Step: remove old storage
12253 if not self.early_release:
12254 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12255 self._RemoveOldStorage(self.target_node, iv_names)
12258 class LURepairNodeStorage(NoHooksLU):
12259 """Repairs the volume group on a node.
12264 def CheckArguments(self):
12265 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12267 storage_type = self.op.storage_type
12269 if (constants.SO_FIX_CONSISTENCY not in
12270 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12271 raise errors.OpPrereqError("Storage units of type '%s' can not be"
12272 " repaired" % storage_type,
12273 errors.ECODE_INVAL)
12275 def ExpandNames(self):
12276 self.needed_locks = {
12277 locking.LEVEL_NODE: [self.op.node_name],
12280 def _CheckFaultyDisks(self, instance, node_name):
12281 """Ensure faulty disks abort the opcode or at least warn."""
12283 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12285 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12286 " node '%s'" % (instance.name, node_name),
12287 errors.ECODE_STATE)
12288 except errors.OpPrereqError, err:
12289 if self.op.ignore_consistency:
12290 self.LogWarning(str(err.args[0]))
12294 def CheckPrereq(self):
12295 """Check prerequisites.
12298 # Check whether any instance on this node has faulty disks
12299 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12300 if inst.admin_state != constants.ADMINST_UP:
12302 check_nodes = set(inst.all_nodes)
12303 check_nodes.discard(self.op.node_name)
12304 for inst_node_name in check_nodes:
12305 self._CheckFaultyDisks(inst, inst_node_name)
12307 def Exec(self, feedback_fn):
12308 feedback_fn("Repairing storage unit '%s' on %s ..." %
12309 (self.op.name, self.op.node_name))
12311 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12312 result = self.rpc.call_storage_execute(self.op.node_name,
12313 self.op.storage_type, st_args,
12315 constants.SO_FIX_CONSISTENCY)
12316 result.Raise("Failed to repair storage unit '%s' on %s" %
12317 (self.op.name, self.op.node_name))
12320 class LUNodeEvacuate(NoHooksLU):
12321 """Evacuates instances off a list of nodes.
12326 _MODE2IALLOCATOR = {
12327 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12328 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12329 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12331 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12332 assert (frozenset(_MODE2IALLOCATOR.values()) ==
12333 constants.IALLOCATOR_NEVAC_MODES)
12335 def CheckArguments(self):
12336 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12338 def ExpandNames(self):
12339 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12341 if self.op.remote_node is not None:
12342 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12343 assert self.op.remote_node
12345 if self.op.remote_node == self.op.node_name:
12346 raise errors.OpPrereqError("Can not use evacuated node as a new"
12347 " secondary node", errors.ECODE_INVAL)
12349 if self.op.mode != constants.NODE_EVAC_SEC:
12350 raise errors.OpPrereqError("Without the use of an iallocator only"
12351 " secondary instances can be evacuated",
12352 errors.ECODE_INVAL)
12355 self.share_locks = _ShareAll()
12356 self.needed_locks = {
12357 locking.LEVEL_INSTANCE: [],
12358 locking.LEVEL_NODEGROUP: [],
12359 locking.LEVEL_NODE: [],
12362 # Determine nodes (via group) optimistically, needs verification once locks
12363 # have been acquired
12364 self.lock_nodes = self._DetermineNodes()
12366 def _DetermineNodes(self):
12367 """Gets the list of nodes to operate on.
12370 if self.op.remote_node is None:
12371 # Iallocator will choose any node(s) in the same group
12372 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12374 group_nodes = frozenset([self.op.remote_node])
12376 # Determine nodes to be locked
12377 return set([self.op.node_name]) | group_nodes
12379 def _DetermineInstances(self):
12380 """Builds list of instances to operate on.
12383 assert self.op.mode in constants.NODE_EVAC_MODES
12385 if self.op.mode == constants.NODE_EVAC_PRI:
12386 # Primary instances only
12387 inst_fn = _GetNodePrimaryInstances
12388 assert self.op.remote_node is None, \
12389 "Evacuating primary instances requires iallocator"
12390 elif self.op.mode == constants.NODE_EVAC_SEC:
12391 # Secondary instances only
12392 inst_fn = _GetNodeSecondaryInstances
12395 assert self.op.mode == constants.NODE_EVAC_ALL
12396 inst_fn = _GetNodeInstances
12397 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12399 raise errors.OpPrereqError("Due to an issue with the iallocator"
12400 " interface it is not possible to evacuate"
12401 " all instances at once; specify explicitly"
12402 " whether to evacuate primary or secondary"
12404 errors.ECODE_INVAL)
12406 return inst_fn(self.cfg, self.op.node_name)
12408 def DeclareLocks(self, level):
12409 if level == locking.LEVEL_INSTANCE:
12410 # Lock instances optimistically, needs verification once node and group
12411 # locks have been acquired
12412 self.needed_locks[locking.LEVEL_INSTANCE] = \
12413 set(i.name for i in self._DetermineInstances())
12415 elif level == locking.LEVEL_NODEGROUP:
12416 # Lock node groups for all potential target nodes optimistically, needs
12417 # verification once nodes have been acquired
12418 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12419 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12421 elif level == locking.LEVEL_NODE:
12422 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12424 def CheckPrereq(self):
12426 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12427 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12428 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12430 need_nodes = self._DetermineNodes()
12432 if not owned_nodes.issuperset(need_nodes):
12433 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12434 " locks were acquired, current nodes are"
12435 " are '%s', used to be '%s'; retry the"
12437 (self.op.node_name,
12438 utils.CommaJoin(need_nodes),
12439 utils.CommaJoin(owned_nodes)),
12440 errors.ECODE_STATE)
12442 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12443 if owned_groups != wanted_groups:
12444 raise errors.OpExecError("Node groups changed since locks were acquired,"
12445 " current groups are '%s', used to be '%s';"
12446 " retry the operation" %
12447 (utils.CommaJoin(wanted_groups),
12448 utils.CommaJoin(owned_groups)))
12450 # Determine affected instances
12451 self.instances = self._DetermineInstances()
12452 self.instance_names = [i.name for i in self.instances]
12454 if set(self.instance_names) != owned_instances:
12455 raise errors.OpExecError("Instances on node '%s' changed since locks"
12456 " were acquired, current instances are '%s',"
12457 " used to be '%s'; retry the operation" %
12458 (self.op.node_name,
12459 utils.CommaJoin(self.instance_names),
12460 utils.CommaJoin(owned_instances)))
12462 if self.instance_names:
12463 self.LogInfo("Evacuating instances from node '%s': %s",
12465 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12467 self.LogInfo("No instances to evacuate from node '%s'",
12470 if self.op.remote_node is not None:
12471 for i in self.instances:
12472 if i.primary_node == self.op.remote_node:
12473 raise errors.OpPrereqError("Node %s is the primary node of"
12474 " instance %s, cannot use it as"
12476 (self.op.remote_node, i.name),
12477 errors.ECODE_INVAL)
12479 def Exec(self, feedback_fn):
12480 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12482 if not self.instance_names:
12483 # No instances to evacuate
12486 elif self.op.iallocator is not None:
12487 # TODO: Implement relocation to other group
12488 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12489 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12490 instances=list(self.instance_names))
12491 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12493 ial.Run(self.op.iallocator)
12495 if not ial.success:
12496 raise errors.OpPrereqError("Can't compute node evacuation using"
12497 " iallocator '%s': %s" %
12498 (self.op.iallocator, ial.info),
12499 errors.ECODE_NORES)
12501 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12503 elif self.op.remote_node is not None:
12504 assert self.op.mode == constants.NODE_EVAC_SEC
12506 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12507 remote_node=self.op.remote_node,
12509 mode=constants.REPLACE_DISK_CHG,
12510 early_release=self.op.early_release)]
12511 for instance_name in self.instance_names]
12514 raise errors.ProgrammerError("No iallocator or remote node")
12516 return ResultWithJobs(jobs)
12519 def _SetOpEarlyRelease(early_release, op):
12520 """Sets C{early_release} flag on opcodes if available.
12524 op.early_release = early_release
12525 except AttributeError:
12526 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12531 def _NodeEvacDest(use_nodes, group, nodes):
12532 """Returns group or nodes depending on caller's choice.
12536 return utils.CommaJoin(nodes)
12541 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12542 """Unpacks the result of change-group and node-evacuate iallocator requests.
12544 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12545 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12547 @type lu: L{LogicalUnit}
12548 @param lu: Logical unit instance
12549 @type alloc_result: tuple/list
12550 @param alloc_result: Result from iallocator
12551 @type early_release: bool
12552 @param early_release: Whether to release locks early if possible
12553 @type use_nodes: bool
12554 @param use_nodes: Whether to display node names instead of groups
12557 (moved, failed, jobs) = alloc_result
12560 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12561 for (name, reason) in failed)
12562 lu.LogWarning("Unable to evacuate instances %s", failreason)
12563 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12566 lu.LogInfo("Instances to be moved: %s",
12567 utils.CommaJoin("%s (to %s)" %
12568 (name, _NodeEvacDest(use_nodes, group, nodes))
12569 for (name, group, nodes) in moved))
12571 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12572 map(opcodes.OpCode.LoadOpCode, ops))
12576 def _DiskSizeInBytesToMebibytes(lu, size):
12577 """Converts a disk size in bytes to mebibytes.
12579 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12582 (mib, remainder) = divmod(size, 1024 * 1024)
12585 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12586 " to not overwrite existing data (%s bytes will not be"
12587 " wiped)", (1024 * 1024) - remainder)
12593 class LUInstanceGrowDisk(LogicalUnit):
12594 """Grow a disk of an instance.
12597 HPATH = "disk-grow"
12598 HTYPE = constants.HTYPE_INSTANCE
12601 def ExpandNames(self):
12602 self._ExpandAndLockInstance()
12603 self.needed_locks[locking.LEVEL_NODE] = []
12604 self.needed_locks[locking.LEVEL_NODE_RES] = []
12605 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12606 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12608 def DeclareLocks(self, level):
12609 if level == locking.LEVEL_NODE:
12610 self._LockInstancesNodes()
12611 elif level == locking.LEVEL_NODE_RES:
12613 self.needed_locks[locking.LEVEL_NODE_RES] = \
12614 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12616 def BuildHooksEnv(self):
12617 """Build hooks env.
12619 This runs on the master, the primary and all the secondaries.
12623 "DISK": self.op.disk,
12624 "AMOUNT": self.op.amount,
12625 "ABSOLUTE": self.op.absolute,
12627 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12630 def BuildHooksNodes(self):
12631 """Build hooks nodes.
12634 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12637 def CheckPrereq(self):
12638 """Check prerequisites.
12640 This checks that the instance is in the cluster.
12643 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12644 assert instance is not None, \
12645 "Cannot retrieve locked instance %s" % self.op.instance_name
12646 nodenames = list(instance.all_nodes)
12647 for node in nodenames:
12648 _CheckNodeOnline(self, node)
12650 self.instance = instance
12652 if instance.disk_template not in constants.DTS_GROWABLE:
12653 raise errors.OpPrereqError("Instance's disk layout does not support"
12654 " growing", errors.ECODE_INVAL)
12656 self.disk = instance.FindDisk(self.op.disk)
12658 if self.op.absolute:
12659 self.target = self.op.amount
12660 self.delta = self.target - self.disk.size
12662 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12663 "current disk size (%s)" %
12664 (utils.FormatUnit(self.target, "h"),
12665 utils.FormatUnit(self.disk.size, "h")),
12666 errors.ECODE_STATE)
12668 self.delta = self.op.amount
12669 self.target = self.disk.size + self.delta
12671 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12672 utils.FormatUnit(self.delta, "h"),
12673 errors.ECODE_INVAL)
12675 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12677 def _CheckDiskSpace(self, nodenames, req_vgspace):
12678 template = self.instance.disk_template
12679 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12680 # TODO: check the free disk space for file, when that feature will be
12682 nodes = map(self.cfg.GetNodeInfo, nodenames)
12683 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12686 # With exclusive storage we need to something smarter than just looking
12687 # at free space; for now, let's simply abort the operation.
12688 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12689 " is enabled", errors.ECODE_STATE)
12690 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12692 def Exec(self, feedback_fn):
12693 """Execute disk grow.
12696 instance = self.instance
12699 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12700 assert (self.owned_locks(locking.LEVEL_NODE) ==
12701 self.owned_locks(locking.LEVEL_NODE_RES))
12703 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12705 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12707 raise errors.OpExecError("Cannot activate block device to grow")
12709 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12710 (self.op.disk, instance.name,
12711 utils.FormatUnit(self.delta, "h"),
12712 utils.FormatUnit(self.target, "h")))
12714 # First run all grow ops in dry-run mode
12715 for node in instance.all_nodes:
12716 self.cfg.SetDiskID(disk, node)
12717 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12719 result.Raise("Dry-run grow request failed to node %s" % node)
12722 # Get disk size from primary node for wiping
12723 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12724 result.Raise("Failed to retrieve disk size from node '%s'" %
12725 instance.primary_node)
12727 (disk_size_in_bytes, ) = result.payload
12729 if disk_size_in_bytes is None:
12730 raise errors.OpExecError("Failed to retrieve disk size from primary"
12731 " node '%s'" % instance.primary_node)
12733 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12735 assert old_disk_size >= disk.size, \
12736 ("Retrieved disk size too small (got %s, should be at least %s)" %
12737 (old_disk_size, disk.size))
12739 old_disk_size = None
12741 # We know that (as far as we can test) operations across different
12742 # nodes will succeed, time to run it for real on the backing storage
12743 for node in instance.all_nodes:
12744 self.cfg.SetDiskID(disk, node)
12745 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12747 result.Raise("Grow request failed to node %s" % node)
12749 # And now execute it for logical storage, on the primary node
12750 node = instance.primary_node
12751 self.cfg.SetDiskID(disk, node)
12752 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12754 result.Raise("Grow request failed to node %s" % node)
12756 disk.RecordGrow(self.delta)
12757 self.cfg.Update(instance, feedback_fn)
12759 # Changes have been recorded, release node lock
12760 _ReleaseLocks(self, locking.LEVEL_NODE)
12762 # Downgrade lock while waiting for sync
12763 self.glm.downgrade(locking.LEVEL_INSTANCE)
12765 assert wipe_disks ^ (old_disk_size is None)
12768 assert instance.disks[self.op.disk] == disk
12770 # Wipe newly added disk space
12771 _WipeDisks(self, instance,
12772 disks=[(self.op.disk, disk, old_disk_size)])
12774 if self.op.wait_for_sync:
12775 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12777 self.LogWarning("Disk syncing has not returned a good status; check"
12779 if instance.admin_state != constants.ADMINST_UP:
12780 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12781 elif instance.admin_state != constants.ADMINST_UP:
12782 self.LogWarning("Not shutting down the disk even if the instance is"
12783 " not supposed to be running because no wait for"
12784 " sync mode was requested")
12786 assert self.owned_locks(locking.LEVEL_NODE_RES)
12787 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12790 class LUInstanceQueryData(NoHooksLU):
12791 """Query runtime instance data.
12796 def ExpandNames(self):
12797 self.needed_locks = {}
12799 # Use locking if requested or when non-static information is wanted
12800 if not (self.op.static or self.op.use_locking):
12801 self.LogWarning("Non-static data requested, locks need to be acquired")
12802 self.op.use_locking = True
12804 if self.op.instances or not self.op.use_locking:
12805 # Expand instance names right here
12806 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12808 # Will use acquired locks
12809 self.wanted_names = None
12811 if self.op.use_locking:
12812 self.share_locks = _ShareAll()
12814 if self.wanted_names is None:
12815 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12817 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12819 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12820 self.needed_locks[locking.LEVEL_NODE] = []
12821 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12823 def DeclareLocks(self, level):
12824 if self.op.use_locking:
12825 if level == locking.LEVEL_NODEGROUP:
12826 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12828 # Lock all groups used by instances optimistically; this requires going
12829 # via the node before it's locked, requiring verification later on
12830 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12831 frozenset(group_uuid
12832 for instance_name in owned_instances
12834 self.cfg.GetInstanceNodeGroups(instance_name))
12836 elif level == locking.LEVEL_NODE:
12837 self._LockInstancesNodes()
12839 def CheckPrereq(self):
12840 """Check prerequisites.
12842 This only checks the optional instance list against the existing names.
12845 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12846 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12847 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12849 if self.wanted_names is None:
12850 assert self.op.use_locking, "Locking was not used"
12851 self.wanted_names = owned_instances
12853 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12855 if self.op.use_locking:
12856 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12859 assert not (owned_instances or owned_groups or owned_nodes)
12861 self.wanted_instances = instances.values()
12863 def _ComputeBlockdevStatus(self, node, instance, dev):
12864 """Returns the status of a block device
12867 if self.op.static or not node:
12870 self.cfg.SetDiskID(dev, node)
12872 result = self.rpc.call_blockdev_find(node, dev)
12876 result.Raise("Can't compute disk status for %s" % instance.name)
12878 status = result.payload
12882 return (status.dev_path, status.major, status.minor,
12883 status.sync_percent, status.estimated_time,
12884 status.is_degraded, status.ldisk_status)
12886 def _ComputeDiskStatus(self, instance, snode, dev):
12887 """Compute block device status.
12890 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12892 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12894 def _ComputeDiskStatusInner(self, instance, snode, dev):
12895 """Compute block device status.
12897 @attention: The device has to be annotated already.
12900 if dev.dev_type in constants.LDS_DRBD:
12901 # we change the snode then (otherwise we use the one passed in)
12902 if dev.logical_id[0] == instance.primary_node:
12903 snode = dev.logical_id[1]
12905 snode = dev.logical_id[0]
12907 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12909 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12912 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12919 "iv_name": dev.iv_name,
12920 "dev_type": dev.dev_type,
12921 "logical_id": dev.logical_id,
12922 "physical_id": dev.physical_id,
12923 "pstatus": dev_pstatus,
12924 "sstatus": dev_sstatus,
12925 "children": dev_children,
12930 def Exec(self, feedback_fn):
12931 """Gather and return data"""
12934 cluster = self.cfg.GetClusterInfo()
12936 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12937 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12939 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12940 for node in nodes.values()))
12942 group2name_fn = lambda uuid: groups[uuid].name
12944 for instance in self.wanted_instances:
12945 pnode = nodes[instance.primary_node]
12947 if self.op.static or pnode.offline:
12948 remote_state = None
12950 self.LogWarning("Primary node %s is marked offline, returning static"
12951 " information only for instance %s" %
12952 (pnode.name, instance.name))
12954 remote_info = self.rpc.call_instance_info(instance.primary_node,
12956 instance.hypervisor)
12957 remote_info.Raise("Error checking node %s" % instance.primary_node)
12958 remote_info = remote_info.payload
12959 if remote_info and "state" in remote_info:
12960 remote_state = "up"
12962 if instance.admin_state == constants.ADMINST_UP:
12963 remote_state = "down"
12965 remote_state = instance.admin_state
12967 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12970 snodes_group_uuids = [nodes[snode_name].group
12971 for snode_name in instance.secondary_nodes]
12973 result[instance.name] = {
12974 "name": instance.name,
12975 "config_state": instance.admin_state,
12976 "run_state": remote_state,
12977 "pnode": instance.primary_node,
12978 "pnode_group_uuid": pnode.group,
12979 "pnode_group_name": group2name_fn(pnode.group),
12980 "snodes": instance.secondary_nodes,
12981 "snodes_group_uuids": snodes_group_uuids,
12982 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12984 # this happens to be the same format used for hooks
12985 "nics": _NICListToTuple(self, instance.nics),
12986 "disk_template": instance.disk_template,
12988 "hypervisor": instance.hypervisor,
12989 "network_port": instance.network_port,
12990 "hv_instance": instance.hvparams,
12991 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12992 "be_instance": instance.beparams,
12993 "be_actual": cluster.FillBE(instance),
12994 "os_instance": instance.osparams,
12995 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12996 "serial_no": instance.serial_no,
12997 "mtime": instance.mtime,
12998 "ctime": instance.ctime,
12999 "uuid": instance.uuid,
13005 def PrepareContainerMods(mods, private_fn):
13006 """Prepares a list of container modifications by adding a private data field.
13008 @type mods: list of tuples; (operation, index, parameters)
13009 @param mods: List of modifications
13010 @type private_fn: callable or None
13011 @param private_fn: Callable for constructing a private data field for a
13016 if private_fn is None:
13021 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13024 #: Type description for changes as returned by L{ApplyContainerMods}'s
13026 _TApplyContModsCbChanges = \
13027 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13028 ht.TNonEmptyString,
13033 def ApplyContainerMods(kind, container, chgdesc, mods,
13034 create_fn, modify_fn, remove_fn):
13035 """Applies descriptions in C{mods} to C{container}.
13038 @param kind: One-word item description
13039 @type container: list
13040 @param container: Container to modify
13041 @type chgdesc: None or list
13042 @param chgdesc: List of applied changes
13044 @param mods: Modifications as returned by L{PrepareContainerMods}
13045 @type create_fn: callable
13046 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13047 receives absolute item index, parameters and private data object as added
13048 by L{PrepareContainerMods}, returns tuple containing new item and changes
13050 @type modify_fn: callable
13051 @param modify_fn: Callback for modifying an existing item
13052 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13053 and private data object as added by L{PrepareContainerMods}, returns
13055 @type remove_fn: callable
13056 @param remove_fn: Callback on removing item; receives absolute item index,
13057 item and private data object as added by L{PrepareContainerMods}
13060 for (op, idx, params, private) in mods:
13063 absidx = len(container) - 1
13065 raise IndexError("Not accepting negative indices other than -1")
13066 elif idx > len(container):
13067 raise IndexError("Got %s index %s, but there are only %s" %
13068 (kind, idx, len(container)))
13074 if op == constants.DDM_ADD:
13075 # Calculate where item will be added
13077 addidx = len(container)
13081 if create_fn is None:
13084 (item, changes) = create_fn(addidx, params, private)
13087 container.append(item)
13090 assert idx <= len(container)
13091 # list.insert does so before the specified index
13092 container.insert(idx, item)
13094 # Retrieve existing item
13096 item = container[absidx]
13098 raise IndexError("Invalid %s index %s" % (kind, idx))
13100 if op == constants.DDM_REMOVE:
13103 if remove_fn is not None:
13104 remove_fn(absidx, item, private)
13106 changes = [("%s/%s" % (kind, absidx), "remove")]
13108 assert container[absidx] == item
13109 del container[absidx]
13110 elif op == constants.DDM_MODIFY:
13111 if modify_fn is not None:
13112 changes = modify_fn(absidx, item, params, private)
13114 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13116 assert _TApplyContModsCbChanges(changes)
13118 if not (chgdesc is None or changes is None):
13119 chgdesc.extend(changes)
13122 def _UpdateIvNames(base_index, disks):
13123 """Updates the C{iv_name} attribute of disks.
13125 @type disks: list of L{objects.Disk}
13128 for (idx, disk) in enumerate(disks):
13129 disk.iv_name = "disk/%s" % (base_index + idx, )
13132 class _InstNicModPrivate:
13133 """Data structure for network interface modifications.
13135 Used by L{LUInstanceSetParams}.
13138 def __init__(self):
13143 class LUInstanceSetParams(LogicalUnit):
13144 """Modifies an instances's parameters.
13147 HPATH = "instance-modify"
13148 HTYPE = constants.HTYPE_INSTANCE
13152 def _UpgradeDiskNicMods(kind, mods, verify_fn):
13153 assert ht.TList(mods)
13154 assert not mods or len(mods[0]) in (2, 3)
13156 if mods and len(mods[0]) == 2:
13160 for op, params in mods:
13161 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13162 result.append((op, -1, params))
13166 raise errors.OpPrereqError("Only one %s add or remove operation is"
13167 " supported at a time" % kind,
13168 errors.ECODE_INVAL)
13170 result.append((constants.DDM_MODIFY, op, params))
13172 assert verify_fn(result)
13179 def _CheckMods(kind, mods, key_types, item_fn):
13180 """Ensures requested disk/NIC modifications are valid.
13183 for (op, _, params) in mods:
13184 assert ht.TDict(params)
13186 # If 'key_types' is an empty dict, we assume we have an
13187 # 'ext' template and thus do not ForceDictType
13189 utils.ForceDictType(params, key_types)
13191 if op == constants.DDM_REMOVE:
13193 raise errors.OpPrereqError("No settings should be passed when"
13194 " removing a %s" % kind,
13195 errors.ECODE_INVAL)
13196 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13197 item_fn(op, params)
13199 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13202 def _VerifyDiskModification(op, params):
13203 """Verifies a disk modification.
13206 if op == constants.DDM_ADD:
13207 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13208 if mode not in constants.DISK_ACCESS_SET:
13209 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13210 errors.ECODE_INVAL)
13212 size = params.get(constants.IDISK_SIZE, None)
13214 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13215 constants.IDISK_SIZE, errors.ECODE_INVAL)
13219 except (TypeError, ValueError), err:
13220 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13221 errors.ECODE_INVAL)
13223 params[constants.IDISK_SIZE] = size
13225 elif op == constants.DDM_MODIFY:
13226 if constants.IDISK_SIZE in params:
13227 raise errors.OpPrereqError("Disk size change not possible, use"
13228 " grow-disk", errors.ECODE_INVAL)
13229 if constants.IDISK_MODE not in params:
13230 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13231 " modification supported, but missing",
13232 errors.ECODE_NOENT)
13233 if len(params) > 1:
13234 raise errors.OpPrereqError("Disk modification doesn't support"
13235 " additional arbitrary parameters",
13236 errors.ECODE_INVAL)
13239 def _VerifyNicModification(op, params):
13240 """Verifies a network interface modification.
13243 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13244 ip = params.get(constants.INIC_IP, None)
13245 req_net = params.get(constants.INIC_NETWORK, None)
13246 link = params.get(constants.NIC_LINK, None)
13247 mode = params.get(constants.NIC_MODE, None)
13248 if req_net is not None:
13249 if req_net.lower() == constants.VALUE_NONE:
13250 params[constants.INIC_NETWORK] = None
13252 elif link is not None or mode is not None:
13253 raise errors.OpPrereqError("If network is given"
13254 " mode or link should not",
13255 errors.ECODE_INVAL)
13257 if op == constants.DDM_ADD:
13258 macaddr = params.get(constants.INIC_MAC, None)
13259 if macaddr is None:
13260 params[constants.INIC_MAC] = constants.VALUE_AUTO
13263 if ip.lower() == constants.VALUE_NONE:
13264 params[constants.INIC_IP] = None
13266 if ip.lower() == constants.NIC_IP_POOL:
13267 if op == constants.DDM_ADD and req_net is None:
13268 raise errors.OpPrereqError("If ip=pool, parameter network"
13270 errors.ECODE_INVAL)
13272 if not netutils.IPAddress.IsValid(ip):
13273 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13274 errors.ECODE_INVAL)
13276 if constants.INIC_MAC in params:
13277 macaddr = params[constants.INIC_MAC]
13278 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13279 macaddr = utils.NormalizeAndValidateMac(macaddr)
13281 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13282 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13283 " modifying an existing NIC",
13284 errors.ECODE_INVAL)
13286 def CheckArguments(self):
13287 if not (self.op.nics or self.op.disks or self.op.disk_template or
13288 self.op.hvparams or self.op.beparams or self.op.os_name or
13289 self.op.offline is not None or self.op.runtime_mem):
13290 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13292 if self.op.hvparams:
13293 _CheckGlobalHvParams(self.op.hvparams)
13295 self.op.disks = self._UpgradeDiskNicMods(
13296 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13297 self.op.nics = self._UpgradeDiskNicMods(
13298 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13300 if self.op.disks and self.op.disk_template is not None:
13301 raise errors.OpPrereqError("Disk template conversion and other disk"
13302 " changes not supported at the same time",
13303 errors.ECODE_INVAL)
13305 if (self.op.disk_template and
13306 self.op.disk_template in constants.DTS_INT_MIRROR and
13307 self.op.remote_node is None):
13308 raise errors.OpPrereqError("Changing the disk template to a mirrored"
13309 " one requires specifying a secondary node",
13310 errors.ECODE_INVAL)
13312 # Check NIC modifications
13313 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13314 self._VerifyNicModification)
13316 def ExpandNames(self):
13317 self._ExpandAndLockInstance()
13318 self.needed_locks[locking.LEVEL_NODEGROUP] = []
13319 # Can't even acquire node locks in shared mode as upcoming changes in
13320 # Ganeti 2.6 will start to modify the node object on disk conversion
13321 self.needed_locks[locking.LEVEL_NODE] = []
13322 self.needed_locks[locking.LEVEL_NODE_RES] = []
13323 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13324 # Look node group to look up the ipolicy
13325 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13327 def DeclareLocks(self, level):
13328 if level == locking.LEVEL_NODEGROUP:
13329 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13330 # Acquire locks for the instance's nodegroups optimistically. Needs
13331 # to be verified in CheckPrereq
13332 self.needed_locks[locking.LEVEL_NODEGROUP] = \
13333 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13334 elif level == locking.LEVEL_NODE:
13335 self._LockInstancesNodes()
13336 if self.op.disk_template and self.op.remote_node:
13337 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13338 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13339 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13341 self.needed_locks[locking.LEVEL_NODE_RES] = \
13342 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13344 def BuildHooksEnv(self):
13345 """Build hooks env.
13347 This runs on the master, primary and secondaries.
13351 if constants.BE_MINMEM in self.be_new:
13352 args["minmem"] = self.be_new[constants.BE_MINMEM]
13353 if constants.BE_MAXMEM in self.be_new:
13354 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13355 if constants.BE_VCPUS in self.be_new:
13356 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13357 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13358 # information at all.
13360 if self._new_nics is not None:
13363 for nic in self._new_nics:
13364 n = copy.deepcopy(nic)
13365 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13366 n.nicparams = nicparams
13367 nics.append(_NICToTuple(self, n))
13369 args["nics"] = nics
13371 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13372 if self.op.disk_template:
13373 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13374 if self.op.runtime_mem:
13375 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13379 def BuildHooksNodes(self):
13380 """Build hooks nodes.
13383 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13386 def _PrepareNicModification(self, params, private, old_ip, old_net,
13387 old_params, cluster, pnode):
13389 update_params_dict = dict([(key, params[key])
13390 for key in constants.NICS_PARAMETERS
13393 req_link = update_params_dict.get(constants.NIC_LINK, None)
13394 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13396 new_net = params.get(constants.INIC_NETWORK, old_net)
13397 if new_net is not None:
13398 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
13399 if netparams is None:
13400 raise errors.OpPrereqError("No netparams found for the network"
13401 " %s, probably not connected" % new_net,
13402 errors.ECODE_INVAL)
13403 new_params = dict(netparams)
13405 new_params = _GetUpdatedParams(old_params, update_params_dict)
13407 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13409 new_filled_params = cluster.SimpleFillNIC(new_params)
13410 objects.NIC.CheckParameterSyntax(new_filled_params)
13412 new_mode = new_filled_params[constants.NIC_MODE]
13413 if new_mode == constants.NIC_MODE_BRIDGED:
13414 bridge = new_filled_params[constants.NIC_LINK]
13415 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13417 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13419 self.warn.append(msg)
13421 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13423 elif new_mode == constants.NIC_MODE_ROUTED:
13424 ip = params.get(constants.INIC_IP, old_ip)
13426 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13427 " on a routed NIC", errors.ECODE_INVAL)
13429 elif new_mode == constants.NIC_MODE_OVS:
13430 # TODO: check OVS link
13431 self.LogInfo("OVS links are currently not checked for correctness")
13433 if constants.INIC_MAC in params:
13434 mac = params[constants.INIC_MAC]
13436 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13437 errors.ECODE_INVAL)
13438 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13439 # otherwise generate the MAC address
13440 params[constants.INIC_MAC] = \
13441 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13443 # or validate/reserve the current one
13445 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13446 except errors.ReservationError:
13447 raise errors.OpPrereqError("MAC address '%s' already in use"
13448 " in cluster" % mac,
13449 errors.ECODE_NOTUNIQUE)
13450 elif new_net != old_net:
13452 def get_net_prefix(net):
13454 uuid = self.cfg.LookupNetwork(net)
13456 nobj = self.cfg.GetNetwork(uuid)
13457 return nobj.mac_prefix
13460 new_prefix = get_net_prefix(new_net)
13461 old_prefix = get_net_prefix(old_net)
13462 if old_prefix != new_prefix:
13463 params[constants.INIC_MAC] = \
13464 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
13466 #if there is a change in nic-network configuration
13467 new_ip = params.get(constants.INIC_IP, old_ip)
13468 if (new_ip, new_net) != (old_ip, old_net):
13471 if new_ip.lower() == constants.NIC_IP_POOL:
13473 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
13474 except errors.ReservationError:
13475 raise errors.OpPrereqError("Unable to get a free IP"
13476 " from the address pool",
13477 errors.ECODE_STATE)
13478 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
13479 params[constants.INIC_IP] = new_ip
13480 elif new_ip != old_ip or new_net != old_net:
13482 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
13483 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
13484 except errors.ReservationError:
13485 raise errors.OpPrereqError("IP %s not available in network %s" %
13487 errors.ECODE_NOTUNIQUE)
13488 elif new_ip.lower() == constants.NIC_IP_POOL:
13489 raise errors.OpPrereqError("ip=pool, but no network found",
13490 errors.ECODE_INVAL)
13493 elif self.op.conflicts_check:
13494 _CheckForConflictingIp(self, new_ip, pnode)
13499 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
13500 except errors.AddressPoolError:
13501 logging.warning("Release IP %s not contained in network %s",
13504 # there are no changes in (net, ip) tuple
13505 elif (old_net is not None and
13506 (req_link is not None or req_mode is not None)):
13507 raise errors.OpPrereqError("Not allowed to change link or mode of"
13508 " a NIC that is connected to a network",
13509 errors.ECODE_INVAL)
13511 private.params = new_params
13512 private.filled = new_filled_params
13514 def _PreCheckDiskTemplate(self, pnode_info):
13515 """CheckPrereq checks related to a new disk template."""
13516 # Arguments are passed to avoid configuration lookups
13517 instance = self.instance
13518 pnode = instance.primary_node
13519 cluster = self.cluster
13520 if instance.disk_template == self.op.disk_template:
13521 raise errors.OpPrereqError("Instance already has disk template %s" %
13522 instance.disk_template, errors.ECODE_INVAL)
13524 if (instance.disk_template,
13525 self.op.disk_template) not in self._DISK_CONVERSIONS:
13526 raise errors.OpPrereqError("Unsupported disk template conversion from"
13527 " %s to %s" % (instance.disk_template,
13528 self.op.disk_template),
13529 errors.ECODE_INVAL)
13530 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13531 msg="cannot change disk template")
13532 if self.op.disk_template in constants.DTS_INT_MIRROR:
13533 if self.op.remote_node == pnode:
13534 raise errors.OpPrereqError("Given new secondary node %s is the same"
13535 " as the primary node of the instance" %
13536 self.op.remote_node, errors.ECODE_STATE)
13537 _CheckNodeOnline(self, self.op.remote_node)
13538 _CheckNodeNotDrained(self, self.op.remote_node)
13539 # FIXME: here we assume that the old instance type is DT_PLAIN
13540 assert instance.disk_template == constants.DT_PLAIN
13541 disks = [{constants.IDISK_SIZE: d.size,
13542 constants.IDISK_VG: d.logical_id[0]}
13543 for d in instance.disks]
13544 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13545 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13547 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13548 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13549 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13551 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13552 ignore=self.op.ignore_ipolicy)
13553 if pnode_info.group != snode_info.group:
13554 self.LogWarning("The primary and secondary nodes are in two"
13555 " different node groups; the disk parameters"
13556 " from the first disk's node group will be"
13559 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13560 # Make sure none of the nodes require exclusive storage
13561 nodes = [pnode_info]
13562 if self.op.disk_template in constants.DTS_INT_MIRROR:
13564 nodes.append(snode_info)
13565 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13566 if compat.any(map(has_es, nodes)):
13567 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13568 " storage is enabled" % (instance.disk_template,
13569 self.op.disk_template))
13570 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13572 def CheckPrereq(self):
13573 """Check prerequisites.
13575 This only checks the instance list against the existing names.
13578 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13579 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13581 cluster = self.cluster = self.cfg.GetClusterInfo()
13582 assert self.instance is not None, \
13583 "Cannot retrieve locked instance %s" % self.op.instance_name
13585 pnode = instance.primary_node
13586 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13587 nodelist = list(instance.all_nodes)
13588 pnode_info = self.cfg.GetNodeInfo(pnode)
13589 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13591 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13592 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13593 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13595 # dictionary with instance information after the modification
13598 # Check disk modifications. This is done here and not in CheckArguments
13599 # (as with NICs), because we need to know the instance's disk template
13600 if instance.disk_template == constants.DT_EXT:
13601 self._CheckMods("disk", self.op.disks, {},
13602 self._VerifyDiskModification)
13604 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13605 self._VerifyDiskModification)
13607 # Prepare disk/NIC modifications
13608 self.diskmod = PrepareContainerMods(self.op.disks, None)
13609 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13611 # Check the validity of the `provider' parameter
13612 if instance.disk_template in constants.DT_EXT:
13613 for mod in self.diskmod:
13614 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13615 if mod[0] == constants.DDM_ADD:
13616 if ext_provider is None:
13617 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13618 " '%s' missing, during disk add" %
13620 constants.IDISK_PROVIDER),
13621 errors.ECODE_NOENT)
13622 elif mod[0] == constants.DDM_MODIFY:
13624 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13626 constants.IDISK_PROVIDER,
13627 errors.ECODE_INVAL)
13629 for mod in self.diskmod:
13630 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13631 if ext_provider is not None:
13632 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13633 " instances of type '%s'" %
13634 (constants.IDISK_PROVIDER,
13636 errors.ECODE_INVAL)
13639 if self.op.os_name and not self.op.force:
13640 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13641 self.op.force_variant)
13642 instance_os = self.op.os_name
13644 instance_os = instance.os
13646 assert not (self.op.disk_template and self.op.disks), \
13647 "Can't modify disk template and apply disk changes at the same time"
13649 if self.op.disk_template:
13650 self._PreCheckDiskTemplate(pnode_info)
13652 # hvparams processing
13653 if self.op.hvparams:
13654 hv_type = instance.hypervisor
13655 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13656 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13657 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13660 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13661 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13662 self.hv_proposed = self.hv_new = hv_new # the new actual values
13663 self.hv_inst = i_hvdict # the new dict (without defaults)
13665 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13667 self.hv_new = self.hv_inst = {}
13669 # beparams processing
13670 if self.op.beparams:
13671 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13673 objects.UpgradeBeParams(i_bedict)
13674 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13675 be_new = cluster.SimpleFillBE(i_bedict)
13676 self.be_proposed = self.be_new = be_new # the new actual values
13677 self.be_inst = i_bedict # the new dict (without defaults)
13679 self.be_new = self.be_inst = {}
13680 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13681 be_old = cluster.FillBE(instance)
13683 # CPU param validation -- checking every time a parameter is
13684 # changed to cover all cases where either CPU mask or vcpus have
13686 if (constants.BE_VCPUS in self.be_proposed and
13687 constants.HV_CPU_MASK in self.hv_proposed):
13689 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13690 # Verify mask is consistent with number of vCPUs. Can skip this
13691 # test if only 1 entry in the CPU mask, which means same mask
13692 # is applied to all vCPUs.
13693 if (len(cpu_list) > 1 and
13694 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13695 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13697 (self.be_proposed[constants.BE_VCPUS],
13698 self.hv_proposed[constants.HV_CPU_MASK]),
13699 errors.ECODE_INVAL)
13701 # Only perform this test if a new CPU mask is given
13702 if constants.HV_CPU_MASK in self.hv_new:
13703 # Calculate the largest CPU number requested
13704 max_requested_cpu = max(map(max, cpu_list))
13705 # Check that all of the instance's nodes have enough physical CPUs to
13706 # satisfy the requested CPU mask
13707 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13708 max_requested_cpu + 1, instance.hypervisor)
13710 # osparams processing
13711 if self.op.osparams:
13712 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13713 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13714 self.os_inst = i_osdict # the new dict (without defaults)
13720 #TODO(dynmem): do the appropriate check involving MINMEM
13721 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13722 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13723 mem_check_list = [pnode]
13724 if be_new[constants.BE_AUTO_BALANCE]:
13725 # either we changed auto_balance to yes or it was from before
13726 mem_check_list.extend(instance.secondary_nodes)
13727 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13728 instance.hypervisor)
13729 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13730 [instance.hypervisor], False)
13731 pninfo = nodeinfo[pnode]
13732 msg = pninfo.fail_msg
13734 # Assume the primary node is unreachable and go ahead
13735 self.warn.append("Can't get info from primary node %s: %s" %
13738 (_, _, (pnhvinfo, )) = pninfo.payload
13739 if not isinstance(pnhvinfo.get("memory_free", None), int):
13740 self.warn.append("Node data from primary node %s doesn't contain"
13741 " free memory information" % pnode)
13742 elif instance_info.fail_msg:
13743 self.warn.append("Can't get instance runtime information: %s" %
13744 instance_info.fail_msg)
13746 if instance_info.payload:
13747 current_mem = int(instance_info.payload["memory"])
13749 # Assume instance not running
13750 # (there is a slight race condition here, but it's not very
13751 # probable, and we have no other way to check)
13752 # TODO: Describe race condition
13754 #TODO(dynmem): do the appropriate check involving MINMEM
13755 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13756 pnhvinfo["memory_free"])
13758 raise errors.OpPrereqError("This change will prevent the instance"
13759 " from starting, due to %d MB of memory"
13760 " missing on its primary node" %
13761 miss_mem, errors.ECODE_NORES)
13763 if be_new[constants.BE_AUTO_BALANCE]:
13764 for node, nres in nodeinfo.items():
13765 if node not in instance.secondary_nodes:
13767 nres.Raise("Can't get info from secondary node %s" % node,
13768 prereq=True, ecode=errors.ECODE_STATE)
13769 (_, _, (nhvinfo, )) = nres.payload
13770 if not isinstance(nhvinfo.get("memory_free", None), int):
13771 raise errors.OpPrereqError("Secondary node %s didn't return free"
13772 " memory information" % node,
13773 errors.ECODE_STATE)
13774 #TODO(dynmem): do the appropriate check involving MINMEM
13775 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13776 raise errors.OpPrereqError("This change will prevent the instance"
13777 " from failover to its secondary node"
13778 " %s, due to not enough memory" % node,
13779 errors.ECODE_STATE)
13781 if self.op.runtime_mem:
13782 remote_info = self.rpc.call_instance_info(instance.primary_node,
13784 instance.hypervisor)
13785 remote_info.Raise("Error checking node %s" % instance.primary_node)
13786 if not remote_info.payload: # not running already
13787 raise errors.OpPrereqError("Instance %s is not running" %
13788 instance.name, errors.ECODE_STATE)
13790 current_memory = remote_info.payload["memory"]
13791 if (not self.op.force and
13792 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13793 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13794 raise errors.OpPrereqError("Instance %s must have memory between %d"
13795 " and %d MB of memory unless --force is"
13798 self.be_proposed[constants.BE_MINMEM],
13799 self.be_proposed[constants.BE_MAXMEM]),
13800 errors.ECODE_INVAL)
13802 delta = self.op.runtime_mem - current_memory
13804 _CheckNodeFreeMemory(self, instance.primary_node,
13805 "ballooning memory for instance %s" %
13806 instance.name, delta, instance.hypervisor)
13808 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13809 raise errors.OpPrereqError("Disk operations not supported for"
13810 " diskless instances", errors.ECODE_INVAL)
13812 def _PrepareNicCreate(_, params, private):
13813 self._PrepareNicModification(params, private, None, None,
13814 {}, cluster, pnode)
13815 return (None, None)
13817 def _PrepareNicMod(_, nic, params, private):
13818 self._PrepareNicModification(params, private, nic.ip, nic.network,
13819 nic.nicparams, cluster, pnode)
13822 def _PrepareNicRemove(_, params, __):
13824 net = params.network
13825 if net is not None and ip is not None:
13826 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13828 # Verify NIC changes (operating on copy)
13829 nics = instance.nics[:]
13830 ApplyContainerMods("NIC", nics, None, self.nicmod,
13831 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13832 if len(nics) > constants.MAX_NICS:
13833 raise errors.OpPrereqError("Instance has too many network interfaces"
13834 " (%d), cannot add more" % constants.MAX_NICS,
13835 errors.ECODE_STATE)
13837 # Verify disk changes (operating on a copy)
13838 disks = instance.disks[:]
13839 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13840 if len(disks) > constants.MAX_DISKS:
13841 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13842 " more" % constants.MAX_DISKS,
13843 errors.ECODE_STATE)
13844 disk_sizes = [disk.size for disk in instance.disks]
13845 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13846 self.diskmod if op == constants.DDM_ADD)
13847 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13848 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13850 if self.op.offline is not None and self.op.offline:
13851 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13852 msg="can't change to offline")
13854 # Pre-compute NIC changes (necessary to use result in hooks)
13855 self._nic_chgdesc = []
13857 # Operate on copies as this is still in prereq
13858 nics = [nic.Copy() for nic in instance.nics]
13859 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13860 self._CreateNewNic, self._ApplyNicMods, None)
13861 self._new_nics = nics
13862 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13864 self._new_nics = None
13865 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13867 if not self.op.ignore_ipolicy:
13868 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13871 # Fill ispec with backend parameters
13872 ispec[constants.ISPEC_SPINDLE_USE] = \
13873 self.be_new.get(constants.BE_SPINDLE_USE, None)
13874 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13877 # Copy ispec to verify parameters with min/max values separately
13878 ispec_max = ispec.copy()
13879 ispec_max[constants.ISPEC_MEM_SIZE] = \
13880 self.be_new.get(constants.BE_MAXMEM, None)
13881 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13882 ispec_min = ispec.copy()
13883 ispec_min[constants.ISPEC_MEM_SIZE] = \
13884 self.be_new.get(constants.BE_MINMEM, None)
13885 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13887 if (res_max or res_min):
13888 # FIXME: Improve error message by including information about whether
13889 # the upper or lower limit of the parameter fails the ipolicy.
13890 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13891 (group_info, group_info.name,
13892 utils.CommaJoin(set(res_max + res_min))))
13893 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13895 def _ConvertPlainToDrbd(self, feedback_fn):
13896 """Converts an instance from plain to drbd.
13899 feedback_fn("Converting template to drbd")
13900 instance = self.instance
13901 pnode = instance.primary_node
13902 snode = self.op.remote_node
13904 assert instance.disk_template == constants.DT_PLAIN
13906 # create a fake disk info for _GenerateDiskTemplate
13907 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13908 constants.IDISK_VG: d.logical_id[0]}
13909 for d in instance.disks]
13910 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13911 instance.name, pnode, [snode],
13912 disk_info, None, None, 0, feedback_fn,
13914 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13916 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13917 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13918 info = _GetInstanceInfoText(instance)
13919 feedback_fn("Creating additional volumes...")
13920 # first, create the missing data and meta devices
13921 for disk in anno_disks:
13922 # unfortunately this is... not too nice
13923 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13924 info, True, p_excl_stor)
13925 for child in disk.children:
13926 _CreateSingleBlockDev(self, snode, instance, child, info, True,
13928 # at this stage, all new LVs have been created, we can rename the
13930 feedback_fn("Renaming original volumes...")
13931 rename_list = [(o, n.children[0].logical_id)
13932 for (o, n) in zip(instance.disks, new_disks)]
13933 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13934 result.Raise("Failed to rename original LVs")
13936 feedback_fn("Initializing DRBD devices...")
13937 # all child devices are in place, we can now create the DRBD devices
13938 for disk in anno_disks:
13939 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13940 f_create = node == pnode
13941 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13944 # at this point, the instance has been modified
13945 instance.disk_template = constants.DT_DRBD8
13946 instance.disks = new_disks
13947 self.cfg.Update(instance, feedback_fn)
13949 # Release node locks while waiting for sync
13950 _ReleaseLocks(self, locking.LEVEL_NODE)
13952 # disks are created, waiting for sync
13953 disk_abort = not _WaitForSync(self, instance,
13954 oneshot=not self.op.wait_for_sync)
13956 raise errors.OpExecError("There are some degraded disks for"
13957 " this instance, please cleanup manually")
13959 # Node resource locks will be released by caller
13961 def _ConvertDrbdToPlain(self, feedback_fn):
13962 """Converts an instance from drbd to plain.
13965 instance = self.instance
13967 assert len(instance.secondary_nodes) == 1
13968 assert instance.disk_template == constants.DT_DRBD8
13970 pnode = instance.primary_node
13971 snode = instance.secondary_nodes[0]
13972 feedback_fn("Converting template to plain")
13974 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13975 new_disks = [d.children[0] for d in instance.disks]
13977 # copy over size and mode
13978 for parent, child in zip(old_disks, new_disks):
13979 child.size = parent.size
13980 child.mode = parent.mode
13982 # this is a DRBD disk, return its port to the pool
13983 # NOTE: this must be done right before the call to cfg.Update!
13984 for disk in old_disks:
13985 tcp_port = disk.logical_id[2]
13986 self.cfg.AddTcpUdpPort(tcp_port)
13988 # update instance structure
13989 instance.disks = new_disks
13990 instance.disk_template = constants.DT_PLAIN
13991 self.cfg.Update(instance, feedback_fn)
13993 # Release locks in case removing disks takes a while
13994 _ReleaseLocks(self, locking.LEVEL_NODE)
13996 feedback_fn("Removing volumes on the secondary node...")
13997 for disk in old_disks:
13998 self.cfg.SetDiskID(disk, snode)
13999 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14001 self.LogWarning("Could not remove block device %s on node %s,"
14002 " continuing anyway: %s", disk.iv_name, snode, msg)
14004 feedback_fn("Removing unneeded volumes on the primary node...")
14005 for idx, disk in enumerate(old_disks):
14006 meta = disk.children[1]
14007 self.cfg.SetDiskID(meta, pnode)
14008 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14010 self.LogWarning("Could not remove metadata for disk %d on node %s,"
14011 " continuing anyway: %s", idx, pnode, msg)
14013 def _CreateNewDisk(self, idx, params, _):
14014 """Creates a new disk.
14017 instance = self.instance
14020 if instance.disk_template in constants.DTS_FILEBASED:
14021 (file_driver, file_path) = instance.disks[0].logical_id
14022 file_path = os.path.dirname(file_path)
14024 file_driver = file_path = None
14027 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14028 instance.primary_node, instance.secondary_nodes,
14029 [params], file_path, file_driver, idx,
14030 self.Log, self.diskparams)[0]
14032 info = _GetInstanceInfoText(instance)
14034 logging.info("Creating volume %s for instance %s",
14035 disk.iv_name, instance.name)
14036 # Note: this needs to be kept in sync with _CreateDisks
14038 for node in instance.all_nodes:
14039 f_create = (node == instance.primary_node)
14041 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14042 except errors.OpExecError, err:
14043 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14044 disk.iv_name, disk, node, err)
14047 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14051 def _ModifyDisk(idx, disk, params, _):
14052 """Modifies a disk.
14055 disk.mode = params[constants.IDISK_MODE]
14058 ("disk.mode/%d" % idx, disk.mode),
14061 def _RemoveDisk(self, idx, root, _):
14065 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14066 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14067 self.cfg.SetDiskID(disk, node)
14068 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14070 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14071 " continuing anyway", idx, node, msg)
14073 # if this is a DRBD disk, return its port to the pool
14074 if root.dev_type in constants.LDS_DRBD:
14075 self.cfg.AddTcpUdpPort(root.logical_id[2])
14078 def _CreateNewNic(idx, params, private):
14079 """Creates data structure for a new network interface.
14082 mac = params[constants.INIC_MAC]
14083 ip = params.get(constants.INIC_IP, None)
14084 net = params.get(constants.INIC_NETWORK, None)
14085 #TODO: not private.filled?? can a nic have no nicparams??
14086 nicparams = private.filled
14088 return (objects.NIC(mac=mac, ip=ip, network=net, nicparams=nicparams), [
14090 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14091 (mac, ip, private.filled[constants.NIC_MODE],
14092 private.filled[constants.NIC_LINK],
14097 def _ApplyNicMods(idx, nic, params, private):
14098 """Modifies a network interface.
14103 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
14105 changes.append(("nic.%s/%d" % (key, idx), params[key]))
14106 setattr(nic, key, params[key])
14109 nic.nicparams = private.filled
14111 for (key, val) in nic.nicparams.items():
14112 changes.append(("nic.%s/%d" % (key, idx), val))
14116 def Exec(self, feedback_fn):
14117 """Modifies an instance.
14119 All parameters take effect only at the next restart of the instance.
14122 # Process here the warnings from CheckPrereq, as we don't have a
14123 # feedback_fn there.
14124 # TODO: Replace with self.LogWarning
14125 for warn in self.warn:
14126 feedback_fn("WARNING: %s" % warn)
14128 assert ((self.op.disk_template is None) ^
14129 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14130 "Not owning any node resource locks"
14133 instance = self.instance
14136 if self.op.runtime_mem:
14137 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14139 self.op.runtime_mem)
14140 rpcres.Raise("Cannot modify instance runtime memory")
14141 result.append(("runtime_memory", self.op.runtime_mem))
14143 # Apply disk changes
14144 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14145 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14146 _UpdateIvNames(0, instance.disks)
14148 if self.op.disk_template:
14150 check_nodes = set(instance.all_nodes)
14151 if self.op.remote_node:
14152 check_nodes.add(self.op.remote_node)
14153 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14154 owned = self.owned_locks(level)
14155 assert not (check_nodes - owned), \
14156 ("Not owning the correct locks, owning %r, expected at least %r" %
14157 (owned, check_nodes))
14159 r_shut = _ShutdownInstanceDisks(self, instance)
14161 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14162 " proceed with disk template conversion")
14163 mode = (instance.disk_template, self.op.disk_template)
14165 self._DISK_CONVERSIONS[mode](self, feedback_fn)
14167 self.cfg.ReleaseDRBDMinors(instance.name)
14169 result.append(("disk_template", self.op.disk_template))
14171 assert instance.disk_template == self.op.disk_template, \
14172 ("Expected disk template '%s', found '%s'" %
14173 (self.op.disk_template, instance.disk_template))
14175 # Release node and resource locks if there are any (they might already have
14176 # been released during disk conversion)
14177 _ReleaseLocks(self, locking.LEVEL_NODE)
14178 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14180 # Apply NIC changes
14181 if self._new_nics is not None:
14182 instance.nics = self._new_nics
14183 result.extend(self._nic_chgdesc)
14186 if self.op.hvparams:
14187 instance.hvparams = self.hv_inst
14188 for key, val in self.op.hvparams.iteritems():
14189 result.append(("hv/%s" % key, val))
14192 if self.op.beparams:
14193 instance.beparams = self.be_inst
14194 for key, val in self.op.beparams.iteritems():
14195 result.append(("be/%s" % key, val))
14198 if self.op.os_name:
14199 instance.os = self.op.os_name
14202 if self.op.osparams:
14203 instance.osparams = self.os_inst
14204 for key, val in self.op.osparams.iteritems():
14205 result.append(("os/%s" % key, val))
14207 if self.op.offline is None:
14210 elif self.op.offline:
14211 # Mark instance as offline
14212 self.cfg.MarkInstanceOffline(instance.name)
14213 result.append(("admin_state", constants.ADMINST_OFFLINE))
14215 # Mark instance as online, but stopped
14216 self.cfg.MarkInstanceDown(instance.name)
14217 result.append(("admin_state", constants.ADMINST_DOWN))
14219 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14221 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14222 self.owned_locks(locking.LEVEL_NODE)), \
14223 "All node locks should have been released by now"
14227 _DISK_CONVERSIONS = {
14228 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14229 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14233 class LUInstanceChangeGroup(LogicalUnit):
14234 HPATH = "instance-change-group"
14235 HTYPE = constants.HTYPE_INSTANCE
14238 def ExpandNames(self):
14239 self.share_locks = _ShareAll()
14241 self.needed_locks = {
14242 locking.LEVEL_NODEGROUP: [],
14243 locking.LEVEL_NODE: [],
14244 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14247 self._ExpandAndLockInstance()
14249 if self.op.target_groups:
14250 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14251 self.op.target_groups)
14253 self.req_target_uuids = None
14255 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14257 def DeclareLocks(self, level):
14258 if level == locking.LEVEL_NODEGROUP:
14259 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14261 if self.req_target_uuids:
14262 lock_groups = set(self.req_target_uuids)
14264 # Lock all groups used by instance optimistically; this requires going
14265 # via the node before it's locked, requiring verification later on
14266 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14267 lock_groups.update(instance_groups)
14269 # No target groups, need to lock all of them
14270 lock_groups = locking.ALL_SET
14272 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14274 elif level == locking.LEVEL_NODE:
14275 if self.req_target_uuids:
14276 # Lock all nodes used by instances
14277 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14278 self._LockInstancesNodes()
14280 # Lock all nodes in all potential target groups
14281 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14282 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14283 member_nodes = [node_name
14284 for group in lock_groups
14285 for node_name in self.cfg.GetNodeGroup(group).members]
14286 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14288 # Lock all nodes as all groups are potential targets
14289 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14291 def CheckPrereq(self):
14292 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14293 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14294 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14296 assert (self.req_target_uuids is None or
14297 owned_groups.issuperset(self.req_target_uuids))
14298 assert owned_instances == set([self.op.instance_name])
14300 # Get instance information
14301 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14303 # Check if node groups for locked instance are still correct
14304 assert owned_nodes.issuperset(self.instance.all_nodes), \
14305 ("Instance %s's nodes changed while we kept the lock" %
14306 self.op.instance_name)
14308 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14311 if self.req_target_uuids:
14312 # User requested specific target groups
14313 self.target_uuids = frozenset(self.req_target_uuids)
14315 # All groups except those used by the instance are potential targets
14316 self.target_uuids = owned_groups - inst_groups
14318 conflicting_groups = self.target_uuids & inst_groups
14319 if conflicting_groups:
14320 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14321 " used by the instance '%s'" %
14322 (utils.CommaJoin(conflicting_groups),
14323 self.op.instance_name),
14324 errors.ECODE_INVAL)
14326 if not self.target_uuids:
14327 raise errors.OpPrereqError("There are no possible target groups",
14328 errors.ECODE_INVAL)
14330 def BuildHooksEnv(self):
14331 """Build hooks env.
14334 assert self.target_uuids
14337 "TARGET_GROUPS": " ".join(self.target_uuids),
14340 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14344 def BuildHooksNodes(self):
14345 """Build hooks nodes.
14348 mn = self.cfg.GetMasterNode()
14349 return ([mn], [mn])
14351 def Exec(self, feedback_fn):
14352 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14354 assert instances == [self.op.instance_name], "Instance not locked"
14356 req = iallocator.IAReqGroupChange(instances=instances,
14357 target_groups=list(self.target_uuids))
14358 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14360 ial.Run(self.op.iallocator)
14362 if not ial.success:
14363 raise errors.OpPrereqError("Can't compute solution for changing group of"
14364 " instance '%s' using iallocator '%s': %s" %
14365 (self.op.instance_name, self.op.iallocator,
14366 ial.info), errors.ECODE_NORES)
14368 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14370 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14371 " instance '%s'", len(jobs), self.op.instance_name)
14373 return ResultWithJobs(jobs)
14376 class LUBackupQuery(NoHooksLU):
14377 """Query the exports list
14382 def CheckArguments(self):
14383 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14384 ["node", "export"], self.op.use_locking)
14386 def ExpandNames(self):
14387 self.expq.ExpandNames(self)
14389 def DeclareLocks(self, level):
14390 self.expq.DeclareLocks(self, level)
14392 def Exec(self, feedback_fn):
14395 for (node, expname) in self.expq.OldStyleQuery(self):
14396 if expname is None:
14397 result[node] = False
14399 result.setdefault(node, []).append(expname)
14404 class _ExportQuery(_QueryBase):
14405 FIELDS = query.EXPORT_FIELDS
14407 #: The node name is not a unique key for this query
14408 SORT_FIELD = "node"
14410 def ExpandNames(self, lu):
14411 lu.needed_locks = {}
14413 # The following variables interact with _QueryBase._GetNames
14415 self.wanted = _GetWantedNodes(lu, self.names)
14417 self.wanted = locking.ALL_SET
14419 self.do_locking = self.use_locking
14421 if self.do_locking:
14422 lu.share_locks = _ShareAll()
14423 lu.needed_locks = {
14424 locking.LEVEL_NODE: self.wanted,
14428 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14430 def DeclareLocks(self, lu, level):
14433 def _GetQueryData(self, lu):
14434 """Computes the list of nodes and their attributes.
14437 # Locking is not used
14439 assert not (compat.any(lu.glm.is_owned(level)
14440 for level in locking.LEVELS
14441 if level != locking.LEVEL_CLUSTER) or
14442 self.do_locking or self.use_locking)
14444 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14448 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14450 result.append((node, None))
14452 result.extend((node, expname) for expname in nres.payload)
14457 class LUBackupPrepare(NoHooksLU):
14458 """Prepares an instance for an export and returns useful information.
14463 def ExpandNames(self):
14464 self._ExpandAndLockInstance()
14466 def CheckPrereq(self):
14467 """Check prerequisites.
14470 instance_name = self.op.instance_name
14472 self.instance = self.cfg.GetInstanceInfo(instance_name)
14473 assert self.instance is not None, \
14474 "Cannot retrieve locked instance %s" % self.op.instance_name
14475 _CheckNodeOnline(self, self.instance.primary_node)
14477 self._cds = _GetClusterDomainSecret()
14479 def Exec(self, feedback_fn):
14480 """Prepares an instance for an export.
14483 instance = self.instance
14485 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14486 salt = utils.GenerateSecret(8)
14488 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14489 result = self.rpc.call_x509_cert_create(instance.primary_node,
14490 constants.RIE_CERT_VALIDITY)
14491 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14493 (name, cert_pem) = result.payload
14495 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14499 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14500 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14502 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14508 class LUBackupExport(LogicalUnit):
14509 """Export an instance to an image in the cluster.
14512 HPATH = "instance-export"
14513 HTYPE = constants.HTYPE_INSTANCE
14516 def CheckArguments(self):
14517 """Check the arguments.
14520 self.x509_key_name = self.op.x509_key_name
14521 self.dest_x509_ca_pem = self.op.destination_x509_ca
14523 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14524 if not self.x509_key_name:
14525 raise errors.OpPrereqError("Missing X509 key name for encryption",
14526 errors.ECODE_INVAL)
14528 if not self.dest_x509_ca_pem:
14529 raise errors.OpPrereqError("Missing destination X509 CA",
14530 errors.ECODE_INVAL)
14532 def ExpandNames(self):
14533 self._ExpandAndLockInstance()
14535 # Lock all nodes for local exports
14536 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14537 # FIXME: lock only instance primary and destination node
14539 # Sad but true, for now we have do lock all nodes, as we don't know where
14540 # the previous export might be, and in this LU we search for it and
14541 # remove it from its current node. In the future we could fix this by:
14542 # - making a tasklet to search (share-lock all), then create the
14543 # new one, then one to remove, after
14544 # - removing the removal operation altogether
14545 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14547 # Allocations should be stopped while this LU runs with node locks, but
14548 # it doesn't have to be exclusive
14549 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14550 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14552 def DeclareLocks(self, level):
14553 """Last minute lock declaration."""
14554 # All nodes are locked anyway, so nothing to do here.
14556 def BuildHooksEnv(self):
14557 """Build hooks env.
14559 This will run on the master, primary node and target node.
14563 "EXPORT_MODE": self.op.mode,
14564 "EXPORT_NODE": self.op.target_node,
14565 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14566 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14567 # TODO: Generic function for boolean env variables
14568 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14571 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14575 def BuildHooksNodes(self):
14576 """Build hooks nodes.
14579 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14581 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14582 nl.append(self.op.target_node)
14586 def CheckPrereq(self):
14587 """Check prerequisites.
14589 This checks that the instance and node names are valid.
14592 instance_name = self.op.instance_name
14594 self.instance = self.cfg.GetInstanceInfo(instance_name)
14595 assert self.instance is not None, \
14596 "Cannot retrieve locked instance %s" % self.op.instance_name
14597 _CheckNodeOnline(self, self.instance.primary_node)
14599 if (self.op.remove_instance and
14600 self.instance.admin_state == constants.ADMINST_UP and
14601 not self.op.shutdown):
14602 raise errors.OpPrereqError("Can not remove instance without shutting it"
14603 " down before", errors.ECODE_STATE)
14605 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14606 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14607 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14608 assert self.dst_node is not None
14610 _CheckNodeOnline(self, self.dst_node.name)
14611 _CheckNodeNotDrained(self, self.dst_node.name)
14614 self.dest_disk_info = None
14615 self.dest_x509_ca = None
14617 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14618 self.dst_node = None
14620 if len(self.op.target_node) != len(self.instance.disks):
14621 raise errors.OpPrereqError(("Received destination information for %s"
14622 " disks, but instance %s has %s disks") %
14623 (len(self.op.target_node), instance_name,
14624 len(self.instance.disks)),
14625 errors.ECODE_INVAL)
14627 cds = _GetClusterDomainSecret()
14629 # Check X509 key name
14631 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14632 except (TypeError, ValueError), err:
14633 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14634 errors.ECODE_INVAL)
14636 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14637 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14638 errors.ECODE_INVAL)
14640 # Load and verify CA
14642 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14643 except OpenSSL.crypto.Error, err:
14644 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14645 (err, ), errors.ECODE_INVAL)
14647 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14648 if errcode is not None:
14649 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14650 (msg, ), errors.ECODE_INVAL)
14652 self.dest_x509_ca = cert
14654 # Verify target information
14656 for idx, disk_data in enumerate(self.op.target_node):
14658 (host, port, magic) = \
14659 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14660 except errors.GenericError, err:
14661 raise errors.OpPrereqError("Target info for disk %s: %s" %
14662 (idx, err), errors.ECODE_INVAL)
14664 disk_info.append((host, port, magic))
14666 assert len(disk_info) == len(self.op.target_node)
14667 self.dest_disk_info = disk_info
14670 raise errors.ProgrammerError("Unhandled export mode %r" %
14673 # instance disk type verification
14674 # TODO: Implement export support for file-based disks
14675 for disk in self.instance.disks:
14676 if disk.dev_type == constants.LD_FILE:
14677 raise errors.OpPrereqError("Export not supported for instances with"
14678 " file-based disks", errors.ECODE_INVAL)
14680 def _CleanupExports(self, feedback_fn):
14681 """Removes exports of current instance from all other nodes.
14683 If an instance in a cluster with nodes A..D was exported to node C, its
14684 exports will be removed from the nodes A, B and D.
14687 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14689 nodelist = self.cfg.GetNodeList()
14690 nodelist.remove(self.dst_node.name)
14692 # on one-node clusters nodelist will be empty after the removal
14693 # if we proceed the backup would be removed because OpBackupQuery
14694 # substitutes an empty list with the full cluster node list.
14695 iname = self.instance.name
14697 feedback_fn("Removing old exports for instance %s" % iname)
14698 exportlist = self.rpc.call_export_list(nodelist)
14699 for node in exportlist:
14700 if exportlist[node].fail_msg:
14702 if iname in exportlist[node].payload:
14703 msg = self.rpc.call_export_remove(node, iname).fail_msg
14705 self.LogWarning("Could not remove older export for instance %s"
14706 " on node %s: %s", iname, node, msg)
14708 def Exec(self, feedback_fn):
14709 """Export an instance to an image in the cluster.
14712 assert self.op.mode in constants.EXPORT_MODES
14714 instance = self.instance
14715 src_node = instance.primary_node
14717 if self.op.shutdown:
14718 # shutdown the instance, but not the disks
14719 feedback_fn("Shutting down instance %s" % instance.name)
14720 result = self.rpc.call_instance_shutdown(src_node, instance,
14721 self.op.shutdown_timeout)
14722 # TODO: Maybe ignore failures if ignore_remove_failures is set
14723 result.Raise("Could not shutdown instance %s on"
14724 " node %s" % (instance.name, src_node))
14726 # set the disks ID correctly since call_instance_start needs the
14727 # correct drbd minor to create the symlinks
14728 for disk in instance.disks:
14729 self.cfg.SetDiskID(disk, src_node)
14731 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14734 # Activate the instance disks if we'exporting a stopped instance
14735 feedback_fn("Activating disks for %s" % instance.name)
14736 _StartInstanceDisks(self, instance, None)
14739 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14742 helper.CreateSnapshots()
14744 if (self.op.shutdown and
14745 instance.admin_state == constants.ADMINST_UP and
14746 not self.op.remove_instance):
14747 assert not activate_disks
14748 feedback_fn("Starting instance %s" % instance.name)
14749 result = self.rpc.call_instance_start(src_node,
14750 (instance, None, None), False)
14751 msg = result.fail_msg
14753 feedback_fn("Failed to start instance: %s" % msg)
14754 _ShutdownInstanceDisks(self, instance)
14755 raise errors.OpExecError("Could not start instance: %s" % msg)
14757 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14758 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14759 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14760 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14761 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14763 (key_name, _, _) = self.x509_key_name
14766 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14769 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14770 key_name, dest_ca_pem,
14775 # Check for backwards compatibility
14776 assert len(dresults) == len(instance.disks)
14777 assert compat.all(isinstance(i, bool) for i in dresults), \
14778 "Not all results are boolean: %r" % dresults
14782 feedback_fn("Deactivating disks for %s" % instance.name)
14783 _ShutdownInstanceDisks(self, instance)
14785 if not (compat.all(dresults) and fin_resu):
14788 failures.append("export finalization")
14789 if not compat.all(dresults):
14790 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14792 failures.append("disk export: disk(s) %s" % fdsk)
14794 raise errors.OpExecError("Export failed, errors in %s" %
14795 utils.CommaJoin(failures))
14797 # At this point, the export was successful, we can cleanup/finish
14799 # Remove instance if requested
14800 if self.op.remove_instance:
14801 feedback_fn("Removing instance %s" % instance.name)
14802 _RemoveInstance(self, feedback_fn, instance,
14803 self.op.ignore_remove_failures)
14805 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14806 self._CleanupExports(feedback_fn)
14808 return fin_resu, dresults
14811 class LUBackupRemove(NoHooksLU):
14812 """Remove exports related to the named instance.
14817 def ExpandNames(self):
14818 self.needed_locks = {
14819 # We need all nodes to be locked in order for RemoveExport to work, but
14820 # we don't need to lock the instance itself, as nothing will happen to it
14821 # (and we can remove exports also for a removed instance)
14822 locking.LEVEL_NODE: locking.ALL_SET,
14824 # Removing backups is quick, so blocking allocations is justified
14825 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14828 # Allocations should be stopped while this LU runs with node locks, but it
14829 # doesn't have to be exclusive
14830 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14832 def Exec(self, feedback_fn):
14833 """Remove any export.
14836 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14837 # If the instance was not found we'll try with the name that was passed in.
14838 # This will only work if it was an FQDN, though.
14840 if not instance_name:
14842 instance_name = self.op.instance_name
14844 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14845 exportlist = self.rpc.call_export_list(locked_nodes)
14847 for node in exportlist:
14848 msg = exportlist[node].fail_msg
14850 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14852 if instance_name in exportlist[node].payload:
14854 result = self.rpc.call_export_remove(node, instance_name)
14855 msg = result.fail_msg
14857 logging.error("Could not remove export for instance %s"
14858 " on node %s: %s", instance_name, node, msg)
14860 if fqdn_warn and not found:
14861 feedback_fn("Export not found. If trying to remove an export belonging"
14862 " to a deleted instance please use its Fully Qualified"
14866 class LUGroupAdd(LogicalUnit):
14867 """Logical unit for creating node groups.
14870 HPATH = "group-add"
14871 HTYPE = constants.HTYPE_GROUP
14874 def ExpandNames(self):
14875 # We need the new group's UUID here so that we can create and acquire the
14876 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14877 # that it should not check whether the UUID exists in the configuration.
14878 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14879 self.needed_locks = {}
14880 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14882 def CheckPrereq(self):
14883 """Check prerequisites.
14885 This checks that the given group name is not an existing node group
14890 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14891 except errors.OpPrereqError:
14894 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14895 " node group (UUID: %s)" %
14896 (self.op.group_name, existing_uuid),
14897 errors.ECODE_EXISTS)
14899 if self.op.ndparams:
14900 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14902 if self.op.hv_state:
14903 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14905 self.new_hv_state = None
14907 if self.op.disk_state:
14908 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14910 self.new_disk_state = None
14912 if self.op.diskparams:
14913 for templ in constants.DISK_TEMPLATES:
14914 if templ in self.op.diskparams:
14915 utils.ForceDictType(self.op.diskparams[templ],
14916 constants.DISK_DT_TYPES)
14917 self.new_diskparams = self.op.diskparams
14919 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14920 except errors.OpPrereqError, err:
14921 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14922 errors.ECODE_INVAL)
14924 self.new_diskparams = {}
14926 if self.op.ipolicy:
14927 cluster = self.cfg.GetClusterInfo()
14928 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14930 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14931 except errors.ConfigurationError, err:
14932 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14933 errors.ECODE_INVAL)
14935 def BuildHooksEnv(self):
14936 """Build hooks env.
14940 "GROUP_NAME": self.op.group_name,
14943 def BuildHooksNodes(self):
14944 """Build hooks nodes.
14947 mn = self.cfg.GetMasterNode()
14948 return ([mn], [mn])
14950 def Exec(self, feedback_fn):
14951 """Add the node group to the cluster.
14954 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14955 uuid=self.group_uuid,
14956 alloc_policy=self.op.alloc_policy,
14957 ndparams=self.op.ndparams,
14958 diskparams=self.new_diskparams,
14959 ipolicy=self.op.ipolicy,
14960 hv_state_static=self.new_hv_state,
14961 disk_state_static=self.new_disk_state)
14963 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14964 del self.remove_locks[locking.LEVEL_NODEGROUP]
14967 class LUGroupAssignNodes(NoHooksLU):
14968 """Logical unit for assigning nodes to groups.
14973 def ExpandNames(self):
14974 # These raise errors.OpPrereqError on their own:
14975 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14976 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14978 # We want to lock all the affected nodes and groups. We have readily
14979 # available the list of nodes, and the *destination* group. To gather the
14980 # list of "source" groups, we need to fetch node information later on.
14981 self.needed_locks = {
14982 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14983 locking.LEVEL_NODE: self.op.nodes,
14986 def DeclareLocks(self, level):
14987 if level == locking.LEVEL_NODEGROUP:
14988 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14990 # Try to get all affected nodes' groups without having the group or node
14991 # lock yet. Needs verification later in the code flow.
14992 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14994 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14996 def CheckPrereq(self):
14997 """Check prerequisites.
15000 assert self.needed_locks[locking.LEVEL_NODEGROUP]
15001 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15002 frozenset(self.op.nodes))
15004 expected_locks = (set([self.group_uuid]) |
15005 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15006 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15007 if actual_locks != expected_locks:
15008 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15009 " current groups are '%s', used to be '%s'" %
15010 (utils.CommaJoin(expected_locks),
15011 utils.CommaJoin(actual_locks)))
15013 self.node_data = self.cfg.GetAllNodesInfo()
15014 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15015 instance_data = self.cfg.GetAllInstancesInfo()
15017 if self.group is None:
15018 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15019 (self.op.group_name, self.group_uuid))
15021 (new_splits, previous_splits) = \
15022 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15023 for node in self.op.nodes],
15024 self.node_data, instance_data)
15027 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15029 if not self.op.force:
15030 raise errors.OpExecError("The following instances get split by this"
15031 " change and --force was not given: %s" %
15034 self.LogWarning("This operation will split the following instances: %s",
15037 if previous_splits:
15038 self.LogWarning("In addition, these already-split instances continue"
15039 " to be split across groups: %s",
15040 utils.CommaJoin(utils.NiceSort(previous_splits)))
15042 def Exec(self, feedback_fn):
15043 """Assign nodes to a new group.
15046 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15048 self.cfg.AssignGroupNodes(mods)
15051 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15052 """Check for split instances after a node assignment.
15054 This method considers a series of node assignments as an atomic operation,
15055 and returns information about split instances after applying the set of
15058 In particular, it returns information about newly split instances, and
15059 instances that were already split, and remain so after the change.
15061 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15064 @type changes: list of (node_name, new_group_uuid) pairs.
15065 @param changes: list of node assignments to consider.
15066 @param node_data: a dict with data for all nodes
15067 @param instance_data: a dict with all instances to consider
15068 @rtype: a two-tuple
15069 @return: a list of instances that were previously okay and result split as a
15070 consequence of this change, and a list of instances that were previously
15071 split and this change does not fix.
15074 changed_nodes = dict((node, group) for node, group in changes
15075 if node_data[node].group != group)
15077 all_split_instances = set()
15078 previously_split_instances = set()
15080 def InstanceNodes(instance):
15081 return [instance.primary_node] + list(instance.secondary_nodes)
15083 for inst in instance_data.values():
15084 if inst.disk_template not in constants.DTS_INT_MIRROR:
15087 instance_nodes = InstanceNodes(inst)
15089 if len(set(node_data[node].group for node in instance_nodes)) > 1:
15090 previously_split_instances.add(inst.name)
15092 if len(set(changed_nodes.get(node, node_data[node].group)
15093 for node in instance_nodes)) > 1:
15094 all_split_instances.add(inst.name)
15096 return (list(all_split_instances - previously_split_instances),
15097 list(previously_split_instances & all_split_instances))
15100 class _GroupQuery(_QueryBase):
15101 FIELDS = query.GROUP_FIELDS
15103 def ExpandNames(self, lu):
15104 lu.needed_locks = {}
15106 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15107 self._cluster = lu.cfg.GetClusterInfo()
15108 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15111 self.wanted = [name_to_uuid[name]
15112 for name in utils.NiceSort(name_to_uuid.keys())]
15114 # Accept names to be either names or UUIDs.
15117 all_uuid = frozenset(self._all_groups.keys())
15119 for name in self.names:
15120 if name in all_uuid:
15121 self.wanted.append(name)
15122 elif name in name_to_uuid:
15123 self.wanted.append(name_to_uuid[name])
15125 missing.append(name)
15128 raise errors.OpPrereqError("Some groups do not exist: %s" %
15129 utils.CommaJoin(missing),
15130 errors.ECODE_NOENT)
15132 def DeclareLocks(self, lu, level):
15135 def _GetQueryData(self, lu):
15136 """Computes the list of node groups and their attributes.
15139 do_nodes = query.GQ_NODE in self.requested_data
15140 do_instances = query.GQ_INST in self.requested_data
15142 group_to_nodes = None
15143 group_to_instances = None
15145 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15146 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15147 # latter GetAllInstancesInfo() is not enough, for we have to go through
15148 # instance->node. Hence, we will need to process nodes even if we only need
15149 # instance information.
15150 if do_nodes or do_instances:
15151 all_nodes = lu.cfg.GetAllNodesInfo()
15152 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15155 for node in all_nodes.values():
15156 if node.group in group_to_nodes:
15157 group_to_nodes[node.group].append(node.name)
15158 node_to_group[node.name] = node.group
15161 all_instances = lu.cfg.GetAllInstancesInfo()
15162 group_to_instances = dict((uuid, []) for uuid in self.wanted)
15164 for instance in all_instances.values():
15165 node = instance.primary_node
15166 if node in node_to_group:
15167 group_to_instances[node_to_group[node]].append(instance.name)
15170 # Do not pass on node information if it was not requested.
15171 group_to_nodes = None
15173 return query.GroupQueryData(self._cluster,
15174 [self._all_groups[uuid]
15175 for uuid in self.wanted],
15176 group_to_nodes, group_to_instances,
15177 query.GQ_DISKPARAMS in self.requested_data)
15180 class LUGroupQuery(NoHooksLU):
15181 """Logical unit for querying node groups.
15186 def CheckArguments(self):
15187 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15188 self.op.output_fields, False)
15190 def ExpandNames(self):
15191 self.gq.ExpandNames(self)
15193 def DeclareLocks(self, level):
15194 self.gq.DeclareLocks(self, level)
15196 def Exec(self, feedback_fn):
15197 return self.gq.OldStyleQuery(self)
15200 class LUGroupSetParams(LogicalUnit):
15201 """Modifies the parameters of a node group.
15204 HPATH = "group-modify"
15205 HTYPE = constants.HTYPE_GROUP
15208 def CheckArguments(self):
15211 self.op.diskparams,
15212 self.op.alloc_policy,
15214 self.op.disk_state,
15218 if all_changes.count(None) == len(all_changes):
15219 raise errors.OpPrereqError("Please pass at least one modification",
15220 errors.ECODE_INVAL)
15222 def ExpandNames(self):
15223 # This raises errors.OpPrereqError on its own:
15224 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15226 self.needed_locks = {
15227 locking.LEVEL_INSTANCE: [],
15228 locking.LEVEL_NODEGROUP: [self.group_uuid],
15231 self.share_locks[locking.LEVEL_INSTANCE] = 1
15233 def DeclareLocks(self, level):
15234 if level == locking.LEVEL_INSTANCE:
15235 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15237 # Lock instances optimistically, needs verification once group lock has
15239 self.needed_locks[locking.LEVEL_INSTANCE] = \
15240 self.cfg.GetNodeGroupInstances(self.group_uuid)
15243 def _UpdateAndVerifyDiskParams(old, new):
15244 """Updates and verifies disk parameters.
15247 new_params = _GetUpdatedParams(old, new)
15248 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15251 def CheckPrereq(self):
15252 """Check prerequisites.
15255 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15257 # Check if locked instances are still correct
15258 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15260 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15261 cluster = self.cfg.GetClusterInfo()
15263 if self.group is None:
15264 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15265 (self.op.group_name, self.group_uuid))
15267 if self.op.ndparams:
15268 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15269 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15270 self.new_ndparams = new_ndparams
15272 if self.op.diskparams:
15273 diskparams = self.group.diskparams
15274 uavdp = self._UpdateAndVerifyDiskParams
15275 # For each disktemplate subdict update and verify the values
15276 new_diskparams = dict((dt,
15277 uavdp(diskparams.get(dt, {}),
15278 self.op.diskparams[dt]))
15279 for dt in constants.DISK_TEMPLATES
15280 if dt in self.op.diskparams)
15281 # As we've all subdicts of diskparams ready, lets merge the actual
15282 # dict with all updated subdicts
15283 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15285 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15286 except errors.OpPrereqError, err:
15287 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15288 errors.ECODE_INVAL)
15290 if self.op.hv_state:
15291 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15292 self.group.hv_state_static)
15294 if self.op.disk_state:
15295 self.new_disk_state = \
15296 _MergeAndVerifyDiskState(self.op.disk_state,
15297 self.group.disk_state_static)
15299 if self.op.ipolicy:
15300 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15304 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15305 inst_filter = lambda inst: inst.name in owned_instances
15306 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15307 gmi = ganeti.masterd.instance
15309 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15311 new_ipolicy, instances)
15314 self.LogWarning("After the ipolicy change the following instances"
15315 " violate them: %s",
15316 utils.CommaJoin(violations))
15318 def BuildHooksEnv(self):
15319 """Build hooks env.
15323 "GROUP_NAME": self.op.group_name,
15324 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15327 def BuildHooksNodes(self):
15328 """Build hooks nodes.
15331 mn = self.cfg.GetMasterNode()
15332 return ([mn], [mn])
15334 def Exec(self, feedback_fn):
15335 """Modifies the node group.
15340 if self.op.ndparams:
15341 self.group.ndparams = self.new_ndparams
15342 result.append(("ndparams", str(self.group.ndparams)))
15344 if self.op.diskparams:
15345 self.group.diskparams = self.new_diskparams
15346 result.append(("diskparams", str(self.group.diskparams)))
15348 if self.op.alloc_policy:
15349 self.group.alloc_policy = self.op.alloc_policy
15351 if self.op.hv_state:
15352 self.group.hv_state_static = self.new_hv_state
15354 if self.op.disk_state:
15355 self.group.disk_state_static = self.new_disk_state
15357 if self.op.ipolicy:
15358 self.group.ipolicy = self.new_ipolicy
15360 self.cfg.Update(self.group, feedback_fn)
15364 class LUGroupRemove(LogicalUnit):
15365 HPATH = "group-remove"
15366 HTYPE = constants.HTYPE_GROUP
15369 def ExpandNames(self):
15370 # This will raises errors.OpPrereqError on its own:
15371 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15372 self.needed_locks = {
15373 locking.LEVEL_NODEGROUP: [self.group_uuid],
15376 def CheckPrereq(self):
15377 """Check prerequisites.
15379 This checks that the given group name exists as a node group, that is
15380 empty (i.e., contains no nodes), and that is not the last group of the
15384 # Verify that the group is empty.
15385 group_nodes = [node.name
15386 for node in self.cfg.GetAllNodesInfo().values()
15387 if node.group == self.group_uuid]
15390 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15392 (self.op.group_name,
15393 utils.CommaJoin(utils.NiceSort(group_nodes))),
15394 errors.ECODE_STATE)
15396 # Verify the cluster would not be left group-less.
15397 if len(self.cfg.GetNodeGroupList()) == 1:
15398 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15399 " removed" % self.op.group_name,
15400 errors.ECODE_STATE)
15402 def BuildHooksEnv(self):
15403 """Build hooks env.
15407 "GROUP_NAME": self.op.group_name,
15410 def BuildHooksNodes(self):
15411 """Build hooks nodes.
15414 mn = self.cfg.GetMasterNode()
15415 return ([mn], [mn])
15417 def Exec(self, feedback_fn):
15418 """Remove the node group.
15422 self.cfg.RemoveNodeGroup(self.group_uuid)
15423 except errors.ConfigurationError:
15424 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15425 (self.op.group_name, self.group_uuid))
15427 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15430 class LUGroupRename(LogicalUnit):
15431 HPATH = "group-rename"
15432 HTYPE = constants.HTYPE_GROUP
15435 def ExpandNames(self):
15436 # This raises errors.OpPrereqError on its own:
15437 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15439 self.needed_locks = {
15440 locking.LEVEL_NODEGROUP: [self.group_uuid],
15443 def CheckPrereq(self):
15444 """Check prerequisites.
15446 Ensures requested new name is not yet used.
15450 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15451 except errors.OpPrereqError:
15454 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15455 " node group (UUID: %s)" %
15456 (self.op.new_name, new_name_uuid),
15457 errors.ECODE_EXISTS)
15459 def BuildHooksEnv(self):
15460 """Build hooks env.
15464 "OLD_NAME": self.op.group_name,
15465 "NEW_NAME": self.op.new_name,
15468 def BuildHooksNodes(self):
15469 """Build hooks nodes.
15472 mn = self.cfg.GetMasterNode()
15474 all_nodes = self.cfg.GetAllNodesInfo()
15475 all_nodes.pop(mn, None)
15478 run_nodes.extend(node.name for node in all_nodes.values()
15479 if node.group == self.group_uuid)
15481 return (run_nodes, run_nodes)
15483 def Exec(self, feedback_fn):
15484 """Rename the node group.
15487 group = self.cfg.GetNodeGroup(self.group_uuid)
15490 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15491 (self.op.group_name, self.group_uuid))
15493 group.name = self.op.new_name
15494 self.cfg.Update(group, feedback_fn)
15496 return self.op.new_name
15499 class LUGroupEvacuate(LogicalUnit):
15500 HPATH = "group-evacuate"
15501 HTYPE = constants.HTYPE_GROUP
15504 def ExpandNames(self):
15505 # This raises errors.OpPrereqError on its own:
15506 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15508 if self.op.target_groups:
15509 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15510 self.op.target_groups)
15512 self.req_target_uuids = []
15514 if self.group_uuid in self.req_target_uuids:
15515 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15516 " as a target group (targets are %s)" %
15518 utils.CommaJoin(self.req_target_uuids)),
15519 errors.ECODE_INVAL)
15521 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15523 self.share_locks = _ShareAll()
15524 self.needed_locks = {
15525 locking.LEVEL_INSTANCE: [],
15526 locking.LEVEL_NODEGROUP: [],
15527 locking.LEVEL_NODE: [],
15530 def DeclareLocks(self, level):
15531 if level == locking.LEVEL_INSTANCE:
15532 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15534 # Lock instances optimistically, needs verification once node and group
15535 # locks have been acquired
15536 self.needed_locks[locking.LEVEL_INSTANCE] = \
15537 self.cfg.GetNodeGroupInstances(self.group_uuid)
15539 elif level == locking.LEVEL_NODEGROUP:
15540 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15542 if self.req_target_uuids:
15543 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15545 # Lock all groups used by instances optimistically; this requires going
15546 # via the node before it's locked, requiring verification later on
15547 lock_groups.update(group_uuid
15548 for instance_name in
15549 self.owned_locks(locking.LEVEL_INSTANCE)
15551 self.cfg.GetInstanceNodeGroups(instance_name))
15553 # No target groups, need to lock all of them
15554 lock_groups = locking.ALL_SET
15556 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15558 elif level == locking.LEVEL_NODE:
15559 # This will only lock the nodes in the group to be evacuated which
15560 # contain actual instances
15561 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15562 self._LockInstancesNodes()
15564 # Lock all nodes in group to be evacuated and target groups
15565 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15566 assert self.group_uuid in owned_groups
15567 member_nodes = [node_name
15568 for group in owned_groups
15569 for node_name in self.cfg.GetNodeGroup(group).members]
15570 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15572 def CheckPrereq(self):
15573 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15574 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15575 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15577 assert owned_groups.issuperset(self.req_target_uuids)
15578 assert self.group_uuid in owned_groups
15580 # Check if locked instances are still correct
15581 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15583 # Get instance information
15584 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15586 # Check if node groups for locked instances are still correct
15587 _CheckInstancesNodeGroups(self.cfg, self.instances,
15588 owned_groups, owned_nodes, self.group_uuid)
15590 if self.req_target_uuids:
15591 # User requested specific target groups
15592 self.target_uuids = self.req_target_uuids
15594 # All groups except the one to be evacuated are potential targets
15595 self.target_uuids = [group_uuid for group_uuid in owned_groups
15596 if group_uuid != self.group_uuid]
15598 if not self.target_uuids:
15599 raise errors.OpPrereqError("There are no possible target groups",
15600 errors.ECODE_INVAL)
15602 def BuildHooksEnv(self):
15603 """Build hooks env.
15607 "GROUP_NAME": self.op.group_name,
15608 "TARGET_GROUPS": " ".join(self.target_uuids),
15611 def BuildHooksNodes(self):
15612 """Build hooks nodes.
15615 mn = self.cfg.GetMasterNode()
15617 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15619 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15621 return (run_nodes, run_nodes)
15623 def Exec(self, feedback_fn):
15624 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15626 assert self.group_uuid not in self.target_uuids
15628 req = iallocator.IAReqGroupChange(instances=instances,
15629 target_groups=self.target_uuids)
15630 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15632 ial.Run(self.op.iallocator)
15634 if not ial.success:
15635 raise errors.OpPrereqError("Can't compute group evacuation using"
15636 " iallocator '%s': %s" %
15637 (self.op.iallocator, ial.info),
15638 errors.ECODE_NORES)
15640 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15642 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15643 len(jobs), self.op.group_name)
15645 return ResultWithJobs(jobs)
15648 class TagsLU(NoHooksLU): # pylint: disable=W0223
15649 """Generic tags LU.
15651 This is an abstract class which is the parent of all the other tags LUs.
15654 def ExpandNames(self):
15655 self.group_uuid = None
15656 self.needed_locks = {}
15658 if self.op.kind == constants.TAG_NODE:
15659 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15660 lock_level = locking.LEVEL_NODE
15661 lock_name = self.op.name
15662 elif self.op.kind == constants.TAG_INSTANCE:
15663 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15664 lock_level = locking.LEVEL_INSTANCE
15665 lock_name = self.op.name
15666 elif self.op.kind == constants.TAG_NODEGROUP:
15667 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15668 lock_level = locking.LEVEL_NODEGROUP
15669 lock_name = self.group_uuid
15670 elif self.op.kind == constants.TAG_NETWORK:
15671 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15672 lock_level = locking.LEVEL_NETWORK
15673 lock_name = self.network_uuid
15678 if lock_level and getattr(self.op, "use_locking", True):
15679 self.needed_locks[lock_level] = lock_name
15681 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15682 # not possible to acquire the BGL based on opcode parameters)
15684 def CheckPrereq(self):
15685 """Check prerequisites.
15688 if self.op.kind == constants.TAG_CLUSTER:
15689 self.target = self.cfg.GetClusterInfo()
15690 elif self.op.kind == constants.TAG_NODE:
15691 self.target = self.cfg.GetNodeInfo(self.op.name)
15692 elif self.op.kind == constants.TAG_INSTANCE:
15693 self.target = self.cfg.GetInstanceInfo(self.op.name)
15694 elif self.op.kind == constants.TAG_NODEGROUP:
15695 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15696 elif self.op.kind == constants.TAG_NETWORK:
15697 self.target = self.cfg.GetNetwork(self.network_uuid)
15699 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15700 str(self.op.kind), errors.ECODE_INVAL)
15703 class LUTagsGet(TagsLU):
15704 """Returns the tags of a given object.
15709 def ExpandNames(self):
15710 TagsLU.ExpandNames(self)
15712 # Share locks as this is only a read operation
15713 self.share_locks = _ShareAll()
15715 def Exec(self, feedback_fn):
15716 """Returns the tag list.
15719 return list(self.target.GetTags())
15722 class LUTagsSearch(NoHooksLU):
15723 """Searches the tags for a given pattern.
15728 def ExpandNames(self):
15729 self.needed_locks = {}
15731 def CheckPrereq(self):
15732 """Check prerequisites.
15734 This checks the pattern passed for validity by compiling it.
15738 self.re = re.compile(self.op.pattern)
15739 except re.error, err:
15740 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15741 (self.op.pattern, err), errors.ECODE_INVAL)
15743 def Exec(self, feedback_fn):
15744 """Returns the tag list.
15748 tgts = [("/cluster", cfg.GetClusterInfo())]
15749 ilist = cfg.GetAllInstancesInfo().values()
15750 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15751 nlist = cfg.GetAllNodesInfo().values()
15752 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15753 tgts.extend(("/nodegroup/%s" % n.name, n)
15754 for n in cfg.GetAllNodeGroupsInfo().values())
15756 for path, target in tgts:
15757 for tag in target.GetTags():
15758 if self.re.search(tag):
15759 results.append((path, tag))
15763 class LUTagsSet(TagsLU):
15764 """Sets a tag on a given object.
15769 def CheckPrereq(self):
15770 """Check prerequisites.
15772 This checks the type and length of the tag name and value.
15775 TagsLU.CheckPrereq(self)
15776 for tag in self.op.tags:
15777 objects.TaggableObject.ValidateTag(tag)
15779 def Exec(self, feedback_fn):
15784 for tag in self.op.tags:
15785 self.target.AddTag(tag)
15786 except errors.TagError, err:
15787 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15788 self.cfg.Update(self.target, feedback_fn)
15791 class LUTagsDel(TagsLU):
15792 """Delete a list of tags from a given object.
15797 def CheckPrereq(self):
15798 """Check prerequisites.
15800 This checks that we have the given tag.
15803 TagsLU.CheckPrereq(self)
15804 for tag in self.op.tags:
15805 objects.TaggableObject.ValidateTag(tag)
15806 del_tags = frozenset(self.op.tags)
15807 cur_tags = self.target.GetTags()
15809 diff_tags = del_tags - cur_tags
15811 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15812 raise errors.OpPrereqError("Tag(s) %s not found" %
15813 (utils.CommaJoin(diff_names), ),
15814 errors.ECODE_NOENT)
15816 def Exec(self, feedback_fn):
15817 """Remove the tag from the object.
15820 for tag in self.op.tags:
15821 self.target.RemoveTag(tag)
15822 self.cfg.Update(self.target, feedback_fn)
15825 class LUTestDelay(NoHooksLU):
15826 """Sleep for a specified amount of time.
15828 This LU sleeps on the master and/or nodes for a specified amount of
15834 def ExpandNames(self):
15835 """Expand names and set required locks.
15837 This expands the node list, if any.
15840 self.needed_locks = {}
15841 if self.op.on_nodes:
15842 # _GetWantedNodes can be used here, but is not always appropriate to use
15843 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15844 # more information.
15845 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15846 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15848 def _TestDelay(self):
15849 """Do the actual sleep.
15852 if self.op.on_master:
15853 if not utils.TestDelay(self.op.duration):
15854 raise errors.OpExecError("Error during master delay test")
15855 if self.op.on_nodes:
15856 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15857 for node, node_result in result.items():
15858 node_result.Raise("Failure during rpc call to node %s" % node)
15860 def Exec(self, feedback_fn):
15861 """Execute the test delay opcode, with the wanted repetitions.
15864 if self.op.repeat == 0:
15867 top_value = self.op.repeat - 1
15868 for i in range(self.op.repeat):
15869 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15873 class LURestrictedCommand(NoHooksLU):
15874 """Logical unit for executing restricted commands.
15879 def ExpandNames(self):
15881 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15883 self.needed_locks = {
15884 locking.LEVEL_NODE: self.op.nodes,
15886 self.share_locks = {
15887 locking.LEVEL_NODE: not self.op.use_locking,
15890 def CheckPrereq(self):
15891 """Check prerequisites.
15895 def Exec(self, feedback_fn):
15896 """Execute restricted command and return output.
15899 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15901 # Check if correct locks are held
15902 assert set(self.op.nodes).issubset(owned_nodes)
15904 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15908 for node_name in self.op.nodes:
15909 nres = rpcres[node_name]
15911 msg = ("Command '%s' on node '%s' failed: %s" %
15912 (self.op.command, node_name, nres.fail_msg))
15913 result.append((False, msg))
15915 result.append((True, nres.payload))
15920 class LUTestJqueue(NoHooksLU):
15921 """Utility LU to test some aspects of the job queue.
15926 # Must be lower than default timeout for WaitForJobChange to see whether it
15927 # notices changed jobs
15928 _CLIENT_CONNECT_TIMEOUT = 20.0
15929 _CLIENT_CONFIRM_TIMEOUT = 60.0
15932 def _NotifyUsingSocket(cls, cb, errcls):
15933 """Opens a Unix socket and waits for another program to connect.
15936 @param cb: Callback to send socket name to client
15937 @type errcls: class
15938 @param errcls: Exception class to use for errors
15941 # Using a temporary directory as there's no easy way to create temporary
15942 # sockets without writing a custom loop around tempfile.mktemp and
15944 tmpdir = tempfile.mkdtemp()
15946 tmpsock = utils.PathJoin(tmpdir, "sock")
15948 logging.debug("Creating temporary socket at %s", tmpsock)
15949 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15954 # Send details to client
15957 # Wait for client to connect before continuing
15958 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15960 (conn, _) = sock.accept()
15961 except socket.error, err:
15962 raise errcls("Client didn't connect in time (%s)" % err)
15966 # Remove as soon as client is connected
15967 shutil.rmtree(tmpdir)
15969 # Wait for client to close
15972 # pylint: disable=E1101
15973 # Instance of '_socketobject' has no ... member
15974 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15976 except socket.error, err:
15977 raise errcls("Client failed to confirm notification (%s)" % err)
15981 def _SendNotification(self, test, arg, sockname):
15982 """Sends a notification to the client.
15985 @param test: Test name
15986 @param arg: Test argument (depends on test)
15987 @type sockname: string
15988 @param sockname: Socket path
15991 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15993 def _Notify(self, prereq, test, arg):
15994 """Notifies the client of a test.
15997 @param prereq: Whether this is a prereq-phase test
15999 @param test: Test name
16000 @param arg: Test argument (depends on test)
16004 errcls = errors.OpPrereqError
16006 errcls = errors.OpExecError
16008 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16012 def CheckArguments(self):
16013 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16014 self.expandnames_calls = 0
16016 def ExpandNames(self):
16017 checkargs_calls = getattr(self, "checkargs_calls", 0)
16018 if checkargs_calls < 1:
16019 raise errors.ProgrammerError("CheckArguments was not called")
16021 self.expandnames_calls += 1
16023 if self.op.notify_waitlock:
16024 self._Notify(True, constants.JQT_EXPANDNAMES, None)
16026 self.LogInfo("Expanding names")
16028 # Get lock on master node (just to get a lock, not for a particular reason)
16029 self.needed_locks = {
16030 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16033 def Exec(self, feedback_fn):
16034 if self.expandnames_calls < 1:
16035 raise errors.ProgrammerError("ExpandNames was not called")
16037 if self.op.notify_exec:
16038 self._Notify(False, constants.JQT_EXEC, None)
16040 self.LogInfo("Executing")
16042 if self.op.log_messages:
16043 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16044 for idx, msg in enumerate(self.op.log_messages):
16045 self.LogInfo("Sending log message %s", idx + 1)
16046 feedback_fn(constants.JQT_MSGPREFIX + msg)
16047 # Report how many test messages have been sent
16048 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16051 raise errors.OpExecError("Opcode failure was requested")
16056 class LUTestAllocator(NoHooksLU):
16057 """Run allocator tests.
16059 This LU runs the allocator tests
16062 def CheckPrereq(self):
16063 """Check prerequisites.
16065 This checks the opcode parameters depending on the director and mode test.
16068 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16069 constants.IALLOCATOR_MODE_MULTI_ALLOC):
16070 for attr in ["memory", "disks", "disk_template",
16071 "os", "tags", "nics", "vcpus"]:
16072 if not hasattr(self.op, attr):
16073 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16074 attr, errors.ECODE_INVAL)
16075 iname = self.cfg.ExpandInstanceName(self.op.name)
16076 if iname is not None:
16077 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16078 iname, errors.ECODE_EXISTS)
16079 if not isinstance(self.op.nics, list):
16080 raise errors.OpPrereqError("Invalid parameter 'nics'",
16081 errors.ECODE_INVAL)
16082 if not isinstance(self.op.disks, list):
16083 raise errors.OpPrereqError("Invalid parameter 'disks'",
16084 errors.ECODE_INVAL)
16085 for row in self.op.disks:
16086 if (not isinstance(row, dict) or
16087 constants.IDISK_SIZE not in row or
16088 not isinstance(row[constants.IDISK_SIZE], int) or
16089 constants.IDISK_MODE not in row or
16090 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16091 raise errors.OpPrereqError("Invalid contents of the 'disks'"
16092 " parameter", errors.ECODE_INVAL)
16093 if self.op.hypervisor is None:
16094 self.op.hypervisor = self.cfg.GetHypervisorType()
16095 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16096 fname = _ExpandInstanceName(self.cfg, self.op.name)
16097 self.op.name = fname
16098 self.relocate_from = \
16099 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16100 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16101 constants.IALLOCATOR_MODE_NODE_EVAC):
16102 if not self.op.instances:
16103 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16104 self.op.instances = _GetWantedInstances(self, self.op.instances)
16106 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16107 self.op.mode, errors.ECODE_INVAL)
16109 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16110 if self.op.iallocator is None:
16111 raise errors.OpPrereqError("Missing allocator name",
16112 errors.ECODE_INVAL)
16113 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16114 raise errors.OpPrereqError("Wrong allocator test '%s'" %
16115 self.op.direction, errors.ECODE_INVAL)
16117 def Exec(self, feedback_fn):
16118 """Run the allocator test.
16121 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16122 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16123 memory=self.op.memory,
16124 disks=self.op.disks,
16125 disk_template=self.op.disk_template,
16129 vcpus=self.op.vcpus,
16130 spindle_use=self.op.spindle_use,
16131 hypervisor=self.op.hypervisor)
16132 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16133 req = iallocator.IAReqRelocate(name=self.op.name,
16134 relocate_from=list(self.relocate_from))
16135 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16136 req = iallocator.IAReqGroupChange(instances=self.op.instances,
16137 target_groups=self.op.target_groups)
16138 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16139 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16140 evac_mode=self.op.evac_mode)
16141 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16142 disk_template = self.op.disk_template
16143 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16144 memory=self.op.memory,
16145 disks=self.op.disks,
16146 disk_template=disk_template,
16150 vcpus=self.op.vcpus,
16151 spindle_use=self.op.spindle_use,
16152 hypervisor=self.op.hypervisor)
16153 for idx in range(self.op.count)]
16154 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16156 raise errors.ProgrammerError("Uncatched mode %s in"
16157 " LUTestAllocator.Exec", self.op.mode)
16159 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16160 if self.op.direction == constants.IALLOCATOR_DIR_IN:
16161 result = ial.in_text
16163 ial.Run(self.op.iallocator, validate=False)
16164 result = ial.out_text
16168 class LUNetworkAdd(LogicalUnit):
16169 """Logical unit for creating networks.
16172 HPATH = "network-add"
16173 HTYPE = constants.HTYPE_NETWORK
16176 def BuildHooksNodes(self):
16177 """Build hooks nodes.
16180 mn = self.cfg.GetMasterNode()
16181 return ([mn], [mn])
16183 def CheckArguments(self):
16184 if self.op.mac_prefix:
16185 self.op.mac_prefix = \
16186 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16188 def ExpandNames(self):
16189 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16191 if self.op.conflicts_check:
16192 self.share_locks[locking.LEVEL_NODE] = 1
16193 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16194 self.needed_locks = {
16195 locking.LEVEL_NODE: locking.ALL_SET,
16196 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16199 self.needed_locks = {}
16201 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16203 def CheckPrereq(self):
16204 if self.op.network is None:
16205 raise errors.OpPrereqError("Network must be given",
16206 errors.ECODE_INVAL)
16208 uuid = self.cfg.LookupNetwork(self.op.network_name)
16211 raise errors.OpPrereqError(("Network with name '%s' already exists" %
16212 self.op.network_name), errors.ECODE_EXISTS)
16214 # Check tag validity
16215 for tag in self.op.tags:
16216 objects.TaggableObject.ValidateTag(tag)
16218 def BuildHooksEnv(self):
16219 """Build hooks env.
16223 "name": self.op.network_name,
16224 "subnet": self.op.network,
16225 "gateway": self.op.gateway,
16226 "network6": self.op.network6,
16227 "gateway6": self.op.gateway6,
16228 "mac_prefix": self.op.mac_prefix,
16229 "network_type": self.op.network_type,
16230 "tags": self.op.tags,
16232 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16234 def Exec(self, feedback_fn):
16235 """Add the ip pool to the cluster.
16238 nobj = objects.Network(name=self.op.network_name,
16239 network=self.op.network,
16240 gateway=self.op.gateway,
16241 network6=self.op.network6,
16242 gateway6=self.op.gateway6,
16243 mac_prefix=self.op.mac_prefix,
16244 network_type=self.op.network_type,
16245 uuid=self.network_uuid,
16246 family=constants.IP4_VERSION)
16247 # Initialize the associated address pool
16249 pool = network.AddressPool.InitializeNetwork(nobj)
16250 except errors.AddressPoolError, e:
16251 raise errors.OpExecError("Cannot create IP pool for this network: %s" % e)
16253 # Check if we need to reserve the nodes and the cluster master IP
16254 # These may not be allocated to any instances in routed mode, as
16255 # they wouldn't function anyway.
16256 if self.op.conflicts_check:
16257 for node in self.cfg.GetAllNodesInfo().values():
16258 for ip in [node.primary_ip, node.secondary_ip]:
16260 if pool.Contains(ip):
16262 self.LogInfo("Reserved IP address of node '%s' (%s)",
16264 except errors.AddressPoolError:
16265 self.LogWarning("Cannot reserve IP address of node '%s' (%s)",
16268 master_ip = self.cfg.GetClusterInfo().master_ip
16270 if pool.Contains(master_ip):
16271 pool.Reserve(master_ip)
16272 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16273 except errors.AddressPoolError:
16274 self.LogWarning("Cannot reserve cluster master IP address (%s)",
16277 if self.op.add_reserved_ips:
16278 for ip in self.op.add_reserved_ips:
16280 pool.Reserve(ip, external=True)
16281 except errors.AddressPoolError, e:
16282 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
16285 for tag in self.op.tags:
16288 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16289 del self.remove_locks[locking.LEVEL_NETWORK]
16292 class LUNetworkRemove(LogicalUnit):
16293 HPATH = "network-remove"
16294 HTYPE = constants.HTYPE_NETWORK
16297 def ExpandNames(self):
16298 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16300 if not self.network_uuid:
16301 raise errors.OpPrereqError(("Network '%s' not found" %
16302 self.op.network_name), errors.ECODE_NOENT)
16304 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16305 self.needed_locks = {
16306 locking.LEVEL_NETWORK: [self.network_uuid],
16307 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16310 def CheckPrereq(self):
16311 """Check prerequisites.
16313 This checks that the given network name exists as a network, that is
16314 empty (i.e., contains no nodes), and that is not the last group of the
16318 # Verify that the network is not conncted.
16319 node_groups = [group.name
16320 for group in self.cfg.GetAllNodeGroupsInfo().values()
16321 if self.network_uuid in group.networks]
16324 self.LogWarning("Network '%s' is connected to the following"
16325 " node groups: %s" %
16326 (self.op.network_name,
16327 utils.CommaJoin(utils.NiceSort(node_groups))))
16328 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16330 def BuildHooksEnv(self):
16331 """Build hooks env.
16335 "NETWORK_NAME": self.op.network_name,
16338 def BuildHooksNodes(self):
16339 """Build hooks nodes.
16342 mn = self.cfg.GetMasterNode()
16343 return ([mn], [mn])
16345 def Exec(self, feedback_fn):
16346 """Remove the network.
16350 self.cfg.RemoveNetwork(self.network_uuid)
16351 except errors.ConfigurationError:
16352 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16353 (self.op.network_name, self.network_uuid))
16356 class LUNetworkSetParams(LogicalUnit):
16357 """Modifies the parameters of a network.
16360 HPATH = "network-modify"
16361 HTYPE = constants.HTYPE_NETWORK
16364 def CheckArguments(self):
16365 if (self.op.gateway and
16366 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16367 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16368 " at once", errors.ECODE_INVAL)
16370 def ExpandNames(self):
16371 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16372 if self.network_uuid is None:
16373 raise errors.OpPrereqError(("Network '%s' not found" %
16374 self.op.network_name), errors.ECODE_NOENT)
16376 self.needed_locks = {
16377 locking.LEVEL_NETWORK: [self.network_uuid],
16380 def CheckPrereq(self):
16381 """Check prerequisites.
16384 self.network = self.cfg.GetNetwork(self.network_uuid)
16385 self.gateway = self.network.gateway
16386 self.network_type = self.network.network_type
16387 self.mac_prefix = self.network.mac_prefix
16388 self.network6 = self.network.network6
16389 self.gateway6 = self.network.gateway6
16390 self.tags = self.network.tags
16392 self.pool = network.AddressPool(self.network)
16394 if self.op.gateway:
16395 if self.op.gateway == constants.VALUE_NONE:
16396 self.gateway = None
16398 self.gateway = self.op.gateway
16399 if self.pool.IsReserved(self.gateway):
16400 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16401 " reserved" % self.gateway,
16402 errors.ECODE_STATE)
16404 if self.op.network_type:
16405 if self.op.network_type == constants.VALUE_NONE:
16406 self.network_type = None
16408 self.network_type = self.op.network_type
16410 if self.op.mac_prefix:
16411 if self.op.mac_prefix == constants.VALUE_NONE:
16412 self.mac_prefix = None
16414 self.mac_prefix = \
16415 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16417 if self.op.gateway6:
16418 if self.op.gateway6 == constants.VALUE_NONE:
16419 self.gateway6 = None
16421 self.gateway6 = self.op.gateway6
16423 if self.op.network6:
16424 if self.op.network6 == constants.VALUE_NONE:
16425 self.network6 = None
16427 self.network6 = self.op.network6
16429 def BuildHooksEnv(self):
16430 """Build hooks env.
16434 "name": self.op.network_name,
16435 "subnet": self.network.network,
16436 "gateway": self.gateway,
16437 "network6": self.network6,
16438 "gateway6": self.gateway6,
16439 "mac_prefix": self.mac_prefix,
16440 "network_type": self.network_type,
16443 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16445 def BuildHooksNodes(self):
16446 """Build hooks nodes.
16449 mn = self.cfg.GetMasterNode()
16450 return ([mn], [mn])
16452 def Exec(self, feedback_fn):
16453 """Modifies the network.
16456 #TODO: reserve/release via temporary reservation manager
16457 # extend cfg.ReserveIp/ReleaseIp with the external flag
16458 if self.op.gateway:
16459 if self.gateway == self.network.gateway:
16460 self.LogWarning("Gateway is already %s", self.gateway)
16463 self.pool.Reserve(self.gateway, external=True)
16464 if self.network.gateway:
16465 self.pool.Release(self.network.gateway, external=True)
16466 self.network.gateway = self.gateway
16468 if self.op.add_reserved_ips:
16469 for ip in self.op.add_reserved_ips:
16471 if self.pool.IsReserved(ip):
16472 self.LogWarning("IP address %s is already reserved", ip)
16474 self.pool.Reserve(ip, external=True)
16475 except errors.AddressPoolError, err:
16476 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16478 if self.op.remove_reserved_ips:
16479 for ip in self.op.remove_reserved_ips:
16480 if ip == self.network.gateway:
16481 self.LogWarning("Cannot unreserve Gateway's IP")
16484 if not self.pool.IsReserved(ip):
16485 self.LogWarning("IP address %s is already unreserved", ip)
16487 self.pool.Release(ip, external=True)
16488 except errors.AddressPoolError, err:
16489 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16491 if self.op.mac_prefix:
16492 self.network.mac_prefix = self.mac_prefix
16494 if self.op.network6:
16495 self.network.network6 = self.network6
16497 if self.op.gateway6:
16498 self.network.gateway6 = self.gateway6
16500 if self.op.network_type:
16501 self.network.network_type = self.network_type
16503 self.pool.Validate()
16505 self.cfg.Update(self.network, feedback_fn)
16508 class _NetworkQuery(_QueryBase):
16509 FIELDS = query.NETWORK_FIELDS
16511 def ExpandNames(self, lu):
16512 lu.needed_locks = {}
16513 lu.share_locks = _ShareAll()
16515 self.do_locking = self.use_locking
16517 all_networks = lu.cfg.GetAllNetworksInfo()
16518 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16524 for name in self.names:
16525 if name in name_to_uuid:
16526 self.wanted.append(name_to_uuid[name])
16528 missing.append(name)
16531 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16532 errors.ECODE_NOENT)
16534 self.wanted = locking.ALL_SET
16536 if self.do_locking:
16537 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16538 if query.NETQ_INST in self.requested_data:
16539 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16540 if query.NETQ_GROUP in self.requested_data:
16541 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16543 def DeclareLocks(self, lu, level):
16546 def _GetQueryData(self, lu):
16547 """Computes the list of networks and their attributes.
16550 all_networks = lu.cfg.GetAllNetworksInfo()
16552 network_uuids = self._GetNames(lu, all_networks.keys(),
16553 locking.LEVEL_NETWORK)
16555 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16557 do_instances = query.NETQ_INST in self.requested_data
16558 do_groups = query.NETQ_GROUP in self.requested_data
16560 network_to_instances = None
16561 network_to_groups = None
16563 # For NETQ_GROUP, we need to map network->[groups]
16565 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16566 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16567 for _, group in all_groups.iteritems():
16568 for net_uuid in network_uuids:
16569 netparams = group.networks.get(net_uuid, None)
16571 info = (group.name, netparams[constants.NIC_MODE],
16572 netparams[constants.NIC_LINK])
16574 network_to_groups[net_uuid].append(info)
16577 all_instances = lu.cfg.GetAllInstancesInfo()
16578 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16579 for instance in all_instances.values():
16580 for nic in instance.nics:
16582 net_uuid = name_to_uuid[nic.network]
16583 if net_uuid in network_uuids:
16584 network_to_instances[net_uuid].append(instance.name)
16587 if query.NETQ_STATS in self.requested_data:
16590 self._GetStats(network.AddressPool(all_networks[uuid])))
16591 for uuid in network_uuids)
16595 return query.NetworkQueryData([all_networks[uuid]
16596 for uuid in network_uuids],
16598 network_to_instances,
16602 def _GetStats(pool):
16603 """Returns statistics for a network address pool.
16607 "free_count": pool.GetFreeCount(),
16608 "reserved_count": pool.GetReservedCount(),
16609 "map": pool.GetMap(),
16610 "external_reservations":
16611 utils.CommaJoin(pool.GetExternalReservations()),
16615 class LUNetworkQuery(NoHooksLU):
16616 """Logical unit for querying networks.
16621 def CheckArguments(self):
16622 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16623 self.op.output_fields, self.op.use_locking)
16625 def ExpandNames(self):
16626 self.nq.ExpandNames(self)
16628 def Exec(self, feedback_fn):
16629 return self.nq.OldStyleQuery(self)
16632 class LUNetworkConnect(LogicalUnit):
16633 """Connect a network to a nodegroup
16636 HPATH = "network-connect"
16637 HTYPE = constants.HTYPE_NETWORK
16640 def ExpandNames(self):
16641 self.network_name = self.op.network_name
16642 self.group_name = self.op.group_name
16643 self.network_mode = self.op.network_mode
16644 self.network_link = self.op.network_link
16646 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16647 if self.network_uuid is None:
16648 raise errors.OpPrereqError("Network '%s' does not exist" %
16649 self.network_name, errors.ECODE_NOENT)
16651 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16652 if self.group_uuid is None:
16653 raise errors.OpPrereqError("Group '%s' does not exist" %
16654 self.group_name, errors.ECODE_NOENT)
16656 self.needed_locks = {
16657 locking.LEVEL_INSTANCE: [],
16658 locking.LEVEL_NODEGROUP: [self.group_uuid],
16660 self.share_locks[locking.LEVEL_INSTANCE] = 1
16662 if self.op.conflicts_check:
16663 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16664 self.share_locks[locking.LEVEL_NETWORK] = 1
16666 def DeclareLocks(self, level):
16667 if level == locking.LEVEL_INSTANCE:
16668 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16670 # Lock instances optimistically, needs verification once group lock has
16672 if self.op.conflicts_check:
16673 self.needed_locks[locking.LEVEL_INSTANCE] = \
16674 self.cfg.GetNodeGroupInstances(self.group_uuid)
16676 def BuildHooksEnv(self):
16678 "GROUP_NAME": self.group_name,
16679 "GROUP_NETWORK_MODE": self.network_mode,
16680 "GROUP_NETWORK_LINK": self.network_link,
16684 def BuildHooksNodes(self):
16685 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16686 return (nodes, nodes)
16688 def CheckPrereq(self):
16689 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16691 assert self.group_uuid in owned_groups
16694 constants.NIC_MODE: self.network_mode,
16695 constants.NIC_LINK: self.network_link,
16697 objects.NIC.CheckParameterSyntax(self.netparams)
16699 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16700 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16701 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16702 self.connected = False
16703 if self.network_uuid in self.group.networks:
16704 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16705 (self.network_name, self.group.name))
16706 self.connected = True
16709 if self.op.conflicts_check:
16710 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16712 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16715 def Exec(self, feedback_fn):
16719 self.group.networks[self.network_uuid] = self.netparams
16720 self.cfg.Update(self.group, feedback_fn)
16723 def _NetworkConflictCheck(lu, check_fn, action):
16724 """Checks for network interface conflicts with a network.
16726 @type lu: L{LogicalUnit}
16727 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16729 @param check_fn: Function checking for conflict
16730 @type action: string
16731 @param action: Part of error message (see code)
16732 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16735 # Check if locked instances are still correct
16736 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
16737 _CheckNodeGroupInstances(lu.cfg, lu.group_uuid, owned_instances)
16741 for (_, instance) in lu.cfg.GetMultiInstanceInfo(owned_instances):
16742 instconflicts = [(idx, nic.ip)
16743 for (idx, nic) in enumerate(instance.nics)
16747 conflicts.append((instance.name, instconflicts))
16750 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16751 " node group '%s', are in use: %s" %
16752 (lu.network_name, action, lu.group.name,
16753 utils.CommaJoin(("%s: %s" %
16754 (name, _FmtNetworkConflict(details)))
16755 for (name, details) in conflicts)))
16757 raise errors.OpPrereqError("Conflicting IP addresses found; "
16758 " remove/modify the corresponding network"
16759 " interfaces", errors.ECODE_STATE)
16762 def _FmtNetworkConflict(details):
16763 """Utility for L{_NetworkConflictCheck}.
16766 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16767 for (idx, ipaddr) in details)
16770 class LUNetworkDisconnect(LogicalUnit):
16771 """Disconnect a network to a nodegroup
16774 HPATH = "network-disconnect"
16775 HTYPE = constants.HTYPE_NETWORK
16778 def ExpandNames(self):
16779 self.network_name = self.op.network_name
16780 self.group_name = self.op.group_name
16782 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16783 if self.network_uuid is None:
16784 raise errors.OpPrereqError("Network '%s' does not exist" %
16785 self.network_name, errors.ECODE_NOENT)
16787 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16788 if self.group_uuid is None:
16789 raise errors.OpPrereqError("Group '%s' does not exist" %
16790 self.group_name, errors.ECODE_NOENT)
16792 self.needed_locks = {
16793 locking.LEVEL_INSTANCE: [],
16794 locking.LEVEL_NODEGROUP: [self.group_uuid],
16796 self.share_locks[locking.LEVEL_INSTANCE] = 1
16798 def DeclareLocks(self, level):
16799 if level == locking.LEVEL_INSTANCE:
16800 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16802 # Lock instances optimistically, needs verification once group lock has
16804 if self.op.conflicts_check:
16805 self.needed_locks[locking.LEVEL_INSTANCE] = \
16806 self.cfg.GetNodeGroupInstances(self.group_uuid)
16808 def BuildHooksEnv(self):
16810 "GROUP_NAME": self.group_name,
16814 def BuildHooksNodes(self):
16815 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16816 return (nodes, nodes)
16818 def CheckPrereq(self):
16819 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16821 assert self.group_uuid in owned_groups
16823 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16824 self.connected = True
16825 if self.network_uuid not in self.group.networks:
16826 self.LogWarning("Network '%s' is not mapped to group '%s'",
16827 self.network_name, self.group.name)
16828 self.connected = False
16831 if self.op.conflicts_check:
16832 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_name,
16835 def Exec(self, feedback_fn):
16836 if not self.connected:
16839 del self.group.networks[self.network_uuid]
16840 self.cfg.Update(self.group, feedback_fn)
16843 #: Query type implementations
16845 constants.QR_CLUSTER: _ClusterQuery,
16846 constants.QR_INSTANCE: _InstanceQuery,
16847 constants.QR_NODE: _NodeQuery,
16848 constants.QR_GROUP: _GroupQuery,
16849 constants.QR_NETWORK: _NetworkQuery,
16850 constants.QR_OS: _OsQuery,
16851 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16852 constants.QR_EXPORT: _ExportQuery,
16855 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16858 def _GetQueryImplementation(name):
16859 """Returns the implemtnation for a query type.
16861 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16865 return _QUERY_IMPL[name]
16867 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16868 errors.ECODE_INVAL)
16871 def _CheckForConflictingIp(lu, ip, node):
16872 """In case of conflicting IP address raise error.
16875 @param ip: IP address
16877 @param node: node name
16880 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16881 if conf_net is not None:
16882 raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16884 errors.ECODE_STATE)
16886 return (None, None)