4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node.
703 @type cfg: L{config.ConfigWriter}
704 @param cfg: The cluster configuration
705 @type node: L{objects.Node}
706 @param node: The node
708 @return: The effective value of exclusive_storage
711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node.
717 @type cfg: L{config.ConfigWriter}
718 @param cfg: The cluster configuration
719 @type nodename: string
720 @param nodename: The node
722 @return: The effective value of exclusive_storage
723 @raise errors.OpPrereqError: if no node exists with the given name
726 ni = cfg.GetNodeInfo(nodename)
728 raise errors.OpPrereqError("Invalid node name %s" % nodename,
730 return _IsExclusiveStorageEnabledNode(cfg, ni)
733 def _CopyLockList(names):
734 """Makes a copy of a list of lock names.
736 Handles L{locking.ALL_SET} correctly.
739 if names == locking.ALL_SET:
740 return locking.ALL_SET
745 def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names.
748 @type lu: L{LogicalUnit}
749 @param lu: the logical unit on whose behalf we execute
751 @param nodes: list of node names or None for all nodes
753 @return: the list of nodes, sorted
754 @raise errors.ProgrammerError: if the nodes parameter is wrong type
758 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
760 return utils.NiceSort(lu.cfg.GetNodeList())
763 def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names.
766 @type lu: L{LogicalUnit}
767 @param lu: the logical unit on whose behalf we execute
768 @type instances: list
769 @param instances: list of instance names or None for all instances
771 @return: the list of instances, sorted
772 @raise errors.OpPrereqError: if the instances parameter is wrong type
773 @raise errors.OpPrereqError: if any of the passed instances is not found
777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
779 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
783 def _GetUpdatedParams(old_params, update_dict,
784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary.
787 @type old_params: dict
788 @param old_params: old parameters
789 @type update_dict: dict
790 @param update_dict: dict containing new parameter values, or
791 constants.VALUE_DEFAULT to reset the parameter to its default
793 @param use_default: boolean
794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
795 values as 'to be deleted' values
796 @param use_none: boolean
797 @type use_none: whether to recognise C{None} values as 'to be
800 @return: the new parameter dictionary
803 params_copy = copy.deepcopy(old_params)
804 for key, val in update_dict.iteritems():
805 if ((use_default and val == constants.VALUE_DEFAULT) or
806 (use_none and val is None)):
812 params_copy[key] = val
816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
817 """Return the new version of a instance policy.
819 @param group_policy: whether this policy applies to a group and thus
820 we should support removal of policy entries
823 use_none = use_default = group_policy
824 ipolicy = copy.deepcopy(old_ipolicy)
825 for key, value in new_ipolicy.items():
826 if key not in constants.IPOLICY_ALL_KEYS:
827 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
829 if key in constants.IPOLICY_ISPECS:
830 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
831 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
833 use_default=use_default)
835 if (not value or value == [constants.VALUE_DEFAULT] or
836 value == constants.VALUE_DEFAULT):
840 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
841 " on the cluster'" % key,
844 if key in constants.IPOLICY_PARAMETERS:
845 # FIXME: we assume all such values are float
847 ipolicy[key] = float(value)
848 except (TypeError, ValueError), err:
849 raise errors.OpPrereqError("Invalid value for attribute"
850 " '%s': '%s', error: %s" %
851 (key, value, err), errors.ECODE_INVAL)
853 # FIXME: we assume all others are lists; this should be redone
855 ipolicy[key] = list(value)
857 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
858 except errors.ConfigurationError, err:
859 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
864 def _UpdateAndVerifySubDict(base, updates, type_check):
865 """Updates and verifies a dict with sub dicts of the same type.
867 @param base: The dict with the old data
868 @param updates: The dict with the new data
869 @param type_check: Dict suitable to ForceDictType to verify correct types
870 @returns: A new dict with updated and verified values
874 new = _GetUpdatedParams(old, value)
875 utils.ForceDictType(new, type_check)
878 ret = copy.deepcopy(base)
879 ret.update(dict((key, fn(base.get(key, {}), value))
880 for key, value in updates.items()))
884 def _MergeAndVerifyHvState(op_input, obj_input):
885 """Combines the hv state from an opcode with the one of the object
887 @param op_input: The input dict from the opcode
888 @param obj_input: The input dict from the objects
889 @return: The verified and updated dict
893 invalid_hvs = set(op_input) - constants.HYPER_TYPES
895 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
896 " %s" % utils.CommaJoin(invalid_hvs),
898 if obj_input is None:
900 type_check = constants.HVSTS_PARAMETER_TYPES
901 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
906 def _MergeAndVerifyDiskState(op_input, obj_input):
907 """Combines the disk state from an opcode with the one of the object
909 @param op_input: The input dict from the opcode
910 @param obj_input: The input dict from the objects
911 @return: The verified and updated dict
914 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
916 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
917 utils.CommaJoin(invalid_dst),
919 type_check = constants.DSS_PARAMETER_TYPES
920 if obj_input is None:
922 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
924 for key, value in op_input.items())
929 def _ReleaseLocks(lu, level, names=None, keep=None):
930 """Releases locks owned by an LU.
932 @type lu: L{LogicalUnit}
933 @param level: Lock level
934 @type names: list or None
935 @param names: Names of locks to release
936 @type keep: list or None
937 @param keep: Names of locks to retain
940 assert not (keep is not None and names is not None), \
941 "Only one of the 'names' and the 'keep' parameters can be given"
943 if names is not None:
944 should_release = names.__contains__
946 should_release = lambda name: name not in keep
948 should_release = None
950 owned = lu.owned_locks(level)
952 # Not owning any lock at this level, do nothing
959 # Determine which locks to release
961 if should_release(name):
966 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
968 # Release just some locks
969 lu.glm.release(level, names=release)
971 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
974 lu.glm.release(level)
976 assert not lu.glm.is_owned(level), "No locks should be owned"
979 def _MapInstanceDisksToNodes(instances):
980 """Creates a map from (node, volume) to instance name.
982 @type instances: list of L{objects.Instance}
983 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
986 return dict(((node, vol), inst.name)
987 for inst in instances
988 for (node, vols) in inst.MapLVsByNode().items()
992 def _RunPostHook(lu, node_name):
993 """Runs the post-hook for an opcode on a single node.
996 hm = lu.proc.BuildHooksManager(lu)
998 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
999 except Exception, err: # pylint: disable=W0703
1000 lu.LogWarning("Errors occurred running hooks on %s: %s",
1004 def _CheckOutputFields(static, dynamic, selected):
1005 """Checks whether all selected fields are valid.
1007 @type static: L{utils.FieldSet}
1008 @param static: static fields set
1009 @type dynamic: L{utils.FieldSet}
1010 @param dynamic: dynamic fields set
1013 f = utils.FieldSet()
1017 delta = f.NonMatching(selected)
1019 raise errors.OpPrereqError("Unknown output fields selected: %s"
1020 % ",".join(delta), errors.ECODE_INVAL)
1023 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1024 """Make sure that none of the given paramters is global.
1026 If a global parameter is found, an L{errors.OpPrereqError} exception is
1027 raised. This is used to avoid setting global parameters for individual nodes.
1029 @type params: dictionary
1030 @param params: Parameters to check
1031 @type glob_pars: dictionary
1032 @param glob_pars: Forbidden parameters
1034 @param kind: Kind of parameters (e.g. "node")
1035 @type bad_levels: string
1036 @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
1038 @type good_levels: strings
1039 @param good_levels: Level(s) at which the parameters are allowed (e.g.
1043 used_globals = glob_pars.intersection(params)
1045 msg = ("The following %s parameters are global and cannot"
1046 " be customized at %s level, please modify them at"
1048 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
1049 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1052 def _CheckNodeOnline(lu, node, msg=None):
1053 """Ensure that a given node is online.
1055 @param lu: the LU on behalf of which we make the check
1056 @param node: the node to check
1057 @param msg: if passed, should be a message to replace the default one
1058 @raise errors.OpPrereqError: if the node is offline
1062 msg = "Can't use offline node"
1063 if lu.cfg.GetNodeInfo(node).offline:
1064 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1067 def _CheckNodeNotDrained(lu, node):
1068 """Ensure that a given node is not drained.
1070 @param lu: the LU on behalf of which we make the check
1071 @param node: the node to check
1072 @raise errors.OpPrereqError: if the node is drained
1075 if lu.cfg.GetNodeInfo(node).drained:
1076 raise errors.OpPrereqError("Can't use drained node %s" % node,
1080 def _CheckNodeVmCapable(lu, node):
1081 """Ensure that a given node is vm capable.
1083 @param lu: the LU on behalf of which we make the check
1084 @param node: the node to check
1085 @raise errors.OpPrereqError: if the node is not vm capable
1088 if not lu.cfg.GetNodeInfo(node).vm_capable:
1089 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1093 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1094 """Ensure that a node supports a given OS.
1096 @param lu: the LU on behalf of which we make the check
1097 @param node: the node to check
1098 @param os_name: the OS to query about
1099 @param force_variant: whether to ignore variant errors
1100 @raise errors.OpPrereqError: if the node is not supporting the OS
1103 result = lu.rpc.call_os_get(node, os_name)
1104 result.Raise("OS '%s' not in supported OS list for node %s" %
1106 prereq=True, ecode=errors.ECODE_INVAL)
1107 if not force_variant:
1108 _CheckOSVariant(result.payload, os_name)
1111 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1112 """Ensure that a node has the given secondary ip.
1114 @type lu: L{LogicalUnit}
1115 @param lu: the LU on behalf of which we make the check
1117 @param node: the node to check
1118 @type secondary_ip: string
1119 @param secondary_ip: the ip to check
1120 @type prereq: boolean
1121 @param prereq: whether to throw a prerequisite or an execute error
1122 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1123 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1126 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1127 result.Raise("Failure checking secondary ip on node %s" % node,
1128 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1129 if not result.payload:
1130 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1131 " please fix and re-run this command" % secondary_ip)
1133 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1135 raise errors.OpExecError(msg)
1138 def _CheckNodePVs(nresult, exclusive_storage):
1142 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1143 if pvlist_dict is None:
1144 return (["Can't get PV list from node"], None)
1145 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1147 # check that ':' is not present in PV names, since it's a
1148 # special character for lvcreate (denotes the range of PEs to
1152 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1153 (pv.name, pv.vg_name))
1155 if exclusive_storage:
1156 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1157 errlist.extend(errmsgs)
1158 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1160 for (pvname, lvlist) in shared_pvs:
1161 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1162 errlist.append("PV %s is shared among unrelated LVs (%s)" %
1163 (pvname, utils.CommaJoin(lvlist)))
1164 return (errlist, es_pvinfo)
1167 def _GetClusterDomainSecret():
1168 """Reads the cluster domain secret.
1171 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1175 def _CheckInstanceState(lu, instance, req_states, msg=None):
1176 """Ensure that an instance is in one of the required states.
1178 @param lu: the LU on behalf of which we make the check
1179 @param instance: the instance to check
1180 @param msg: if passed, should be a message to replace the default one
1181 @raise errors.OpPrereqError: if the instance is not in the required state
1185 msg = ("can't use instance from outside %s states" %
1186 utils.CommaJoin(req_states))
1187 if instance.admin_state not in req_states:
1188 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1189 (instance.name, instance.admin_state, msg),
1192 if constants.ADMINST_UP not in req_states:
1193 pnode = instance.primary_node
1194 if not lu.cfg.GetNodeInfo(pnode).offline:
1195 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1196 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1197 prereq=True, ecode=errors.ECODE_ENVIRON)
1198 if instance.name in ins_l.payload:
1199 raise errors.OpPrereqError("Instance %s is running, %s" %
1200 (instance.name, msg), errors.ECODE_STATE)
1202 lu.LogWarning("Primary node offline, ignoring check that instance"
1206 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1207 """Computes if value is in the desired range.
1209 @param name: name of the parameter for which we perform the check
1210 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1212 @param ipolicy: dictionary containing min, max and std values
1213 @param value: actual value that we want to use
1214 @return: None or element not meeting the criteria
1218 if value in [None, constants.VALUE_AUTO]:
1220 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1221 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1222 if value > max_v or min_v > value:
1224 fqn = "%s/%s" % (name, qualifier)
1227 return ("%s value %s is not in range [%s, %s]" %
1228 (fqn, value, min_v, max_v))
1232 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1233 nic_count, disk_sizes, spindle_use,
1234 _compute_fn=_ComputeMinMaxSpec):
1235 """Verifies ipolicy against provided specs.
1238 @param ipolicy: The ipolicy
1240 @param mem_size: The memory size
1241 @type cpu_count: int
1242 @param cpu_count: Used cpu cores
1243 @type disk_count: int
1244 @param disk_count: Number of disks used
1245 @type nic_count: int
1246 @param nic_count: Number of nics used
1247 @type disk_sizes: list of ints
1248 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1249 @type spindle_use: int
1250 @param spindle_use: The number of spindles this instance uses
1251 @param _compute_fn: The compute function (unittest only)
1252 @return: A list of violations, or an empty list of no violations are found
1255 assert disk_count == len(disk_sizes)
1258 (constants.ISPEC_MEM_SIZE, "", mem_size),
1259 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1260 (constants.ISPEC_DISK_COUNT, "", disk_count),
1261 (constants.ISPEC_NIC_COUNT, "", nic_count),
1262 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1263 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1264 for idx, d in enumerate(disk_sizes)]
1267 (_compute_fn(name, qualifier, ipolicy, value)
1268 for (name, qualifier, value) in test_settings))
1271 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1272 _compute_fn=_ComputeIPolicySpecViolation):
1273 """Compute if instance meets the specs of ipolicy.
1276 @param ipolicy: The ipolicy to verify against
1277 @type instance: L{objects.Instance}
1278 @param instance: The instance to verify
1279 @param _compute_fn: The function to verify ipolicy (unittest only)
1280 @see: L{_ComputeIPolicySpecViolation}
1283 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1284 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1285 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1286 disk_count = len(instance.disks)
1287 disk_sizes = [disk.size for disk in instance.disks]
1288 nic_count = len(instance.nics)
1290 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1291 disk_sizes, spindle_use)
1294 def _ComputeIPolicyInstanceSpecViolation(
1295 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1296 """Compute if instance specs meets the specs of ipolicy.
1299 @param ipolicy: The ipolicy to verify against
1300 @param instance_spec: dict
1301 @param instance_spec: The instance spec to verify
1302 @param _compute_fn: The function to verify ipolicy (unittest only)
1303 @see: L{_ComputeIPolicySpecViolation}
1306 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1307 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1308 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1309 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1310 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1311 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1313 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1314 disk_sizes, spindle_use)
1317 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1319 _compute_fn=_ComputeIPolicyInstanceViolation):
1320 """Compute if instance meets the specs of the new target group.
1322 @param ipolicy: The ipolicy to verify
1323 @param instance: The instance object to verify
1324 @param current_group: The current group of the instance
1325 @param target_group: The new group of the instance
1326 @param _compute_fn: The function to verify ipolicy (unittest only)
1327 @see: L{_ComputeIPolicySpecViolation}
1330 if current_group == target_group:
1333 return _compute_fn(ipolicy, instance)
1336 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1337 _compute_fn=_ComputeIPolicyNodeViolation):
1338 """Checks that the target node is correct in terms of instance policy.
1340 @param ipolicy: The ipolicy to verify
1341 @param instance: The instance object to verify
1342 @param node: The new node to relocate
1343 @param ignore: Ignore violations of the ipolicy
1344 @param _compute_fn: The function to verify ipolicy (unittest only)
1345 @see: L{_ComputeIPolicySpecViolation}
1348 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1349 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1352 msg = ("Instance does not meet target node group's (%s) instance"
1353 " policy: %s") % (node.group, utils.CommaJoin(res))
1357 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1360 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1361 """Computes a set of any instances that would violate the new ipolicy.
1363 @param old_ipolicy: The current (still in-place) ipolicy
1364 @param new_ipolicy: The new (to become) ipolicy
1365 @param instances: List of instances to verify
1366 @return: A list of instances which violates the new ipolicy but
1370 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1371 _ComputeViolatingInstances(old_ipolicy, instances))
1374 def _ExpandItemName(fn, name, kind):
1375 """Expand an item name.
1377 @param fn: the function to use for expansion
1378 @param name: requested item name
1379 @param kind: text description ('Node' or 'Instance')
1380 @return: the resolved (full) name
1381 @raise errors.OpPrereqError: if the item is not found
1384 full_name = fn(name)
1385 if full_name is None:
1386 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1391 def _ExpandNodeName(cfg, name):
1392 """Wrapper over L{_ExpandItemName} for nodes."""
1393 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1396 def _ExpandInstanceName(cfg, name):
1397 """Wrapper over L{_ExpandItemName} for instance."""
1398 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1401 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1403 """Builds network related env variables for hooks
1405 This builds the hook environment from individual variables.
1408 @param name: the name of the network
1409 @type subnet: string
1410 @param subnet: the ipv4 subnet
1411 @type gateway: string
1412 @param gateway: the ipv4 gateway
1413 @type network6: string
1414 @param network6: the ipv6 subnet
1415 @type gateway6: string
1416 @param gateway6: the ipv6 gateway
1417 @type mac_prefix: string
1418 @param mac_prefix: the mac_prefix
1420 @param tags: the tags of the network
1425 env["NETWORK_NAME"] = name
1427 env["NETWORK_SUBNET"] = subnet
1429 env["NETWORK_GATEWAY"] = gateway
1431 env["NETWORK_SUBNET6"] = network6
1433 env["NETWORK_GATEWAY6"] = gateway6
1435 env["NETWORK_MAC_PREFIX"] = mac_prefix
1437 env["NETWORK_TAGS"] = " ".join(tags)
1442 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1443 minmem, maxmem, vcpus, nics, disk_template, disks,
1444 bep, hvp, hypervisor_name, tags):
1445 """Builds instance related env variables for hooks
1447 This builds the hook environment from individual variables.
1450 @param name: the name of the instance
1451 @type primary_node: string
1452 @param primary_node: the name of the instance's primary node
1453 @type secondary_nodes: list
1454 @param secondary_nodes: list of secondary nodes as strings
1455 @type os_type: string
1456 @param os_type: the name of the instance's OS
1457 @type status: string
1458 @param status: the desired status of the instance
1459 @type minmem: string
1460 @param minmem: the minimum memory size of the instance
1461 @type maxmem: string
1462 @param maxmem: the maximum memory size of the instance
1464 @param vcpus: the count of VCPUs the instance has
1466 @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1467 the NICs the instance has
1468 @type disk_template: string
1469 @param disk_template: the disk template of the instance
1471 @param disks: the list of (size, mode) pairs
1473 @param bep: the backend parameters for the instance
1475 @param hvp: the hypervisor parameters for the instance
1476 @type hypervisor_name: string
1477 @param hypervisor_name: the hypervisor for the instance
1479 @param tags: list of instance tags as strings
1481 @return: the hook environment for this instance
1486 "INSTANCE_NAME": name,
1487 "INSTANCE_PRIMARY": primary_node,
1488 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1489 "INSTANCE_OS_TYPE": os_type,
1490 "INSTANCE_STATUS": status,
1491 "INSTANCE_MINMEM": minmem,
1492 "INSTANCE_MAXMEM": maxmem,
1493 # TODO(2.7) remove deprecated "memory" value
1494 "INSTANCE_MEMORY": maxmem,
1495 "INSTANCE_VCPUS": vcpus,
1496 "INSTANCE_DISK_TEMPLATE": disk_template,
1497 "INSTANCE_HYPERVISOR": hypervisor_name,
1500 nic_count = len(nics)
1501 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1504 env["INSTANCE_NIC%d_IP" % idx] = ip
1505 env["INSTANCE_NIC%d_MAC" % idx] = mac
1506 env["INSTANCE_NIC%d_MODE" % idx] = mode
1507 env["INSTANCE_NIC%d_LINK" % idx] = link
1509 nobj = objects.Network.FromDict(netinfo)
1510 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1512 # FIXME: broken network reference: the instance NIC specifies a
1513 # network, but the relevant network entry was not in the config. This
1514 # should be made impossible.
1515 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
1516 if mode == constants.NIC_MODE_BRIDGED:
1517 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1521 env["INSTANCE_NIC_COUNT"] = nic_count
1524 disk_count = len(disks)
1525 for idx, (size, mode) in enumerate(disks):
1526 env["INSTANCE_DISK%d_SIZE" % idx] = size
1527 env["INSTANCE_DISK%d_MODE" % idx] = mode
1531 env["INSTANCE_DISK_COUNT"] = disk_count
1536 env["INSTANCE_TAGS"] = " ".join(tags)
1538 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1539 for key, value in source.items():
1540 env["INSTANCE_%s_%s" % (kind, key)] = value
1545 def _NICToTuple(lu, nic):
1546 """Build a tupple of nic information.
1548 @type lu: L{LogicalUnit}
1549 @param lu: the logical unit on whose behalf we execute
1550 @type nic: L{objects.NIC}
1551 @param nic: nic to convert to hooks tuple
1554 cluster = lu.cfg.GetClusterInfo()
1555 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1556 mode = filled_params[constants.NIC_MODE]
1557 link = filled_params[constants.NIC_LINK]
1560 nobj = lu.cfg.GetNetwork(nic.network)
1561 netinfo = objects.Network.ToDict(nobj)
1562 return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1565 def _NICListToTuple(lu, nics):
1566 """Build a list of nic information tuples.
1568 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1569 value in LUInstanceQueryData.
1571 @type lu: L{LogicalUnit}
1572 @param lu: the logical unit on whose behalf we execute
1573 @type nics: list of L{objects.NIC}
1574 @param nics: list of nics to convert to hooks tuples
1579 hooks_nics.append(_NICToTuple(lu, nic))
1583 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1584 """Builds instance related env variables for hooks from an object.
1586 @type lu: L{LogicalUnit}
1587 @param lu: the logical unit on whose behalf we execute
1588 @type instance: L{objects.Instance}
1589 @param instance: the instance for which we should build the
1591 @type override: dict
1592 @param override: dictionary with key/values that will override
1595 @return: the hook environment dictionary
1598 cluster = lu.cfg.GetClusterInfo()
1599 bep = cluster.FillBE(instance)
1600 hvp = cluster.FillHV(instance)
1602 "name": instance.name,
1603 "primary_node": instance.primary_node,
1604 "secondary_nodes": instance.secondary_nodes,
1605 "os_type": instance.os,
1606 "status": instance.admin_state,
1607 "maxmem": bep[constants.BE_MAXMEM],
1608 "minmem": bep[constants.BE_MINMEM],
1609 "vcpus": bep[constants.BE_VCPUS],
1610 "nics": _NICListToTuple(lu, instance.nics),
1611 "disk_template": instance.disk_template,
1612 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1615 "hypervisor_name": instance.hypervisor,
1616 "tags": instance.tags,
1619 args.update(override)
1620 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1623 def _AdjustCandidatePool(lu, exceptions):
1624 """Adjust the candidate pool after node operations.
1627 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1629 lu.LogInfo("Promoted nodes to master candidate role: %s",
1630 utils.CommaJoin(node.name for node in mod_list))
1631 for name in mod_list:
1632 lu.context.ReaddNode(name)
1633 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1635 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1639 def _DecideSelfPromotion(lu, exceptions=None):
1640 """Decide whether I should promote myself as a master candidate.
1643 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1644 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1645 # the new node will increase mc_max with one, so:
1646 mc_should = min(mc_should + 1, cp_size)
1647 return mc_now < mc_should
1650 def _ComputeViolatingInstances(ipolicy, instances):
1651 """Computes a set of instances who violates given ipolicy.
1653 @param ipolicy: The ipolicy to verify
1654 @type instances: object.Instance
1655 @param instances: List of instances to verify
1656 @return: A frozenset of instance names violating the ipolicy
1659 return frozenset([inst.name for inst in instances
1660 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1663 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1664 """Check that the brigdes needed by a list of nics exist.
1667 cluster = lu.cfg.GetClusterInfo()
1668 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1669 brlist = [params[constants.NIC_LINK] for params in paramslist
1670 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1672 result = lu.rpc.call_bridges_exist(target_node, brlist)
1673 result.Raise("Error checking bridges on destination node '%s'" %
1674 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1677 def _CheckInstanceBridgesExist(lu, instance, node=None):
1678 """Check that the brigdes needed by an instance exist.
1682 node = instance.primary_node
1683 _CheckNicsBridgesExist(lu, instance.nics, node)
1686 def _CheckOSVariant(os_obj, name):
1687 """Check whether an OS name conforms to the os variants specification.
1689 @type os_obj: L{objects.OS}
1690 @param os_obj: OS object to check
1692 @param name: OS name passed by the user, to check for validity
1695 variant = objects.OS.GetVariant(name)
1696 if not os_obj.supported_variants:
1698 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1699 " passed)" % (os_obj.name, variant),
1703 raise errors.OpPrereqError("OS name must include a variant",
1706 if variant not in os_obj.supported_variants:
1707 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1710 def _GetNodeInstancesInner(cfg, fn):
1711 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1714 def _GetNodeInstances(cfg, node_name):
1715 """Returns a list of all primary and secondary instances on a node.
1719 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1722 def _GetNodePrimaryInstances(cfg, node_name):
1723 """Returns primary instances on a node.
1726 return _GetNodeInstancesInner(cfg,
1727 lambda inst: node_name == inst.primary_node)
1730 def _GetNodeSecondaryInstances(cfg, node_name):
1731 """Returns secondary instances on a node.
1734 return _GetNodeInstancesInner(cfg,
1735 lambda inst: node_name in inst.secondary_nodes)
1738 def _GetStorageTypeArgs(cfg, storage_type):
1739 """Returns the arguments for a storage type.
1742 # Special case for file storage
1743 if storage_type == constants.ST_FILE:
1744 # storage.FileStorage wants a list of storage directories
1745 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1750 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1753 for dev in instance.disks:
1754 cfg.SetDiskID(dev, node_name)
1756 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1758 result.Raise("Failed to get disk status from node %s" % node_name,
1759 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1761 for idx, bdev_status in enumerate(result.payload):
1762 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1768 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1769 """Check the sanity of iallocator and node arguments and use the
1770 cluster-wide iallocator if appropriate.
1772 Check that at most one of (iallocator, node) is specified. If none is
1773 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1774 then the LU's opcode's iallocator slot is filled with the cluster-wide
1777 @type iallocator_slot: string
1778 @param iallocator_slot: the name of the opcode iallocator slot
1779 @type node_slot: string
1780 @param node_slot: the name of the opcode target node slot
1783 node = getattr(lu.op, node_slot, None)
1784 ialloc = getattr(lu.op, iallocator_slot, None)
1788 if node is not None and ialloc is not None:
1789 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1791 elif ((node is None and ialloc is None) or
1792 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1793 default_iallocator = lu.cfg.GetDefaultIAllocator()
1794 if default_iallocator:
1795 setattr(lu.op, iallocator_slot, default_iallocator)
1797 raise errors.OpPrereqError("No iallocator or node given and no"
1798 " cluster-wide default iallocator found;"
1799 " please specify either an iallocator or a"
1800 " node, or set a cluster-wide default"
1801 " iallocator", errors.ECODE_INVAL)
1804 def _GetDefaultIAllocator(cfg, ialloc):
1805 """Decides on which iallocator to use.
1807 @type cfg: L{config.ConfigWriter}
1808 @param cfg: Cluster configuration object
1809 @type ialloc: string or None
1810 @param ialloc: Iallocator specified in opcode
1812 @return: Iallocator name
1816 # Use default iallocator
1817 ialloc = cfg.GetDefaultIAllocator()
1820 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1821 " opcode nor as a cluster-wide default",
1827 def _CheckHostnameSane(lu, name):
1828 """Ensures that a given hostname resolves to a 'sane' name.
1830 The given name is required to be a prefix of the resolved hostname,
1831 to prevent accidental mismatches.
1833 @param lu: the logical unit on behalf of which we're checking
1834 @param name: the name we should resolve and check
1835 @return: the resolved hostname object
1838 hostname = netutils.GetHostname(name=name)
1839 if hostname.name != name:
1840 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1841 if not utils.MatchNameComponent(name, [hostname.name]):
1842 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1843 " same as given hostname '%s'") %
1844 (hostname.name, name), errors.ECODE_INVAL)
1848 class LUClusterPostInit(LogicalUnit):
1849 """Logical unit for running hooks after cluster initialization.
1852 HPATH = "cluster-init"
1853 HTYPE = constants.HTYPE_CLUSTER
1855 def BuildHooksEnv(self):
1860 "OP_TARGET": self.cfg.GetClusterName(),
1863 def BuildHooksNodes(self):
1864 """Build hooks nodes.
1867 return ([], [self.cfg.GetMasterNode()])
1869 def Exec(self, feedback_fn):
1876 class LUClusterDestroy(LogicalUnit):
1877 """Logical unit for destroying the cluster.
1880 HPATH = "cluster-destroy"
1881 HTYPE = constants.HTYPE_CLUSTER
1883 def BuildHooksEnv(self):
1888 "OP_TARGET": self.cfg.GetClusterName(),
1891 def BuildHooksNodes(self):
1892 """Build hooks nodes.
1897 def CheckPrereq(self):
1898 """Check prerequisites.
1900 This checks whether the cluster is empty.
1902 Any errors are signaled by raising errors.OpPrereqError.
1905 master = self.cfg.GetMasterNode()
1907 nodelist = self.cfg.GetNodeList()
1908 if len(nodelist) != 1 or nodelist[0] != master:
1909 raise errors.OpPrereqError("There are still %d node(s) in"
1910 " this cluster." % (len(nodelist) - 1),
1912 instancelist = self.cfg.GetInstanceList()
1914 raise errors.OpPrereqError("There are still %d instance(s) in"
1915 " this cluster." % len(instancelist),
1918 def Exec(self, feedback_fn):
1919 """Destroys the cluster.
1922 master_params = self.cfg.GetMasterNetworkParameters()
1924 # Run post hooks on master node before it's removed
1925 _RunPostHook(self, master_params.name)
1927 ems = self.cfg.GetUseExternalMipScript()
1928 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1931 self.LogWarning("Error disabling the master IP address: %s",
1934 return master_params.name
1937 def _VerifyCertificate(filename):
1938 """Verifies a certificate for L{LUClusterVerifyConfig}.
1940 @type filename: string
1941 @param filename: Path to PEM file
1945 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1946 utils.ReadFile(filename))
1947 except Exception, err: # pylint: disable=W0703
1948 return (LUClusterVerifyConfig.ETYPE_ERROR,
1949 "Failed to load X509 certificate %s: %s" % (filename, err))
1952 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1953 constants.SSL_CERT_EXPIRATION_ERROR)
1956 fnamemsg = "While verifying %s: %s" % (filename, msg)
1961 return (None, fnamemsg)
1962 elif errcode == utils.CERT_WARNING:
1963 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1964 elif errcode == utils.CERT_ERROR:
1965 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1967 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1970 def _GetAllHypervisorParameters(cluster, instances):
1971 """Compute the set of all hypervisor parameters.
1973 @type cluster: L{objects.Cluster}
1974 @param cluster: the cluster object
1975 @param instances: list of L{objects.Instance}
1976 @param instances: additional instances from which to obtain parameters
1977 @rtype: list of (origin, hypervisor, parameters)
1978 @return: a list with all parameters found, indicating the hypervisor they
1979 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1984 for hv_name in cluster.enabled_hypervisors:
1985 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1987 for os_name, os_hvp in cluster.os_hvp.items():
1988 for hv_name, hv_params in os_hvp.items():
1990 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1991 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1993 # TODO: collapse identical parameter values in a single one
1994 for instance in instances:
1995 if instance.hvparams:
1996 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1997 cluster.FillHV(instance)))
2002 class _VerifyErrors(object):
2003 """Mix-in for cluster/group verify LUs.
2005 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2006 self.op and self._feedback_fn to be available.)
2010 ETYPE_FIELD = "code"
2011 ETYPE_ERROR = "ERROR"
2012 ETYPE_WARNING = "WARNING"
2014 def _Error(self, ecode, item, msg, *args, **kwargs):
2015 """Format an error message.
2017 Based on the opcode's error_codes parameter, either format a
2018 parseable error code, or a simpler error string.
2020 This must be called only from Exec and functions called from Exec.
2023 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2024 itype, etxt, _ = ecode
2025 # If the error code is in the list of ignored errors, demote the error to a
2027 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2028 ltype = self.ETYPE_WARNING
2029 # first complete the msg
2032 # then format the whole message
2033 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2034 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2040 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2041 # and finally report it via the feedback_fn
2042 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2043 # do not mark the operation as failed for WARN cases only
2044 if ltype == self.ETYPE_ERROR:
2047 def _ErrorIf(self, cond, *args, **kwargs):
2048 """Log an error message if the passed condition is True.
2052 or self.op.debug_simulate_errors): # pylint: disable=E1101
2053 self._Error(*args, **kwargs)
2056 class LUClusterVerify(NoHooksLU):
2057 """Submits all jobs necessary to verify the cluster.
2062 def ExpandNames(self):
2063 self.needed_locks = {}
2065 def Exec(self, feedback_fn):
2068 if self.op.group_name:
2069 groups = [self.op.group_name]
2070 depends_fn = lambda: None
2072 groups = self.cfg.GetNodeGroupList()
2074 # Verify global configuration
2076 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2079 # Always depend on global verification
2080 depends_fn = lambda: [(-len(jobs), [])]
2083 [opcodes.OpClusterVerifyGroup(group_name=group,
2084 ignore_errors=self.op.ignore_errors,
2085 depends=depends_fn())]
2086 for group in groups)
2088 # Fix up all parameters
2089 for op in itertools.chain(*jobs): # pylint: disable=W0142
2090 op.debug_simulate_errors = self.op.debug_simulate_errors
2091 op.verbose = self.op.verbose
2092 op.error_codes = self.op.error_codes
2094 op.skip_checks = self.op.skip_checks
2095 except AttributeError:
2096 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2098 return ResultWithJobs(jobs)
2101 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2102 """Verifies the cluster config.
2107 def _VerifyHVP(self, hvp_data):
2108 """Verifies locally the syntax of the hypervisor parameters.
2111 for item, hv_name, hv_params in hvp_data:
2112 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2115 hv_class = hypervisor.GetHypervisorClass(hv_name)
2116 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2117 hv_class.CheckParameterSyntax(hv_params)
2118 except errors.GenericError, err:
2119 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2121 def ExpandNames(self):
2122 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2123 self.share_locks = _ShareAll()
2125 def CheckPrereq(self):
2126 """Check prerequisites.
2129 # Retrieve all information
2130 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2131 self.all_node_info = self.cfg.GetAllNodesInfo()
2132 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2134 def Exec(self, feedback_fn):
2135 """Verify integrity of cluster, performing various test on nodes.
2139 self._feedback_fn = feedback_fn
2141 feedback_fn("* Verifying cluster config")
2143 for msg in self.cfg.VerifyConfig():
2144 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2146 feedback_fn("* Verifying cluster certificate files")
2148 for cert_filename in pathutils.ALL_CERT_FILES:
2149 (errcode, msg) = _VerifyCertificate(cert_filename)
2150 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2152 feedback_fn("* Verifying hypervisor parameters")
2154 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2155 self.all_inst_info.values()))
2157 feedback_fn("* Verifying all nodes belong to an existing group")
2159 # We do this verification here because, should this bogus circumstance
2160 # occur, it would never be caught by VerifyGroup, which only acts on
2161 # nodes/instances reachable from existing node groups.
2163 dangling_nodes = set(node.name for node in self.all_node_info.values()
2164 if node.group not in self.all_group_info)
2166 dangling_instances = {}
2167 no_node_instances = []
2169 for inst in self.all_inst_info.values():
2170 if inst.primary_node in dangling_nodes:
2171 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2172 elif inst.primary_node not in self.all_node_info:
2173 no_node_instances.append(inst.name)
2178 utils.CommaJoin(dangling_instances.get(node.name,
2180 for node in dangling_nodes]
2182 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2184 "the following nodes (and their instances) belong to a non"
2185 " existing group: %s", utils.CommaJoin(pretty_dangling))
2187 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2189 "the following instances have a non-existing primary-node:"
2190 " %s", utils.CommaJoin(no_node_instances))
2195 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2196 """Verifies the status of a node group.
2199 HPATH = "cluster-verify"
2200 HTYPE = constants.HTYPE_CLUSTER
2203 _HOOKS_INDENT_RE = re.compile("^", re.M)
2205 class NodeImage(object):
2206 """A class representing the logical and physical status of a node.
2209 @ivar name: the node name to which this object refers
2210 @ivar volumes: a structure as returned from
2211 L{ganeti.backend.GetVolumeList} (runtime)
2212 @ivar instances: a list of running instances (runtime)
2213 @ivar pinst: list of configured primary instances (config)
2214 @ivar sinst: list of configured secondary instances (config)
2215 @ivar sbp: dictionary of {primary-node: list of instances} for all
2216 instances for which this node is secondary (config)
2217 @ivar mfree: free memory, as reported by hypervisor (runtime)
2218 @ivar dfree: free disk, as reported by the node (runtime)
2219 @ivar offline: the offline status (config)
2220 @type rpc_fail: boolean
2221 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2222 not whether the individual keys were correct) (runtime)
2223 @type lvm_fail: boolean
2224 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2225 @type hyp_fail: boolean
2226 @ivar hyp_fail: whether the RPC call didn't return the instance list
2227 @type ghost: boolean
2228 @ivar ghost: whether this is a known node or not (config)
2229 @type os_fail: boolean
2230 @ivar os_fail: whether the RPC call didn't return valid OS data
2232 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2233 @type vm_capable: boolean
2234 @ivar vm_capable: whether the node can host instances
2236 @ivar pv_min: size in MiB of the smallest PVs
2238 @ivar pv_max: size in MiB of the biggest PVs
2241 def __init__(self, offline=False, name=None, vm_capable=True):
2250 self.offline = offline
2251 self.vm_capable = vm_capable
2252 self.rpc_fail = False
2253 self.lvm_fail = False
2254 self.hyp_fail = False
2256 self.os_fail = False
2261 def ExpandNames(self):
2262 # This raises errors.OpPrereqError on its own:
2263 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2265 # Get instances in node group; this is unsafe and needs verification later
2267 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2269 self.needed_locks = {
2270 locking.LEVEL_INSTANCE: inst_names,
2271 locking.LEVEL_NODEGROUP: [self.group_uuid],
2272 locking.LEVEL_NODE: [],
2274 # This opcode is run by watcher every five minutes and acquires all nodes
2275 # for a group. It doesn't run for a long time, so it's better to acquire
2276 # the node allocation lock as well.
2277 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2280 self.share_locks = _ShareAll()
2282 def DeclareLocks(self, level):
2283 if level == locking.LEVEL_NODE:
2284 # Get members of node group; this is unsafe and needs verification later
2285 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2287 all_inst_info = self.cfg.GetAllInstancesInfo()
2289 # In Exec(), we warn about mirrored instances that have primary and
2290 # secondary living in separate node groups. To fully verify that
2291 # volumes for these instances are healthy, we will need to do an
2292 # extra call to their secondaries. We ensure here those nodes will
2294 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2295 # Important: access only the instances whose lock is owned
2296 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2297 nodes.update(all_inst_info[inst].secondary_nodes)
2299 self.needed_locks[locking.LEVEL_NODE] = nodes
2301 def CheckPrereq(self):
2302 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2303 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2305 group_nodes = set(self.group_info.members)
2307 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2310 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2312 unlocked_instances = \
2313 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2316 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2317 utils.CommaJoin(unlocked_nodes),
2320 if unlocked_instances:
2321 raise errors.OpPrereqError("Missing lock for instances: %s" %
2322 utils.CommaJoin(unlocked_instances),
2325 self.all_node_info = self.cfg.GetAllNodesInfo()
2326 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2328 self.my_node_names = utils.NiceSort(group_nodes)
2329 self.my_inst_names = utils.NiceSort(group_instances)
2331 self.my_node_info = dict((name, self.all_node_info[name])
2332 for name in self.my_node_names)
2334 self.my_inst_info = dict((name, self.all_inst_info[name])
2335 for name in self.my_inst_names)
2337 # We detect here the nodes that will need the extra RPC calls for verifying
2338 # split LV volumes; they should be locked.
2339 extra_lv_nodes = set()
2341 for inst in self.my_inst_info.values():
2342 if inst.disk_template in constants.DTS_INT_MIRROR:
2343 for nname in inst.all_nodes:
2344 if self.all_node_info[nname].group != self.group_uuid:
2345 extra_lv_nodes.add(nname)
2347 unlocked_lv_nodes = \
2348 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2350 if unlocked_lv_nodes:
2351 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2352 utils.CommaJoin(unlocked_lv_nodes),
2354 self.extra_lv_nodes = list(extra_lv_nodes)
2356 def _VerifyNode(self, ninfo, nresult):
2357 """Perform some basic validation on data returned from a node.
2359 - check the result data structure is well formed and has all the
2361 - check ganeti version
2363 @type ninfo: L{objects.Node}
2364 @param ninfo: the node to check
2365 @param nresult: the results from the node
2367 @return: whether overall this call was successful (and we can expect
2368 reasonable values in the respose)
2372 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2374 # main result, nresult should be a non-empty dict
2375 test = not nresult or not isinstance(nresult, dict)
2376 _ErrorIf(test, constants.CV_ENODERPC, node,
2377 "unable to verify node: no data returned")
2381 # compares ganeti version
2382 local_version = constants.PROTOCOL_VERSION
2383 remote_version = nresult.get("version", None)
2384 test = not (remote_version and
2385 isinstance(remote_version, (list, tuple)) and
2386 len(remote_version) == 2)
2387 _ErrorIf(test, constants.CV_ENODERPC, node,
2388 "connection to node returned invalid data")
2392 test = local_version != remote_version[0]
2393 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2394 "incompatible protocol versions: master %s,"
2395 " node %s", local_version, remote_version[0])
2399 # node seems compatible, we can actually try to look into its results
2401 # full package version
2402 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2403 constants.CV_ENODEVERSION, node,
2404 "software version mismatch: master %s, node %s",
2405 constants.RELEASE_VERSION, remote_version[1],
2406 code=self.ETYPE_WARNING)
2408 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2409 if ninfo.vm_capable and isinstance(hyp_result, dict):
2410 for hv_name, hv_result in hyp_result.iteritems():
2411 test = hv_result is not None
2412 _ErrorIf(test, constants.CV_ENODEHV, node,
2413 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2415 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2416 if ninfo.vm_capable and isinstance(hvp_result, list):
2417 for item, hv_name, hv_result in hvp_result:
2418 _ErrorIf(True, constants.CV_ENODEHV, node,
2419 "hypervisor %s parameter verify failure (source %s): %s",
2420 hv_name, item, hv_result)
2422 test = nresult.get(constants.NV_NODESETUP,
2423 ["Missing NODESETUP results"])
2424 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2429 def _VerifyNodeTime(self, ninfo, nresult,
2430 nvinfo_starttime, nvinfo_endtime):
2431 """Check the node time.
2433 @type ninfo: L{objects.Node}
2434 @param ninfo: the node to check
2435 @param nresult: the remote results for the node
2436 @param nvinfo_starttime: the start time of the RPC call
2437 @param nvinfo_endtime: the end time of the RPC call
2441 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2443 ntime = nresult.get(constants.NV_TIME, None)
2445 ntime_merged = utils.MergeTime(ntime)
2446 except (ValueError, TypeError):
2447 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2450 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2451 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2452 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2453 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2457 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2458 "Node time diverges by at least %s from master node time",
2461 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2462 """Check the node LVM results and update info for cross-node checks.
2464 @type ninfo: L{objects.Node}
2465 @param ninfo: the node to check
2466 @param nresult: the remote results for the node
2467 @param vg_name: the configured VG name
2468 @type nimg: L{NodeImage}
2469 @param nimg: node image
2476 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2478 # checks vg existence and size > 20G
2479 vglist = nresult.get(constants.NV_VGLIST, None)
2481 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2483 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2484 constants.MIN_VG_SIZE)
2485 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2488 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2490 self._Error(constants.CV_ENODELVM, node, em)
2491 if pvminmax is not None:
2492 (nimg.pv_min, nimg.pv_max) = pvminmax
2494 def _VerifyGroupLVM(self, node_image, vg_name):
2495 """Check cross-node consistency in LVM.
2497 @type node_image: dict
2498 @param node_image: info about nodes, mapping from node to names to
2499 L{NodeImage} objects
2500 @param vg_name: the configured VG name
2506 # Only exlcusive storage needs this kind of checks
2507 if not self._exclusive_storage:
2510 # exclusive_storage wants all PVs to have the same size (approximately),
2511 # if the smallest and the biggest ones are okay, everything is fine.
2512 # pv_min is None iff pv_max is None
2513 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2516 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2517 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2518 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2519 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2520 "PV sizes differ too much in the group; smallest (%s MB) is"
2521 " on %s, biggest (%s MB) is on %s",
2522 pvmin, minnode, pvmax, maxnode)
2524 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2525 """Check the node bridges.
2527 @type ninfo: L{objects.Node}
2528 @param ninfo: the node to check
2529 @param nresult: the remote results for the node
2530 @param bridges: the expected list of bridges
2537 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2539 missing = nresult.get(constants.NV_BRIDGES, None)
2540 test = not isinstance(missing, list)
2541 _ErrorIf(test, constants.CV_ENODENET, node,
2542 "did not return valid bridge information")
2544 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2545 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2547 def _VerifyNodeUserScripts(self, ninfo, nresult):
2548 """Check the results of user scripts presence and executability on the node
2550 @type ninfo: L{objects.Node}
2551 @param ninfo: the node to check
2552 @param nresult: the remote results for the node
2557 test = not constants.NV_USERSCRIPTS in nresult
2558 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2559 "did not return user scripts information")
2561 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2563 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2564 "user scripts not present or not executable: %s" %
2565 utils.CommaJoin(sorted(broken_scripts)))
2567 def _VerifyNodeNetwork(self, ninfo, nresult):
2568 """Check the node network connectivity results.
2570 @type ninfo: L{objects.Node}
2571 @param ninfo: the node to check
2572 @param nresult: the remote results for the node
2576 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2578 test = constants.NV_NODELIST not in nresult
2579 _ErrorIf(test, constants.CV_ENODESSH, node,
2580 "node hasn't returned node ssh connectivity data")
2582 if nresult[constants.NV_NODELIST]:
2583 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2584 _ErrorIf(True, constants.CV_ENODESSH, node,
2585 "ssh communication with node '%s': %s", a_node, a_msg)
2587 test = constants.NV_NODENETTEST not in nresult
2588 _ErrorIf(test, constants.CV_ENODENET, node,
2589 "node hasn't returned node tcp connectivity data")
2591 if nresult[constants.NV_NODENETTEST]:
2592 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2594 _ErrorIf(True, constants.CV_ENODENET, node,
2595 "tcp communication with node '%s': %s",
2596 anode, nresult[constants.NV_NODENETTEST][anode])
2598 test = constants.NV_MASTERIP not in nresult
2599 _ErrorIf(test, constants.CV_ENODENET, node,
2600 "node hasn't returned node master IP reachability data")
2602 if not nresult[constants.NV_MASTERIP]:
2603 if node == self.master_node:
2604 msg = "the master node cannot reach the master IP (not configured?)"
2606 msg = "cannot reach the master IP"
2607 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2609 def _VerifyInstance(self, instance, inst_config, node_image,
2611 """Verify an instance.
2613 This function checks to see if the required block devices are
2614 available on the instance's node, and that the nodes are in the correct
2618 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2619 pnode = inst_config.primary_node
2620 pnode_img = node_image[pnode]
2621 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2623 node_vol_should = {}
2624 inst_config.MapLVsByNode(node_vol_should)
2626 cluster = self.cfg.GetClusterInfo()
2627 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2629 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
2630 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2631 code=self.ETYPE_WARNING)
2633 for node in node_vol_should:
2634 n_img = node_image[node]
2635 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2636 # ignore missing volumes on offline or broken nodes
2638 for volume in node_vol_should[node]:
2639 test = volume not in n_img.volumes
2640 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2641 "volume %s missing on node %s", volume, node)
2643 if inst_config.admin_state == constants.ADMINST_UP:
2644 test = instance not in pnode_img.instances and not pnode_img.offline
2645 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2646 "instance not running on its primary node %s",
2648 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2649 "instance is marked as running and lives on offline node %s",
2652 diskdata = [(nname, success, status, idx)
2653 for (nname, disks) in diskstatus.items()
2654 for idx, (success, status) in enumerate(disks)]
2656 for nname, success, bdev_status, idx in diskdata:
2657 # the 'ghost node' construction in Exec() ensures that we have a
2659 snode = node_image[nname]
2660 bad_snode = snode.ghost or snode.offline
2661 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2662 not success and not bad_snode,
2663 constants.CV_EINSTANCEFAULTYDISK, instance,
2664 "couldn't retrieve status for disk/%s on %s: %s",
2665 idx, nname, bdev_status)
2666 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2667 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2668 constants.CV_EINSTANCEFAULTYDISK, instance,
2669 "disk/%s on %s is faulty", idx, nname)
2671 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2672 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2673 " primary node failed", instance)
2675 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2676 constants.CV_EINSTANCELAYOUT,
2677 instance, "instance has multiple secondary nodes: %s",
2678 utils.CommaJoin(inst_config.secondary_nodes),
2679 code=self.ETYPE_WARNING)
2681 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2682 # Disk template not compatible with exclusive_storage: no instance
2683 # node should have the flag set
2684 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2685 inst_config.all_nodes)
2686 es_nodes = [n for (n, es) in es_flags.items()
2688 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2689 "instance has template %s, which is not supported on nodes"
2690 " that have exclusive storage set: %s",
2691 inst_config.disk_template, utils.CommaJoin(es_nodes))
2693 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2694 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2695 instance_groups = {}
2697 for node in instance_nodes:
2698 instance_groups.setdefault(self.all_node_info[node].group,
2702 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2703 # Sort so that we always list the primary node first.
2704 for group, nodes in sorted(instance_groups.items(),
2705 key=lambda (_, nodes): pnode in nodes,
2708 self._ErrorIf(len(instance_groups) > 1,
2709 constants.CV_EINSTANCESPLITGROUPS,
2710 instance, "instance has primary and secondary nodes in"
2711 " different groups: %s", utils.CommaJoin(pretty_list),
2712 code=self.ETYPE_WARNING)
2714 inst_nodes_offline = []
2715 for snode in inst_config.secondary_nodes:
2716 s_img = node_image[snode]
2717 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2718 snode, "instance %s, connection to secondary node failed",
2722 inst_nodes_offline.append(snode)
2724 # warn that the instance lives on offline nodes
2725 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2726 "instance has offline secondary node(s) %s",
2727 utils.CommaJoin(inst_nodes_offline))
2728 # ... or ghost/non-vm_capable nodes
2729 for node in inst_config.all_nodes:
2730 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2731 instance, "instance lives on ghost node %s", node)
2732 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2733 instance, "instance lives on non-vm_capable node %s", node)
2735 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2736 """Verify if there are any unknown volumes in the cluster.
2738 The .os, .swap and backup volumes are ignored. All other volumes are
2739 reported as unknown.
2741 @type reserved: L{ganeti.utils.FieldSet}
2742 @param reserved: a FieldSet of reserved volume names
2745 for node, n_img in node_image.items():
2746 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2747 self.all_node_info[node].group != self.group_uuid):
2748 # skip non-healthy nodes
2750 for volume in n_img.volumes:
2751 test = ((node not in node_vol_should or
2752 volume not in node_vol_should[node]) and
2753 not reserved.Matches(volume))
2754 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2755 "volume %s is unknown", volume)
2757 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2758 """Verify N+1 Memory Resilience.
2760 Check that if one single node dies we can still start all the
2761 instances it was primary for.
2764 cluster_info = self.cfg.GetClusterInfo()
2765 for node, n_img in node_image.items():
2766 # This code checks that every node which is now listed as
2767 # secondary has enough memory to host all instances it is
2768 # supposed to should a single other node in the cluster fail.
2769 # FIXME: not ready for failover to an arbitrary node
2770 # FIXME: does not support file-backed instances
2771 # WARNING: we currently take into account down instances as well
2772 # as up ones, considering that even if they're down someone
2773 # might want to start them even in the event of a node failure.
2774 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2775 # we're skipping nodes marked offline and nodes in other groups from
2776 # the N+1 warning, since most likely we don't have good memory
2777 # infromation from them; we already list instances living on such
2778 # nodes, and that's enough warning
2780 #TODO(dynmem): also consider ballooning out other instances
2781 for prinode, instances in n_img.sbp.items():
2783 for instance in instances:
2784 bep = cluster_info.FillBE(instance_cfg[instance])
2785 if bep[constants.BE_AUTO_BALANCE]:
2786 needed_mem += bep[constants.BE_MINMEM]
2787 test = n_img.mfree < needed_mem
2788 self._ErrorIf(test, constants.CV_ENODEN1, node,
2789 "not enough memory to accomodate instance failovers"
2790 " should node %s fail (%dMiB needed, %dMiB available)",
2791 prinode, needed_mem, n_img.mfree)
2794 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2795 (files_all, files_opt, files_mc, files_vm)):
2796 """Verifies file checksums collected from all nodes.
2798 @param errorif: Callback for reporting errors
2799 @param nodeinfo: List of L{objects.Node} objects
2800 @param master_node: Name of master node
2801 @param all_nvinfo: RPC results
2804 # Define functions determining which nodes to consider for a file
2807 (files_mc, lambda node: (node.master_candidate or
2808 node.name == master_node)),
2809 (files_vm, lambda node: node.vm_capable),
2812 # Build mapping from filename to list of nodes which should have the file
2814 for (files, fn) in files2nodefn:
2816 filenodes = nodeinfo
2818 filenodes = filter(fn, nodeinfo)
2819 nodefiles.update((filename,
2820 frozenset(map(operator.attrgetter("name"), filenodes)))
2821 for filename in files)
2823 assert set(nodefiles) == (files_all | files_mc | files_vm)
2825 fileinfo = dict((filename, {}) for filename in nodefiles)
2826 ignore_nodes = set()
2828 for node in nodeinfo:
2830 ignore_nodes.add(node.name)
2833 nresult = all_nvinfo[node.name]
2835 if nresult.fail_msg or not nresult.payload:
2838 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2839 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2840 for (key, value) in fingerprints.items())
2843 test = not (node_files and isinstance(node_files, dict))
2844 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2845 "Node did not return file checksum data")
2847 ignore_nodes.add(node.name)
2850 # Build per-checksum mapping from filename to nodes having it
2851 for (filename, checksum) in node_files.items():
2852 assert filename in nodefiles
2853 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2855 for (filename, checksums) in fileinfo.items():
2856 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2858 # Nodes having the file
2859 with_file = frozenset(node_name
2860 for nodes in fileinfo[filename].values()
2861 for node_name in nodes) - ignore_nodes
2863 expected_nodes = nodefiles[filename] - ignore_nodes
2865 # Nodes missing file
2866 missing_file = expected_nodes - with_file
2868 if filename in files_opt:
2870 errorif(missing_file and missing_file != expected_nodes,
2871 constants.CV_ECLUSTERFILECHECK, None,
2872 "File %s is optional, but it must exist on all or no"
2873 " nodes (not found on %s)",
2874 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2876 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2877 "File %s is missing from node(s) %s", filename,
2878 utils.CommaJoin(utils.NiceSort(missing_file)))
2880 # Warn if a node has a file it shouldn't
2881 unexpected = with_file - expected_nodes
2883 constants.CV_ECLUSTERFILECHECK, None,
2884 "File %s should not exist on node(s) %s",
2885 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2887 # See if there are multiple versions of the file
2888 test = len(checksums) > 1
2890 variants = ["variant %s on %s" %
2891 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2892 for (idx, (checksum, nodes)) in
2893 enumerate(sorted(checksums.items()))]
2897 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2898 "File %s found with %s different checksums (%s)",
2899 filename, len(checksums), "; ".join(variants))
2901 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2903 """Verifies and the node DRBD status.
2905 @type ninfo: L{objects.Node}
2906 @param ninfo: the node to check
2907 @param nresult: the remote results for the node
2908 @param instanceinfo: the dict of instances
2909 @param drbd_helper: the configured DRBD usermode helper
2910 @param drbd_map: the DRBD map as returned by
2911 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2915 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2918 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2919 test = (helper_result is None)
2920 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2921 "no drbd usermode helper returned")
2923 status, payload = helper_result
2925 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2926 "drbd usermode helper check unsuccessful: %s", payload)
2927 test = status and (payload != drbd_helper)
2928 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2929 "wrong drbd usermode helper: %s", payload)
2931 # compute the DRBD minors
2933 for minor, instance in drbd_map[node].items():
2934 test = instance not in instanceinfo
2935 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2936 "ghost instance '%s' in temporary DRBD map", instance)
2937 # ghost instance should not be running, but otherwise we
2938 # don't give double warnings (both ghost instance and
2939 # unallocated minor in use)
2941 node_drbd[minor] = (instance, False)
2943 instance = instanceinfo[instance]
2944 node_drbd[minor] = (instance.name,
2945 instance.admin_state == constants.ADMINST_UP)
2947 # and now check them
2948 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2949 test = not isinstance(used_minors, (tuple, list))
2950 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2951 "cannot parse drbd status file: %s", str(used_minors))
2953 # we cannot check drbd status
2956 for minor, (iname, must_exist) in node_drbd.items():
2957 test = minor not in used_minors and must_exist
2958 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2959 "drbd minor %d of instance %s is not active", minor, iname)
2960 for minor in used_minors:
2961 test = minor not in node_drbd
2962 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2963 "unallocated drbd minor %d is in use", minor)
2965 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2966 """Builds the node OS structures.
2968 @type ninfo: L{objects.Node}
2969 @param ninfo: the node to check
2970 @param nresult: the remote results for the node
2971 @param nimg: the node image object
2975 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2977 remote_os = nresult.get(constants.NV_OSLIST, None)
2978 test = (not isinstance(remote_os, list) or
2979 not compat.all(isinstance(v, list) and len(v) == 7
2980 for v in remote_os))
2982 _ErrorIf(test, constants.CV_ENODEOS, node,
2983 "node hasn't returned valid OS data")
2992 for (name, os_path, status, diagnose,
2993 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2995 if name not in os_dict:
2998 # parameters is a list of lists instead of list of tuples due to
2999 # JSON lacking a real tuple type, fix it:
3000 parameters = [tuple(v) for v in parameters]
3001 os_dict[name].append((os_path, status, diagnose,
3002 set(variants), set(parameters), set(api_ver)))
3004 nimg.oslist = os_dict
3006 def _VerifyNodeOS(self, ninfo, nimg, base):
3007 """Verifies the node OS list.
3009 @type ninfo: L{objects.Node}
3010 @param ninfo: the node to check
3011 @param nimg: the node image object
3012 @param base: the 'template' node we match against (e.g. from the master)
3016 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3018 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3020 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3021 for os_name, os_data in nimg.oslist.items():
3022 assert os_data, "Empty OS status for OS %s?!" % os_name
3023 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3024 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3025 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3026 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3027 "OS '%s' has multiple entries (first one shadows the rest): %s",
3028 os_name, utils.CommaJoin([v[0] for v in os_data]))
3029 # comparisons with the 'base' image
3030 test = os_name not in base.oslist
3031 _ErrorIf(test, constants.CV_ENODEOS, node,
3032 "Extra OS %s not present on reference node (%s)",
3036 assert base.oslist[os_name], "Base node has empty OS status?"
3037 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3039 # base OS is invalid, skipping
3041 for kind, a, b in [("API version", f_api, b_api),
3042 ("variants list", f_var, b_var),
3043 ("parameters", beautify_params(f_param),
3044 beautify_params(b_param))]:
3045 _ErrorIf(a != b, constants.CV_ENODEOS, node,
3046 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3047 kind, os_name, base.name,
3048 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3050 # check any missing OSes
3051 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3052 _ErrorIf(missing, constants.CV_ENODEOS, node,
3053 "OSes present on reference node %s but missing on this node: %s",
3054 base.name, utils.CommaJoin(missing))
3056 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3057 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3059 @type ninfo: L{objects.Node}
3060 @param ninfo: the node to check
3061 @param nresult: the remote results for the node
3062 @type is_master: bool
3063 @param is_master: Whether node is the master node
3069 (constants.ENABLE_FILE_STORAGE or
3070 constants.ENABLE_SHARED_FILE_STORAGE)):
3072 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3074 # This should never happen
3075 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3076 "Node did not return forbidden file storage paths")
3078 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3079 "Found forbidden file storage paths: %s",
3080 utils.CommaJoin(fspaths))
3082 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3083 constants.CV_ENODEFILESTORAGEPATHS, node,
3084 "Node should not have returned forbidden file storage"
3087 def _VerifyOob(self, ninfo, nresult):
3088 """Verifies out of band functionality of a node.
3090 @type ninfo: L{objects.Node}
3091 @param ninfo: the node to check
3092 @param nresult: the remote results for the node
3096 # We just have to verify the paths on master and/or master candidates
3097 # as the oob helper is invoked on the master
3098 if ((ninfo.master_candidate or ninfo.master_capable) and
3099 constants.NV_OOB_PATHS in nresult):
3100 for path_result in nresult[constants.NV_OOB_PATHS]:
3101 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3103 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3104 """Verifies and updates the node volume data.
3106 This function will update a L{NodeImage}'s internal structures
3107 with data from the remote call.
3109 @type ninfo: L{objects.Node}
3110 @param ninfo: the node to check
3111 @param nresult: the remote results for the node
3112 @param nimg: the node image object
3113 @param vg_name: the configured VG name
3117 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3119 nimg.lvm_fail = True
3120 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3123 elif isinstance(lvdata, basestring):
3124 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3125 utils.SafeEncode(lvdata))
3126 elif not isinstance(lvdata, dict):
3127 _ErrorIf(True, constants.CV_ENODELVM, node,
3128 "rpc call to node failed (lvlist)")
3130 nimg.volumes = lvdata
3131 nimg.lvm_fail = False
3133 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3134 """Verifies and updates the node instance list.
3136 If the listing was successful, then updates this node's instance
3137 list. Otherwise, it marks the RPC call as failed for the instance
3140 @type ninfo: L{objects.Node}
3141 @param ninfo: the node to check
3142 @param nresult: the remote results for the node
3143 @param nimg: the node image object
3146 idata = nresult.get(constants.NV_INSTANCELIST, None)
3147 test = not isinstance(idata, list)
3148 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3149 "rpc call to node failed (instancelist): %s",
3150 utils.SafeEncode(str(idata)))
3152 nimg.hyp_fail = True
3154 nimg.instances = idata
3156 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3157 """Verifies and computes a node information map
3159 @type ninfo: L{objects.Node}
3160 @param ninfo: the node to check
3161 @param nresult: the remote results for the node
3162 @param nimg: the node image object
3163 @param vg_name: the configured VG name
3167 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3169 # try to read free memory (from the hypervisor)
3170 hv_info = nresult.get(constants.NV_HVINFO, None)
3171 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3172 _ErrorIf(test, constants.CV_ENODEHV, node,
3173 "rpc call to node failed (hvinfo)")
3176 nimg.mfree = int(hv_info["memory_free"])
3177 except (ValueError, TypeError):
3178 _ErrorIf(True, constants.CV_ENODERPC, node,
3179 "node returned invalid nodeinfo, check hypervisor")
3181 # FIXME: devise a free space model for file based instances as well
3182 if vg_name is not None:
3183 test = (constants.NV_VGLIST not in nresult or
3184 vg_name not in nresult[constants.NV_VGLIST])
3185 _ErrorIf(test, constants.CV_ENODELVM, node,
3186 "node didn't return data for the volume group '%s'"
3187 " - it is either missing or broken", vg_name)
3190 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3191 except (ValueError, TypeError):
3192 _ErrorIf(True, constants.CV_ENODERPC, node,
3193 "node returned invalid LVM info, check LVM status")
3195 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3196 """Gets per-disk status information for all instances.
3198 @type nodelist: list of strings
3199 @param nodelist: Node names
3200 @type node_image: dict of (name, L{objects.Node})
3201 @param node_image: Node objects
3202 @type instanceinfo: dict of (name, L{objects.Instance})
3203 @param instanceinfo: Instance objects
3204 @rtype: {instance: {node: [(succes, payload)]}}
3205 @return: a dictionary of per-instance dictionaries with nodes as
3206 keys and disk information as values; the disk information is a
3207 list of tuples (success, payload)
3210 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3213 node_disks_devonly = {}
3214 diskless_instances = set()
3215 diskless = constants.DT_DISKLESS
3217 for nname in nodelist:
3218 node_instances = list(itertools.chain(node_image[nname].pinst,
3219 node_image[nname].sinst))
3220 diskless_instances.update(inst for inst in node_instances
3221 if instanceinfo[inst].disk_template == diskless)
3222 disks = [(inst, disk)
3223 for inst in node_instances
3224 for disk in instanceinfo[inst].disks]
3227 # No need to collect data
3230 node_disks[nname] = disks
3232 # _AnnotateDiskParams makes already copies of the disks
3234 for (inst, dev) in disks:
3235 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3236 self.cfg.SetDiskID(anno_disk, nname)
3237 devonly.append(anno_disk)
3239 node_disks_devonly[nname] = devonly
3241 assert len(node_disks) == len(node_disks_devonly)
3243 # Collect data from all nodes with disks
3244 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3247 assert len(result) == len(node_disks)
3251 for (nname, nres) in result.items():
3252 disks = node_disks[nname]
3255 # No data from this node
3256 data = len(disks) * [(False, "node offline")]
3259 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3260 "while getting disk information: %s", msg)
3262 # No data from this node
3263 data = len(disks) * [(False, msg)]
3266 for idx, i in enumerate(nres.payload):
3267 if isinstance(i, (tuple, list)) and len(i) == 2:
3270 logging.warning("Invalid result from node %s, entry %d: %s",
3272 data.append((False, "Invalid result from the remote node"))
3274 for ((inst, _), status) in zip(disks, data):
3275 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3277 # Add empty entries for diskless instances.
3278 for inst in diskless_instances:
3279 assert inst not in instdisk
3282 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3283 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3284 compat.all(isinstance(s, (tuple, list)) and
3285 len(s) == 2 for s in statuses)
3286 for inst, nnames in instdisk.items()
3287 for nname, statuses in nnames.items())
3289 instdisk_keys = set(instdisk)
3290 instanceinfo_keys = set(instanceinfo)
3291 assert instdisk_keys == instanceinfo_keys, \
3292 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3293 (instdisk_keys, instanceinfo_keys))
3298 def _SshNodeSelector(group_uuid, all_nodes):
3299 """Create endless iterators for all potential SSH check hosts.
3302 nodes = [node for node in all_nodes
3303 if (node.group != group_uuid and
3305 keyfunc = operator.attrgetter("group")
3307 return map(itertools.cycle,
3308 [sorted(map(operator.attrgetter("name"), names))
3309 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3313 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3314 """Choose which nodes should talk to which other nodes.
3316 We will make nodes contact all nodes in their group, and one node from
3319 @warning: This algorithm has a known issue if one node group is much
3320 smaller than others (e.g. just one node). In such a case all other
3321 nodes will talk to the single node.
3324 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3325 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3327 return (online_nodes,
3328 dict((name, sorted([i.next() for i in sel]))
3329 for name in online_nodes))
3331 def BuildHooksEnv(self):
3334 Cluster-Verify hooks just ran in the post phase and their failure makes
3335 the output be logged in the verify output and the verification to fail.
3339 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3342 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3343 for node in self.my_node_info.values())
3347 def BuildHooksNodes(self):
3348 """Build hooks nodes.
3351 return ([], self.my_node_names)
3353 def Exec(self, feedback_fn):
3354 """Verify integrity of the node group, performing various test on nodes.
3357 # This method has too many local variables. pylint: disable=R0914
3358 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3360 if not self.my_node_names:
3362 feedback_fn("* Empty node group, skipping verification")
3366 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3367 verbose = self.op.verbose
3368 self._feedback_fn = feedback_fn
3370 vg_name = self.cfg.GetVGName()
3371 drbd_helper = self.cfg.GetDRBDHelper()
3372 cluster = self.cfg.GetClusterInfo()
3373 hypervisors = cluster.enabled_hypervisors
3374 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3376 i_non_redundant = [] # Non redundant instances
3377 i_non_a_balanced = [] # Non auto-balanced instances
3378 i_offline = 0 # Count of offline instances
3379 n_offline = 0 # Count of offline nodes
3380 n_drained = 0 # Count of nodes being drained
3381 node_vol_should = {}
3383 # FIXME: verify OS list
3386 filemap = _ComputeAncillaryFiles(cluster, False)
3388 # do local checksums
3389 master_node = self.master_node = self.cfg.GetMasterNode()
3390 master_ip = self.cfg.GetMasterIP()
3392 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3395 if self.cfg.GetUseExternalMipScript():
3396 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3398 node_verify_param = {
3399 constants.NV_FILELIST:
3400 map(vcluster.MakeVirtualPath,
3401 utils.UniqueSequence(filename
3402 for files in filemap
3403 for filename in files)),
3404 constants.NV_NODELIST:
3405 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3406 self.all_node_info.values()),
3407 constants.NV_HYPERVISOR: hypervisors,
3408 constants.NV_HVPARAMS:
3409 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3410 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3411 for node in node_data_list
3412 if not node.offline],
3413 constants.NV_INSTANCELIST: hypervisors,
3414 constants.NV_VERSION: None,
3415 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3416 constants.NV_NODESETUP: None,
3417 constants.NV_TIME: None,
3418 constants.NV_MASTERIP: (master_node, master_ip),
3419 constants.NV_OSLIST: None,
3420 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3421 constants.NV_USERSCRIPTS: user_scripts,
3424 if vg_name is not None:
3425 node_verify_param[constants.NV_VGLIST] = None
3426 node_verify_param[constants.NV_LVLIST] = vg_name
3427 node_verify_param[constants.NV_PVLIST] = [vg_name]
3430 node_verify_param[constants.NV_DRBDLIST] = None
3431 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3433 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3434 # Load file storage paths only from master node
3435 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3438 # FIXME: this needs to be changed per node-group, not cluster-wide
3440 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3441 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3442 bridges.add(default_nicpp[constants.NIC_LINK])
3443 for instance in self.my_inst_info.values():
3444 for nic in instance.nics:
3445 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3446 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3447 bridges.add(full_nic[constants.NIC_LINK])
3450 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3452 # Build our expected cluster state
3453 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3455 vm_capable=node.vm_capable))
3456 for node in node_data_list)
3460 for node in self.all_node_info.values():
3461 path = _SupportsOob(self.cfg, node)
3462 if path and path not in oob_paths:
3463 oob_paths.append(path)
3466 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3468 for instance in self.my_inst_names:
3469 inst_config = self.my_inst_info[instance]
3470 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3473 for nname in inst_config.all_nodes:
3474 if nname not in node_image:
3475 gnode = self.NodeImage(name=nname)
3476 gnode.ghost = (nname not in self.all_node_info)
3477 node_image[nname] = gnode
3479 inst_config.MapLVsByNode(node_vol_should)
3481 pnode = inst_config.primary_node
3482 node_image[pnode].pinst.append(instance)
3484 for snode in inst_config.secondary_nodes:
3485 nimg = node_image[snode]
3486 nimg.sinst.append(instance)
3487 if pnode not in nimg.sbp:
3488 nimg.sbp[pnode] = []
3489 nimg.sbp[pnode].append(instance)
3491 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3492 # The value of exclusive_storage should be the same across the group, so if
3493 # it's True for at least a node, we act as if it were set for all the nodes
3494 self._exclusive_storage = compat.any(es_flags.values())
3495 if self._exclusive_storage:
3496 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3498 # At this point, we have the in-memory data structures complete,
3499 # except for the runtime information, which we'll gather next
3501 # Due to the way our RPC system works, exact response times cannot be
3502 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3503 # time before and after executing the request, we can at least have a time
3505 nvinfo_starttime = time.time()
3506 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3508 self.cfg.GetClusterName())
3509 nvinfo_endtime = time.time()
3511 if self.extra_lv_nodes and vg_name is not None:
3513 self.rpc.call_node_verify(self.extra_lv_nodes,
3514 {constants.NV_LVLIST: vg_name},
3515 self.cfg.GetClusterName())
3517 extra_lv_nvinfo = {}
3519 all_drbd_map = self.cfg.ComputeDRBDMap()
3521 feedback_fn("* Gathering disk information (%s nodes)" %
3522 len(self.my_node_names))
3523 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3526 feedback_fn("* Verifying configuration file consistency")
3528 # If not all nodes are being checked, we need to make sure the master node
3529 # and a non-checked vm_capable node are in the list.
3530 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3532 vf_nvinfo = all_nvinfo.copy()
3533 vf_node_info = list(self.my_node_info.values())
3534 additional_nodes = []
3535 if master_node not in self.my_node_info:
3536 additional_nodes.append(master_node)
3537 vf_node_info.append(self.all_node_info[master_node])
3538 # Add the first vm_capable node we find which is not included,
3539 # excluding the master node (which we already have)
3540 for node in absent_nodes:
3541 nodeinfo = self.all_node_info[node]
3542 if (nodeinfo.vm_capable and not nodeinfo.offline and
3543 node != master_node):
3544 additional_nodes.append(node)
3545 vf_node_info.append(self.all_node_info[node])
3547 key = constants.NV_FILELIST
3548 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3549 {key: node_verify_param[key]},
3550 self.cfg.GetClusterName()))
3552 vf_nvinfo = all_nvinfo
3553 vf_node_info = self.my_node_info.values()
3555 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3557 feedback_fn("* Verifying node status")
3561 for node_i in node_data_list:
3563 nimg = node_image[node]
3567 feedback_fn("* Skipping offline node %s" % (node,))
3571 if node == master_node:
3573 elif node_i.master_candidate:
3574 ntype = "master candidate"
3575 elif node_i.drained:
3581 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3583 msg = all_nvinfo[node].fail_msg
3584 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3587 nimg.rpc_fail = True
3590 nresult = all_nvinfo[node].payload
3592 nimg.call_ok = self._VerifyNode(node_i, nresult)
3593 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3594 self._VerifyNodeNetwork(node_i, nresult)
3595 self._VerifyNodeUserScripts(node_i, nresult)
3596 self._VerifyOob(node_i, nresult)
3597 self._VerifyFileStoragePaths(node_i, nresult,
3598 node == master_node)
3601 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3602 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3605 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3606 self._UpdateNodeInstances(node_i, nresult, nimg)
3607 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3608 self._UpdateNodeOS(node_i, nresult, nimg)
3610 if not nimg.os_fail:
3611 if refos_img is None:
3613 self._VerifyNodeOS(node_i, nimg, refos_img)
3614 self._VerifyNodeBridges(node_i, nresult, bridges)
3616 # Check whether all running instancies are primary for the node. (This
3617 # can no longer be done from _VerifyInstance below, since some of the
3618 # wrong instances could be from other node groups.)
3619 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3621 for inst in non_primary_inst:
3622 test = inst in self.all_inst_info
3623 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3624 "instance should not run on node %s", node_i.name)
3625 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3626 "node is running unknown instance %s", inst)
3628 self._VerifyGroupLVM(node_image, vg_name)
3630 for node, result in extra_lv_nvinfo.items():
3631 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3632 node_image[node], vg_name)
3634 feedback_fn("* Verifying instance status")
3635 for instance in self.my_inst_names:
3637 feedback_fn("* Verifying instance %s" % instance)
3638 inst_config = self.my_inst_info[instance]
3639 self._VerifyInstance(instance, inst_config, node_image,
3642 # If the instance is non-redundant we cannot survive losing its primary
3643 # node, so we are not N+1 compliant.
3644 if inst_config.disk_template not in constants.DTS_MIRRORED:
3645 i_non_redundant.append(instance)
3647 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3648 i_non_a_balanced.append(instance)
3650 feedback_fn("* Verifying orphan volumes")
3651 reserved = utils.FieldSet(*cluster.reserved_lvs)
3653 # We will get spurious "unknown volume" warnings if any node of this group
3654 # is secondary for an instance whose primary is in another group. To avoid
3655 # them, we find these instances and add their volumes to node_vol_should.
3656 for inst in self.all_inst_info.values():
3657 for secondary in inst.secondary_nodes:
3658 if (secondary in self.my_node_info
3659 and inst.name not in self.my_inst_info):
3660 inst.MapLVsByNode(node_vol_should)
3663 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3665 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3666 feedback_fn("* Verifying N+1 Memory redundancy")
3667 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3669 feedback_fn("* Other Notes")
3671 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3672 % len(i_non_redundant))
3674 if i_non_a_balanced:
3675 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3676 % len(i_non_a_balanced))
3679 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3682 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3685 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3689 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3690 """Analyze the post-hooks' result
3692 This method analyses the hook result, handles it, and sends some
3693 nicely-formatted feedback back to the user.
3695 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3696 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3697 @param hooks_results: the results of the multi-node hooks rpc call
3698 @param feedback_fn: function used send feedback back to the caller
3699 @param lu_result: previous Exec result
3700 @return: the new Exec result, based on the previous result
3704 # We only really run POST phase hooks, only for non-empty groups,
3705 # and are only interested in their results
3706 if not self.my_node_names:
3709 elif phase == constants.HOOKS_PHASE_POST:
3710 # Used to change hooks' output to proper indentation
3711 feedback_fn("* Hooks Results")
3712 assert hooks_results, "invalid result from hooks"
3714 for node_name in hooks_results:
3715 res = hooks_results[node_name]
3717 test = msg and not res.offline
3718 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3719 "Communication failure in hooks execution: %s", msg)
3720 if res.offline or msg:
3721 # No need to investigate payload if node is offline or gave
3724 for script, hkr, output in res.payload:
3725 test = hkr == constants.HKR_FAIL
3726 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3727 "Script %s failed, output:", script)
3729 output = self._HOOKS_INDENT_RE.sub(" ", output)
3730 feedback_fn("%s" % output)
3736 class LUClusterVerifyDisks(NoHooksLU):
3737 """Verifies the cluster disks status.
3742 def ExpandNames(self):
3743 self.share_locks = _ShareAll()
3744 self.needed_locks = {
3745 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3748 def Exec(self, feedback_fn):
3749 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3751 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3752 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3753 for group in group_names])
3756 class LUGroupVerifyDisks(NoHooksLU):
3757 """Verifies the status of all disks in a node group.
3762 def ExpandNames(self):
3763 # Raises errors.OpPrereqError on its own if group can't be found
3764 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3766 self.share_locks = _ShareAll()
3767 self.needed_locks = {
3768 locking.LEVEL_INSTANCE: [],
3769 locking.LEVEL_NODEGROUP: [],
3770 locking.LEVEL_NODE: [],
3772 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3773 # starts one instance of this opcode for every group, which means all
3774 # nodes will be locked for a short amount of time, so it's better to
3775 # acquire the node allocation lock as well.
3776 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3779 def DeclareLocks(self, level):
3780 if level == locking.LEVEL_INSTANCE:
3781 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3783 # Lock instances optimistically, needs verification once node and group
3784 # locks have been acquired
3785 self.needed_locks[locking.LEVEL_INSTANCE] = \
3786 self.cfg.GetNodeGroupInstances(self.group_uuid)
3788 elif level == locking.LEVEL_NODEGROUP:
3789 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3791 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3792 set([self.group_uuid] +
3793 # Lock all groups used by instances optimistically; this requires
3794 # going via the node before it's locked, requiring verification
3797 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3798 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3800 elif level == locking.LEVEL_NODE:
3801 # This will only lock the nodes in the group to be verified which contain
3803 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3804 self._LockInstancesNodes()
3806 # Lock all nodes in group to be verified
3807 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3808 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3809 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3811 def CheckPrereq(self):
3812 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3813 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3814 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3816 assert self.group_uuid in owned_groups
3818 # Check if locked instances are still correct
3819 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3821 # Get instance information
3822 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3824 # Check if node groups for locked instances are still correct
3825 _CheckInstancesNodeGroups(self.cfg, self.instances,
3826 owned_groups, owned_nodes, self.group_uuid)
3828 def Exec(self, feedback_fn):
3829 """Verify integrity of cluster disks.
3831 @rtype: tuple of three items
3832 @return: a tuple of (dict of node-to-node_error, list of instances
3833 which need activate-disks, dict of instance: (node, volume) for
3838 res_instances = set()
3841 nv_dict = _MapInstanceDisksToNodes(
3842 [inst for inst in self.instances.values()
3843 if inst.admin_state == constants.ADMINST_UP])
3846 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3847 set(self.cfg.GetVmCapableNodeList()))
3849 node_lvs = self.rpc.call_lv_list(nodes, [])
3851 for (node, node_res) in node_lvs.items():
3852 if node_res.offline:
3855 msg = node_res.fail_msg
3857 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3858 res_nodes[node] = msg
3861 for lv_name, (_, _, lv_online) in node_res.payload.items():
3862 inst = nv_dict.pop((node, lv_name), None)
3863 if not (lv_online or inst is None):
3864 res_instances.add(inst)
3866 # any leftover items in nv_dict are missing LVs, let's arrange the data
3868 for key, inst in nv_dict.iteritems():
3869 res_missing.setdefault(inst, []).append(list(key))
3871 return (res_nodes, list(res_instances), res_missing)
3874 class LUClusterRepairDiskSizes(NoHooksLU):
3875 """Verifies the cluster disks sizes.
3880 def ExpandNames(self):
3881 if self.op.instances:
3882 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3883 # Not getting the node allocation lock as only a specific set of
3884 # instances (and their nodes) is going to be acquired
3885 self.needed_locks = {
3886 locking.LEVEL_NODE_RES: [],
3887 locking.LEVEL_INSTANCE: self.wanted_names,
3889 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3891 self.wanted_names = None
3892 self.needed_locks = {
3893 locking.LEVEL_NODE_RES: locking.ALL_SET,
3894 locking.LEVEL_INSTANCE: locking.ALL_SET,
3896 # This opcode is acquires the node locks for all instances
3897 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3900 self.share_locks = {
3901 locking.LEVEL_NODE_RES: 1,
3902 locking.LEVEL_INSTANCE: 0,
3903 locking.LEVEL_NODE_ALLOC: 1,
3906 def DeclareLocks(self, level):
3907 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3908 self._LockInstancesNodes(primary_only=True, level=level)
3910 def CheckPrereq(self):
3911 """Check prerequisites.
3913 This only checks the optional instance list against the existing names.
3916 if self.wanted_names is None:
3917 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3919 self.wanted_instances = \
3920 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3922 def _EnsureChildSizes(self, disk):
3923 """Ensure children of the disk have the needed disk size.
3925 This is valid mainly for DRBD8 and fixes an issue where the
3926 children have smaller disk size.
3928 @param disk: an L{ganeti.objects.Disk} object
3931 if disk.dev_type == constants.LD_DRBD8:
3932 assert disk.children, "Empty children for DRBD8?"
3933 fchild = disk.children[0]
3934 mismatch = fchild.size < disk.size
3936 self.LogInfo("Child disk has size %d, parent %d, fixing",
3937 fchild.size, disk.size)
3938 fchild.size = disk.size
3940 # and we recurse on this child only, not on the metadev
3941 return self._EnsureChildSizes(fchild) or mismatch
3945 def Exec(self, feedback_fn):
3946 """Verify the size of cluster disks.
3949 # TODO: check child disks too
3950 # TODO: check differences in size between primary/secondary nodes
3952 for instance in self.wanted_instances:
3953 pnode = instance.primary_node
3954 if pnode not in per_node_disks:
3955 per_node_disks[pnode] = []
3956 for idx, disk in enumerate(instance.disks):
3957 per_node_disks[pnode].append((instance, idx, disk))
3959 assert not (frozenset(per_node_disks.keys()) -
3960 self.owned_locks(locking.LEVEL_NODE_RES)), \
3961 "Not owning correct locks"
3962 assert not self.owned_locks(locking.LEVEL_NODE)
3965 for node, dskl in per_node_disks.items():
3966 newl = [v[2].Copy() for v in dskl]
3968 self.cfg.SetDiskID(dsk, node)
3969 result = self.rpc.call_blockdev_getsize(node, newl)
3971 self.LogWarning("Failure in blockdev_getsize call to node"
3972 " %s, ignoring", node)
3974 if len(result.payload) != len(dskl):
3975 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3976 " result.payload=%s", node, len(dskl), result.payload)
3977 self.LogWarning("Invalid result from node %s, ignoring node results",
3980 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3982 self.LogWarning("Disk %d of instance %s did not return size"
3983 " information, ignoring", idx, instance.name)
3985 if not isinstance(size, (int, long)):
3986 self.LogWarning("Disk %d of instance %s did not return valid"
3987 " size information, ignoring", idx, instance.name)
3990 if size != disk.size:
3991 self.LogInfo("Disk %d of instance %s has mismatched size,"
3992 " correcting: recorded %d, actual %d", idx,
3993 instance.name, disk.size, size)
3995 self.cfg.Update(instance, feedback_fn)
3996 changed.append((instance.name, idx, size))
3997 if self._EnsureChildSizes(disk):
3998 self.cfg.Update(instance, feedback_fn)
3999 changed.append((instance.name, idx, disk.size))
4003 class LUClusterRename(LogicalUnit):
4004 """Rename the cluster.
4007 HPATH = "cluster-rename"
4008 HTYPE = constants.HTYPE_CLUSTER
4010 def BuildHooksEnv(self):
4015 "OP_TARGET": self.cfg.GetClusterName(),
4016 "NEW_NAME": self.op.name,
4019 def BuildHooksNodes(self):
4020 """Build hooks nodes.
4023 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4025 def CheckPrereq(self):
4026 """Verify that the passed name is a valid one.
4029 hostname = netutils.GetHostname(name=self.op.name,
4030 family=self.cfg.GetPrimaryIPFamily())
4032 new_name = hostname.name
4033 self.ip = new_ip = hostname.ip
4034 old_name = self.cfg.GetClusterName()
4035 old_ip = self.cfg.GetMasterIP()
4036 if new_name == old_name and new_ip == old_ip:
4037 raise errors.OpPrereqError("Neither the name nor the IP address of the"
4038 " cluster has changed",
4040 if new_ip != old_ip:
4041 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4042 raise errors.OpPrereqError("The given cluster IP address (%s) is"
4043 " reachable on the network" %
4044 new_ip, errors.ECODE_NOTUNIQUE)
4046 self.op.name = new_name
4048 def Exec(self, feedback_fn):
4049 """Rename the cluster.
4052 clustername = self.op.name
4055 # shutdown the master IP
4056 master_params = self.cfg.GetMasterNetworkParameters()
4057 ems = self.cfg.GetUseExternalMipScript()
4058 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4060 result.Raise("Could not disable the master role")
4063 cluster = self.cfg.GetClusterInfo()
4064 cluster.cluster_name = clustername
4065 cluster.master_ip = new_ip
4066 self.cfg.Update(cluster, feedback_fn)
4068 # update the known hosts file
4069 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4070 node_list = self.cfg.GetOnlineNodeList()
4072 node_list.remove(master_params.name)
4075 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4077 master_params.ip = new_ip
4078 result = self.rpc.call_node_activate_master_ip(master_params.name,
4080 msg = result.fail_msg
4082 self.LogWarning("Could not re-enable the master role on"
4083 " the master, please restart manually: %s", msg)
4088 def _ValidateNetmask(cfg, netmask):
4089 """Checks if a netmask is valid.
4091 @type cfg: L{config.ConfigWriter}
4092 @param cfg: The cluster configuration
4094 @param netmask: the netmask to be verified
4095 @raise errors.OpPrereqError: if the validation fails
4098 ip_family = cfg.GetPrimaryIPFamily()
4100 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4101 except errors.ProgrammerError:
4102 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4103 ip_family, errors.ECODE_INVAL)
4104 if not ipcls.ValidateNetmask(netmask):
4105 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4106 (netmask), errors.ECODE_INVAL)
4109 class LUClusterSetParams(LogicalUnit):
4110 """Change the parameters of the cluster.
4113 HPATH = "cluster-modify"
4114 HTYPE = constants.HTYPE_CLUSTER
4117 def CheckArguments(self):
4121 if self.op.uid_pool:
4122 uidpool.CheckUidPool(self.op.uid_pool)
4124 if self.op.add_uids:
4125 uidpool.CheckUidPool(self.op.add_uids)
4127 if self.op.remove_uids:
4128 uidpool.CheckUidPool(self.op.remove_uids)
4130 if self.op.master_netmask is not None:
4131 _ValidateNetmask(self.cfg, self.op.master_netmask)
4133 if self.op.diskparams:
4134 for dt_params in self.op.diskparams.values():
4135 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4137 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4138 except errors.OpPrereqError, err:
4139 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4142 def ExpandNames(self):
4143 # FIXME: in the future maybe other cluster params won't require checking on
4144 # all nodes to be modified.
4145 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4146 # resource locks the right thing, shouldn't it be the BGL instead?
4147 self.needed_locks = {
4148 locking.LEVEL_NODE: locking.ALL_SET,
4149 locking.LEVEL_INSTANCE: locking.ALL_SET,
4150 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4151 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4153 self.share_locks = _ShareAll()
4155 def BuildHooksEnv(self):
4160 "OP_TARGET": self.cfg.GetClusterName(),
4161 "NEW_VG_NAME": self.op.vg_name,
4164 def BuildHooksNodes(self):
4165 """Build hooks nodes.
4168 mn = self.cfg.GetMasterNode()
4171 def CheckPrereq(self):
4172 """Check prerequisites.
4174 This checks whether the given params don't conflict and
4175 if the given volume group is valid.
4178 if self.op.vg_name is not None and not self.op.vg_name:
4179 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4180 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4181 " instances exist", errors.ECODE_INVAL)
4183 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4184 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4185 raise errors.OpPrereqError("Cannot disable drbd helper while"
4186 " drbd-based instances exist",
4189 node_list = self.owned_locks(locking.LEVEL_NODE)
4191 # if vg_name not None, checks given volume group on all nodes
4193 vglist = self.rpc.call_vg_list(node_list)
4194 for node in node_list:
4195 msg = vglist[node].fail_msg
4197 # ignoring down node
4198 self.LogWarning("Error while gathering data on node %s"
4199 " (ignoring node): %s", node, msg)
4201 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4203 constants.MIN_VG_SIZE)
4205 raise errors.OpPrereqError("Error on node '%s': %s" %
4206 (node, vgstatus), errors.ECODE_ENVIRON)
4208 if self.op.drbd_helper:
4209 # checks given drbd helper on all nodes
4210 helpers = self.rpc.call_drbd_helper(node_list)
4211 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4213 self.LogInfo("Not checking drbd helper on offline node %s", node)
4215 msg = helpers[node].fail_msg
4217 raise errors.OpPrereqError("Error checking drbd helper on node"
4218 " '%s': %s" % (node, msg),
4219 errors.ECODE_ENVIRON)
4220 node_helper = helpers[node].payload
4221 if node_helper != self.op.drbd_helper:
4222 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4223 (node, node_helper), errors.ECODE_ENVIRON)
4225 self.cluster = cluster = self.cfg.GetClusterInfo()
4226 # validate params changes
4227 if self.op.beparams:
4228 objects.UpgradeBeParams(self.op.beparams)
4229 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4230 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4232 if self.op.ndparams:
4233 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4234 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4236 # TODO: we need a more general way to handle resetting
4237 # cluster-level parameters to default values
4238 if self.new_ndparams["oob_program"] == "":
4239 self.new_ndparams["oob_program"] = \
4240 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4242 if self.op.hv_state:
4243 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4244 self.cluster.hv_state_static)
4245 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4246 for hv, values in new_hv_state.items())
4248 if self.op.disk_state:
4249 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4250 self.cluster.disk_state_static)
4251 self.new_disk_state = \
4252 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4253 for name, values in svalues.items()))
4254 for storage, svalues in new_disk_state.items())
4257 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4260 all_instances = self.cfg.GetAllInstancesInfo().values()
4262 for group in self.cfg.GetAllNodeGroupsInfo().values():
4263 instances = frozenset([inst for inst in all_instances
4264 if compat.any(node in group.members
4265 for node in inst.all_nodes)])
4266 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4267 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4268 new = _ComputeNewInstanceViolations(ipol,
4269 new_ipolicy, instances)
4271 violations.update(new)
4274 self.LogWarning("After the ipolicy change the following instances"
4275 " violate them: %s",
4276 utils.CommaJoin(utils.NiceSort(violations)))
4278 if self.op.nicparams:
4279 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4280 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4281 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4284 # check all instances for consistency
4285 for instance in self.cfg.GetAllInstancesInfo().values():
4286 for nic_idx, nic in enumerate(instance.nics):
4287 params_copy = copy.deepcopy(nic.nicparams)
4288 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4290 # check parameter syntax
4292 objects.NIC.CheckParameterSyntax(params_filled)
4293 except errors.ConfigurationError, err:
4294 nic_errors.append("Instance %s, nic/%d: %s" %
4295 (instance.name, nic_idx, err))
4297 # if we're moving instances to routed, check that they have an ip
4298 target_mode = params_filled[constants.NIC_MODE]
4299 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4300 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4301 " address" % (instance.name, nic_idx))
4303 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4304 "\n".join(nic_errors), errors.ECODE_INVAL)
4306 # hypervisor list/parameters
4307 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4308 if self.op.hvparams:
4309 for hv_name, hv_dict in self.op.hvparams.items():
4310 if hv_name not in self.new_hvparams:
4311 self.new_hvparams[hv_name] = hv_dict
4313 self.new_hvparams[hv_name].update(hv_dict)
4315 # disk template parameters
4316 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4317 if self.op.diskparams:
4318 for dt_name, dt_params in self.op.diskparams.items():
4319 if dt_name not in self.op.diskparams:
4320 self.new_diskparams[dt_name] = dt_params
4322 self.new_diskparams[dt_name].update(dt_params)
4324 # os hypervisor parameters
4325 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4327 for os_name, hvs in self.op.os_hvp.items():
4328 if os_name not in self.new_os_hvp:
4329 self.new_os_hvp[os_name] = hvs
4331 for hv_name, hv_dict in hvs.items():
4333 # Delete if it exists
4334 self.new_os_hvp[os_name].pop(hv_name, None)
4335 elif hv_name not in self.new_os_hvp[os_name]:
4336 self.new_os_hvp[os_name][hv_name] = hv_dict
4338 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4341 self.new_osp = objects.FillDict(cluster.osparams, {})
4342 if self.op.osparams:
4343 for os_name, osp in self.op.osparams.items():
4344 if os_name not in self.new_osp:
4345 self.new_osp[os_name] = {}
4347 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4350 if not self.new_osp[os_name]:
4351 # we removed all parameters
4352 del self.new_osp[os_name]
4354 # check the parameter validity (remote check)
4355 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4356 os_name, self.new_osp[os_name])
4358 # changes to the hypervisor list
4359 if self.op.enabled_hypervisors is not None:
4360 self.hv_list = self.op.enabled_hypervisors
4361 for hv in self.hv_list:
4362 # if the hypervisor doesn't already exist in the cluster
4363 # hvparams, we initialize it to empty, and then (in both
4364 # cases) we make sure to fill the defaults, as we might not
4365 # have a complete defaults list if the hypervisor wasn't
4367 if hv not in new_hvp:
4369 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4370 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4372 self.hv_list = cluster.enabled_hypervisors
4374 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4375 # either the enabled list has changed, or the parameters have, validate
4376 for hv_name, hv_params in self.new_hvparams.items():
4377 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4378 (self.op.enabled_hypervisors and
4379 hv_name in self.op.enabled_hypervisors)):
4380 # either this is a new hypervisor, or its parameters have changed
4381 hv_class = hypervisor.GetHypervisorClass(hv_name)
4382 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4383 hv_class.CheckParameterSyntax(hv_params)
4384 _CheckHVParams(self, node_list, hv_name, hv_params)
4387 # no need to check any newly-enabled hypervisors, since the
4388 # defaults have already been checked in the above code-block
4389 for os_name, os_hvp in self.new_os_hvp.items():
4390 for hv_name, hv_params in os_hvp.items():
4391 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4392 # we need to fill in the new os_hvp on top of the actual hv_p
4393 cluster_defaults = self.new_hvparams.get(hv_name, {})
4394 new_osp = objects.FillDict(cluster_defaults, hv_params)
4395 hv_class = hypervisor.GetHypervisorClass(hv_name)
4396 hv_class.CheckParameterSyntax(new_osp)
4397 _CheckHVParams(self, node_list, hv_name, new_osp)
4399 if self.op.default_iallocator:
4400 alloc_script = utils.FindFile(self.op.default_iallocator,
4401 constants.IALLOCATOR_SEARCH_PATH,
4403 if alloc_script is None:
4404 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4405 " specified" % self.op.default_iallocator,
4408 def Exec(self, feedback_fn):
4409 """Change the parameters of the cluster.
4412 if self.op.vg_name is not None:
4413 new_volume = self.op.vg_name
4416 if new_volume != self.cfg.GetVGName():
4417 self.cfg.SetVGName(new_volume)
4419 feedback_fn("Cluster LVM configuration already in desired"
4420 " state, not changing")
4421 if self.op.drbd_helper is not None:
4422 new_helper = self.op.drbd_helper
4425 if new_helper != self.cfg.GetDRBDHelper():
4426 self.cfg.SetDRBDHelper(new_helper)
4428 feedback_fn("Cluster DRBD helper already in desired state,"
4430 if self.op.hvparams:
4431 self.cluster.hvparams = self.new_hvparams
4433 self.cluster.os_hvp = self.new_os_hvp
4434 if self.op.enabled_hypervisors is not None:
4435 self.cluster.hvparams = self.new_hvparams
4436 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4437 if self.op.beparams:
4438 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4439 if self.op.nicparams:
4440 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4442 self.cluster.ipolicy = self.new_ipolicy
4443 if self.op.osparams:
4444 self.cluster.osparams = self.new_osp
4445 if self.op.ndparams:
4446 self.cluster.ndparams = self.new_ndparams
4447 if self.op.diskparams:
4448 self.cluster.diskparams = self.new_diskparams
4449 if self.op.hv_state:
4450 self.cluster.hv_state_static = self.new_hv_state
4451 if self.op.disk_state:
4452 self.cluster.disk_state_static = self.new_disk_state
4454 if self.op.candidate_pool_size is not None:
4455 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4456 # we need to update the pool size here, otherwise the save will fail
4457 _AdjustCandidatePool(self, [])
4459 if self.op.maintain_node_health is not None:
4460 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4461 feedback_fn("Note: CONFD was disabled at build time, node health"
4462 " maintenance is not useful (still enabling it)")
4463 self.cluster.maintain_node_health = self.op.maintain_node_health
4465 if self.op.prealloc_wipe_disks is not None:
4466 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4468 if self.op.add_uids is not None:
4469 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4471 if self.op.remove_uids is not None:
4472 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4474 if self.op.uid_pool is not None:
4475 self.cluster.uid_pool = self.op.uid_pool
4477 if self.op.default_iallocator is not None:
4478 self.cluster.default_iallocator = self.op.default_iallocator
4480 if self.op.reserved_lvs is not None:
4481 self.cluster.reserved_lvs = self.op.reserved_lvs
4483 if self.op.use_external_mip_script is not None:
4484 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4486 def helper_os(aname, mods, desc):
4488 lst = getattr(self.cluster, aname)
4489 for key, val in mods:
4490 if key == constants.DDM_ADD:
4492 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4495 elif key == constants.DDM_REMOVE:
4499 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4501 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4503 if self.op.hidden_os:
4504 helper_os("hidden_os", self.op.hidden_os, "hidden")
4506 if self.op.blacklisted_os:
4507 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4509 if self.op.master_netdev:
4510 master_params = self.cfg.GetMasterNetworkParameters()
4511 ems = self.cfg.GetUseExternalMipScript()
4512 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4513 self.cluster.master_netdev)
4514 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4516 result.Raise("Could not disable the master ip")
4517 feedback_fn("Changing master_netdev from %s to %s" %
4518 (master_params.netdev, self.op.master_netdev))
4519 self.cluster.master_netdev = self.op.master_netdev
4521 if self.op.master_netmask:
4522 master_params = self.cfg.GetMasterNetworkParameters()
4523 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4524 result = self.rpc.call_node_change_master_netmask(master_params.name,
4525 master_params.netmask,
4526 self.op.master_netmask,
4528 master_params.netdev)
4530 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4533 self.cluster.master_netmask = self.op.master_netmask
4535 self.cfg.Update(self.cluster, feedback_fn)
4537 if self.op.master_netdev:
4538 master_params = self.cfg.GetMasterNetworkParameters()
4539 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4540 self.op.master_netdev)
4541 ems = self.cfg.GetUseExternalMipScript()
4542 result = self.rpc.call_node_activate_master_ip(master_params.name,
4545 self.LogWarning("Could not re-enable the master ip on"
4546 " the master, please restart manually: %s",
4550 def _UploadHelper(lu, nodes, fname):
4551 """Helper for uploading a file and showing warnings.
4554 if os.path.exists(fname):
4555 result = lu.rpc.call_upload_file(nodes, fname)
4556 for to_node, to_result in result.items():
4557 msg = to_result.fail_msg
4559 msg = ("Copy of file %s to node %s failed: %s" %
4560 (fname, to_node, msg))
4564 def _ComputeAncillaryFiles(cluster, redist):
4565 """Compute files external to Ganeti which need to be consistent.
4567 @type redist: boolean
4568 @param redist: Whether to include files which need to be redistributed
4571 # Compute files for all nodes
4573 pathutils.SSH_KNOWN_HOSTS_FILE,
4574 pathutils.CONFD_HMAC_KEY,
4575 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4576 pathutils.SPICE_CERT_FILE,
4577 pathutils.SPICE_CACERT_FILE,
4578 pathutils.RAPI_USERS_FILE,
4582 # we need to ship at least the RAPI certificate
4583 files_all.add(pathutils.RAPI_CERT_FILE)
4585 files_all.update(pathutils.ALL_CERT_FILES)
4586 files_all.update(ssconf.SimpleStore().GetFileList())
4588 if cluster.modify_etc_hosts:
4589 files_all.add(pathutils.ETC_HOSTS)
4591 if cluster.use_external_mip_script:
4592 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4594 # Files which are optional, these must:
4595 # - be present in one other category as well
4596 # - either exist or not exist on all nodes of that category (mc, vm all)
4598 pathutils.RAPI_USERS_FILE,
4601 # Files which should only be on master candidates
4605 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4609 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4610 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4611 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4613 # Files which should only be on VM-capable nodes
4616 for hv_name in cluster.enabled_hypervisors
4618 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4622 for hv_name in cluster.enabled_hypervisors
4624 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4626 # Filenames in each category must be unique
4627 all_files_set = files_all | files_mc | files_vm
4628 assert (len(all_files_set) ==
4629 sum(map(len, [files_all, files_mc, files_vm]))), \
4630 "Found file listed in more than one file list"
4632 # Optional files must be present in one other category
4633 assert all_files_set.issuperset(files_opt), \
4634 "Optional file not in a different required list"
4636 # This one file should never ever be re-distributed via RPC
4637 assert not (redist and
4638 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4640 return (files_all, files_opt, files_mc, files_vm)
4643 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4644 """Distribute additional files which are part of the cluster configuration.
4646 ConfigWriter takes care of distributing the config and ssconf files, but
4647 there are more files which should be distributed to all nodes. This function
4648 makes sure those are copied.
4650 @param lu: calling logical unit
4651 @param additional_nodes: list of nodes not in the config to distribute to
4652 @type additional_vm: boolean
4653 @param additional_vm: whether the additional nodes are vm-capable or not
4656 # Gather target nodes
4657 cluster = lu.cfg.GetClusterInfo()
4658 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4660 online_nodes = lu.cfg.GetOnlineNodeList()
4661 online_set = frozenset(online_nodes)
4662 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4664 if additional_nodes is not None:
4665 online_nodes.extend(additional_nodes)
4667 vm_nodes.extend(additional_nodes)
4669 # Never distribute to master node
4670 for nodelist in [online_nodes, vm_nodes]:
4671 if master_info.name in nodelist:
4672 nodelist.remove(master_info.name)
4675 (files_all, _, files_mc, files_vm) = \
4676 _ComputeAncillaryFiles(cluster, True)
4678 # Never re-distribute configuration file from here
4679 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4680 pathutils.CLUSTER_CONF_FILE in files_vm)
4681 assert not files_mc, "Master candidates not handled in this function"
4684 (online_nodes, files_all),
4685 (vm_nodes, files_vm),
4689 for (node_list, files) in filemap:
4691 _UploadHelper(lu, node_list, fname)
4694 class LUClusterRedistConf(NoHooksLU):
4695 """Force the redistribution of cluster configuration.
4697 This is a very simple LU.
4702 def ExpandNames(self):
4703 self.needed_locks = {
4704 locking.LEVEL_NODE: locking.ALL_SET,
4705 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4707 self.share_locks = _ShareAll()
4709 def Exec(self, feedback_fn):
4710 """Redistribute the configuration.
4713 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4714 _RedistributeAncillaryFiles(self)
4717 class LUClusterActivateMasterIp(NoHooksLU):
4718 """Activate the master IP on the master node.
4721 def Exec(self, feedback_fn):
4722 """Activate the master IP.
4725 master_params = self.cfg.GetMasterNetworkParameters()
4726 ems = self.cfg.GetUseExternalMipScript()
4727 result = self.rpc.call_node_activate_master_ip(master_params.name,
4729 result.Raise("Could not activate the master IP")
4732 class LUClusterDeactivateMasterIp(NoHooksLU):
4733 """Deactivate the master IP on the master node.
4736 def Exec(self, feedback_fn):
4737 """Deactivate the master IP.
4740 master_params = self.cfg.GetMasterNetworkParameters()
4741 ems = self.cfg.GetUseExternalMipScript()
4742 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4744 result.Raise("Could not deactivate the master IP")
4747 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4748 """Sleep and poll for an instance's disk to sync.
4751 if not instance.disks or disks is not None and not disks:
4754 disks = _ExpandCheckDisks(instance, disks)
4757 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4759 node = instance.primary_node
4762 lu.cfg.SetDiskID(dev, node)
4764 # TODO: Convert to utils.Retry
4767 degr_retries = 10 # in seconds, as we sleep 1 second each time
4771 cumul_degraded = False
4772 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4773 msg = rstats.fail_msg
4775 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4778 raise errors.RemoteError("Can't contact node %s for mirror data,"
4779 " aborting." % node)
4782 rstats = rstats.payload
4784 for i, mstat in enumerate(rstats):
4786 lu.LogWarning("Can't compute data for node %s/%s",
4787 node, disks[i].iv_name)
4790 cumul_degraded = (cumul_degraded or
4791 (mstat.is_degraded and mstat.sync_percent is None))
4792 if mstat.sync_percent is not None:
4794 if mstat.estimated_time is not None:
4795 rem_time = ("%s remaining (estimated)" %
4796 utils.FormatSeconds(mstat.estimated_time))
4797 max_time = mstat.estimated_time
4799 rem_time = "no time estimate"
4800 lu.LogInfo("- device %s: %5.2f%% done, %s",
4801 disks[i].iv_name, mstat.sync_percent, rem_time)
4803 # if we're done but degraded, let's do a few small retries, to
4804 # make sure we see a stable and not transient situation; therefore
4805 # we force restart of the loop
4806 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4807 logging.info("Degraded disks found, %d retries left", degr_retries)
4815 time.sleep(min(60, max_time))
4818 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4820 return not cumul_degraded
4823 def _BlockdevFind(lu, node, dev, instance):
4824 """Wrapper around call_blockdev_find to annotate diskparams.
4826 @param lu: A reference to the lu object
4827 @param node: The node to call out
4828 @param dev: The device to find
4829 @param instance: The instance object the device belongs to
4830 @returns The result of the rpc call
4833 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4834 return lu.rpc.call_blockdev_find(node, disk)
4837 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4838 """Wrapper around L{_CheckDiskConsistencyInner}.
4841 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4842 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4846 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4848 """Check that mirrors are not degraded.
4850 @attention: The device has to be annotated already.
4852 The ldisk parameter, if True, will change the test from the
4853 is_degraded attribute (which represents overall non-ok status for
4854 the device(s)) to the ldisk (representing the local storage status).
4857 lu.cfg.SetDiskID(dev, node)
4861 if on_primary or dev.AssembleOnSecondary():
4862 rstats = lu.rpc.call_blockdev_find(node, dev)
4863 msg = rstats.fail_msg
4865 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4867 elif not rstats.payload:
4868 lu.LogWarning("Can't find disk on node %s", node)
4872 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4874 result = result and not rstats.payload.is_degraded
4877 for child in dev.children:
4878 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4884 class LUOobCommand(NoHooksLU):
4885 """Logical unit for OOB handling.
4889 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4891 def ExpandNames(self):
4892 """Gather locks we need.
4895 if self.op.node_names:
4896 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4897 lock_names = self.op.node_names
4899 lock_names = locking.ALL_SET
4901 self.needed_locks = {
4902 locking.LEVEL_NODE: lock_names,
4905 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4907 if not self.op.node_names:
4908 # Acquire node allocation lock only if all nodes are affected
4909 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4911 def CheckPrereq(self):
4912 """Check prerequisites.
4915 - the node exists in the configuration
4918 Any errors are signaled by raising errors.OpPrereqError.
4922 self.master_node = self.cfg.GetMasterNode()
4924 assert self.op.power_delay >= 0.0
4926 if self.op.node_names:
4927 if (self.op.command in self._SKIP_MASTER and
4928 self.master_node in self.op.node_names):
4929 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4930 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4932 if master_oob_handler:
4933 additional_text = ("run '%s %s %s' if you want to operate on the"
4934 " master regardless") % (master_oob_handler,
4938 additional_text = "it does not support out-of-band operations"
4940 raise errors.OpPrereqError(("Operating on the master node %s is not"
4941 " allowed for %s; %s") %
4942 (self.master_node, self.op.command,
4943 additional_text), errors.ECODE_INVAL)
4945 self.op.node_names = self.cfg.GetNodeList()
4946 if self.op.command in self._SKIP_MASTER:
4947 self.op.node_names.remove(self.master_node)
4949 if self.op.command in self._SKIP_MASTER:
4950 assert self.master_node not in self.op.node_names
4952 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4954 raise errors.OpPrereqError("Node %s not found" % node_name,
4957 self.nodes.append(node)
4959 if (not self.op.ignore_status and
4960 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4961 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4962 " not marked offline") % node_name,
4965 def Exec(self, feedback_fn):
4966 """Execute OOB and return result if we expect any.
4969 master_node = self.master_node
4972 for idx, node in enumerate(utils.NiceSort(self.nodes,
4973 key=lambda node: node.name)):
4974 node_entry = [(constants.RS_NORMAL, node.name)]
4975 ret.append(node_entry)
4977 oob_program = _SupportsOob(self.cfg, node)
4980 node_entry.append((constants.RS_UNAVAIL, None))
4983 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4984 self.op.command, oob_program, node.name)
4985 result = self.rpc.call_run_oob(master_node, oob_program,
4986 self.op.command, node.name,
4990 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4991 node.name, result.fail_msg)
4992 node_entry.append((constants.RS_NODATA, None))
4995 self._CheckPayload(result)
4996 except errors.OpExecError, err:
4997 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4999 node_entry.append((constants.RS_NODATA, None))
5001 if self.op.command == constants.OOB_HEALTH:
5002 # For health we should log important events
5003 for item, status in result.payload:
5004 if status in [constants.OOB_STATUS_WARNING,
5005 constants.OOB_STATUS_CRITICAL]:
5006 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5007 item, node.name, status)
5009 if self.op.command == constants.OOB_POWER_ON:
5011 elif self.op.command == constants.OOB_POWER_OFF:
5012 node.powered = False
5013 elif self.op.command == constants.OOB_POWER_STATUS:
5014 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5015 if powered != node.powered:
5016 logging.warning(("Recorded power state (%s) of node '%s' does not"
5017 " match actual power state (%s)"), node.powered,
5020 # For configuration changing commands we should update the node
5021 if self.op.command in (constants.OOB_POWER_ON,
5022 constants.OOB_POWER_OFF):
5023 self.cfg.Update(node, feedback_fn)
5025 node_entry.append((constants.RS_NORMAL, result.payload))
5027 if (self.op.command == constants.OOB_POWER_ON and
5028 idx < len(self.nodes) - 1):
5029 time.sleep(self.op.power_delay)
5033 def _CheckPayload(self, result):
5034 """Checks if the payload is valid.
5036 @param result: RPC result
5037 @raises errors.OpExecError: If payload is not valid
5041 if self.op.command == constants.OOB_HEALTH:
5042 if not isinstance(result.payload, list):
5043 errs.append("command 'health' is expected to return a list but got %s" %
5044 type(result.payload))
5046 for item, status in result.payload:
5047 if status not in constants.OOB_STATUSES:
5048 errs.append("health item '%s' has invalid status '%s'" %
5051 if self.op.command == constants.OOB_POWER_STATUS:
5052 if not isinstance(result.payload, dict):
5053 errs.append("power-status is expected to return a dict but got %s" %
5054 type(result.payload))
5056 if self.op.command in [
5057 constants.OOB_POWER_ON,
5058 constants.OOB_POWER_OFF,
5059 constants.OOB_POWER_CYCLE,
5061 if result.payload is not None:
5062 errs.append("%s is expected to not return payload but got '%s'" %
5063 (self.op.command, result.payload))
5066 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5067 utils.CommaJoin(errs))
5070 class _OsQuery(_QueryBase):
5071 FIELDS = query.OS_FIELDS
5073 def ExpandNames(self, lu):
5074 # Lock all nodes in shared mode
5075 # Temporary removal of locks, should be reverted later
5076 # TODO: reintroduce locks when they are lighter-weight
5077 lu.needed_locks = {}
5078 #self.share_locks[locking.LEVEL_NODE] = 1
5079 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5081 # The following variables interact with _QueryBase._GetNames
5083 self.wanted = self.names
5085 self.wanted = locking.ALL_SET
5087 self.do_locking = self.use_locking
5089 def DeclareLocks(self, lu, level):
5093 def _DiagnoseByOS(rlist):
5094 """Remaps a per-node return list into an a per-os per-node dictionary
5096 @param rlist: a map with node names as keys and OS objects as values
5099 @return: a dictionary with osnames as keys and as value another
5100 map, with nodes as keys and tuples of (path, status, diagnose,
5101 variants, parameters, api_versions) as values, eg::
5103 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5104 (/srv/..., False, "invalid api")],
5105 "node2": [(/srv/..., True, "", [], [])]}
5110 # we build here the list of nodes that didn't fail the RPC (at RPC
5111 # level), so that nodes with a non-responding node daemon don't
5112 # make all OSes invalid
5113 good_nodes = [node_name for node_name in rlist
5114 if not rlist[node_name].fail_msg]
5115 for node_name, nr in rlist.items():
5116 if nr.fail_msg or not nr.payload:
5118 for (name, path, status, diagnose, variants,
5119 params, api_versions) in nr.payload:
5120 if name not in all_os:
5121 # build a list of nodes for this os containing empty lists
5122 # for each node in node_list
5124 for nname in good_nodes:
5125 all_os[name][nname] = []
5126 # convert params from [name, help] to (name, help)
5127 params = [tuple(v) for v in params]
5128 all_os[name][node_name].append((path, status, diagnose,
5129 variants, params, api_versions))
5132 def _GetQueryData(self, lu):
5133 """Computes the list of nodes and their attributes.
5136 # Locking is not used
5137 assert not (compat.any(lu.glm.is_owned(level)
5138 for level in locking.LEVELS
5139 if level != locking.LEVEL_CLUSTER) or
5140 self.do_locking or self.use_locking)
5142 valid_nodes = [node.name
5143 for node in lu.cfg.GetAllNodesInfo().values()
5144 if not node.offline and node.vm_capable]
5145 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5146 cluster = lu.cfg.GetClusterInfo()
5150 for (os_name, os_data) in pol.items():
5151 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5152 hidden=(os_name in cluster.hidden_os),
5153 blacklisted=(os_name in cluster.blacklisted_os))
5157 api_versions = set()
5159 for idx, osl in enumerate(os_data.values()):
5160 info.valid = bool(info.valid and osl and osl[0][1])
5164 (node_variants, node_params, node_api) = osl[0][3:6]
5167 variants.update(node_variants)
5168 parameters.update(node_params)
5169 api_versions.update(node_api)
5171 # Filter out inconsistent values
5172 variants.intersection_update(node_variants)
5173 parameters.intersection_update(node_params)
5174 api_versions.intersection_update(node_api)
5176 info.variants = list(variants)
5177 info.parameters = list(parameters)
5178 info.api_versions = list(api_versions)
5180 data[os_name] = info
5182 # Prepare data in requested order
5183 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5187 class LUOsDiagnose(NoHooksLU):
5188 """Logical unit for OS diagnose/query.
5194 def _BuildFilter(fields, names):
5195 """Builds a filter for querying OSes.
5198 name_filter = qlang.MakeSimpleFilter("name", names)
5200 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5201 # respective field is not requested
5202 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5203 for fname in ["hidden", "blacklisted"]
5204 if fname not in fields]
5205 if "valid" not in fields:
5206 status_filter.append([qlang.OP_TRUE, "valid"])
5209 status_filter.insert(0, qlang.OP_AND)
5211 status_filter = None
5213 if name_filter and status_filter:
5214 return [qlang.OP_AND, name_filter, status_filter]
5218 return status_filter
5220 def CheckArguments(self):
5221 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5222 self.op.output_fields, False)
5224 def ExpandNames(self):
5225 self.oq.ExpandNames(self)
5227 def Exec(self, feedback_fn):
5228 return self.oq.OldStyleQuery(self)
5231 class _ExtStorageQuery(_QueryBase):
5232 FIELDS = query.EXTSTORAGE_FIELDS
5234 def ExpandNames(self, lu):
5235 # Lock all nodes in shared mode
5236 # Temporary removal of locks, should be reverted later
5237 # TODO: reintroduce locks when they are lighter-weight
5238 lu.needed_locks = {}
5239 #self.share_locks[locking.LEVEL_NODE] = 1
5240 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5242 # The following variables interact with _QueryBase._GetNames
5244 self.wanted = self.names
5246 self.wanted = locking.ALL_SET
5248 self.do_locking = self.use_locking
5250 def DeclareLocks(self, lu, level):
5254 def _DiagnoseByProvider(rlist):
5255 """Remaps a per-node return list into an a per-provider per-node dictionary
5257 @param rlist: a map with node names as keys and ExtStorage objects as values
5260 @return: a dictionary with extstorage providers as keys and as
5261 value another map, with nodes as keys and tuples of
5262 (path, status, diagnose, parameters) as values, eg::
5264 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5265 "node2": [(/srv/..., False, "missing file")]
5266 "node3": [(/srv/..., True, "", [])]
5271 # we build here the list of nodes that didn't fail the RPC (at RPC
5272 # level), so that nodes with a non-responding node daemon don't
5273 # make all OSes invalid
5274 good_nodes = [node_name for node_name in rlist
5275 if not rlist[node_name].fail_msg]
5276 for node_name, nr in rlist.items():
5277 if nr.fail_msg or not nr.payload:
5279 for (name, path, status, diagnose, params) in nr.payload:
5280 if name not in all_es:
5281 # build a list of nodes for this os containing empty lists
5282 # for each node in node_list
5284 for nname in good_nodes:
5285 all_es[name][nname] = []
5286 # convert params from [name, help] to (name, help)
5287 params = [tuple(v) for v in params]
5288 all_es[name][node_name].append((path, status, diagnose, params))
5291 def _GetQueryData(self, lu):
5292 """Computes the list of nodes and their attributes.
5295 # Locking is not used
5296 assert not (compat.any(lu.glm.is_owned(level)
5297 for level in locking.LEVELS
5298 if level != locking.LEVEL_CLUSTER) or
5299 self.do_locking or self.use_locking)
5301 valid_nodes = [node.name
5302 for node in lu.cfg.GetAllNodesInfo().values()
5303 if not node.offline and node.vm_capable]
5304 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5308 nodegroup_list = lu.cfg.GetNodeGroupList()
5310 for (es_name, es_data) in pol.items():
5311 # For every provider compute the nodegroup validity.
5312 # To do this we need to check the validity of each node in es_data
5313 # and then construct the corresponding nodegroup dict:
5314 # { nodegroup1: status
5315 # nodegroup2: status
5318 for nodegroup in nodegroup_list:
5319 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5321 nodegroup_nodes = ndgrp.members
5322 nodegroup_name = ndgrp.name
5325 for node in nodegroup_nodes:
5326 if node in valid_nodes:
5327 if es_data[node] != []:
5328 node_status = es_data[node][0][1]
5329 node_statuses.append(node_status)
5331 node_statuses.append(False)
5333 if False in node_statuses:
5334 ndgrp_data[nodegroup_name] = False
5336 ndgrp_data[nodegroup_name] = True
5338 # Compute the provider's parameters
5340 for idx, esl in enumerate(es_data.values()):
5341 valid = bool(esl and esl[0][1])
5345 node_params = esl[0][3]
5348 parameters.update(node_params)
5350 # Filter out inconsistent values
5351 parameters.intersection_update(node_params)
5353 params = list(parameters)
5355 # Now fill all the info for this provider
5356 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5357 nodegroup_status=ndgrp_data,
5360 data[es_name] = info
5362 # Prepare data in requested order
5363 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5367 class LUExtStorageDiagnose(NoHooksLU):
5368 """Logical unit for ExtStorage diagnose/query.
5373 def CheckArguments(self):
5374 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5375 self.op.output_fields, False)
5377 def ExpandNames(self):
5378 self.eq.ExpandNames(self)
5380 def Exec(self, feedback_fn):
5381 return self.eq.OldStyleQuery(self)
5384 class LUNodeRemove(LogicalUnit):
5385 """Logical unit for removing a node.
5388 HPATH = "node-remove"
5389 HTYPE = constants.HTYPE_NODE
5391 def BuildHooksEnv(self):
5396 "OP_TARGET": self.op.node_name,
5397 "NODE_NAME": self.op.node_name,
5400 def BuildHooksNodes(self):
5401 """Build hooks nodes.
5403 This doesn't run on the target node in the pre phase as a failed
5404 node would then be impossible to remove.
5407 all_nodes = self.cfg.GetNodeList()
5409 all_nodes.remove(self.op.node_name)
5412 return (all_nodes, all_nodes)
5414 def CheckPrereq(self):
5415 """Check prerequisites.
5418 - the node exists in the configuration
5419 - it does not have primary or secondary instances
5420 - it's not the master
5422 Any errors are signaled by raising errors.OpPrereqError.
5425 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5426 node = self.cfg.GetNodeInfo(self.op.node_name)
5427 assert node is not None
5429 masternode = self.cfg.GetMasterNode()
5430 if node.name == masternode:
5431 raise errors.OpPrereqError("Node is the master node, failover to another"
5432 " node is required", errors.ECODE_INVAL)
5434 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5435 if node.name in instance.all_nodes:
5436 raise errors.OpPrereqError("Instance %s is still running on the node,"
5437 " please remove first" % instance_name,
5439 self.op.node_name = node.name
5442 def Exec(self, feedback_fn):
5443 """Removes the node from the cluster.
5447 logging.info("Stopping the node daemon and removing configs from node %s",
5450 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5452 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5455 # Promote nodes to master candidate as needed
5456 _AdjustCandidatePool(self, exceptions=[node.name])
5457 self.context.RemoveNode(node.name)
5459 # Run post hooks on the node before it's removed
5460 _RunPostHook(self, node.name)
5462 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5463 msg = result.fail_msg
5465 self.LogWarning("Errors encountered on the remote node while leaving"
5466 " the cluster: %s", msg)
5468 # Remove node from our /etc/hosts
5469 if self.cfg.GetClusterInfo().modify_etc_hosts:
5470 master_node = self.cfg.GetMasterNode()
5471 result = self.rpc.call_etc_hosts_modify(master_node,
5472 constants.ETC_HOSTS_REMOVE,
5474 result.Raise("Can't update hosts file with new host data")
5475 _RedistributeAncillaryFiles(self)
5478 class _NodeQuery(_QueryBase):
5479 FIELDS = query.NODE_FIELDS
5481 def ExpandNames(self, lu):
5482 lu.needed_locks = {}
5483 lu.share_locks = _ShareAll()
5486 self.wanted = _GetWantedNodes(lu, self.names)
5488 self.wanted = locking.ALL_SET
5490 self.do_locking = (self.use_locking and
5491 query.NQ_LIVE in self.requested_data)
5494 # If any non-static field is requested we need to lock the nodes
5495 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5496 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5498 def DeclareLocks(self, lu, level):
5501 def _GetQueryData(self, lu):
5502 """Computes the list of nodes and their attributes.
5505 all_info = lu.cfg.GetAllNodesInfo()
5507 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5509 # Gather data as requested
5510 if query.NQ_LIVE in self.requested_data:
5511 # filter out non-vm_capable nodes
5512 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5514 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5515 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5516 [lu.cfg.GetHypervisorType()], es_flags)
5517 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5518 for (name, nresult) in node_data.items()
5519 if not nresult.fail_msg and nresult.payload)
5523 if query.NQ_INST in self.requested_data:
5524 node_to_primary = dict([(name, set()) for name in nodenames])
5525 node_to_secondary = dict([(name, set()) for name in nodenames])
5527 inst_data = lu.cfg.GetAllInstancesInfo()
5529 for inst in inst_data.values():
5530 if inst.primary_node in node_to_primary:
5531 node_to_primary[inst.primary_node].add(inst.name)
5532 for secnode in inst.secondary_nodes:
5533 if secnode in node_to_secondary:
5534 node_to_secondary[secnode].add(inst.name)
5536 node_to_primary = None
5537 node_to_secondary = None
5539 if query.NQ_OOB in self.requested_data:
5540 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5541 for name, node in all_info.iteritems())
5545 if query.NQ_GROUP in self.requested_data:
5546 groups = lu.cfg.GetAllNodeGroupsInfo()
5550 return query.NodeQueryData([all_info[name] for name in nodenames],
5551 live_data, lu.cfg.GetMasterNode(),
5552 node_to_primary, node_to_secondary, groups,
5553 oob_support, lu.cfg.GetClusterInfo())
5556 class LUNodeQuery(NoHooksLU):
5557 """Logical unit for querying nodes.
5560 # pylint: disable=W0142
5563 def CheckArguments(self):
5564 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5565 self.op.output_fields, self.op.use_locking)
5567 def ExpandNames(self):
5568 self.nq.ExpandNames(self)
5570 def DeclareLocks(self, level):
5571 self.nq.DeclareLocks(self, level)
5573 def Exec(self, feedback_fn):
5574 return self.nq.OldStyleQuery(self)
5577 class LUNodeQueryvols(NoHooksLU):
5578 """Logical unit for getting volumes on node(s).
5582 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5583 _FIELDS_STATIC = utils.FieldSet("node")
5585 def CheckArguments(self):
5586 _CheckOutputFields(static=self._FIELDS_STATIC,
5587 dynamic=self._FIELDS_DYNAMIC,
5588 selected=self.op.output_fields)
5590 def ExpandNames(self):
5591 self.share_locks = _ShareAll()
5594 self.needed_locks = {
5595 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5598 self.needed_locks = {
5599 locking.LEVEL_NODE: locking.ALL_SET,
5600 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5603 def Exec(self, feedback_fn):
5604 """Computes the list of nodes and their attributes.
5607 nodenames = self.owned_locks(locking.LEVEL_NODE)
5608 volumes = self.rpc.call_node_volumes(nodenames)
5610 ilist = self.cfg.GetAllInstancesInfo()
5611 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5614 for node in nodenames:
5615 nresult = volumes[node]
5618 msg = nresult.fail_msg
5620 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5623 node_vols = sorted(nresult.payload,
5624 key=operator.itemgetter("dev"))
5626 for vol in node_vols:
5628 for field in self.op.output_fields:
5631 elif field == "phys":
5635 elif field == "name":
5637 elif field == "size":
5638 val = int(float(vol["size"]))
5639 elif field == "instance":
5640 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5642 raise errors.ParameterError(field)
5643 node_output.append(str(val))
5645 output.append(node_output)
5650 class LUNodeQueryStorage(NoHooksLU):
5651 """Logical unit for getting information on storage units on node(s).
5654 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5657 def CheckArguments(self):
5658 _CheckOutputFields(static=self._FIELDS_STATIC,
5659 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5660 selected=self.op.output_fields)
5662 def ExpandNames(self):
5663 self.share_locks = _ShareAll()
5666 self.needed_locks = {
5667 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5670 self.needed_locks = {
5671 locking.LEVEL_NODE: locking.ALL_SET,
5672 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5675 def Exec(self, feedback_fn):
5676 """Computes the list of nodes and their attributes.
5679 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5681 # Always get name to sort by
5682 if constants.SF_NAME in self.op.output_fields:
5683 fields = self.op.output_fields[:]
5685 fields = [constants.SF_NAME] + self.op.output_fields
5687 # Never ask for node or type as it's only known to the LU
5688 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5689 while extra in fields:
5690 fields.remove(extra)
5692 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5693 name_idx = field_idx[constants.SF_NAME]
5695 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5696 data = self.rpc.call_storage_list(self.nodes,
5697 self.op.storage_type, st_args,
5698 self.op.name, fields)
5702 for node in utils.NiceSort(self.nodes):
5703 nresult = data[node]
5707 msg = nresult.fail_msg
5709 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5712 rows = dict([(row[name_idx], row) for row in nresult.payload])
5714 for name in utils.NiceSort(rows.keys()):
5719 for field in self.op.output_fields:
5720 if field == constants.SF_NODE:
5722 elif field == constants.SF_TYPE:
5723 val = self.op.storage_type
5724 elif field in field_idx:
5725 val = row[field_idx[field]]
5727 raise errors.ParameterError(field)
5736 class _InstanceQuery(_QueryBase):
5737 FIELDS = query.INSTANCE_FIELDS
5739 def ExpandNames(self, lu):
5740 lu.needed_locks = {}
5741 lu.share_locks = _ShareAll()
5744 self.wanted = _GetWantedInstances(lu, self.names)
5746 self.wanted = locking.ALL_SET
5748 self.do_locking = (self.use_locking and
5749 query.IQ_LIVE in self.requested_data)
5751 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5752 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5753 lu.needed_locks[locking.LEVEL_NODE] = []
5754 lu.needed_locks[locking.LEVEL_NETWORK] = []
5755 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5757 self.do_grouplocks = (self.do_locking and
5758 query.IQ_NODES in self.requested_data)
5760 def DeclareLocks(self, lu, level):
5762 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5763 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5765 # Lock all groups used by instances optimistically; this requires going
5766 # via the node before it's locked, requiring verification later on
5767 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5769 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5770 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5771 elif level == locking.LEVEL_NODE:
5772 lu._LockInstancesNodes() # pylint: disable=W0212
5774 elif level == locking.LEVEL_NETWORK:
5775 lu.needed_locks[locking.LEVEL_NETWORK] = \
5777 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5778 for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
5781 def _CheckGroupLocks(lu):
5782 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5783 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5785 # Check if node groups for locked instances are still correct
5786 for instance_name in owned_instances:
5787 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5789 def _GetQueryData(self, lu):
5790 """Computes the list of instances and their attributes.
5793 if self.do_grouplocks:
5794 self._CheckGroupLocks(lu)
5796 cluster = lu.cfg.GetClusterInfo()
5797 all_info = lu.cfg.GetAllInstancesInfo()
5799 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5801 instance_list = [all_info[name] for name in instance_names]
5802 nodes = frozenset(itertools.chain(*(inst.all_nodes
5803 for inst in instance_list)))
5804 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5807 wrongnode_inst = set()
5809 # Gather data as requested
5810 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5812 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5814 result = node_data[name]
5816 # offline nodes will be in both lists
5817 assert result.fail_msg
5818 offline_nodes.append(name)
5820 bad_nodes.append(name)
5821 elif result.payload:
5822 for inst in result.payload:
5823 if inst in all_info:
5824 if all_info[inst].primary_node == name:
5825 live_data.update(result.payload)
5827 wrongnode_inst.add(inst)
5829 # orphan instance; we don't list it here as we don't
5830 # handle this case yet in the output of instance listing
5831 logging.warning("Orphan instance '%s' found on node %s",
5833 # else no instance is alive
5837 if query.IQ_DISKUSAGE in self.requested_data:
5838 gmi = ganeti.masterd.instance
5839 disk_usage = dict((inst.name,
5840 gmi.ComputeDiskSize(inst.disk_template,
5841 [{constants.IDISK_SIZE: disk.size}
5842 for disk in inst.disks]))
5843 for inst in instance_list)
5847 if query.IQ_CONSOLE in self.requested_data:
5849 for inst in instance_list:
5850 if inst.name in live_data:
5851 # Instance is running
5852 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5854 consinfo[inst.name] = None
5855 assert set(consinfo.keys()) == set(instance_names)
5859 if query.IQ_NODES in self.requested_data:
5860 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5862 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5863 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5864 for uuid in set(map(operator.attrgetter("group"),
5870 if query.IQ_NETWORKS in self.requested_data:
5871 net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
5872 for i in instance_list))
5873 networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
5877 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5878 disk_usage, offline_nodes, bad_nodes,
5879 live_data, wrongnode_inst, consinfo,
5880 nodes, groups, networks)
5883 class LUQuery(NoHooksLU):
5884 """Query for resources/items of a certain kind.
5887 # pylint: disable=W0142
5890 def CheckArguments(self):
5891 qcls = _GetQueryImplementation(self.op.what)
5893 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5895 def ExpandNames(self):
5896 self.impl.ExpandNames(self)
5898 def DeclareLocks(self, level):
5899 self.impl.DeclareLocks(self, level)
5901 def Exec(self, feedback_fn):
5902 return self.impl.NewStyleQuery(self)
5905 class LUQueryFields(NoHooksLU):
5906 """Query for resources/items of a certain kind.
5909 # pylint: disable=W0142
5912 def CheckArguments(self):
5913 self.qcls = _GetQueryImplementation(self.op.what)
5915 def ExpandNames(self):
5916 self.needed_locks = {}
5918 def Exec(self, feedback_fn):
5919 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5922 class LUNodeModifyStorage(NoHooksLU):
5923 """Logical unit for modifying a storage volume on a node.
5928 def CheckArguments(self):
5929 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5931 storage_type = self.op.storage_type
5934 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5936 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5937 " modified" % storage_type,
5940 diff = set(self.op.changes.keys()) - modifiable
5942 raise errors.OpPrereqError("The following fields can not be modified for"
5943 " storage units of type '%s': %r" %
5944 (storage_type, list(diff)),
5947 def ExpandNames(self):
5948 self.needed_locks = {
5949 locking.LEVEL_NODE: self.op.node_name,
5952 def Exec(self, feedback_fn):
5953 """Computes the list of nodes and their attributes.
5956 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5957 result = self.rpc.call_storage_modify(self.op.node_name,
5958 self.op.storage_type, st_args,
5959 self.op.name, self.op.changes)
5960 result.Raise("Failed to modify storage unit '%s' on %s" %
5961 (self.op.name, self.op.node_name))
5964 class LUNodeAdd(LogicalUnit):
5965 """Logical unit for adding node to the cluster.
5969 HTYPE = constants.HTYPE_NODE
5970 _NFLAGS = ["master_capable", "vm_capable"]
5972 def CheckArguments(self):
5973 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5974 # validate/normalize the node name
5975 self.hostname = netutils.GetHostname(name=self.op.node_name,
5976 family=self.primary_ip_family)
5977 self.op.node_name = self.hostname.name
5979 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5980 raise errors.OpPrereqError("Cannot readd the master node",
5983 if self.op.readd and self.op.group:
5984 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5985 " being readded", errors.ECODE_INVAL)
5987 def BuildHooksEnv(self):
5990 This will run on all nodes before, and on all nodes + the new node after.
5994 "OP_TARGET": self.op.node_name,
5995 "NODE_NAME": self.op.node_name,
5996 "NODE_PIP": self.op.primary_ip,
5997 "NODE_SIP": self.op.secondary_ip,
5998 "MASTER_CAPABLE": str(self.op.master_capable),
5999 "VM_CAPABLE": str(self.op.vm_capable),
6002 def BuildHooksNodes(self):
6003 """Build hooks nodes.
6006 # Exclude added node
6007 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6008 post_nodes = pre_nodes + [self.op.node_name, ]
6010 return (pre_nodes, post_nodes)
6012 def CheckPrereq(self):
6013 """Check prerequisites.
6016 - the new node is not already in the config
6018 - its parameters (single/dual homed) matches the cluster
6020 Any errors are signaled by raising errors.OpPrereqError.
6024 hostname = self.hostname
6025 node = hostname.name
6026 primary_ip = self.op.primary_ip = hostname.ip
6027 if self.op.secondary_ip is None:
6028 if self.primary_ip_family == netutils.IP6Address.family:
6029 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6030 " IPv4 address must be given as secondary",
6032 self.op.secondary_ip = primary_ip
6034 secondary_ip = self.op.secondary_ip
6035 if not netutils.IP4Address.IsValid(secondary_ip):
6036 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6037 " address" % secondary_ip, errors.ECODE_INVAL)
6039 node_list = cfg.GetNodeList()
6040 if not self.op.readd and node in node_list:
6041 raise errors.OpPrereqError("Node %s is already in the configuration" %
6042 node, errors.ECODE_EXISTS)
6043 elif self.op.readd and node not in node_list:
6044 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6047 self.changed_primary_ip = False
6049 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6050 if self.op.readd and node == existing_node_name:
6051 if existing_node.secondary_ip != secondary_ip:
6052 raise errors.OpPrereqError("Readded node doesn't have the same IP"
6053 " address configuration as before",
6055 if existing_node.primary_ip != primary_ip:
6056 self.changed_primary_ip = True
6060 if (existing_node.primary_ip == primary_ip or
6061 existing_node.secondary_ip == primary_ip or
6062 existing_node.primary_ip == secondary_ip or
6063 existing_node.secondary_ip == secondary_ip):
6064 raise errors.OpPrereqError("New node ip address(es) conflict with"
6065 " existing node %s" % existing_node.name,
6066 errors.ECODE_NOTUNIQUE)
6068 # After this 'if' block, None is no longer a valid value for the
6069 # _capable op attributes
6071 old_node = self.cfg.GetNodeInfo(node)
6072 assert old_node is not None, "Can't retrieve locked node %s" % node
6073 for attr in self._NFLAGS:
6074 if getattr(self.op, attr) is None:
6075 setattr(self.op, attr, getattr(old_node, attr))
6077 for attr in self._NFLAGS:
6078 if getattr(self.op, attr) is None:
6079 setattr(self.op, attr, True)
6081 if self.op.readd and not self.op.vm_capable:
6082 pri, sec = cfg.GetNodeInstances(node)
6084 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6085 " flag set to false, but it already holds"
6086 " instances" % node,
6089 # check that the type of the node (single versus dual homed) is the
6090 # same as for the master
6091 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6092 master_singlehomed = myself.secondary_ip == myself.primary_ip
6093 newbie_singlehomed = secondary_ip == primary_ip
6094 if master_singlehomed != newbie_singlehomed:
6095 if master_singlehomed:
6096 raise errors.OpPrereqError("The master has no secondary ip but the"
6097 " new node has one",
6100 raise errors.OpPrereqError("The master has a secondary ip but the"
6101 " new node doesn't have one",
6104 # checks reachability
6105 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6106 raise errors.OpPrereqError("Node not reachable by ping",
6107 errors.ECODE_ENVIRON)
6109 if not newbie_singlehomed:
6110 # check reachability from my secondary ip to newbie's secondary ip
6111 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6112 source=myself.secondary_ip):
6113 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6114 " based ping to node daemon port",
6115 errors.ECODE_ENVIRON)
6122 if self.op.master_capable:
6123 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6125 self.master_candidate = False
6128 self.new_node = old_node
6130 node_group = cfg.LookupNodeGroup(self.op.group)
6131 self.new_node = objects.Node(name=node,
6132 primary_ip=primary_ip,
6133 secondary_ip=secondary_ip,
6134 master_candidate=self.master_candidate,
6135 offline=False, drained=False,
6136 group=node_group, ndparams={})
6138 if self.op.ndparams:
6139 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6140 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6141 "node", "cluster or group")
6143 if self.op.hv_state:
6144 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6146 if self.op.disk_state:
6147 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6149 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6150 # it a property on the base class.
6151 rpcrunner = rpc.DnsOnlyRunner()
6152 result = rpcrunner.call_version([node])[node]
6153 result.Raise("Can't get version information from node %s" % node)
6154 if constants.PROTOCOL_VERSION == result.payload:
6155 logging.info("Communication to node %s fine, sw version %s match",
6156 node, result.payload)
6158 raise errors.OpPrereqError("Version mismatch master version %s,"
6159 " node version %s" %
6160 (constants.PROTOCOL_VERSION, result.payload),
6161 errors.ECODE_ENVIRON)
6163 vg_name = cfg.GetVGName()
6164 if vg_name is not None:
6165 vparams = {constants.NV_PVLIST: [vg_name]}
6166 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6167 cname = self.cfg.GetClusterName()
6168 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6169 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6171 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6172 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6174 def Exec(self, feedback_fn):
6175 """Adds the new node to the cluster.
6178 new_node = self.new_node
6179 node = new_node.name
6181 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6184 # We adding a new node so we assume it's powered
6185 new_node.powered = True
6187 # for re-adds, reset the offline/drained/master-candidate flags;
6188 # we need to reset here, otherwise offline would prevent RPC calls
6189 # later in the procedure; this also means that if the re-add
6190 # fails, we are left with a non-offlined, broken node
6192 new_node.drained = new_node.offline = False # pylint: disable=W0201
6193 self.LogInfo("Readding a node, the offline/drained flags were reset")
6194 # if we demote the node, we do cleanup later in the procedure
6195 new_node.master_candidate = self.master_candidate
6196 if self.changed_primary_ip:
6197 new_node.primary_ip = self.op.primary_ip
6199 # copy the master/vm_capable flags
6200 for attr in self._NFLAGS:
6201 setattr(new_node, attr, getattr(self.op, attr))
6203 # notify the user about any possible mc promotion
6204 if new_node.master_candidate:
6205 self.LogInfo("Node will be a master candidate")
6207 if self.op.ndparams:
6208 new_node.ndparams = self.op.ndparams
6210 new_node.ndparams = {}
6212 if self.op.hv_state:
6213 new_node.hv_state_static = self.new_hv_state
6215 if self.op.disk_state:
6216 new_node.disk_state_static = self.new_disk_state
6218 # Add node to our /etc/hosts, and add key to known_hosts
6219 if self.cfg.GetClusterInfo().modify_etc_hosts:
6220 master_node = self.cfg.GetMasterNode()
6221 result = self.rpc.call_etc_hosts_modify(master_node,
6222 constants.ETC_HOSTS_ADD,
6225 result.Raise("Can't update hosts file with new host data")
6227 if new_node.secondary_ip != new_node.primary_ip:
6228 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6231 node_verify_list = [self.cfg.GetMasterNode()]
6232 node_verify_param = {
6233 constants.NV_NODELIST: ([node], {}),
6234 # TODO: do a node-net-test as well?
6237 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6238 self.cfg.GetClusterName())
6239 for verifier in node_verify_list:
6240 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6241 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6243 for failed in nl_payload:
6244 feedback_fn("ssh/hostname verification failed"
6245 " (checking from %s): %s" %
6246 (verifier, nl_payload[failed]))
6247 raise errors.OpExecError("ssh/hostname verification failed")
6250 _RedistributeAncillaryFiles(self)
6251 self.context.ReaddNode(new_node)
6252 # make sure we redistribute the config
6253 self.cfg.Update(new_node, feedback_fn)
6254 # and make sure the new node will not have old files around
6255 if not new_node.master_candidate:
6256 result = self.rpc.call_node_demote_from_mc(new_node.name)
6257 msg = result.fail_msg
6259 self.LogWarning("Node failed to demote itself from master"
6260 " candidate status: %s" % msg)
6262 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6263 additional_vm=self.op.vm_capable)
6264 self.context.AddNode(new_node, self.proc.GetECId())
6267 class LUNodeSetParams(LogicalUnit):
6268 """Modifies the parameters of a node.
6270 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6271 to the node role (as _ROLE_*)
6272 @cvar _R2F: a dictionary from node role to tuples of flags
6273 @cvar _FLAGS: a list of attribute names corresponding to the flags
6276 HPATH = "node-modify"
6277 HTYPE = constants.HTYPE_NODE
6279 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6281 (True, False, False): _ROLE_CANDIDATE,
6282 (False, True, False): _ROLE_DRAINED,
6283 (False, False, True): _ROLE_OFFLINE,
6284 (False, False, False): _ROLE_REGULAR,
6286 _R2F = dict((v, k) for k, v in _F2R.items())
6287 _FLAGS = ["master_candidate", "drained", "offline"]
6289 def CheckArguments(self):
6290 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6291 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6292 self.op.master_capable, self.op.vm_capable,
6293 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6295 if all_mods.count(None) == len(all_mods):
6296 raise errors.OpPrereqError("Please pass at least one modification",
6298 if all_mods.count(True) > 1:
6299 raise errors.OpPrereqError("Can't set the node into more than one"
6300 " state at the same time",
6303 # Boolean value that tells us whether we might be demoting from MC
6304 self.might_demote = (self.op.master_candidate is False or
6305 self.op.offline is True or
6306 self.op.drained is True or
6307 self.op.master_capable is False)
6309 if self.op.secondary_ip:
6310 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6311 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6312 " address" % self.op.secondary_ip,
6315 self.lock_all = self.op.auto_promote and self.might_demote
6316 self.lock_instances = self.op.secondary_ip is not None
6318 def _InstanceFilter(self, instance):
6319 """Filter for getting affected instances.
6322 return (instance.disk_template in constants.DTS_INT_MIRROR and
6323 self.op.node_name in instance.all_nodes)
6325 def ExpandNames(self):
6327 self.needed_locks = {
6328 locking.LEVEL_NODE: locking.ALL_SET,
6330 # Block allocations when all nodes are locked
6331 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6334 self.needed_locks = {
6335 locking.LEVEL_NODE: self.op.node_name,
6338 # Since modifying a node can have severe effects on currently running
6339 # operations the resource lock is at least acquired in shared mode
6340 self.needed_locks[locking.LEVEL_NODE_RES] = \
6341 self.needed_locks[locking.LEVEL_NODE]
6343 # Get all locks except nodes in shared mode; they are not used for anything
6344 # but read-only access
6345 self.share_locks = _ShareAll()
6346 self.share_locks[locking.LEVEL_NODE] = 0
6347 self.share_locks[locking.LEVEL_NODE_RES] = 0
6348 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6350 if self.lock_instances:
6351 self.needed_locks[locking.LEVEL_INSTANCE] = \
6352 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6354 def BuildHooksEnv(self):
6357 This runs on the master node.
6361 "OP_TARGET": self.op.node_name,
6362 "MASTER_CANDIDATE": str(self.op.master_candidate),
6363 "OFFLINE": str(self.op.offline),
6364 "DRAINED": str(self.op.drained),
6365 "MASTER_CAPABLE": str(self.op.master_capable),
6366 "VM_CAPABLE": str(self.op.vm_capable),
6369 def BuildHooksNodes(self):
6370 """Build hooks nodes.
6373 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6376 def CheckPrereq(self):
6377 """Check prerequisites.
6379 This only checks the instance list against the existing names.
6382 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6384 if self.lock_instances:
6385 affected_instances = \
6386 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6388 # Verify instance locks
6389 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6390 wanted_instances = frozenset(affected_instances.keys())
6391 if wanted_instances - owned_instances:
6392 raise errors.OpPrereqError("Instances affected by changing node %s's"
6393 " secondary IP address have changed since"
6394 " locks were acquired, wanted '%s', have"
6395 " '%s'; retry the operation" %
6397 utils.CommaJoin(wanted_instances),
6398 utils.CommaJoin(owned_instances)),
6401 affected_instances = None
6403 if (self.op.master_candidate is not None or
6404 self.op.drained is not None or
6405 self.op.offline is not None):
6406 # we can't change the master's node flags
6407 if self.op.node_name == self.cfg.GetMasterNode():
6408 raise errors.OpPrereqError("The master role can be changed"
6409 " only via master-failover",
6412 if self.op.master_candidate and not node.master_capable:
6413 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6414 " it a master candidate" % node.name,
6417 if self.op.vm_capable is False:
6418 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6420 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6421 " the vm_capable flag" % node.name,
6424 if node.master_candidate and self.might_demote and not self.lock_all:
6425 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6426 # check if after removing the current node, we're missing master
6428 (mc_remaining, mc_should, _) = \
6429 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6430 if mc_remaining < mc_should:
6431 raise errors.OpPrereqError("Not enough master candidates, please"
6432 " pass auto promote option to allow"
6433 " promotion (--auto-promote or RAPI"
6434 " auto_promote=True)", errors.ECODE_STATE)
6436 self.old_flags = old_flags = (node.master_candidate,
6437 node.drained, node.offline)
6438 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6439 self.old_role = old_role = self._F2R[old_flags]
6441 # Check for ineffective changes
6442 for attr in self._FLAGS:
6443 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6444 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6445 setattr(self.op, attr, None)
6447 # Past this point, any flag change to False means a transition
6448 # away from the respective state, as only real changes are kept
6450 # TODO: We might query the real power state if it supports OOB
6451 if _SupportsOob(self.cfg, node):
6452 if self.op.offline is False and not (node.powered or
6453 self.op.powered is True):
6454 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6455 " offline status can be reset") %
6456 self.op.node_name, errors.ECODE_STATE)
6457 elif self.op.powered is not None:
6458 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6459 " as it does not support out-of-band"
6460 " handling") % self.op.node_name,
6463 # If we're being deofflined/drained, we'll MC ourself if needed
6464 if (self.op.drained is False or self.op.offline is False or
6465 (self.op.master_capable and not node.master_capable)):
6466 if _DecideSelfPromotion(self):
6467 self.op.master_candidate = True
6468 self.LogInfo("Auto-promoting node to master candidate")
6470 # If we're no longer master capable, we'll demote ourselves from MC
6471 if self.op.master_capable is False and node.master_candidate:
6472 self.LogInfo("Demoting from master candidate")
6473 self.op.master_candidate = False
6476 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6477 if self.op.master_candidate:
6478 new_role = self._ROLE_CANDIDATE
6479 elif self.op.drained:
6480 new_role = self._ROLE_DRAINED
6481 elif self.op.offline:
6482 new_role = self._ROLE_OFFLINE
6483 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6484 # False is still in new flags, which means we're un-setting (the
6486 new_role = self._ROLE_REGULAR
6487 else: # no new flags, nothing, keep old role
6490 self.new_role = new_role
6492 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6493 # Trying to transition out of offline status
6494 result = self.rpc.call_version([node.name])[node.name]
6496 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6497 " to report its version: %s" %
6498 (node.name, result.fail_msg),
6501 self.LogWarning("Transitioning node from offline to online state"
6502 " without using re-add. Please make sure the node"
6505 # When changing the secondary ip, verify if this is a single-homed to
6506 # multi-homed transition or vice versa, and apply the relevant
6508 if self.op.secondary_ip:
6509 # Ok even without locking, because this can't be changed by any LU
6510 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6511 master_singlehomed = master.secondary_ip == master.primary_ip
6512 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6513 if self.op.force and node.name == master.name:
6514 self.LogWarning("Transitioning from single-homed to multi-homed"
6515 " cluster; all nodes will require a secondary IP"
6518 raise errors.OpPrereqError("Changing the secondary ip on a"
6519 " single-homed cluster requires the"
6520 " --force option to be passed, and the"
6521 " target node to be the master",
6523 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6524 if self.op.force and node.name == master.name:
6525 self.LogWarning("Transitioning from multi-homed to single-homed"
6526 " cluster; secondary IP addresses will have to be"
6529 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6530 " same as the primary IP on a multi-homed"
6531 " cluster, unless the --force option is"
6532 " passed, and the target node is the"
6533 " master", errors.ECODE_INVAL)
6535 assert not (frozenset(affected_instances) -
6536 self.owned_locks(locking.LEVEL_INSTANCE))
6539 if affected_instances:
6540 msg = ("Cannot change secondary IP address: offline node has"
6541 " instances (%s) configured to use it" %
6542 utils.CommaJoin(affected_instances.keys()))
6543 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6545 # On online nodes, check that no instances are running, and that
6546 # the node has the new ip and we can reach it.
6547 for instance in affected_instances.values():
6548 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6549 msg="cannot change secondary ip")
6551 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6552 if master.name != node.name:
6553 # check reachability from master secondary ip to new secondary ip
6554 if not netutils.TcpPing(self.op.secondary_ip,
6555 constants.DEFAULT_NODED_PORT,
6556 source=master.secondary_ip):
6557 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6558 " based ping to node daemon port",
6559 errors.ECODE_ENVIRON)
6561 if self.op.ndparams:
6562 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6563 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6564 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6565 "node", "cluster or group")
6566 self.new_ndparams = new_ndparams
6568 if self.op.hv_state:
6569 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6570 self.node.hv_state_static)
6572 if self.op.disk_state:
6573 self.new_disk_state = \
6574 _MergeAndVerifyDiskState(self.op.disk_state,
6575 self.node.disk_state_static)
6577 def Exec(self, feedback_fn):
6582 old_role = self.old_role
6583 new_role = self.new_role
6587 if self.op.ndparams:
6588 node.ndparams = self.new_ndparams
6590 if self.op.powered is not None:
6591 node.powered = self.op.powered
6593 if self.op.hv_state:
6594 node.hv_state_static = self.new_hv_state
6596 if self.op.disk_state:
6597 node.disk_state_static = self.new_disk_state
6599 for attr in ["master_capable", "vm_capable"]:
6600 val = getattr(self.op, attr)
6602 setattr(node, attr, val)
6603 result.append((attr, str(val)))
6605 if new_role != old_role:
6606 # Tell the node to demote itself, if no longer MC and not offline
6607 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6608 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6610 self.LogWarning("Node failed to demote itself: %s", msg)
6612 new_flags = self._R2F[new_role]
6613 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6615 result.append((desc, str(nf)))
6616 (node.master_candidate, node.drained, node.offline) = new_flags
6618 # we locked all nodes, we adjust the CP before updating this node
6620 _AdjustCandidatePool(self, [node.name])
6622 if self.op.secondary_ip:
6623 node.secondary_ip = self.op.secondary_ip
6624 result.append(("secondary_ip", self.op.secondary_ip))
6626 # this will trigger configuration file update, if needed
6627 self.cfg.Update(node, feedback_fn)
6629 # this will trigger job queue propagation or cleanup if the mc
6631 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6632 self.context.ReaddNode(node)
6637 class LUNodePowercycle(NoHooksLU):
6638 """Powercycles a node.
6643 def CheckArguments(self):
6644 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6645 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6646 raise errors.OpPrereqError("The node is the master and the force"
6647 " parameter was not set",
6650 def ExpandNames(self):
6651 """Locking for PowercycleNode.
6653 This is a last-resort option and shouldn't block on other
6654 jobs. Therefore, we grab no locks.
6657 self.needed_locks = {}
6659 def Exec(self, feedback_fn):
6663 result = self.rpc.call_node_powercycle(self.op.node_name,
6664 self.cfg.GetHypervisorType())
6665 result.Raise("Failed to schedule the reboot")
6666 return result.payload
6669 class LUClusterQuery(NoHooksLU):
6670 """Query cluster configuration.
6675 def ExpandNames(self):
6676 self.needed_locks = {}
6678 def Exec(self, feedback_fn):
6679 """Return cluster config.
6682 cluster = self.cfg.GetClusterInfo()
6685 # Filter just for enabled hypervisors
6686 for os_name, hv_dict in cluster.os_hvp.items():
6687 os_hvp[os_name] = {}
6688 for hv_name, hv_params in hv_dict.items():
6689 if hv_name in cluster.enabled_hypervisors:
6690 os_hvp[os_name][hv_name] = hv_params
6692 # Convert ip_family to ip_version
6693 primary_ip_version = constants.IP4_VERSION
6694 if cluster.primary_ip_family == netutils.IP6Address.family:
6695 primary_ip_version = constants.IP6_VERSION
6698 "software_version": constants.RELEASE_VERSION,
6699 "protocol_version": constants.PROTOCOL_VERSION,
6700 "config_version": constants.CONFIG_VERSION,
6701 "os_api_version": max(constants.OS_API_VERSIONS),
6702 "export_version": constants.EXPORT_VERSION,
6703 "architecture": runtime.GetArchInfo(),
6704 "name": cluster.cluster_name,
6705 "master": cluster.master_node,
6706 "default_hypervisor": cluster.primary_hypervisor,
6707 "enabled_hypervisors": cluster.enabled_hypervisors,
6708 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6709 for hypervisor_name in cluster.enabled_hypervisors]),
6711 "beparams": cluster.beparams,
6712 "osparams": cluster.osparams,
6713 "ipolicy": cluster.ipolicy,
6714 "nicparams": cluster.nicparams,
6715 "ndparams": cluster.ndparams,
6716 "diskparams": cluster.diskparams,
6717 "candidate_pool_size": cluster.candidate_pool_size,
6718 "master_netdev": cluster.master_netdev,
6719 "master_netmask": cluster.master_netmask,
6720 "use_external_mip_script": cluster.use_external_mip_script,
6721 "volume_group_name": cluster.volume_group_name,
6722 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6723 "file_storage_dir": cluster.file_storage_dir,
6724 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6725 "maintain_node_health": cluster.maintain_node_health,
6726 "ctime": cluster.ctime,
6727 "mtime": cluster.mtime,
6728 "uuid": cluster.uuid,
6729 "tags": list(cluster.GetTags()),
6730 "uid_pool": cluster.uid_pool,
6731 "default_iallocator": cluster.default_iallocator,
6732 "reserved_lvs": cluster.reserved_lvs,
6733 "primary_ip_version": primary_ip_version,
6734 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6735 "hidden_os": cluster.hidden_os,
6736 "blacklisted_os": cluster.blacklisted_os,
6742 class LUClusterConfigQuery(NoHooksLU):
6743 """Return configuration values.
6748 def CheckArguments(self):
6749 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6751 def ExpandNames(self):
6752 self.cq.ExpandNames(self)
6754 def DeclareLocks(self, level):
6755 self.cq.DeclareLocks(self, level)
6757 def Exec(self, feedback_fn):
6758 result = self.cq.OldStyleQuery(self)
6760 assert len(result) == 1
6765 class _ClusterQuery(_QueryBase):
6766 FIELDS = query.CLUSTER_FIELDS
6768 #: Do not sort (there is only one item)
6771 def ExpandNames(self, lu):
6772 lu.needed_locks = {}
6774 # The following variables interact with _QueryBase._GetNames
6775 self.wanted = locking.ALL_SET
6776 self.do_locking = self.use_locking
6779 raise errors.OpPrereqError("Can not use locking for cluster queries",
6782 def DeclareLocks(self, lu, level):
6785 def _GetQueryData(self, lu):
6786 """Computes the list of nodes and their attributes.
6789 # Locking is not used
6790 assert not (compat.any(lu.glm.is_owned(level)
6791 for level in locking.LEVELS
6792 if level != locking.LEVEL_CLUSTER) or
6793 self.do_locking or self.use_locking)
6795 if query.CQ_CONFIG in self.requested_data:
6796 cluster = lu.cfg.GetClusterInfo()
6798 cluster = NotImplemented
6800 if query.CQ_QUEUE_DRAINED in self.requested_data:
6801 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6803 drain_flag = NotImplemented
6805 if query.CQ_WATCHER_PAUSE in self.requested_data:
6806 master_name = lu.cfg.GetMasterNode()
6808 result = lu.rpc.call_get_watcher_pause(master_name)
6809 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6812 watcher_pause = result.payload
6814 watcher_pause = NotImplemented
6816 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6819 class LUInstanceActivateDisks(NoHooksLU):
6820 """Bring up an instance's disks.
6825 def ExpandNames(self):
6826 self._ExpandAndLockInstance()
6827 self.needed_locks[locking.LEVEL_NODE] = []
6828 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6830 def DeclareLocks(self, level):
6831 if level == locking.LEVEL_NODE:
6832 self._LockInstancesNodes()
6834 def CheckPrereq(self):
6835 """Check prerequisites.
6837 This checks that the instance is in the cluster.
6840 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6841 assert self.instance is not None, \
6842 "Cannot retrieve locked instance %s" % self.op.instance_name
6843 _CheckNodeOnline(self, self.instance.primary_node)
6845 def Exec(self, feedback_fn):
6846 """Activate the disks.
6849 disks_ok, disks_info = \
6850 _AssembleInstanceDisks(self, self.instance,
6851 ignore_size=self.op.ignore_size)
6853 raise errors.OpExecError("Cannot activate block devices")
6855 if self.op.wait_for_sync:
6856 if not _WaitForSync(self, self.instance):
6857 raise errors.OpExecError("Some disks of the instance are degraded!")
6862 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6864 """Prepare the block devices for an instance.
6866 This sets up the block devices on all nodes.
6868 @type lu: L{LogicalUnit}
6869 @param lu: the logical unit on whose behalf we execute
6870 @type instance: L{objects.Instance}
6871 @param instance: the instance for whose disks we assemble
6872 @type disks: list of L{objects.Disk} or None
6873 @param disks: which disks to assemble (or all, if None)
6874 @type ignore_secondaries: boolean
6875 @param ignore_secondaries: if true, errors on secondary nodes
6876 won't result in an error return from the function
6877 @type ignore_size: boolean
6878 @param ignore_size: if true, the current known size of the disk
6879 will not be used during the disk activation, useful for cases
6880 when the size is wrong
6881 @return: False if the operation failed, otherwise a list of
6882 (host, instance_visible_name, node_visible_name)
6883 with the mapping from node devices to instance devices
6888 iname = instance.name
6889 disks = _ExpandCheckDisks(instance, disks)
6891 # With the two passes mechanism we try to reduce the window of
6892 # opportunity for the race condition of switching DRBD to primary
6893 # before handshaking occured, but we do not eliminate it
6895 # The proper fix would be to wait (with some limits) until the
6896 # connection has been made and drbd transitions from WFConnection
6897 # into any other network-connected state (Connected, SyncTarget,
6900 # 1st pass, assemble on all nodes in secondary mode
6901 for idx, inst_disk in enumerate(disks):
6902 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6904 node_disk = node_disk.Copy()
6905 node_disk.UnsetSize()
6906 lu.cfg.SetDiskID(node_disk, node)
6907 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6909 msg = result.fail_msg
6911 is_offline_secondary = (node in instance.secondary_nodes and
6913 lu.LogWarning("Could not prepare block device %s on node %s"
6914 " (is_primary=False, pass=1): %s",
6915 inst_disk.iv_name, node, msg)
6916 if not (ignore_secondaries or is_offline_secondary):
6919 # FIXME: race condition on drbd migration to primary
6921 # 2nd pass, do only the primary node
6922 for idx, inst_disk in enumerate(disks):
6925 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6926 if node != instance.primary_node:
6929 node_disk = node_disk.Copy()
6930 node_disk.UnsetSize()
6931 lu.cfg.SetDiskID(node_disk, node)
6932 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6934 msg = result.fail_msg
6936 lu.LogWarning("Could not prepare block device %s on node %s"
6937 " (is_primary=True, pass=2): %s",
6938 inst_disk.iv_name, node, msg)
6941 dev_path = result.payload
6943 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6945 # leave the disks configured for the primary node
6946 # this is a workaround that would be fixed better by
6947 # improving the logical/physical id handling
6949 lu.cfg.SetDiskID(disk, instance.primary_node)
6951 return disks_ok, device_info
6954 def _StartInstanceDisks(lu, instance, force):
6955 """Start the disks of an instance.
6958 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6959 ignore_secondaries=force)
6961 _ShutdownInstanceDisks(lu, instance)
6962 if force is not None and not force:
6964 hint=("If the message above refers to a secondary node,"
6965 " you can retry the operation using '--force'"))
6966 raise errors.OpExecError("Disk consistency error")
6969 class LUInstanceDeactivateDisks(NoHooksLU):
6970 """Shutdown an instance's disks.
6975 def ExpandNames(self):
6976 self._ExpandAndLockInstance()
6977 self.needed_locks[locking.LEVEL_NODE] = []
6978 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6980 def DeclareLocks(self, level):
6981 if level == locking.LEVEL_NODE:
6982 self._LockInstancesNodes()
6984 def CheckPrereq(self):
6985 """Check prerequisites.
6987 This checks that the instance is in the cluster.
6990 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6991 assert self.instance is not None, \
6992 "Cannot retrieve locked instance %s" % self.op.instance_name
6994 def Exec(self, feedback_fn):
6995 """Deactivate the disks
6998 instance = self.instance
7000 _ShutdownInstanceDisks(self, instance)
7002 _SafeShutdownInstanceDisks(self, instance)
7005 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7006 """Shutdown block devices of an instance.
7008 This function checks if an instance is running, before calling
7009 _ShutdownInstanceDisks.
7012 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7013 _ShutdownInstanceDisks(lu, instance, disks=disks)
7016 def _ExpandCheckDisks(instance, disks):
7017 """Return the instance disks selected by the disks list
7019 @type disks: list of L{objects.Disk} or None
7020 @param disks: selected disks
7021 @rtype: list of L{objects.Disk}
7022 @return: selected instance disks to act on
7026 return instance.disks
7028 if not set(disks).issubset(instance.disks):
7029 raise errors.ProgrammerError("Can only act on disks belonging to the"
7034 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7035 """Shutdown block devices of an instance.
7037 This does the shutdown on all nodes of the instance.
7039 If the ignore_primary is false, errors on the primary node are
7044 disks = _ExpandCheckDisks(instance, disks)
7047 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7048 lu.cfg.SetDiskID(top_disk, node)
7049 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7050 msg = result.fail_msg
7052 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7053 disk.iv_name, node, msg)
7054 if ((node == instance.primary_node and not ignore_primary) or
7055 (node != instance.primary_node and not result.offline)):
7060 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7061 """Checks if a node has enough free memory.
7063 This function checks if a given node has the needed amount of free
7064 memory. In case the node has less memory or we cannot get the
7065 information from the node, this function raises an OpPrereqError
7068 @type lu: C{LogicalUnit}
7069 @param lu: a logical unit from which we get configuration data
7071 @param node: the node to check
7072 @type reason: C{str}
7073 @param reason: string to use in the error message
7074 @type requested: C{int}
7075 @param requested: the amount of memory in MiB to check for
7076 @type hypervisor_name: C{str}
7077 @param hypervisor_name: the hypervisor to ask for memory stats
7079 @return: node current free memory
7080 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7081 we cannot check the node
7084 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7085 nodeinfo[node].Raise("Can't get data from node %s" % node,
7086 prereq=True, ecode=errors.ECODE_ENVIRON)
7087 (_, _, (hv_info, )) = nodeinfo[node].payload
7089 free_mem = hv_info.get("memory_free", None)
7090 if not isinstance(free_mem, int):
7091 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7092 " was '%s'" % (node, free_mem),
7093 errors.ECODE_ENVIRON)
7094 if requested > free_mem:
7095 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7096 " needed %s MiB, available %s MiB" %
7097 (node, reason, requested, free_mem),
7102 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7103 """Checks if nodes have enough free disk space in all the VGs.
7105 This function checks if all given nodes have the needed amount of
7106 free disk. In case any node has less disk or we cannot get the
7107 information from the node, this function raises an OpPrereqError
7110 @type lu: C{LogicalUnit}
7111 @param lu: a logical unit from which we get configuration data
7112 @type nodenames: C{list}
7113 @param nodenames: the list of node names to check
7114 @type req_sizes: C{dict}
7115 @param req_sizes: the hash of vg and corresponding amount of disk in
7117 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7118 or we cannot check the node
7121 for vg, req_size in req_sizes.items():
7122 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7125 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7126 """Checks if nodes have enough free disk space in the specified VG.
7128 This function checks if all given nodes have the needed amount of
7129 free disk. In case any node has less disk or we cannot get the
7130 information from the node, this function raises an OpPrereqError
7133 @type lu: C{LogicalUnit}
7134 @param lu: a logical unit from which we get configuration data
7135 @type nodenames: C{list}
7136 @param nodenames: the list of node names to check
7138 @param vg: the volume group to check
7139 @type requested: C{int}
7140 @param requested: the amount of disk in MiB to check for
7141 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7142 or we cannot check the node
7145 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7146 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7147 for node in nodenames:
7148 info = nodeinfo[node]
7149 info.Raise("Cannot get current information from node %s" % node,
7150 prereq=True, ecode=errors.ECODE_ENVIRON)
7151 (_, (vg_info, ), _) = info.payload
7152 vg_free = vg_info.get("vg_free", None)
7153 if not isinstance(vg_free, int):
7154 raise errors.OpPrereqError("Can't compute free disk space on node"
7155 " %s for vg %s, result was '%s'" %
7156 (node, vg, vg_free), errors.ECODE_ENVIRON)
7157 if requested > vg_free:
7158 raise errors.OpPrereqError("Not enough disk space on target node %s"
7159 " vg %s: required %d MiB, available %d MiB" %
7160 (node, vg, requested, vg_free),
7164 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7165 """Checks if nodes have enough physical CPUs
7167 This function checks if all given nodes have the needed number of
7168 physical CPUs. In case any node has less CPUs or we cannot get the
7169 information from the node, this function raises an OpPrereqError
7172 @type lu: C{LogicalUnit}
7173 @param lu: a logical unit from which we get configuration data
7174 @type nodenames: C{list}
7175 @param nodenames: the list of node names to check
7176 @type requested: C{int}
7177 @param requested: the minimum acceptable number of physical CPUs
7178 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7179 or we cannot check the node
7182 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7183 for node in nodenames:
7184 info = nodeinfo[node]
7185 info.Raise("Cannot get current information from node %s" % node,
7186 prereq=True, ecode=errors.ECODE_ENVIRON)
7187 (_, _, (hv_info, )) = info.payload
7188 num_cpus = hv_info.get("cpu_total", None)
7189 if not isinstance(num_cpus, int):
7190 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7191 " on node %s, result was '%s'" %
7192 (node, num_cpus), errors.ECODE_ENVIRON)
7193 if requested > num_cpus:
7194 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7195 "required" % (node, num_cpus, requested),
7199 class LUInstanceStartup(LogicalUnit):
7200 """Starts an instance.
7203 HPATH = "instance-start"
7204 HTYPE = constants.HTYPE_INSTANCE
7207 def CheckArguments(self):
7209 if self.op.beparams:
7210 # fill the beparams dict
7211 objects.UpgradeBeParams(self.op.beparams)
7212 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7214 def ExpandNames(self):
7215 self._ExpandAndLockInstance()
7216 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7218 def DeclareLocks(self, level):
7219 if level == locking.LEVEL_NODE_RES:
7220 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7222 def BuildHooksEnv(self):
7225 This runs on master, primary and secondary nodes of the instance.
7229 "FORCE": self.op.force,
7232 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7236 def BuildHooksNodes(self):
7237 """Build hooks nodes.
7240 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7243 def CheckPrereq(self):
7244 """Check prerequisites.
7246 This checks that the instance is in the cluster.
7249 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7250 assert self.instance is not None, \
7251 "Cannot retrieve locked instance %s" % self.op.instance_name
7254 if self.op.hvparams:
7255 # check hypervisor parameter syntax (locally)
7256 cluster = self.cfg.GetClusterInfo()
7257 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7258 filled_hvp = cluster.FillHV(instance)
7259 filled_hvp.update(self.op.hvparams)
7260 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7261 hv_type.CheckParameterSyntax(filled_hvp)
7262 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7264 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7266 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7268 if self.primary_offline and self.op.ignore_offline_nodes:
7269 self.LogWarning("Ignoring offline primary node")
7271 if self.op.hvparams or self.op.beparams:
7272 self.LogWarning("Overridden parameters are ignored")
7274 _CheckNodeOnline(self, instance.primary_node)
7276 bep = self.cfg.GetClusterInfo().FillBE(instance)
7277 bep.update(self.op.beparams)
7279 # check bridges existence
7280 _CheckInstanceBridgesExist(self, instance)
7282 remote_info = self.rpc.call_instance_info(instance.primary_node,
7284 instance.hypervisor)
7285 remote_info.Raise("Error checking node %s" % instance.primary_node,
7286 prereq=True, ecode=errors.ECODE_ENVIRON)
7287 if not remote_info.payload: # not running already
7288 _CheckNodeFreeMemory(self, instance.primary_node,
7289 "starting instance %s" % instance.name,
7290 bep[constants.BE_MINMEM], instance.hypervisor)
7292 def Exec(self, feedback_fn):
7293 """Start the instance.
7296 instance = self.instance
7297 force = self.op.force
7299 if not self.op.no_remember:
7300 self.cfg.MarkInstanceUp(instance.name)
7302 if self.primary_offline:
7303 assert self.op.ignore_offline_nodes
7304 self.LogInfo("Primary node offline, marked instance as started")
7306 node_current = instance.primary_node
7308 _StartInstanceDisks(self, instance, force)
7311 self.rpc.call_instance_start(node_current,
7312 (instance, self.op.hvparams,
7314 self.op.startup_paused)
7315 msg = result.fail_msg
7317 _ShutdownInstanceDisks(self, instance)
7318 raise errors.OpExecError("Could not start instance: %s" % msg)
7321 class LUInstanceReboot(LogicalUnit):
7322 """Reboot an instance.
7325 HPATH = "instance-reboot"
7326 HTYPE = constants.HTYPE_INSTANCE
7329 def ExpandNames(self):
7330 self._ExpandAndLockInstance()
7332 def BuildHooksEnv(self):
7335 This runs on master, primary and secondary nodes of the instance.
7339 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7340 "REBOOT_TYPE": self.op.reboot_type,
7341 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7344 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7348 def BuildHooksNodes(self):
7349 """Build hooks nodes.
7352 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7355 def CheckPrereq(self):
7356 """Check prerequisites.
7358 This checks that the instance is in the cluster.
7361 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7362 assert self.instance is not None, \
7363 "Cannot retrieve locked instance %s" % self.op.instance_name
7364 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7365 _CheckNodeOnline(self, instance.primary_node)
7367 # check bridges existence
7368 _CheckInstanceBridgesExist(self, instance)
7370 def Exec(self, feedback_fn):
7371 """Reboot the instance.
7374 instance = self.instance
7375 ignore_secondaries = self.op.ignore_secondaries
7376 reboot_type = self.op.reboot_type
7378 remote_info = self.rpc.call_instance_info(instance.primary_node,
7380 instance.hypervisor)
7381 remote_info.Raise("Error checking node %s" % instance.primary_node)
7382 instance_running = bool(remote_info.payload)
7384 node_current = instance.primary_node
7386 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7387 constants.INSTANCE_REBOOT_HARD]:
7388 for disk in instance.disks:
7389 self.cfg.SetDiskID(disk, node_current)
7390 result = self.rpc.call_instance_reboot(node_current, instance,
7392 self.op.shutdown_timeout)
7393 result.Raise("Could not reboot instance")
7395 if instance_running:
7396 result = self.rpc.call_instance_shutdown(node_current, instance,
7397 self.op.shutdown_timeout)
7398 result.Raise("Could not shutdown instance for full reboot")
7399 _ShutdownInstanceDisks(self, instance)
7401 self.LogInfo("Instance %s was already stopped, starting now",
7403 _StartInstanceDisks(self, instance, ignore_secondaries)
7404 result = self.rpc.call_instance_start(node_current,
7405 (instance, None, None), False)
7406 msg = result.fail_msg
7408 _ShutdownInstanceDisks(self, instance)
7409 raise errors.OpExecError("Could not start instance for"
7410 " full reboot: %s" % msg)
7412 self.cfg.MarkInstanceUp(instance.name)
7415 class LUInstanceShutdown(LogicalUnit):
7416 """Shutdown an instance.
7419 HPATH = "instance-stop"
7420 HTYPE = constants.HTYPE_INSTANCE
7423 def ExpandNames(self):
7424 self._ExpandAndLockInstance()
7426 def BuildHooksEnv(self):
7429 This runs on master, primary and secondary nodes of the instance.
7432 env = _BuildInstanceHookEnvByObject(self, self.instance)
7433 env["TIMEOUT"] = self.op.timeout
7436 def BuildHooksNodes(self):
7437 """Build hooks nodes.
7440 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7443 def CheckPrereq(self):
7444 """Check prerequisites.
7446 This checks that the instance is in the cluster.
7449 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7450 assert self.instance is not None, \
7451 "Cannot retrieve locked instance %s" % self.op.instance_name
7453 if not self.op.force:
7454 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7456 self.LogWarning("Ignoring offline instance check")
7458 self.primary_offline = \
7459 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7461 if self.primary_offline and self.op.ignore_offline_nodes:
7462 self.LogWarning("Ignoring offline primary node")
7464 _CheckNodeOnline(self, self.instance.primary_node)
7466 def Exec(self, feedback_fn):
7467 """Shutdown the instance.
7470 instance = self.instance
7471 node_current = instance.primary_node
7472 timeout = self.op.timeout
7474 # If the instance is offline we shouldn't mark it as down, as that
7475 # resets the offline flag.
7476 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7477 self.cfg.MarkInstanceDown(instance.name)
7479 if self.primary_offline:
7480 assert self.op.ignore_offline_nodes
7481 self.LogInfo("Primary node offline, marked instance as stopped")
7483 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7484 msg = result.fail_msg
7486 self.LogWarning("Could not shutdown instance: %s", msg)
7488 _ShutdownInstanceDisks(self, instance)
7491 class LUInstanceReinstall(LogicalUnit):
7492 """Reinstall an instance.
7495 HPATH = "instance-reinstall"
7496 HTYPE = constants.HTYPE_INSTANCE
7499 def ExpandNames(self):
7500 self._ExpandAndLockInstance()
7502 def BuildHooksEnv(self):
7505 This runs on master, primary and secondary nodes of the instance.
7508 return _BuildInstanceHookEnvByObject(self, self.instance)
7510 def BuildHooksNodes(self):
7511 """Build hooks nodes.
7514 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7517 def CheckPrereq(self):
7518 """Check prerequisites.
7520 This checks that the instance is in the cluster and is not running.
7523 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7524 assert instance is not None, \
7525 "Cannot retrieve locked instance %s" % self.op.instance_name
7526 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7527 " offline, cannot reinstall")
7529 if instance.disk_template == constants.DT_DISKLESS:
7530 raise errors.OpPrereqError("Instance '%s' has no disks" %
7531 self.op.instance_name,
7533 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7535 if self.op.os_type is not None:
7537 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7538 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7539 instance_os = self.op.os_type
7541 instance_os = instance.os
7543 nodelist = list(instance.all_nodes)
7545 if self.op.osparams:
7546 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7547 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7548 self.os_inst = i_osdict # the new dict (without defaults)
7552 self.instance = instance
7554 def Exec(self, feedback_fn):
7555 """Reinstall the instance.
7558 inst = self.instance
7560 if self.op.os_type is not None:
7561 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7562 inst.os = self.op.os_type
7563 # Write to configuration
7564 self.cfg.Update(inst, feedback_fn)
7566 _StartInstanceDisks(self, inst, None)
7568 feedback_fn("Running the instance OS create scripts...")
7569 # FIXME: pass debug option from opcode to backend
7570 result = self.rpc.call_instance_os_add(inst.primary_node,
7571 (inst, self.os_inst), True,
7572 self.op.debug_level)
7573 result.Raise("Could not install OS for instance %s on node %s" %
7574 (inst.name, inst.primary_node))
7576 _ShutdownInstanceDisks(self, inst)
7579 class LUInstanceRecreateDisks(LogicalUnit):
7580 """Recreate an instance's missing disks.
7583 HPATH = "instance-recreate-disks"
7584 HTYPE = constants.HTYPE_INSTANCE
7587 _MODIFYABLE = compat.UniqueFrozenset([
7588 constants.IDISK_SIZE,
7589 constants.IDISK_MODE,
7592 # New or changed disk parameters may have different semantics
7593 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7594 constants.IDISK_ADOPT,
7596 # TODO: Implement support changing VG while recreating
7598 constants.IDISK_METAVG,
7599 constants.IDISK_PROVIDER,
7602 def _RunAllocator(self):
7603 """Run the allocator based on input opcode.
7606 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7609 # The allocator should actually run in "relocate" mode, but current
7610 # allocators don't support relocating all the nodes of an instance at
7611 # the same time. As a workaround we use "allocate" mode, but this is
7612 # suboptimal for two reasons:
7613 # - The instance name passed to the allocator is present in the list of
7614 # existing instances, so there could be a conflict within the
7615 # internal structures of the allocator. This doesn't happen with the
7616 # current allocators, but it's a liability.
7617 # - The allocator counts the resources used by the instance twice: once
7618 # because the instance exists already, and once because it tries to
7619 # allocate a new instance.
7620 # The allocator could choose some of the nodes on which the instance is
7621 # running, but that's not a problem. If the instance nodes are broken,
7622 # they should be already be marked as drained or offline, and hence
7623 # skipped by the allocator. If instance disks have been lost for other
7624 # reasons, then recreating the disks on the same nodes should be fine.
7625 disk_template = self.instance.disk_template
7626 spindle_use = be_full[constants.BE_SPINDLE_USE]
7627 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7628 disk_template=disk_template,
7629 tags=list(self.instance.GetTags()),
7630 os=self.instance.os,
7632 vcpus=be_full[constants.BE_VCPUS],
7633 memory=be_full[constants.BE_MAXMEM],
7634 spindle_use=spindle_use,
7635 disks=[{constants.IDISK_SIZE: d.size,
7636 constants.IDISK_MODE: d.mode}
7637 for d in self.instance.disks],
7638 hypervisor=self.instance.hypervisor,
7639 node_whitelist=None)
7640 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7642 ial.Run(self.op.iallocator)
7644 assert req.RequiredNodes() == len(self.instance.all_nodes)
7647 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7648 " %s" % (self.op.iallocator, ial.info),
7651 self.op.nodes = ial.result
7652 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7653 self.op.instance_name, self.op.iallocator,
7654 utils.CommaJoin(ial.result))
7656 def CheckArguments(self):
7657 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7658 # Normalize and convert deprecated list of disk indices
7659 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7661 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7663 raise errors.OpPrereqError("Some disks have been specified more than"
7664 " once: %s" % utils.CommaJoin(duplicates),
7667 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7668 # when neither iallocator nor nodes are specified
7669 if self.op.iallocator or self.op.nodes:
7670 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7672 for (idx, params) in self.op.disks:
7673 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7674 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7676 raise errors.OpPrereqError("Parameters for disk %s try to change"
7677 " unmodifyable parameter(s): %s" %
7678 (idx, utils.CommaJoin(unsupported)),
7681 def ExpandNames(self):
7682 self._ExpandAndLockInstance()
7683 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7686 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7687 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7689 self.needed_locks[locking.LEVEL_NODE] = []
7690 if self.op.iallocator:
7691 # iallocator will select a new node in the same group
7692 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7693 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7695 self.needed_locks[locking.LEVEL_NODE_RES] = []
7697 def DeclareLocks(self, level):
7698 if level == locking.LEVEL_NODEGROUP:
7699 assert self.op.iallocator is not None
7700 assert not self.op.nodes
7701 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7702 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7703 # Lock the primary group used by the instance optimistically; this
7704 # requires going via the node before it's locked, requiring
7705 # verification later on
7706 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7707 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7709 elif level == locking.LEVEL_NODE:
7710 # If an allocator is used, then we lock all the nodes in the current
7711 # instance group, as we don't know yet which ones will be selected;
7712 # if we replace the nodes without using an allocator, locks are
7713 # already declared in ExpandNames; otherwise, we need to lock all the
7714 # instance nodes for disk re-creation
7715 if self.op.iallocator:
7716 assert not self.op.nodes
7717 assert not self.needed_locks[locking.LEVEL_NODE]
7718 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7720 # Lock member nodes of the group of the primary node
7721 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7722 self.needed_locks[locking.LEVEL_NODE].extend(
7723 self.cfg.GetNodeGroup(group_uuid).members)
7725 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7726 elif not self.op.nodes:
7727 self._LockInstancesNodes(primary_only=False)
7728 elif level == locking.LEVEL_NODE_RES:
7730 self.needed_locks[locking.LEVEL_NODE_RES] = \
7731 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7733 def BuildHooksEnv(self):
7736 This runs on master, primary and secondary nodes of the instance.
7739 return _BuildInstanceHookEnvByObject(self, self.instance)
7741 def BuildHooksNodes(self):
7742 """Build hooks nodes.
7745 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7748 def CheckPrereq(self):
7749 """Check prerequisites.
7751 This checks that the instance is in the cluster and is not running.
7754 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7755 assert instance is not None, \
7756 "Cannot retrieve locked instance %s" % self.op.instance_name
7758 if len(self.op.nodes) != len(instance.all_nodes):
7759 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7760 " %d replacement nodes were specified" %
7761 (instance.name, len(instance.all_nodes),
7762 len(self.op.nodes)),
7764 assert instance.disk_template != constants.DT_DRBD8 or \
7765 len(self.op.nodes) == 2
7766 assert instance.disk_template != constants.DT_PLAIN or \
7767 len(self.op.nodes) == 1
7768 primary_node = self.op.nodes[0]
7770 primary_node = instance.primary_node
7771 if not self.op.iallocator:
7772 _CheckNodeOnline(self, primary_node)
7774 if instance.disk_template == constants.DT_DISKLESS:
7775 raise errors.OpPrereqError("Instance '%s' has no disks" %
7776 self.op.instance_name, errors.ECODE_INVAL)
7778 # Verify if node group locks are still correct
7779 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7781 # Node group locks are acquired only for the primary node (and only
7782 # when the allocator is used)
7783 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7786 # if we replace nodes *and* the old primary is offline, we don't
7787 # check the instance state
7788 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7789 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7790 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7791 msg="cannot recreate disks")
7794 self.disks = dict(self.op.disks)
7796 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7798 maxidx = max(self.disks.keys())
7799 if maxidx >= len(instance.disks):
7800 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7803 if ((self.op.nodes or self.op.iallocator) and
7804 sorted(self.disks.keys()) != range(len(instance.disks))):
7805 raise errors.OpPrereqError("Can't recreate disks partially and"
7806 " change the nodes at the same time",
7809 self.instance = instance
7811 if self.op.iallocator:
7812 self._RunAllocator()
7813 # Release unneeded node and node resource locks
7814 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7815 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7816 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7818 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7820 def Exec(self, feedback_fn):
7821 """Recreate the disks.
7824 instance = self.instance
7826 assert (self.owned_locks(locking.LEVEL_NODE) ==
7827 self.owned_locks(locking.LEVEL_NODE_RES))
7830 mods = [] # keeps track of needed changes
7832 for idx, disk in enumerate(instance.disks):
7834 changes = self.disks[idx]
7836 # Disk should not be recreated
7840 # update secondaries for disks, if needed
7841 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7842 # need to update the nodes and minors
7843 assert len(self.op.nodes) == 2
7844 assert len(disk.logical_id) == 6 # otherwise disk internals
7846 (_, _, old_port, _, _, old_secret) = disk.logical_id
7847 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7848 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7849 new_minors[0], new_minors[1], old_secret)
7850 assert len(disk.logical_id) == len(new_id)
7854 mods.append((idx, new_id, changes))
7856 # now that we have passed all asserts above, we can apply the mods
7857 # in a single run (to avoid partial changes)
7858 for idx, new_id, changes in mods:
7859 disk = instance.disks[idx]
7860 if new_id is not None:
7861 assert disk.dev_type == constants.LD_DRBD8
7862 disk.logical_id = new_id
7864 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7865 mode=changes.get(constants.IDISK_MODE, None))
7867 # change primary node, if needed
7869 instance.primary_node = self.op.nodes[0]
7870 self.LogWarning("Changing the instance's nodes, you will have to"
7871 " remove any disks left on the older nodes manually")
7874 self.cfg.Update(instance, feedback_fn)
7876 # All touched nodes must be locked
7877 mylocks = self.owned_locks(locking.LEVEL_NODE)
7878 assert mylocks.issuperset(frozenset(instance.all_nodes))
7879 _CreateDisks(self, instance, to_skip=to_skip)
7882 class LUInstanceRename(LogicalUnit):
7883 """Rename an instance.
7886 HPATH = "instance-rename"
7887 HTYPE = constants.HTYPE_INSTANCE
7889 def CheckArguments(self):
7893 if self.op.ip_check and not self.op.name_check:
7894 # TODO: make the ip check more flexible and not depend on the name check
7895 raise errors.OpPrereqError("IP address check requires a name check",
7898 def BuildHooksEnv(self):
7901 This runs on master, primary and secondary nodes of the instance.
7904 env = _BuildInstanceHookEnvByObject(self, self.instance)
7905 env["INSTANCE_NEW_NAME"] = self.op.new_name
7908 def BuildHooksNodes(self):
7909 """Build hooks nodes.
7912 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7915 def CheckPrereq(self):
7916 """Check prerequisites.
7918 This checks that the instance is in the cluster and is not running.
7921 self.op.instance_name = _ExpandInstanceName(self.cfg,
7922 self.op.instance_name)
7923 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7924 assert instance is not None
7925 _CheckNodeOnline(self, instance.primary_node)
7926 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7927 msg="cannot rename")
7928 self.instance = instance
7930 new_name = self.op.new_name
7931 if self.op.name_check:
7932 hostname = _CheckHostnameSane(self, new_name)
7933 new_name = self.op.new_name = hostname.name
7934 if (self.op.ip_check and
7935 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7936 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7937 (hostname.ip, new_name),
7938 errors.ECODE_NOTUNIQUE)
7940 instance_list = self.cfg.GetInstanceList()
7941 if new_name in instance_list and new_name != instance.name:
7942 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7943 new_name, errors.ECODE_EXISTS)
7945 def Exec(self, feedback_fn):
7946 """Rename the instance.
7949 inst = self.instance
7950 old_name = inst.name
7952 rename_file_storage = False
7953 if (inst.disk_template in constants.DTS_FILEBASED and
7954 self.op.new_name != inst.name):
7955 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7956 rename_file_storage = True
7958 self.cfg.RenameInstance(inst.name, self.op.new_name)
7959 # Change the instance lock. This is definitely safe while we hold the BGL.
7960 # Otherwise the new lock would have to be added in acquired mode.
7962 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7963 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7964 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7966 # re-read the instance from the configuration after rename
7967 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7969 if rename_file_storage:
7970 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7971 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7972 old_file_storage_dir,
7973 new_file_storage_dir)
7974 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7975 " (but the instance has been renamed in Ganeti)" %
7976 (inst.primary_node, old_file_storage_dir,
7977 new_file_storage_dir))
7979 _StartInstanceDisks(self, inst, None)
7980 # update info on disks
7981 info = _GetInstanceInfoText(inst)
7982 for (idx, disk) in enumerate(inst.disks):
7983 for node in inst.all_nodes:
7984 self.cfg.SetDiskID(disk, node)
7985 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7987 self.LogWarning("Error setting info on node %s for disk %s: %s",
7988 node, idx, result.fail_msg)
7990 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7991 old_name, self.op.debug_level)
7992 msg = result.fail_msg
7994 msg = ("Could not run OS rename script for instance %s on node %s"
7995 " (but the instance has been renamed in Ganeti): %s" %
7996 (inst.name, inst.primary_node, msg))
7997 self.LogWarning(msg)
7999 _ShutdownInstanceDisks(self, inst)
8004 class LUInstanceRemove(LogicalUnit):
8005 """Remove an instance.
8008 HPATH = "instance-remove"
8009 HTYPE = constants.HTYPE_INSTANCE
8012 def ExpandNames(self):
8013 self._ExpandAndLockInstance()
8014 self.needed_locks[locking.LEVEL_NODE] = []
8015 self.needed_locks[locking.LEVEL_NODE_RES] = []
8016 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8018 def DeclareLocks(self, level):
8019 if level == locking.LEVEL_NODE:
8020 self._LockInstancesNodes()
8021 elif level == locking.LEVEL_NODE_RES:
8023 self.needed_locks[locking.LEVEL_NODE_RES] = \
8024 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8026 def BuildHooksEnv(self):
8029 This runs on master, primary and secondary nodes of the instance.
8032 env = _BuildInstanceHookEnvByObject(self, self.instance)
8033 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8036 def BuildHooksNodes(self):
8037 """Build hooks nodes.
8040 nl = [self.cfg.GetMasterNode()]
8041 nl_post = list(self.instance.all_nodes) + nl
8042 return (nl, nl_post)
8044 def CheckPrereq(self):
8045 """Check prerequisites.
8047 This checks that the instance is in the cluster.
8050 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8051 assert self.instance is not None, \
8052 "Cannot retrieve locked instance %s" % self.op.instance_name
8054 def Exec(self, feedback_fn):
8055 """Remove the instance.
8058 instance = self.instance
8059 logging.info("Shutting down instance %s on node %s",
8060 instance.name, instance.primary_node)
8062 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8063 self.op.shutdown_timeout)
8064 msg = result.fail_msg
8066 if self.op.ignore_failures:
8067 feedback_fn("Warning: can't shutdown instance: %s" % msg)
8069 raise errors.OpExecError("Could not shutdown instance %s on"
8071 (instance.name, instance.primary_node, msg))
8073 assert (self.owned_locks(locking.LEVEL_NODE) ==
8074 self.owned_locks(locking.LEVEL_NODE_RES))
8075 assert not (set(instance.all_nodes) -
8076 self.owned_locks(locking.LEVEL_NODE)), \
8077 "Not owning correct locks"
8079 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8082 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8083 """Utility function to remove an instance.
8086 logging.info("Removing block devices for instance %s", instance.name)
8088 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8089 if not ignore_failures:
8090 raise errors.OpExecError("Can't remove instance's disks")
8091 feedback_fn("Warning: can't remove instance's disks")
8093 logging.info("Removing instance %s out of cluster config", instance.name)
8095 lu.cfg.RemoveInstance(instance.name)
8097 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8098 "Instance lock removal conflict"
8100 # Remove lock for the instance
8101 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8104 class LUInstanceQuery(NoHooksLU):
8105 """Logical unit for querying instances.
8108 # pylint: disable=W0142
8111 def CheckArguments(self):
8112 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8113 self.op.output_fields, self.op.use_locking)
8115 def ExpandNames(self):
8116 self.iq.ExpandNames(self)
8118 def DeclareLocks(self, level):
8119 self.iq.DeclareLocks(self, level)
8121 def Exec(self, feedback_fn):
8122 return self.iq.OldStyleQuery(self)
8125 def _ExpandNamesForMigration(lu):
8126 """Expands names for use with L{TLMigrateInstance}.
8128 @type lu: L{LogicalUnit}
8131 if lu.op.target_node is not None:
8132 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8134 lu.needed_locks[locking.LEVEL_NODE] = []
8135 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8137 lu.needed_locks[locking.LEVEL_NODE_RES] = []
8138 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8140 # The node allocation lock is actually only needed for replicated instances
8141 # (e.g. DRBD8) and if an iallocator is used.
8142 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8145 def _DeclareLocksForMigration(lu, level):
8146 """Declares locks for L{TLMigrateInstance}.
8148 @type lu: L{LogicalUnit}
8149 @param level: Lock level
8152 if level == locking.LEVEL_NODE_ALLOC:
8153 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8155 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8157 # Node locks are already declared here rather than at LEVEL_NODE as we need
8158 # the instance object anyway to declare the node allocation lock.
8159 if instance.disk_template in constants.DTS_EXT_MIRROR:
8160 if lu.op.target_node is None:
8161 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8162 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8164 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8166 del lu.recalculate_locks[locking.LEVEL_NODE]
8168 lu._LockInstancesNodes() # pylint: disable=W0212
8170 elif level == locking.LEVEL_NODE:
8171 # Node locks are declared together with the node allocation lock
8172 assert (lu.needed_locks[locking.LEVEL_NODE] or
8173 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8175 elif level == locking.LEVEL_NODE_RES:
8177 lu.needed_locks[locking.LEVEL_NODE_RES] = \
8178 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8181 class LUInstanceFailover(LogicalUnit):
8182 """Failover an instance.
8185 HPATH = "instance-failover"
8186 HTYPE = constants.HTYPE_INSTANCE
8189 def CheckArguments(self):
8190 """Check the arguments.
8193 self.iallocator = getattr(self.op, "iallocator", None)
8194 self.target_node = getattr(self.op, "target_node", None)
8196 def ExpandNames(self):
8197 self._ExpandAndLockInstance()
8198 _ExpandNamesForMigration(self)
8201 TLMigrateInstance(self, self.op.instance_name, False, True, False,
8202 self.op.ignore_consistency, True,
8203 self.op.shutdown_timeout, self.op.ignore_ipolicy)
8205 self.tasklets = [self._migrater]
8207 def DeclareLocks(self, level):
8208 _DeclareLocksForMigration(self, level)
8210 def BuildHooksEnv(self):
8213 This runs on master, primary and secondary nodes of the instance.
8216 instance = self._migrater.instance
8217 source_node = instance.primary_node
8218 target_node = self.op.target_node
8220 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8221 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8222 "OLD_PRIMARY": source_node,
8223 "NEW_PRIMARY": target_node,
8226 if instance.disk_template in constants.DTS_INT_MIRROR:
8227 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8228 env["NEW_SECONDARY"] = source_node
8230 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8232 env.update(_BuildInstanceHookEnvByObject(self, instance))
8236 def BuildHooksNodes(self):
8237 """Build hooks nodes.
8240 instance = self._migrater.instance
8241 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8242 return (nl, nl + [instance.primary_node])
8245 class LUInstanceMigrate(LogicalUnit):
8246 """Migrate an instance.
8248 This is migration without shutting down, compared to the failover,
8249 which is done with shutdown.
8252 HPATH = "instance-migrate"
8253 HTYPE = constants.HTYPE_INSTANCE
8256 def ExpandNames(self):
8257 self._ExpandAndLockInstance()
8258 _ExpandNamesForMigration(self)
8261 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8262 False, self.op.allow_failover, False,
8263 self.op.allow_runtime_changes,
8264 constants.DEFAULT_SHUTDOWN_TIMEOUT,
8265 self.op.ignore_ipolicy)
8267 self.tasklets = [self._migrater]
8269 def DeclareLocks(self, level):
8270 _DeclareLocksForMigration(self, level)
8272 def BuildHooksEnv(self):
8275 This runs on master, primary and secondary nodes of the instance.
8278 instance = self._migrater.instance
8279 source_node = instance.primary_node
8280 target_node = self.op.target_node
8281 env = _BuildInstanceHookEnvByObject(self, instance)
8283 "MIGRATE_LIVE": self._migrater.live,
8284 "MIGRATE_CLEANUP": self.op.cleanup,
8285 "OLD_PRIMARY": source_node,
8286 "NEW_PRIMARY": target_node,
8287 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8290 if instance.disk_template in constants.DTS_INT_MIRROR:
8291 env["OLD_SECONDARY"] = target_node
8292 env["NEW_SECONDARY"] = source_node
8294 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8298 def BuildHooksNodes(self):
8299 """Build hooks nodes.
8302 instance = self._migrater.instance
8303 snodes = list(instance.secondary_nodes)
8304 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8308 class LUInstanceMove(LogicalUnit):
8309 """Move an instance by data-copying.
8312 HPATH = "instance-move"
8313 HTYPE = constants.HTYPE_INSTANCE
8316 def ExpandNames(self):
8317 self._ExpandAndLockInstance()
8318 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8319 self.op.target_node = target_node
8320 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8321 self.needed_locks[locking.LEVEL_NODE_RES] = []
8322 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8324 def DeclareLocks(self, level):
8325 if level == locking.LEVEL_NODE:
8326 self._LockInstancesNodes(primary_only=True)
8327 elif level == locking.LEVEL_NODE_RES:
8329 self.needed_locks[locking.LEVEL_NODE_RES] = \
8330 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8332 def BuildHooksEnv(self):
8335 This runs on master, primary and secondary nodes of the instance.
8339 "TARGET_NODE": self.op.target_node,
8340 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8342 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8345 def BuildHooksNodes(self):
8346 """Build hooks nodes.
8350 self.cfg.GetMasterNode(),
8351 self.instance.primary_node,
8352 self.op.target_node,
8356 def CheckPrereq(self):
8357 """Check prerequisites.
8359 This checks that the instance is in the cluster.
8362 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8363 assert self.instance is not None, \
8364 "Cannot retrieve locked instance %s" % self.op.instance_name
8366 node = self.cfg.GetNodeInfo(self.op.target_node)
8367 assert node is not None, \
8368 "Cannot retrieve locked node %s" % self.op.target_node
8370 self.target_node = target_node = node.name
8372 if target_node == instance.primary_node:
8373 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8374 (instance.name, target_node),
8377 bep = self.cfg.GetClusterInfo().FillBE(instance)
8379 for idx, dsk in enumerate(instance.disks):
8380 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8381 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8382 " cannot copy" % idx, errors.ECODE_STATE)
8384 _CheckNodeOnline(self, target_node)
8385 _CheckNodeNotDrained(self, target_node)
8386 _CheckNodeVmCapable(self, target_node)
8387 cluster = self.cfg.GetClusterInfo()
8388 group_info = self.cfg.GetNodeGroup(node.group)
8389 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8390 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
8391 ignore=self.op.ignore_ipolicy)
8393 if instance.admin_state == constants.ADMINST_UP:
8394 # check memory requirements on the secondary node
8395 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8396 instance.name, bep[constants.BE_MAXMEM],
8397 instance.hypervisor)
8399 self.LogInfo("Not checking memory on the secondary node as"
8400 " instance will not be started")
8402 # check bridge existance
8403 _CheckInstanceBridgesExist(self, instance, node=target_node)
8405 def Exec(self, feedback_fn):
8406 """Move an instance.
8408 The move is done by shutting it down on its present node, copying
8409 the data over (slow) and starting it on the new node.
8412 instance = self.instance
8414 source_node = instance.primary_node
8415 target_node = self.target_node
8417 self.LogInfo("Shutting down instance %s on source node %s",
8418 instance.name, source_node)
8420 assert (self.owned_locks(locking.LEVEL_NODE) ==
8421 self.owned_locks(locking.LEVEL_NODE_RES))
8423 result = self.rpc.call_instance_shutdown(source_node, instance,
8424 self.op.shutdown_timeout)
8425 msg = result.fail_msg
8427 if self.op.ignore_consistency:
8428 self.LogWarning("Could not shutdown instance %s on node %s."
8429 " Proceeding anyway. Please make sure node"
8430 " %s is down. Error details: %s",
8431 instance.name, source_node, source_node, msg)
8433 raise errors.OpExecError("Could not shutdown instance %s on"
8435 (instance.name, source_node, msg))
8437 # create the target disks
8439 _CreateDisks(self, instance, target_node=target_node)
8440 except errors.OpExecError:
8441 self.LogWarning("Device creation failed, reverting...")
8443 _RemoveDisks(self, instance, target_node=target_node)
8445 self.cfg.ReleaseDRBDMinors(instance.name)
8448 cluster_name = self.cfg.GetClusterInfo().cluster_name
8451 # activate, get path, copy the data over
8452 for idx, disk in enumerate(instance.disks):
8453 self.LogInfo("Copying data for disk %d", idx)
8454 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8455 instance.name, True, idx)
8457 self.LogWarning("Can't assemble newly created disk %d: %s",
8458 idx, result.fail_msg)
8459 errs.append(result.fail_msg)
8461 dev_path = result.payload
8462 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8463 target_node, dev_path,
8466 self.LogWarning("Can't copy data over for disk %d: %s",
8467 idx, result.fail_msg)
8468 errs.append(result.fail_msg)
8472 self.LogWarning("Some disks failed to copy, aborting")
8474 _RemoveDisks(self, instance, target_node=target_node)
8476 self.cfg.ReleaseDRBDMinors(instance.name)
8477 raise errors.OpExecError("Errors during disk copy: %s" %
8480 instance.primary_node = target_node
8481 self.cfg.Update(instance, feedback_fn)
8483 self.LogInfo("Removing the disks on the original node")
8484 _RemoveDisks(self, instance, target_node=source_node)
8486 # Only start the instance if it's marked as up
8487 if instance.admin_state == constants.ADMINST_UP:
8488 self.LogInfo("Starting instance %s on node %s",
8489 instance.name, target_node)
8491 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8492 ignore_secondaries=True)
8494 _ShutdownInstanceDisks(self, instance)
8495 raise errors.OpExecError("Can't activate the instance's disks")
8497 result = self.rpc.call_instance_start(target_node,
8498 (instance, None, None), False)
8499 msg = result.fail_msg
8501 _ShutdownInstanceDisks(self, instance)
8502 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8503 (instance.name, target_node, msg))
8506 class LUNodeMigrate(LogicalUnit):
8507 """Migrate all instances from a node.
8510 HPATH = "node-migrate"
8511 HTYPE = constants.HTYPE_NODE
8514 def CheckArguments(self):
8517 def ExpandNames(self):
8518 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8520 self.share_locks = _ShareAll()
8521 self.needed_locks = {
8522 locking.LEVEL_NODE: [self.op.node_name],
8525 def BuildHooksEnv(self):
8528 This runs on the master, the primary and all the secondaries.
8532 "NODE_NAME": self.op.node_name,
8533 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8536 def BuildHooksNodes(self):
8537 """Build hooks nodes.
8540 nl = [self.cfg.GetMasterNode()]
8543 def CheckPrereq(self):
8546 def Exec(self, feedback_fn):
8547 # Prepare jobs for migration instances
8548 allow_runtime_changes = self.op.allow_runtime_changes
8550 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8553 iallocator=self.op.iallocator,
8554 target_node=self.op.target_node,
8555 allow_runtime_changes=allow_runtime_changes,
8556 ignore_ipolicy=self.op.ignore_ipolicy)]
8557 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8559 # TODO: Run iallocator in this opcode and pass correct placement options to
8560 # OpInstanceMigrate. Since other jobs can modify the cluster between
8561 # running the iallocator and the actual migration, a good consistency model
8562 # will have to be found.
8564 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8565 frozenset([self.op.node_name]))
8567 return ResultWithJobs(jobs)
8570 class TLMigrateInstance(Tasklet):
8571 """Tasklet class for instance migration.
8574 @ivar live: whether the migration will be done live or non-live;
8575 this variable is initalized only after CheckPrereq has run
8576 @type cleanup: boolean
8577 @ivar cleanup: Wheater we cleanup from a failed migration
8578 @type iallocator: string
8579 @ivar iallocator: The iallocator used to determine target_node
8580 @type target_node: string
8581 @ivar target_node: If given, the target_node to reallocate the instance to
8582 @type failover: boolean
8583 @ivar failover: Whether operation results in failover or migration
8584 @type fallback: boolean
8585 @ivar fallback: Whether fallback to failover is allowed if migration not
8587 @type ignore_consistency: boolean
8588 @ivar ignore_consistency: Wheter we should ignore consistency between source
8590 @type shutdown_timeout: int
8591 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8592 @type ignore_ipolicy: bool
8593 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8598 _MIGRATION_POLL_INTERVAL = 1 # seconds
8599 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8601 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8602 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8604 """Initializes this class.
8607 Tasklet.__init__(self, lu)
8610 self.instance_name = instance_name
8611 self.cleanup = cleanup
8612 self.live = False # will be overridden later
8613 self.failover = failover
8614 self.fallback = fallback
8615 self.ignore_consistency = ignore_consistency
8616 self.shutdown_timeout = shutdown_timeout
8617 self.ignore_ipolicy = ignore_ipolicy
8618 self.allow_runtime_changes = allow_runtime_changes
8620 def CheckPrereq(self):
8621 """Check prerequisites.
8623 This checks that the instance is in the cluster.
8626 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8627 instance = self.cfg.GetInstanceInfo(instance_name)
8628 assert instance is not None
8629 self.instance = instance
8630 cluster = self.cfg.GetClusterInfo()
8632 if (not self.cleanup and
8633 not instance.admin_state == constants.ADMINST_UP and
8634 not self.failover and self.fallback):
8635 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8636 " switching to failover")
8637 self.failover = True
8639 if instance.disk_template not in constants.DTS_MIRRORED:
8644 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8645 " %s" % (instance.disk_template, text),
8648 if instance.disk_template in constants.DTS_EXT_MIRROR:
8649 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8651 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8653 if self.lu.op.iallocator:
8654 self._RunAllocator()
8656 # We set set self.target_node as it is required by
8658 self.target_node = self.lu.op.target_node
8660 # Check that the target node is correct in terms of instance policy
8661 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8662 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8663 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8665 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8666 ignore=self.ignore_ipolicy)
8668 # self.target_node is already populated, either directly or by the
8670 target_node = self.target_node
8671 if self.target_node == instance.primary_node:
8672 raise errors.OpPrereqError("Cannot migrate instance %s"
8673 " to its primary (%s)" %
8674 (instance.name, instance.primary_node),
8677 if len(self.lu.tasklets) == 1:
8678 # It is safe to release locks only when we're the only tasklet
8680 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8681 keep=[instance.primary_node, self.target_node])
8682 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8685 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8687 secondary_nodes = instance.secondary_nodes
8688 if not secondary_nodes:
8689 raise errors.ConfigurationError("No secondary node but using"
8690 " %s disk template" %
8691 instance.disk_template)
8692 target_node = secondary_nodes[0]
8693 if self.lu.op.iallocator or (self.lu.op.target_node and
8694 self.lu.op.target_node != target_node):
8696 text = "failed over"
8699 raise errors.OpPrereqError("Instances with disk template %s cannot"
8700 " be %s to arbitrary nodes"
8701 " (neither an iallocator nor a target"
8702 " node can be passed)" %
8703 (instance.disk_template, text),
8705 nodeinfo = self.cfg.GetNodeInfo(target_node)
8706 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8707 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8709 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8710 ignore=self.ignore_ipolicy)
8712 i_be = cluster.FillBE(instance)
8714 # check memory requirements on the secondary node
8715 if (not self.cleanup and
8716 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8717 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8718 "migrating instance %s" %
8720 i_be[constants.BE_MINMEM],
8721 instance.hypervisor)
8723 self.lu.LogInfo("Not checking memory on the secondary node as"
8724 " instance will not be started")
8726 # check if failover must be forced instead of migration
8727 if (not self.cleanup and not self.failover and
8728 i_be[constants.BE_ALWAYS_FAILOVER]):
8729 self.lu.LogInfo("Instance configured to always failover; fallback"
8731 self.failover = True
8733 # check bridge existance
8734 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8736 if not self.cleanup:
8737 _CheckNodeNotDrained(self.lu, target_node)
8738 if not self.failover:
8739 result = self.rpc.call_instance_migratable(instance.primary_node,
8741 if result.fail_msg and self.fallback:
8742 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8744 self.failover = True
8746 result.Raise("Can't migrate, please use failover",
8747 prereq=True, ecode=errors.ECODE_STATE)
8749 assert not (self.failover and self.cleanup)
8751 if not self.failover:
8752 if self.lu.op.live is not None and self.lu.op.mode is not None:
8753 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8754 " parameters are accepted",
8756 if self.lu.op.live is not None:
8758 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8760 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8761 # reset the 'live' parameter to None so that repeated
8762 # invocations of CheckPrereq do not raise an exception
8763 self.lu.op.live = None
8764 elif self.lu.op.mode is None:
8765 # read the default value from the hypervisor
8766 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8767 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8769 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8771 # Failover is never live
8774 if not (self.failover or self.cleanup):
8775 remote_info = self.rpc.call_instance_info(instance.primary_node,
8777 instance.hypervisor)
8778 remote_info.Raise("Error checking instance on node %s" %
8779 instance.primary_node)
8780 instance_running = bool(remote_info.payload)
8781 if instance_running:
8782 self.current_mem = int(remote_info.payload["memory"])
8784 def _RunAllocator(self):
8785 """Run the allocator based on input opcode.
8788 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8790 # FIXME: add a self.ignore_ipolicy option
8791 req = iallocator.IAReqRelocate(name=self.instance_name,
8792 relocate_from=[self.instance.primary_node])
8793 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8795 ial.Run(self.lu.op.iallocator)
8798 raise errors.OpPrereqError("Can't compute nodes using"
8799 " iallocator '%s': %s" %
8800 (self.lu.op.iallocator, ial.info),
8802 self.target_node = ial.result[0]
8803 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8804 self.instance_name, self.lu.op.iallocator,
8805 utils.CommaJoin(ial.result))
8807 def _WaitUntilSync(self):
8808 """Poll with custom rpc for disk sync.
8810 This uses our own step-based rpc call.
8813 self.feedback_fn("* wait until resync is done")
8817 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8819 (self.instance.disks,
8822 for node, nres in result.items():
8823 nres.Raise("Cannot resync disks on node %s" % node)
8824 node_done, node_percent = nres.payload
8825 all_done = all_done and node_done
8826 if node_percent is not None:
8827 min_percent = min(min_percent, node_percent)
8829 if min_percent < 100:
8830 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8833 def _EnsureSecondary(self, node):
8834 """Demote a node to secondary.
8837 self.feedback_fn("* switching node %s to secondary mode" % node)
8839 for dev in self.instance.disks:
8840 self.cfg.SetDiskID(dev, node)
8842 result = self.rpc.call_blockdev_close(node, self.instance.name,
8843 self.instance.disks)
8844 result.Raise("Cannot change disk to secondary on node %s" % node)
8846 def _GoStandalone(self):
8847 """Disconnect from the network.
8850 self.feedback_fn("* changing into standalone mode")
8851 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8852 self.instance.disks)
8853 for node, nres in result.items():
8854 nres.Raise("Cannot disconnect disks node %s" % node)
8856 def _GoReconnect(self, multimaster):
8857 """Reconnect to the network.
8863 msg = "single-master"
8864 self.feedback_fn("* changing disks into %s mode" % msg)
8865 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8866 (self.instance.disks, self.instance),
8867 self.instance.name, multimaster)
8868 for node, nres in result.items():
8869 nres.Raise("Cannot change disks config on node %s" % node)
8871 def _ExecCleanup(self):
8872 """Try to cleanup after a failed migration.
8874 The cleanup is done by:
8875 - check that the instance is running only on one node
8876 (and update the config if needed)
8877 - change disks on its secondary node to secondary
8878 - wait until disks are fully synchronized
8879 - disconnect from the network
8880 - change disks into single-master mode
8881 - wait again until disks are fully synchronized
8884 instance = self.instance
8885 target_node = self.target_node
8886 source_node = self.source_node
8888 # check running on only one node
8889 self.feedback_fn("* checking where the instance actually runs"
8890 " (if this hangs, the hypervisor might be in"
8892 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8893 for node, result in ins_l.items():
8894 result.Raise("Can't contact node %s" % node)
8896 runningon_source = instance.name in ins_l[source_node].payload
8897 runningon_target = instance.name in ins_l[target_node].payload
8899 if runningon_source and runningon_target:
8900 raise errors.OpExecError("Instance seems to be running on two nodes,"
8901 " or the hypervisor is confused; you will have"
8902 " to ensure manually that it runs only on one"
8903 " and restart this operation")
8905 if not (runningon_source or runningon_target):
8906 raise errors.OpExecError("Instance does not seem to be running at all;"
8907 " in this case it's safer to repair by"
8908 " running 'gnt-instance stop' to ensure disk"
8909 " shutdown, and then restarting it")
8911 if runningon_target:
8912 # the migration has actually succeeded, we need to update the config
8913 self.feedback_fn("* instance running on secondary node (%s),"
8914 " updating config" % target_node)
8915 instance.primary_node = target_node
8916 self.cfg.Update(instance, self.feedback_fn)
8917 demoted_node = source_node
8919 self.feedback_fn("* instance confirmed to be running on its"
8920 " primary node (%s)" % source_node)
8921 demoted_node = target_node
8923 if instance.disk_template in constants.DTS_INT_MIRROR:
8924 self._EnsureSecondary(demoted_node)
8926 self._WaitUntilSync()
8927 except errors.OpExecError:
8928 # we ignore here errors, since if the device is standalone, it
8929 # won't be able to sync
8931 self._GoStandalone()
8932 self._GoReconnect(False)
8933 self._WaitUntilSync()
8935 self.feedback_fn("* done")
8937 def _RevertDiskStatus(self):
8938 """Try to revert the disk status after a failed migration.
8941 target_node = self.target_node
8942 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8946 self._EnsureSecondary(target_node)
8947 self._GoStandalone()
8948 self._GoReconnect(False)
8949 self._WaitUntilSync()
8950 except errors.OpExecError, err:
8951 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8952 " please try to recover the instance manually;"
8953 " error '%s'" % str(err))
8955 def _AbortMigration(self):
8956 """Call the hypervisor code to abort a started migration.
8959 instance = self.instance
8960 target_node = self.target_node
8961 source_node = self.source_node
8962 migration_info = self.migration_info
8964 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8968 abort_msg = abort_result.fail_msg
8970 logging.error("Aborting migration failed on target node %s: %s",
8971 target_node, abort_msg)
8972 # Don't raise an exception here, as we stil have to try to revert the
8973 # disk status, even if this step failed.
8975 abort_result = self.rpc.call_instance_finalize_migration_src(
8976 source_node, instance, False, self.live)
8977 abort_msg = abort_result.fail_msg
8979 logging.error("Aborting migration failed on source node %s: %s",
8980 source_node, abort_msg)
8982 def _ExecMigration(self):
8983 """Migrate an instance.
8985 The migrate is done by:
8986 - change the disks into dual-master mode
8987 - wait until disks are fully synchronized again
8988 - migrate the instance
8989 - change disks on the new secondary node (the old primary) to secondary
8990 - wait until disks are fully synchronized
8991 - change disks into single-master mode
8994 instance = self.instance
8995 target_node = self.target_node
8996 source_node = self.source_node
8998 # Check for hypervisor version mismatch and warn the user.
8999 nodeinfo = self.rpc.call_node_info([source_node, target_node],
9000 None, [self.instance.hypervisor], False)
9001 for ninfo in nodeinfo.values():
9002 ninfo.Raise("Unable to retrieve node information from node '%s'" %
9004 (_, _, (src_info, )) = nodeinfo[source_node].payload
9005 (_, _, (dst_info, )) = nodeinfo[target_node].payload
9007 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9008 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9009 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9010 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9011 if src_version != dst_version:
9012 self.feedback_fn("* warning: hypervisor version mismatch between"
9013 " source (%s) and target (%s) node" %
9014 (src_version, dst_version))
9016 self.feedback_fn("* checking disk consistency between source and target")
9017 for (idx, dev) in enumerate(instance.disks):
9018 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9019 raise errors.OpExecError("Disk %s is degraded or not fully"
9020 " synchronized on target node,"
9021 " aborting migration" % idx)
9023 if self.current_mem > self.tgt_free_mem:
9024 if not self.allow_runtime_changes:
9025 raise errors.OpExecError("Memory ballooning not allowed and not enough"
9026 " free memory to fit instance %s on target"
9027 " node %s (have %dMB, need %dMB)" %
9028 (instance.name, target_node,
9029 self.tgt_free_mem, self.current_mem))
9030 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9031 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9034 rpcres.Raise("Cannot modify instance runtime memory")
9036 # First get the migration information from the remote node
9037 result = self.rpc.call_migration_info(source_node, instance)
9038 msg = result.fail_msg
9040 log_err = ("Failed fetching source migration information from %s: %s" %
9042 logging.error(log_err)
9043 raise errors.OpExecError(log_err)
9045 self.migration_info = migration_info = result.payload
9047 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9048 # Then switch the disks to master/master mode
9049 self._EnsureSecondary(target_node)
9050 self._GoStandalone()
9051 self._GoReconnect(True)
9052 self._WaitUntilSync()
9054 self.feedback_fn("* preparing %s to accept the instance" % target_node)
9055 result = self.rpc.call_accept_instance(target_node,
9058 self.nodes_ip[target_node])
9060 msg = result.fail_msg
9062 logging.error("Instance pre-migration failed, trying to revert"
9063 " disk status: %s", msg)
9064 self.feedback_fn("Pre-migration failed, aborting")
9065 self._AbortMigration()
9066 self._RevertDiskStatus()
9067 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9068 (instance.name, msg))
9070 self.feedback_fn("* migrating instance to %s" % target_node)
9071 result = self.rpc.call_instance_migrate(source_node, instance,
9072 self.nodes_ip[target_node],
9074 msg = result.fail_msg
9076 logging.error("Instance migration failed, trying to revert"
9077 " disk status: %s", msg)
9078 self.feedback_fn("Migration failed, aborting")
9079 self._AbortMigration()
9080 self._RevertDiskStatus()
9081 raise errors.OpExecError("Could not migrate instance %s: %s" %
9082 (instance.name, msg))
9084 self.feedback_fn("* starting memory transfer")
9085 last_feedback = time.time()
9087 result = self.rpc.call_instance_get_migration_status(source_node,
9089 msg = result.fail_msg
9090 ms = result.payload # MigrationStatus instance
9091 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9092 logging.error("Instance migration failed, trying to revert"
9093 " disk status: %s", msg)
9094 self.feedback_fn("Migration failed, aborting")
9095 self._AbortMigration()
9096 self._RevertDiskStatus()
9098 msg = "hypervisor returned failure"
9099 raise errors.OpExecError("Could not migrate instance %s: %s" %
9100 (instance.name, msg))
9102 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9103 self.feedback_fn("* memory transfer complete")
9106 if (utils.TimeoutExpired(last_feedback,
9107 self._MIGRATION_FEEDBACK_INTERVAL) and
9108 ms.transferred_ram is not None):
9109 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9110 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9111 last_feedback = time.time()
9113 time.sleep(self._MIGRATION_POLL_INTERVAL)
9115 result = self.rpc.call_instance_finalize_migration_src(source_node,
9119 msg = result.fail_msg
9121 logging.error("Instance migration succeeded, but finalization failed"
9122 " on the source node: %s", msg)
9123 raise errors.OpExecError("Could not finalize instance migration: %s" %
9126 instance.primary_node = target_node
9128 # distribute new instance config to the other nodes
9129 self.cfg.Update(instance, self.feedback_fn)
9131 result = self.rpc.call_instance_finalize_migration_dst(target_node,
9135 msg = result.fail_msg
9137 logging.error("Instance migration succeeded, but finalization failed"
9138 " on the target node: %s", msg)
9139 raise errors.OpExecError("Could not finalize instance migration: %s" %
9142 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9143 self._EnsureSecondary(source_node)
9144 self._WaitUntilSync()
9145 self._GoStandalone()
9146 self._GoReconnect(False)
9147 self._WaitUntilSync()
9149 # If the instance's disk template is `rbd' or `ext' and there was a
9150 # successful migration, unmap the device from the source node.
9151 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9152 disks = _ExpandCheckDisks(instance, instance.disks)
9153 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9155 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9156 msg = result.fail_msg
9158 logging.error("Migration was successful, but couldn't unmap the"
9159 " block device %s on source node %s: %s",
9160 disk.iv_name, source_node, msg)
9161 logging.error("You need to unmap the device %s manually on %s",
9162 disk.iv_name, source_node)
9164 self.feedback_fn("* done")
9166 def _ExecFailover(self):
9167 """Failover an instance.
9169 The failover is done by shutting it down on its present node and
9170 starting it on the secondary.
9173 instance = self.instance
9174 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9176 source_node = instance.primary_node
9177 target_node = self.target_node
9179 if instance.admin_state == constants.ADMINST_UP:
9180 self.feedback_fn("* checking disk consistency between source and target")
9181 for (idx, dev) in enumerate(instance.disks):
9182 # for drbd, these are drbd over lvm
9183 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9185 if primary_node.offline:
9186 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9188 (primary_node.name, idx, target_node))
9189 elif not self.ignore_consistency:
9190 raise errors.OpExecError("Disk %s is degraded on target node,"
9191 " aborting failover" % idx)
9193 self.feedback_fn("* not checking disk consistency as instance is not"
9196 self.feedback_fn("* shutting down instance on source node")
9197 logging.info("Shutting down instance %s on node %s",
9198 instance.name, source_node)
9200 result = self.rpc.call_instance_shutdown(source_node, instance,
9201 self.shutdown_timeout)
9202 msg = result.fail_msg
9204 if self.ignore_consistency or primary_node.offline:
9205 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9206 " proceeding anyway; please make sure node"
9207 " %s is down; error details: %s",
9208 instance.name, source_node, source_node, msg)
9210 raise errors.OpExecError("Could not shutdown instance %s on"
9212 (instance.name, source_node, msg))
9214 self.feedback_fn("* deactivating the instance's disks on source node")
9215 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9216 raise errors.OpExecError("Can't shut down the instance's disks")
9218 instance.primary_node = target_node
9219 # distribute new instance config to the other nodes
9220 self.cfg.Update(instance, self.feedback_fn)
9222 # Only start the instance if it's marked as up
9223 if instance.admin_state == constants.ADMINST_UP:
9224 self.feedback_fn("* activating the instance's disks on target node %s" %
9226 logging.info("Starting instance %s on node %s",
9227 instance.name, target_node)
9229 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9230 ignore_secondaries=True)
9232 _ShutdownInstanceDisks(self.lu, instance)
9233 raise errors.OpExecError("Can't activate the instance's disks")
9235 self.feedback_fn("* starting the instance on the target node %s" %
9237 result = self.rpc.call_instance_start(target_node, (instance, None, None),
9239 msg = result.fail_msg
9241 _ShutdownInstanceDisks(self.lu, instance)
9242 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9243 (instance.name, target_node, msg))
9245 def Exec(self, feedback_fn):
9246 """Perform the migration.
9249 self.feedback_fn = feedback_fn
9250 self.source_node = self.instance.primary_node
9252 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9253 if self.instance.disk_template in constants.DTS_INT_MIRROR:
9254 self.target_node = self.instance.secondary_nodes[0]
9255 # Otherwise self.target_node has been populated either
9256 # directly, or through an iallocator.
9258 self.all_nodes = [self.source_node, self.target_node]
9259 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9260 in self.cfg.GetMultiNodeInfo(self.all_nodes))
9263 feedback_fn("Failover instance %s" % self.instance.name)
9264 self._ExecFailover()
9266 feedback_fn("Migrating instance %s" % self.instance.name)
9269 return self._ExecCleanup()
9271 return self._ExecMigration()
9274 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9276 """Wrapper around L{_CreateBlockDevInner}.
9278 This method annotates the root device first.
9281 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9282 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9283 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9284 force_open, excl_stor)
9287 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9288 info, force_open, excl_stor):
9289 """Create a tree of block devices on a given node.
9291 If this device type has to be created on secondaries, create it and
9294 If not, just recurse to children keeping the same 'force' value.
9296 @attention: The device has to be annotated already.
9298 @param lu: the lu on whose behalf we execute
9299 @param node: the node on which to create the device
9300 @type instance: L{objects.Instance}
9301 @param instance: the instance which owns the device
9302 @type device: L{objects.Disk}
9303 @param device: the device to create
9304 @type force_create: boolean
9305 @param force_create: whether to force creation of this device; this
9306 will be change to True whenever we find a device which has
9307 CreateOnSecondary() attribute
9308 @param info: the extra 'metadata' we should attach to the device
9309 (this will be represented as a LVM tag)
9310 @type force_open: boolean
9311 @param force_open: this parameter will be passes to the
9312 L{backend.BlockdevCreate} function where it specifies
9313 whether we run on primary or not, and it affects both
9314 the child assembly and the device own Open() execution
9315 @type excl_stor: boolean
9316 @param excl_stor: Whether exclusive_storage is active for the node
9319 if device.CreateOnSecondary():
9323 for child in device.children:
9324 _CreateBlockDevInner(lu, node, instance, child, force_create,
9325 info, force_open, excl_stor)
9327 if not force_create:
9330 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9334 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9336 """Create a single block device on a given node.
9338 This will not recurse over children of the device, so they must be
9341 @param lu: the lu on whose behalf we execute
9342 @param node: the node on which to create the device
9343 @type instance: L{objects.Instance}
9344 @param instance: the instance which owns the device
9345 @type device: L{objects.Disk}
9346 @param device: the device to create
9347 @param info: the extra 'metadata' we should attach to the device
9348 (this will be represented as a LVM tag)
9349 @type force_open: boolean
9350 @param force_open: this parameter will be passes to the
9351 L{backend.BlockdevCreate} function where it specifies
9352 whether we run on primary or not, and it affects both
9353 the child assembly and the device own Open() execution
9354 @type excl_stor: boolean
9355 @param excl_stor: Whether exclusive_storage is active for the node
9358 lu.cfg.SetDiskID(device, node)
9359 result = lu.rpc.call_blockdev_create(node, device, device.size,
9360 instance.name, force_open, info,
9362 result.Raise("Can't create block device %s on"
9363 " node %s for instance %s" % (device, node, instance.name))
9364 if device.physical_id is None:
9365 device.physical_id = result.payload
9368 def _GenerateUniqueNames(lu, exts):
9369 """Generate a suitable LV name.
9371 This will generate a logical volume name for the given instance.
9376 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9377 results.append("%s%s" % (new_id, val))
9381 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9382 iv_name, p_minor, s_minor):
9383 """Generate a drbd8 device complete with its children.
9386 assert len(vgnames) == len(names) == 2
9387 port = lu.cfg.AllocatePort()
9388 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9390 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9391 logical_id=(vgnames[0], names[0]),
9393 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9394 size=constants.DRBD_META_SIZE,
9395 logical_id=(vgnames[1], names[1]),
9397 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9398 logical_id=(primary, secondary, port,
9401 children=[dev_data, dev_meta],
9402 iv_name=iv_name, params={})
9406 _DISK_TEMPLATE_NAME_PREFIX = {
9407 constants.DT_PLAIN: "",
9408 constants.DT_RBD: ".rbd",
9409 constants.DT_EXT: ".ext",
9413 _DISK_TEMPLATE_DEVICE_TYPE = {
9414 constants.DT_PLAIN: constants.LD_LV,
9415 constants.DT_FILE: constants.LD_FILE,
9416 constants.DT_SHARED_FILE: constants.LD_FILE,
9417 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9418 constants.DT_RBD: constants.LD_RBD,
9419 constants.DT_EXT: constants.LD_EXT,
9423 def _GenerateDiskTemplate(
9424 lu, template_name, instance_name, primary_node, secondary_nodes,
9425 disk_info, file_storage_dir, file_driver, base_index,
9426 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9427 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9428 """Generate the entire disk layout for a given template type.
9431 vgname = lu.cfg.GetVGName()
9432 disk_count = len(disk_info)
9435 if template_name == constants.DT_DISKLESS:
9437 elif template_name == constants.DT_DRBD8:
9438 if len(secondary_nodes) != 1:
9439 raise errors.ProgrammerError("Wrong template configuration")
9440 remote_node = secondary_nodes[0]
9441 minors = lu.cfg.AllocateDRBDMinor(
9442 [primary_node, remote_node] * len(disk_info), instance_name)
9444 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9446 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9449 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9450 for i in range(disk_count)]):
9451 names.append(lv_prefix + "_data")
9452 names.append(lv_prefix + "_meta")
9453 for idx, disk in enumerate(disk_info):
9454 disk_index = idx + base_index
9455 data_vg = disk.get(constants.IDISK_VG, vgname)
9456 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9457 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9458 disk[constants.IDISK_SIZE],
9460 names[idx * 2:idx * 2 + 2],
9461 "disk/%d" % disk_index,
9462 minors[idx * 2], minors[idx * 2 + 1])
9463 disk_dev.mode = disk[constants.IDISK_MODE]
9464 disks.append(disk_dev)
9467 raise errors.ProgrammerError("Wrong template configuration")
9469 if template_name == constants.DT_FILE:
9471 elif template_name == constants.DT_SHARED_FILE:
9472 _req_shr_file_storage()
9474 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9475 if name_prefix is None:
9478 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9479 (name_prefix, base_index + i)
9480 for i in range(disk_count)])
9482 if template_name == constants.DT_PLAIN:
9484 def logical_id_fn(idx, _, disk):
9485 vg = disk.get(constants.IDISK_VG, vgname)
9486 return (vg, names[idx])
9488 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9490 lambda _, disk_index, disk: (file_driver,
9491 "%s/disk%d" % (file_storage_dir,
9493 elif template_name == constants.DT_BLOCK:
9495 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9496 disk[constants.IDISK_ADOPT])
9497 elif template_name == constants.DT_RBD:
9498 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9499 elif template_name == constants.DT_EXT:
9500 def logical_id_fn(idx, _, disk):
9501 provider = disk.get(constants.IDISK_PROVIDER, None)
9502 if provider is None:
9503 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9504 " not found", constants.DT_EXT,
9505 constants.IDISK_PROVIDER)
9506 return (provider, names[idx])
9508 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9510 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9512 for idx, disk in enumerate(disk_info):
9514 # Only for the Ext template add disk_info to params
9515 if template_name == constants.DT_EXT:
9516 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9518 if key not in constants.IDISK_PARAMS:
9519 params[key] = disk[key]
9520 disk_index = idx + base_index
9521 size = disk[constants.IDISK_SIZE]
9522 feedback_fn("* disk %s, size %s" %
9523 (disk_index, utils.FormatUnit(size, "h")))
9524 disks.append(objects.Disk(dev_type=dev_type, size=size,
9525 logical_id=logical_id_fn(idx, disk_index, disk),
9526 iv_name="disk/%d" % disk_index,
9527 mode=disk[constants.IDISK_MODE],
9533 def _GetInstanceInfoText(instance):
9534 """Compute that text that should be added to the disk's metadata.
9537 return "originstname+%s" % instance.name
9540 def _CalcEta(time_taken, written, total_size):
9541 """Calculates the ETA based on size written and total size.
9543 @param time_taken: The time taken so far
9544 @param written: amount written so far
9545 @param total_size: The total size of data to be written
9546 @return: The remaining time in seconds
9549 avg_time = time_taken / float(written)
9550 return (total_size - written) * avg_time
9553 def _WipeDisks(lu, instance, disks=None):
9554 """Wipes instance disks.
9556 @type lu: L{LogicalUnit}
9557 @param lu: the logical unit on whose behalf we execute
9558 @type instance: L{objects.Instance}
9559 @param instance: the instance whose disks we should create
9560 @return: the success of the wipe
9563 node = instance.primary_node
9566 disks = [(idx, disk, 0)
9567 for (idx, disk) in enumerate(instance.disks)]
9569 for (_, device, _) in disks:
9570 lu.cfg.SetDiskID(device, node)
9572 logging.info("Pausing synchronization of disks of instance '%s'",
9574 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9575 (map(compat.snd, disks),
9578 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9580 for idx, success in enumerate(result.payload):
9582 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9583 " failed", idx, instance.name)
9586 for (idx, device, offset) in disks:
9587 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9588 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9590 int(min(constants.MAX_WIPE_CHUNK,
9591 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9595 start_time = time.time()
9600 info_text = (" (from %s to %s)" %
9601 (utils.FormatUnit(offset, "h"),
9602 utils.FormatUnit(size, "h")))
9604 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9606 logging.info("Wiping disk %d for instance %s on node %s using"
9607 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9609 while offset < size:
9610 wipe_size = min(wipe_chunk_size, size - offset)
9612 logging.debug("Wiping disk %d, offset %s, chunk %s",
9613 idx, offset, wipe_size)
9615 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9617 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9618 (idx, offset, wipe_size))
9622 if now - last_output >= 60:
9623 eta = _CalcEta(now - start_time, offset, size)
9624 lu.LogInfo(" - done: %.1f%% ETA: %s",
9625 offset / float(size) * 100, utils.FormatSeconds(eta))
9628 logging.info("Resuming synchronization of disks for instance '%s'",
9631 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9632 (map(compat.snd, disks),
9637 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9638 node, result.fail_msg)
9640 for idx, success in enumerate(result.payload):
9642 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9643 " failed", idx, instance.name)
9646 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9647 """Create all disks for an instance.
9649 This abstracts away some work from AddInstance.
9651 @type lu: L{LogicalUnit}
9652 @param lu: the logical unit on whose behalf we execute
9653 @type instance: L{objects.Instance}
9654 @param instance: the instance whose disks we should create
9656 @param to_skip: list of indices to skip
9657 @type target_node: string
9658 @param target_node: if passed, overrides the target node for creation
9660 @return: the success of the creation
9663 info = _GetInstanceInfoText(instance)
9664 if target_node is None:
9665 pnode = instance.primary_node
9666 all_nodes = instance.all_nodes
9671 if instance.disk_template in constants.DTS_FILEBASED:
9672 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9673 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9675 result.Raise("Failed to create directory '%s' on"
9676 " node %s" % (file_storage_dir, pnode))
9678 # Note: this needs to be kept in sync with adding of disks in
9679 # LUInstanceSetParams
9680 for idx, device in enumerate(instance.disks):
9681 if to_skip and idx in to_skip:
9683 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9685 for node in all_nodes:
9686 f_create = node == pnode
9687 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9690 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9691 """Remove all disks for an instance.
9693 This abstracts away some work from `AddInstance()` and
9694 `RemoveInstance()`. Note that in case some of the devices couldn't
9695 be removed, the removal will continue with the other ones (compare
9696 with `_CreateDisks()`).
9698 @type lu: L{LogicalUnit}
9699 @param lu: the logical unit on whose behalf we execute
9700 @type instance: L{objects.Instance}
9701 @param instance: the instance whose disks we should remove
9702 @type target_node: string
9703 @param target_node: used to override the node on which to remove the disks
9705 @return: the success of the removal
9708 logging.info("Removing block devices for instance %s", instance.name)
9711 ports_to_release = set()
9712 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9713 for (idx, device) in enumerate(anno_disks):
9715 edata = [(target_node, device)]
9717 edata = device.ComputeNodeTree(instance.primary_node)
9718 for node, disk in edata:
9719 lu.cfg.SetDiskID(disk, node)
9720 result = lu.rpc.call_blockdev_remove(node, disk)
9722 lu.LogWarning("Could not remove disk %s on node %s,"
9723 " continuing anyway: %s", idx, node, result.fail_msg)
9724 if not (result.offline and node != instance.primary_node):
9727 # if this is a DRBD disk, return its port to the pool
9728 if device.dev_type in constants.LDS_DRBD:
9729 ports_to_release.add(device.logical_id[2])
9731 if all_result or ignore_failures:
9732 for port in ports_to_release:
9733 lu.cfg.AddTcpUdpPort(port)
9735 if instance.disk_template in constants.DTS_FILEBASED:
9736 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9740 tgt = instance.primary_node
9741 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9743 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9744 file_storage_dir, instance.primary_node, result.fail_msg)
9750 def _ComputeDiskSizePerVG(disk_template, disks):
9751 """Compute disk size requirements in the volume group
9754 def _compute(disks, payload):
9755 """Universal algorithm.
9760 vgs[disk[constants.IDISK_VG]] = \
9761 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9765 # Required free disk space as a function of disk and swap space
9767 constants.DT_DISKLESS: {},
9768 constants.DT_PLAIN: _compute(disks, 0),
9769 # 128 MB are added for drbd metadata for each disk
9770 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9771 constants.DT_FILE: {},
9772 constants.DT_SHARED_FILE: {},
9775 if disk_template not in req_size_dict:
9776 raise errors.ProgrammerError("Disk template '%s' size requirement"
9777 " is unknown" % disk_template)
9779 return req_size_dict[disk_template]
9782 def _FilterVmNodes(lu, nodenames):
9783 """Filters out non-vm_capable nodes from a list.
9785 @type lu: L{LogicalUnit}
9786 @param lu: the logical unit for which we check
9787 @type nodenames: list
9788 @param nodenames: the list of nodes on which we should check
9790 @return: the list of vm-capable nodes
9793 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9794 return [name for name in nodenames if name not in vm_nodes]
9797 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9798 """Hypervisor parameter validation.
9800 This function abstract the hypervisor parameter validation to be
9801 used in both instance create and instance modify.
9803 @type lu: L{LogicalUnit}
9804 @param lu: the logical unit for which we check
9805 @type nodenames: list
9806 @param nodenames: the list of nodes on which we should check
9807 @type hvname: string
9808 @param hvname: the name of the hypervisor we should use
9809 @type hvparams: dict
9810 @param hvparams: the parameters which we need to check
9811 @raise errors.OpPrereqError: if the parameters are not valid
9814 nodenames = _FilterVmNodes(lu, nodenames)
9816 cluster = lu.cfg.GetClusterInfo()
9817 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9819 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9820 for node in nodenames:
9824 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9827 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9828 """OS parameters validation.
9830 @type lu: L{LogicalUnit}
9831 @param lu: the logical unit for which we check
9832 @type required: boolean
9833 @param required: whether the validation should fail if the OS is not
9835 @type nodenames: list
9836 @param nodenames: the list of nodes on which we should check
9837 @type osname: string
9838 @param osname: the name of the hypervisor we should use
9839 @type osparams: dict
9840 @param osparams: the parameters which we need to check
9841 @raise errors.OpPrereqError: if the parameters are not valid
9844 nodenames = _FilterVmNodes(lu, nodenames)
9845 result = lu.rpc.call_os_validate(nodenames, required, osname,
9846 [constants.OS_VALIDATE_PARAMETERS],
9848 for node, nres in result.items():
9849 # we don't check for offline cases since this should be run only
9850 # against the master node and/or an instance's nodes
9851 nres.Raise("OS Parameters validation failed on node %s" % node)
9852 if not nres.payload:
9853 lu.LogInfo("OS %s not found on node %s, validation skipped",
9857 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9858 """Wrapper around IAReqInstanceAlloc.
9860 @param op: The instance opcode
9861 @param disks: The computed disks
9862 @param nics: The computed nics
9863 @param beparams: The full filled beparams
9864 @param node_whitelist: List of nodes which should appear as online to the
9865 allocator (unless the node is already marked offline)
9867 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9870 spindle_use = beparams[constants.BE_SPINDLE_USE]
9871 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9872 disk_template=op.disk_template,
9875 vcpus=beparams[constants.BE_VCPUS],
9876 memory=beparams[constants.BE_MAXMEM],
9877 spindle_use=spindle_use,
9879 nics=[n.ToDict() for n in nics],
9880 hypervisor=op.hypervisor,
9881 node_whitelist=node_whitelist)
9884 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9885 """Computes the nics.
9887 @param op: The instance opcode
9888 @param cluster: Cluster configuration object
9889 @param default_ip: The default ip to assign
9890 @param cfg: An instance of the configuration object
9891 @param ec_id: Execution context ID
9893 @returns: The build up nics
9898 nic_mode_req = nic.get(constants.INIC_MODE, None)
9899 nic_mode = nic_mode_req
9900 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9901 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9903 net = nic.get(constants.INIC_NETWORK, None)
9904 link = nic.get(constants.NIC_LINK, None)
9905 ip = nic.get(constants.INIC_IP, None)
9907 if net is None or net.lower() == constants.VALUE_NONE:
9910 if nic_mode_req is not None or link is not None:
9911 raise errors.OpPrereqError("If network is given, no mode or link"
9912 " is allowed to be passed",
9915 # ip validity checks
9916 if ip is None or ip.lower() == constants.VALUE_NONE:
9918 elif ip.lower() == constants.VALUE_AUTO:
9919 if not op.name_check:
9920 raise errors.OpPrereqError("IP address set to auto but name checks"
9921 " have been skipped",
9925 # We defer pool operations until later, so that the iallocator has
9926 # filled in the instance's node(s) dimara
9927 if ip.lower() == constants.NIC_IP_POOL:
9929 raise errors.OpPrereqError("if ip=pool, parameter network"
9930 " must be passed too",
9933 elif not netutils.IPAddress.IsValid(ip):
9934 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9939 # TODO: check the ip address for uniqueness
9940 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9941 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9944 # MAC address verification
9945 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9946 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9947 mac = utils.NormalizeAndValidateMac(mac)
9950 # TODO: We need to factor this out
9951 cfg.ReserveMAC(mac, ec_id)
9952 except errors.ReservationError:
9953 raise errors.OpPrereqError("MAC address %s already in use"
9954 " in cluster" % mac,
9955 errors.ECODE_NOTUNIQUE)
9957 # Build nic parameters
9960 nicparams[constants.NIC_MODE] = nic_mode
9962 nicparams[constants.NIC_LINK] = link
9964 check_params = cluster.SimpleFillNIC(nicparams)
9965 objects.NIC.CheckParameterSyntax(check_params)
9966 net_uuid = cfg.LookupNetwork(net)
9967 nics.append(objects.NIC(mac=mac, ip=nic_ip,
9968 network=net_uuid, nicparams=nicparams))
9973 def _ComputeDisks(op, default_vg):
9974 """Computes the instance disks.
9976 @param op: The instance opcode
9977 @param default_vg: The default_vg to assume
9979 @return: The computed disks
9983 for disk in op.disks:
9984 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9985 if mode not in constants.DISK_ACCESS_SET:
9986 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9987 mode, errors.ECODE_INVAL)
9988 size = disk.get(constants.IDISK_SIZE, None)
9990 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9993 except (TypeError, ValueError):
9994 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9997 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
9998 if ext_provider and op.disk_template != constants.DT_EXT:
9999 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10000 " disk template, not %s" %
10001 (constants.IDISK_PROVIDER, constants.DT_EXT,
10002 op.disk_template), errors.ECODE_INVAL)
10004 data_vg = disk.get(constants.IDISK_VG, default_vg)
10006 constants.IDISK_SIZE: size,
10007 constants.IDISK_MODE: mode,
10008 constants.IDISK_VG: data_vg,
10011 if constants.IDISK_METAVG in disk:
10012 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10013 if constants.IDISK_ADOPT in disk:
10014 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10016 # For extstorage, demand the `provider' option and add any
10017 # additional parameters (ext-params) to the dict
10018 if op.disk_template == constants.DT_EXT:
10020 new_disk[constants.IDISK_PROVIDER] = ext_provider
10022 if key not in constants.IDISK_PARAMS:
10023 new_disk[key] = disk[key]
10025 raise errors.OpPrereqError("Missing provider for template '%s'" %
10026 constants.DT_EXT, errors.ECODE_INVAL)
10028 disks.append(new_disk)
10033 def _ComputeFullBeParams(op, cluster):
10034 """Computes the full beparams.
10036 @param op: The instance opcode
10037 @param cluster: The cluster config object
10039 @return: The fully filled beparams
10042 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10043 for param, value in op.beparams.iteritems():
10044 if value == constants.VALUE_AUTO:
10045 op.beparams[param] = default_beparams[param]
10046 objects.UpgradeBeParams(op.beparams)
10047 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10048 return cluster.SimpleFillBE(op.beparams)
10051 def _CheckOpportunisticLocking(op):
10052 """Generate error if opportunistic locking is not possible.
10055 if op.opportunistic_locking and not op.iallocator:
10056 raise errors.OpPrereqError("Opportunistic locking is only available in"
10057 " combination with an instance allocator",
10058 errors.ECODE_INVAL)
10061 class LUInstanceCreate(LogicalUnit):
10062 """Create an instance.
10065 HPATH = "instance-add"
10066 HTYPE = constants.HTYPE_INSTANCE
10069 def CheckArguments(self):
10070 """Check arguments.
10073 # do not require name_check to ease forward/backward compatibility
10075 if self.op.no_install and self.op.start:
10076 self.LogInfo("No-installation mode selected, disabling startup")
10077 self.op.start = False
10078 # validate/normalize the instance name
10079 self.op.instance_name = \
10080 netutils.Hostname.GetNormalizedName(self.op.instance_name)
10082 if self.op.ip_check and not self.op.name_check:
10083 # TODO: make the ip check more flexible and not depend on the name check
10084 raise errors.OpPrereqError("Cannot do IP address check without a name"
10085 " check", errors.ECODE_INVAL)
10087 # check nics' parameter names
10088 for nic in self.op.nics:
10089 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10091 # check disks. parameter names and consistent adopt/no-adopt strategy
10092 has_adopt = has_no_adopt = False
10093 for disk in self.op.disks:
10094 if self.op.disk_template != constants.DT_EXT:
10095 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10096 if constants.IDISK_ADOPT in disk:
10099 has_no_adopt = True
10100 if has_adopt and has_no_adopt:
10101 raise errors.OpPrereqError("Either all disks are adopted or none is",
10102 errors.ECODE_INVAL)
10104 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10105 raise errors.OpPrereqError("Disk adoption is not supported for the"
10106 " '%s' disk template" %
10107 self.op.disk_template,
10108 errors.ECODE_INVAL)
10109 if self.op.iallocator is not None:
10110 raise errors.OpPrereqError("Disk adoption not allowed with an"
10111 " iallocator script", errors.ECODE_INVAL)
10112 if self.op.mode == constants.INSTANCE_IMPORT:
10113 raise errors.OpPrereqError("Disk adoption not allowed for"
10114 " instance import", errors.ECODE_INVAL)
10116 if self.op.disk_template in constants.DTS_MUST_ADOPT:
10117 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10118 " but no 'adopt' parameter given" %
10119 self.op.disk_template,
10120 errors.ECODE_INVAL)
10122 self.adopt_disks = has_adopt
10124 # instance name verification
10125 if self.op.name_check:
10126 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10127 self.op.instance_name = self.hostname1.name
10128 # used in CheckPrereq for ip ping check
10129 self.check_ip = self.hostname1.ip
10131 self.check_ip = None
10133 # file storage checks
10134 if (self.op.file_driver and
10135 not self.op.file_driver in constants.FILE_DRIVER):
10136 raise errors.OpPrereqError("Invalid file driver name '%s'" %
10137 self.op.file_driver, errors.ECODE_INVAL)
10139 if self.op.disk_template == constants.DT_FILE:
10140 opcodes.RequireFileStorage()
10141 elif self.op.disk_template == constants.DT_SHARED_FILE:
10142 opcodes.RequireSharedFileStorage()
10144 ### Node/iallocator related checks
10145 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10147 if self.op.pnode is not None:
10148 if self.op.disk_template in constants.DTS_INT_MIRROR:
10149 if self.op.snode is None:
10150 raise errors.OpPrereqError("The networked disk templates need"
10151 " a mirror node", errors.ECODE_INVAL)
10152 elif self.op.snode:
10153 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10155 self.op.snode = None
10157 _CheckOpportunisticLocking(self.op)
10159 self._cds = _GetClusterDomainSecret()
10161 if self.op.mode == constants.INSTANCE_IMPORT:
10162 # On import force_variant must be True, because if we forced it at
10163 # initial install, our only chance when importing it back is that it
10165 self.op.force_variant = True
10167 if self.op.no_install:
10168 self.LogInfo("No-installation mode has no effect during import")
10170 elif self.op.mode == constants.INSTANCE_CREATE:
10171 if self.op.os_type is None:
10172 raise errors.OpPrereqError("No guest OS specified",
10173 errors.ECODE_INVAL)
10174 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10175 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10176 " installation" % self.op.os_type,
10177 errors.ECODE_STATE)
10178 if self.op.disk_template is None:
10179 raise errors.OpPrereqError("No disk template specified",
10180 errors.ECODE_INVAL)
10182 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10183 # Check handshake to ensure both clusters have the same domain secret
10184 src_handshake = self.op.source_handshake
10185 if not src_handshake:
10186 raise errors.OpPrereqError("Missing source handshake",
10187 errors.ECODE_INVAL)
10189 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10192 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10193 errors.ECODE_INVAL)
10195 # Load and check source CA
10196 self.source_x509_ca_pem = self.op.source_x509_ca
10197 if not self.source_x509_ca_pem:
10198 raise errors.OpPrereqError("Missing source X509 CA",
10199 errors.ECODE_INVAL)
10202 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10204 except OpenSSL.crypto.Error, err:
10205 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10206 (err, ), errors.ECODE_INVAL)
10208 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10209 if errcode is not None:
10210 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10211 errors.ECODE_INVAL)
10213 self.source_x509_ca = cert
10215 src_instance_name = self.op.source_instance_name
10216 if not src_instance_name:
10217 raise errors.OpPrereqError("Missing source instance name",
10218 errors.ECODE_INVAL)
10220 self.source_instance_name = \
10221 netutils.GetHostname(name=src_instance_name).name
10224 raise errors.OpPrereqError("Invalid instance creation mode %r" %
10225 self.op.mode, errors.ECODE_INVAL)
10227 def ExpandNames(self):
10228 """ExpandNames for CreateInstance.
10230 Figure out the right locks for instance creation.
10233 self.needed_locks = {}
10235 instance_name = self.op.instance_name
10236 # this is just a preventive check, but someone might still add this
10237 # instance in the meantime, and creation will fail at lock-add time
10238 if instance_name in self.cfg.GetInstanceList():
10239 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10240 instance_name, errors.ECODE_EXISTS)
10242 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10244 if self.op.iallocator:
10245 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10246 # specifying a group on instance creation and then selecting nodes from
10248 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10249 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10251 if self.op.opportunistic_locking:
10252 self.opportunistic_locks[locking.LEVEL_NODE] = True
10253 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10255 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10256 nodelist = [self.op.pnode]
10257 if self.op.snode is not None:
10258 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10259 nodelist.append(self.op.snode)
10260 self.needed_locks[locking.LEVEL_NODE] = nodelist
10262 # in case of import lock the source node too
10263 if self.op.mode == constants.INSTANCE_IMPORT:
10264 src_node = self.op.src_node
10265 src_path = self.op.src_path
10267 if src_path is None:
10268 self.op.src_path = src_path = self.op.instance_name
10270 if src_node is None:
10271 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10272 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10273 self.op.src_node = None
10274 if os.path.isabs(src_path):
10275 raise errors.OpPrereqError("Importing an instance from a path"
10276 " requires a source node option",
10277 errors.ECODE_INVAL)
10279 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10280 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10281 self.needed_locks[locking.LEVEL_NODE].append(src_node)
10282 if not os.path.isabs(src_path):
10283 self.op.src_path = src_path = \
10284 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10286 self.needed_locks[locking.LEVEL_NODE_RES] = \
10287 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10289 def _RunAllocator(self):
10290 """Run the allocator based on input opcode.
10293 if self.op.opportunistic_locking:
10294 # Only consider nodes for which a lock is held
10295 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10297 node_whitelist = None
10299 #TODO Export network to iallocator so that it chooses a pnode
10300 # in a nodegroup that has the desired network connected to
10301 req = _CreateInstanceAllocRequest(self.op, self.disks,
10302 self.nics, self.be_full,
10304 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10306 ial.Run(self.op.iallocator)
10308 if not ial.success:
10309 # When opportunistic locks are used only a temporary failure is generated
10310 if self.op.opportunistic_locking:
10311 ecode = errors.ECODE_TEMP_NORES
10313 ecode = errors.ECODE_NORES
10315 raise errors.OpPrereqError("Can't compute nodes using"
10316 " iallocator '%s': %s" %
10317 (self.op.iallocator, ial.info),
10320 self.op.pnode = ial.result[0]
10321 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10322 self.op.instance_name, self.op.iallocator,
10323 utils.CommaJoin(ial.result))
10325 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10327 if req.RequiredNodes() == 2:
10328 self.op.snode = ial.result[1]
10330 def BuildHooksEnv(self):
10331 """Build hooks env.
10333 This runs on master, primary and secondary nodes of the instance.
10337 "ADD_MODE": self.op.mode,
10339 if self.op.mode == constants.INSTANCE_IMPORT:
10340 env["SRC_NODE"] = self.op.src_node
10341 env["SRC_PATH"] = self.op.src_path
10342 env["SRC_IMAGES"] = self.src_images
10344 env.update(_BuildInstanceHookEnv(
10345 name=self.op.instance_name,
10346 primary_node=self.op.pnode,
10347 secondary_nodes=self.secondaries,
10348 status=self.op.start,
10349 os_type=self.op.os_type,
10350 minmem=self.be_full[constants.BE_MINMEM],
10351 maxmem=self.be_full[constants.BE_MAXMEM],
10352 vcpus=self.be_full[constants.BE_VCPUS],
10353 nics=_NICListToTuple(self, self.nics),
10354 disk_template=self.op.disk_template,
10355 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10356 for d in self.disks],
10359 hypervisor_name=self.op.hypervisor,
10365 def BuildHooksNodes(self):
10366 """Build hooks nodes.
10369 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10372 def _ReadExportInfo(self):
10373 """Reads the export information from disk.
10375 It will override the opcode source node and path with the actual
10376 information, if these two were not specified before.
10378 @return: the export information
10381 assert self.op.mode == constants.INSTANCE_IMPORT
10383 src_node = self.op.src_node
10384 src_path = self.op.src_path
10386 if src_node is None:
10387 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10388 exp_list = self.rpc.call_export_list(locked_nodes)
10390 for node in exp_list:
10391 if exp_list[node].fail_msg:
10393 if src_path in exp_list[node].payload:
10395 self.op.src_node = src_node = node
10396 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10400 raise errors.OpPrereqError("No export found for relative path %s" %
10401 src_path, errors.ECODE_INVAL)
10403 _CheckNodeOnline(self, src_node)
10404 result = self.rpc.call_export_info(src_node, src_path)
10405 result.Raise("No export or invalid export found in dir %s" % src_path)
10407 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10408 if not export_info.has_section(constants.INISECT_EXP):
10409 raise errors.ProgrammerError("Corrupted export config",
10410 errors.ECODE_ENVIRON)
10412 ei_version = export_info.get(constants.INISECT_EXP, "version")
10413 if (int(ei_version) != constants.EXPORT_VERSION):
10414 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10415 (ei_version, constants.EXPORT_VERSION),
10416 errors.ECODE_ENVIRON)
10419 def _ReadExportParams(self, einfo):
10420 """Use export parameters as defaults.
10422 In case the opcode doesn't specify (as in override) some instance
10423 parameters, then try to use them from the export information, if
10424 that declares them.
10427 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10429 if self.op.disk_template is None:
10430 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10431 self.op.disk_template = einfo.get(constants.INISECT_INS,
10433 if self.op.disk_template not in constants.DISK_TEMPLATES:
10434 raise errors.OpPrereqError("Disk template specified in configuration"
10435 " file is not one of the allowed values:"
10437 " ".join(constants.DISK_TEMPLATES),
10438 errors.ECODE_INVAL)
10440 raise errors.OpPrereqError("No disk template specified and the export"
10441 " is missing the disk_template information",
10442 errors.ECODE_INVAL)
10444 if not self.op.disks:
10446 # TODO: import the disk iv_name too
10447 for idx in range(constants.MAX_DISKS):
10448 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10449 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10450 disks.append({constants.IDISK_SIZE: disk_sz})
10451 self.op.disks = disks
10452 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10453 raise errors.OpPrereqError("No disk info specified and the export"
10454 " is missing the disk information",
10455 errors.ECODE_INVAL)
10457 if not self.op.nics:
10459 for idx in range(constants.MAX_NICS):
10460 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10462 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10463 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10468 self.op.nics = nics
10470 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10471 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10473 if (self.op.hypervisor is None and
10474 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10475 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10477 if einfo.has_section(constants.INISECT_HYP):
10478 # use the export parameters but do not override the ones
10479 # specified by the user
10480 for name, value in einfo.items(constants.INISECT_HYP):
10481 if name not in self.op.hvparams:
10482 self.op.hvparams[name] = value
10484 if einfo.has_section(constants.INISECT_BEP):
10485 # use the parameters, without overriding
10486 for name, value in einfo.items(constants.INISECT_BEP):
10487 if name not in self.op.beparams:
10488 self.op.beparams[name] = value
10489 # Compatibility for the old "memory" be param
10490 if name == constants.BE_MEMORY:
10491 if constants.BE_MAXMEM not in self.op.beparams:
10492 self.op.beparams[constants.BE_MAXMEM] = value
10493 if constants.BE_MINMEM not in self.op.beparams:
10494 self.op.beparams[constants.BE_MINMEM] = value
10496 # try to read the parameters old style, from the main section
10497 for name in constants.BES_PARAMETERS:
10498 if (name not in self.op.beparams and
10499 einfo.has_option(constants.INISECT_INS, name)):
10500 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10502 if einfo.has_section(constants.INISECT_OSP):
10503 # use the parameters, without overriding
10504 for name, value in einfo.items(constants.INISECT_OSP):
10505 if name not in self.op.osparams:
10506 self.op.osparams[name] = value
10508 def _RevertToDefaults(self, cluster):
10509 """Revert the instance parameters to the default values.
10513 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10514 for name in self.op.hvparams.keys():
10515 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10516 del self.op.hvparams[name]
10518 be_defs = cluster.SimpleFillBE({})
10519 for name in self.op.beparams.keys():
10520 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10521 del self.op.beparams[name]
10523 nic_defs = cluster.SimpleFillNIC({})
10524 for nic in self.op.nics:
10525 for name in constants.NICS_PARAMETERS:
10526 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10529 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10530 for name in self.op.osparams.keys():
10531 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10532 del self.op.osparams[name]
10534 def _CalculateFileStorageDir(self):
10535 """Calculate final instance file storage dir.
10538 # file storage dir calculation/check
10539 self.instance_file_storage_dir = None
10540 if self.op.disk_template in constants.DTS_FILEBASED:
10541 # build the full file storage dir path
10544 if self.op.disk_template == constants.DT_SHARED_FILE:
10545 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10547 get_fsd_fn = self.cfg.GetFileStorageDir
10549 cfg_storagedir = get_fsd_fn()
10550 if not cfg_storagedir:
10551 raise errors.OpPrereqError("Cluster file storage dir not defined",
10552 errors.ECODE_STATE)
10553 joinargs.append(cfg_storagedir)
10555 if self.op.file_storage_dir is not None:
10556 joinargs.append(self.op.file_storage_dir)
10558 joinargs.append(self.op.instance_name)
10560 # pylint: disable=W0142
10561 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10563 def CheckPrereq(self): # pylint: disable=R0914
10564 """Check prerequisites.
10567 self._CalculateFileStorageDir()
10569 if self.op.mode == constants.INSTANCE_IMPORT:
10570 export_info = self._ReadExportInfo()
10571 self._ReadExportParams(export_info)
10572 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10574 self._old_instance_name = None
10576 if (not self.cfg.GetVGName() and
10577 self.op.disk_template not in constants.DTS_NOT_LVM):
10578 raise errors.OpPrereqError("Cluster does not support lvm-based"
10579 " instances", errors.ECODE_STATE)
10581 if (self.op.hypervisor is None or
10582 self.op.hypervisor == constants.VALUE_AUTO):
10583 self.op.hypervisor = self.cfg.GetHypervisorType()
10585 cluster = self.cfg.GetClusterInfo()
10586 enabled_hvs = cluster.enabled_hypervisors
10587 if self.op.hypervisor not in enabled_hvs:
10588 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10590 (self.op.hypervisor, ",".join(enabled_hvs)),
10591 errors.ECODE_STATE)
10593 # Check tag validity
10594 for tag in self.op.tags:
10595 objects.TaggableObject.ValidateTag(tag)
10597 # check hypervisor parameter syntax (locally)
10598 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10599 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10601 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10602 hv_type.CheckParameterSyntax(filled_hvp)
10603 self.hv_full = filled_hvp
10604 # check that we don't specify global parameters on an instance
10605 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10606 "instance", "cluster")
10608 # fill and remember the beparams dict
10609 self.be_full = _ComputeFullBeParams(self.op, cluster)
10611 # build os parameters
10612 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10614 # now that hvp/bep are in final format, let's reset to defaults,
10616 if self.op.identify_defaults:
10617 self._RevertToDefaults(cluster)
10620 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10621 self.proc.GetECId())
10623 # disk checks/pre-build
10624 default_vg = self.cfg.GetVGName()
10625 self.disks = _ComputeDisks(self.op, default_vg)
10627 if self.op.mode == constants.INSTANCE_IMPORT:
10629 for idx in range(len(self.disks)):
10630 option = "disk%d_dump" % idx
10631 if export_info.has_option(constants.INISECT_INS, option):
10632 # FIXME: are the old os-es, disk sizes, etc. useful?
10633 export_name = export_info.get(constants.INISECT_INS, option)
10634 image = utils.PathJoin(self.op.src_path, export_name)
10635 disk_images.append(image)
10637 disk_images.append(False)
10639 self.src_images = disk_images
10641 if self.op.instance_name == self._old_instance_name:
10642 for idx, nic in enumerate(self.nics):
10643 if nic.mac == constants.VALUE_AUTO:
10644 nic_mac_ini = "nic%d_mac" % idx
10645 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10647 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10649 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10650 if self.op.ip_check:
10651 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10652 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10653 (self.check_ip, self.op.instance_name),
10654 errors.ECODE_NOTUNIQUE)
10656 #### mac address generation
10657 # By generating here the mac address both the allocator and the hooks get
10658 # the real final mac address rather than the 'auto' or 'generate' value.
10659 # There is a race condition between the generation and the instance object
10660 # creation, which means that we know the mac is valid now, but we're not
10661 # sure it will be when we actually add the instance. If things go bad
10662 # adding the instance will abort because of a duplicate mac, and the
10663 # creation job will fail.
10664 for nic in self.nics:
10665 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10666 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10670 if self.op.iallocator is not None:
10671 self._RunAllocator()
10673 # Release all unneeded node locks
10674 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10675 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10676 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10677 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10679 assert (self.owned_locks(locking.LEVEL_NODE) ==
10680 self.owned_locks(locking.LEVEL_NODE_RES)), \
10681 "Node locks differ from node resource locks"
10683 #### node related checks
10685 # check primary node
10686 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10687 assert self.pnode is not None, \
10688 "Cannot retrieve locked node %s" % self.op.pnode
10690 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10691 pnode.name, errors.ECODE_STATE)
10693 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10694 pnode.name, errors.ECODE_STATE)
10695 if not pnode.vm_capable:
10696 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10697 " '%s'" % pnode.name, errors.ECODE_STATE)
10699 self.secondaries = []
10701 # Fill in any IPs from IP pools. This must happen here, because we need to
10702 # know the nic's primary node, as specified by the iallocator
10703 for idx, nic in enumerate(self.nics):
10704 net_uuid = nic.network
10705 if net_uuid is not None:
10706 nobj = self.cfg.GetNetwork(net_uuid)
10707 netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
10708 if netparams is None:
10709 raise errors.OpPrereqError("No netparams found for network"
10710 " %s. Propably not connected to"
10711 " node's %s nodegroup" %
10712 (nobj.name, self.pnode.name),
10713 errors.ECODE_INVAL)
10714 self.LogInfo("NIC/%d inherits netparams %s" %
10715 (idx, netparams.values()))
10716 nic.nicparams = dict(netparams)
10717 if nic.ip is not None:
10718 if nic.ip.lower() == constants.NIC_IP_POOL:
10720 nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
10721 except errors.ReservationError:
10722 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10723 " from the address pool" % idx,
10724 errors.ECODE_STATE)
10725 self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
10728 self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
10729 except errors.ReservationError:
10730 raise errors.OpPrereqError("IP address %s already in use"
10731 " or does not belong to network %s" %
10732 (nic.ip, nobj.name),
10733 errors.ECODE_NOTUNIQUE)
10735 # net is None, ip None or given
10736 elif self.op.conflicts_check:
10737 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10739 # mirror node verification
10740 if self.op.disk_template in constants.DTS_INT_MIRROR:
10741 if self.op.snode == pnode.name:
10742 raise errors.OpPrereqError("The secondary node cannot be the"
10743 " primary node", errors.ECODE_INVAL)
10744 _CheckNodeOnline(self, self.op.snode)
10745 _CheckNodeNotDrained(self, self.op.snode)
10746 _CheckNodeVmCapable(self, self.op.snode)
10747 self.secondaries.append(self.op.snode)
10749 snode = self.cfg.GetNodeInfo(self.op.snode)
10750 if pnode.group != snode.group:
10751 self.LogWarning("The primary and secondary nodes are in two"
10752 " different node groups; the disk parameters"
10753 " from the first disk's node group will be"
10756 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10758 if self.op.disk_template in constants.DTS_INT_MIRROR:
10759 nodes.append(snode)
10760 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10761 if compat.any(map(has_es, nodes)):
10762 raise errors.OpPrereqError("Disk template %s not supported with"
10763 " exclusive storage" % self.op.disk_template,
10764 errors.ECODE_STATE)
10766 nodenames = [pnode.name] + self.secondaries
10768 # Verify instance specs
10769 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10771 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10772 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10773 constants.ISPEC_DISK_COUNT: len(self.disks),
10774 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10775 constants.ISPEC_NIC_COUNT: len(self.nics),
10776 constants.ISPEC_SPINDLE_USE: spindle_use,
10779 group_info = self.cfg.GetNodeGroup(pnode.group)
10780 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10781 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10782 if not self.op.ignore_ipolicy and res:
10783 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10784 (pnode.group, group_info.name, utils.CommaJoin(res)))
10785 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10787 if not self.adopt_disks:
10788 if self.op.disk_template == constants.DT_RBD:
10789 # _CheckRADOSFreeSpace() is just a placeholder.
10790 # Any function that checks prerequisites can be placed here.
10791 # Check if there is enough space on the RADOS cluster.
10792 _CheckRADOSFreeSpace()
10793 elif self.op.disk_template == constants.DT_EXT:
10794 # FIXME: Function that checks prereqs if needed
10797 # Check lv size requirements, if not adopting
10798 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10799 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10801 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10802 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10803 disk[constants.IDISK_ADOPT])
10804 for disk in self.disks])
10805 if len(all_lvs) != len(self.disks):
10806 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10807 errors.ECODE_INVAL)
10808 for lv_name in all_lvs:
10810 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10811 # to ReserveLV uses the same syntax
10812 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10813 except errors.ReservationError:
10814 raise errors.OpPrereqError("LV named %s used by another instance" %
10815 lv_name, errors.ECODE_NOTUNIQUE)
10817 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10818 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10820 node_lvs = self.rpc.call_lv_list([pnode.name],
10821 vg_names.payload.keys())[pnode.name]
10822 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10823 node_lvs = node_lvs.payload
10825 delta = all_lvs.difference(node_lvs.keys())
10827 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10828 utils.CommaJoin(delta),
10829 errors.ECODE_INVAL)
10830 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10832 raise errors.OpPrereqError("Online logical volumes found, cannot"
10833 " adopt: %s" % utils.CommaJoin(online_lvs),
10834 errors.ECODE_STATE)
10835 # update the size of disk based on what is found
10836 for dsk in self.disks:
10837 dsk[constants.IDISK_SIZE] = \
10838 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10839 dsk[constants.IDISK_ADOPT])][0]))
10841 elif self.op.disk_template == constants.DT_BLOCK:
10842 # Normalize and de-duplicate device paths
10843 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10844 for disk in self.disks])
10845 if len(all_disks) != len(self.disks):
10846 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10847 errors.ECODE_INVAL)
10848 baddisks = [d for d in all_disks
10849 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10851 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10852 " cannot be adopted" %
10853 (utils.CommaJoin(baddisks),
10854 constants.ADOPTABLE_BLOCKDEV_ROOT),
10855 errors.ECODE_INVAL)
10857 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10858 list(all_disks))[pnode.name]
10859 node_disks.Raise("Cannot get block device information from node %s" %
10861 node_disks = node_disks.payload
10862 delta = all_disks.difference(node_disks.keys())
10864 raise errors.OpPrereqError("Missing block device(s): %s" %
10865 utils.CommaJoin(delta),
10866 errors.ECODE_INVAL)
10867 for dsk in self.disks:
10868 dsk[constants.IDISK_SIZE] = \
10869 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10871 # Verify instance specs
10872 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10874 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10875 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10876 constants.ISPEC_DISK_COUNT: len(self.disks),
10877 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10878 for disk in self.disks],
10879 constants.ISPEC_NIC_COUNT: len(self.nics),
10880 constants.ISPEC_SPINDLE_USE: spindle_use,
10883 group_info = self.cfg.GetNodeGroup(pnode.group)
10884 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10885 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10886 if not self.op.ignore_ipolicy and res:
10887 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10888 " policy: %s") % (pnode.group,
10889 utils.CommaJoin(res)),
10890 errors.ECODE_INVAL)
10892 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10894 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10895 # check OS parameters (remotely)
10896 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10898 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10900 #TODO: _CheckExtParams (remotely)
10901 # Check parameters for extstorage
10903 # memory check on primary node
10904 #TODO(dynmem): use MINMEM for checking
10906 _CheckNodeFreeMemory(self, self.pnode.name,
10907 "creating instance %s" % self.op.instance_name,
10908 self.be_full[constants.BE_MAXMEM],
10909 self.op.hypervisor)
10911 self.dry_run_result = list(nodenames)
10913 def Exec(self, feedback_fn):
10914 """Create and add the instance to the cluster.
10917 instance = self.op.instance_name
10918 pnode_name = self.pnode.name
10920 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10921 self.owned_locks(locking.LEVEL_NODE)), \
10922 "Node locks differ from node resource locks"
10923 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10925 ht_kind = self.op.hypervisor
10926 if ht_kind in constants.HTS_REQ_PORT:
10927 network_port = self.cfg.AllocatePort()
10929 network_port = None
10931 # This is ugly but we got a chicken-egg problem here
10932 # We can only take the group disk parameters, as the instance
10933 # has no disks yet (we are generating them right here).
10934 node = self.cfg.GetNodeInfo(pnode_name)
10935 nodegroup = self.cfg.GetNodeGroup(node.group)
10936 disks = _GenerateDiskTemplate(self,
10937 self.op.disk_template,
10938 instance, pnode_name,
10941 self.instance_file_storage_dir,
10942 self.op.file_driver,
10945 self.cfg.GetGroupDiskParams(nodegroup))
10947 iobj = objects.Instance(name=instance, os=self.op.os_type,
10948 primary_node=pnode_name,
10949 nics=self.nics, disks=disks,
10950 disk_template=self.op.disk_template,
10951 admin_state=constants.ADMINST_DOWN,
10952 network_port=network_port,
10953 beparams=self.op.beparams,
10954 hvparams=self.op.hvparams,
10955 hypervisor=self.op.hypervisor,
10956 osparams=self.op.osparams,
10960 for tag in self.op.tags:
10963 if self.adopt_disks:
10964 if self.op.disk_template == constants.DT_PLAIN:
10965 # rename LVs to the newly-generated names; we need to construct
10966 # 'fake' LV disks with the old data, plus the new unique_id
10967 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10969 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10970 rename_to.append(t_dsk.logical_id)
10971 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10972 self.cfg.SetDiskID(t_dsk, pnode_name)
10973 result = self.rpc.call_blockdev_rename(pnode_name,
10974 zip(tmp_disks, rename_to))
10975 result.Raise("Failed to rename adoped LVs")
10977 feedback_fn("* creating instance disks...")
10979 _CreateDisks(self, iobj)
10980 except errors.OpExecError:
10981 self.LogWarning("Device creation failed, reverting...")
10983 _RemoveDisks(self, iobj)
10985 self.cfg.ReleaseDRBDMinors(instance)
10988 feedback_fn("adding instance %s to cluster config" % instance)
10990 self.cfg.AddInstance(iobj, self.proc.GetECId())
10992 # Declare that we don't want to remove the instance lock anymore, as we've
10993 # added the instance to the config
10994 del self.remove_locks[locking.LEVEL_INSTANCE]
10996 if self.op.mode == constants.INSTANCE_IMPORT:
10997 # Release unused nodes
10998 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
11000 # Release all nodes
11001 _ReleaseLocks(self, locking.LEVEL_NODE)
11004 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
11005 feedback_fn("* wiping instance disks...")
11007 _WipeDisks(self, iobj)
11008 except errors.OpExecError, err:
11009 logging.exception("Wiping disks failed")
11010 self.LogWarning("Wiping instance disks failed (%s)", err)
11014 # Something is already wrong with the disks, don't do anything else
11016 elif self.op.wait_for_sync:
11017 disk_abort = not _WaitForSync(self, iobj)
11018 elif iobj.disk_template in constants.DTS_INT_MIRROR:
11019 # make sure the disks are not degraded (still sync-ing is ok)
11020 feedback_fn("* checking mirrors status")
11021 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11026 _RemoveDisks(self, iobj)
11027 self.cfg.RemoveInstance(iobj.name)
11028 # Make sure the instance lock gets removed
11029 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11030 raise errors.OpExecError("There are some degraded disks for"
11033 # Release all node resource locks
11034 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11036 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11037 # we need to set the disks ID to the primary node, since the
11038 # preceding code might or might have not done it, depending on
11039 # disk template and other options
11040 for disk in iobj.disks:
11041 self.cfg.SetDiskID(disk, pnode_name)
11042 if self.op.mode == constants.INSTANCE_CREATE:
11043 if not self.op.no_install:
11044 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11045 not self.op.wait_for_sync)
11047 feedback_fn("* pausing disk sync to install instance OS")
11048 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11051 for idx, success in enumerate(result.payload):
11053 logging.warn("pause-sync of instance %s for disk %d failed",
11056 feedback_fn("* running the instance OS create scripts...")
11057 # FIXME: pass debug option from opcode to backend
11059 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11060 self.op.debug_level)
11062 feedback_fn("* resuming disk sync")
11063 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11066 for idx, success in enumerate(result.payload):
11068 logging.warn("resume-sync of instance %s for disk %d failed",
11071 os_add_result.Raise("Could not add os for instance %s"
11072 " on node %s" % (instance, pnode_name))
11075 if self.op.mode == constants.INSTANCE_IMPORT:
11076 feedback_fn("* running the instance OS import scripts...")
11080 for idx, image in enumerate(self.src_images):
11084 # FIXME: pass debug option from opcode to backend
11085 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11086 constants.IEIO_FILE, (image, ),
11087 constants.IEIO_SCRIPT,
11088 (iobj.disks[idx], idx),
11090 transfers.append(dt)
11093 masterd.instance.TransferInstanceData(self, feedback_fn,
11094 self.op.src_node, pnode_name,
11095 self.pnode.secondary_ip,
11097 if not compat.all(import_result):
11098 self.LogWarning("Some disks for instance %s on node %s were not"
11099 " imported successfully" % (instance, pnode_name))
11101 rename_from = self._old_instance_name
11103 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11104 feedback_fn("* preparing remote import...")
11105 # The source cluster will stop the instance before attempting to make
11106 # a connection. In some cases stopping an instance can take a long
11107 # time, hence the shutdown timeout is added to the connection
11109 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11110 self.op.source_shutdown_timeout)
11111 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11113 assert iobj.primary_node == self.pnode.name
11115 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11116 self.source_x509_ca,
11117 self._cds, timeouts)
11118 if not compat.all(disk_results):
11119 # TODO: Should the instance still be started, even if some disks
11120 # failed to import (valid for local imports, too)?
11121 self.LogWarning("Some disks for instance %s on node %s were not"
11122 " imported successfully" % (instance, pnode_name))
11124 rename_from = self.source_instance_name
11127 # also checked in the prereq part
11128 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11131 # Run rename script on newly imported instance
11132 assert iobj.name == instance
11133 feedback_fn("Running rename script for %s" % instance)
11134 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11136 self.op.debug_level)
11137 if result.fail_msg:
11138 self.LogWarning("Failed to run rename script for %s on node"
11139 " %s: %s" % (instance, pnode_name, result.fail_msg))
11141 assert not self.owned_locks(locking.LEVEL_NODE_RES)
11144 iobj.admin_state = constants.ADMINST_UP
11145 self.cfg.Update(iobj, feedback_fn)
11146 logging.info("Starting instance %s on node %s", instance, pnode_name)
11147 feedback_fn("* starting instance...")
11148 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11150 result.Raise("Could not start instance")
11152 return list(iobj.all_nodes)
11155 class LUInstanceMultiAlloc(NoHooksLU):
11156 """Allocates multiple instances at the same time.
11161 def CheckArguments(self):
11162 """Check arguments.
11166 for inst in self.op.instances:
11167 if inst.iallocator is not None:
11168 raise errors.OpPrereqError("iallocator are not allowed to be set on"
11169 " instance objects", errors.ECODE_INVAL)
11170 nodes.append(bool(inst.pnode))
11171 if inst.disk_template in constants.DTS_INT_MIRROR:
11172 nodes.append(bool(inst.snode))
11174 has_nodes = compat.any(nodes)
11175 if compat.all(nodes) ^ has_nodes:
11176 raise errors.OpPrereqError("There are instance objects providing"
11177 " pnode/snode while others do not",
11178 errors.ECODE_INVAL)
11180 if self.op.iallocator is None:
11181 default_iallocator = self.cfg.GetDefaultIAllocator()
11182 if default_iallocator and has_nodes:
11183 self.op.iallocator = default_iallocator
11185 raise errors.OpPrereqError("No iallocator or nodes on the instances"
11186 " given and no cluster-wide default"
11187 " iallocator found; please specify either"
11188 " an iallocator or nodes on the instances"
11189 " or set a cluster-wide default iallocator",
11190 errors.ECODE_INVAL)
11192 _CheckOpportunisticLocking(self.op)
11194 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11196 raise errors.OpPrereqError("There are duplicate instance names: %s" %
11197 utils.CommaJoin(dups), errors.ECODE_INVAL)
11199 def ExpandNames(self):
11200 """Calculate the locks.
11203 self.share_locks = _ShareAll()
11204 self.needed_locks = {
11205 # iallocator will select nodes and even if no iallocator is used,
11206 # collisions with LUInstanceCreate should be avoided
11207 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11210 if self.op.iallocator:
11211 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11212 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11214 if self.op.opportunistic_locking:
11215 self.opportunistic_locks[locking.LEVEL_NODE] = True
11216 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11219 for inst in self.op.instances:
11220 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11221 nodeslist.append(inst.pnode)
11222 if inst.snode is not None:
11223 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11224 nodeslist.append(inst.snode)
11226 self.needed_locks[locking.LEVEL_NODE] = nodeslist
11227 # Lock resources of instance's primary and secondary nodes (copy to
11228 # prevent accidential modification)
11229 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11231 def CheckPrereq(self):
11232 """Check prerequisite.
11235 cluster = self.cfg.GetClusterInfo()
11236 default_vg = self.cfg.GetVGName()
11237 ec_id = self.proc.GetECId()
11239 if self.op.opportunistic_locking:
11240 # Only consider nodes for which a lock is held
11241 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11243 node_whitelist = None
11245 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11246 _ComputeNics(op, cluster, None,
11248 _ComputeFullBeParams(op, cluster),
11250 for op in self.op.instances]
11252 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11253 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11255 ial.Run(self.op.iallocator)
11257 if not ial.success:
11258 raise errors.OpPrereqError("Can't compute nodes using"
11259 " iallocator '%s': %s" %
11260 (self.op.iallocator, ial.info),
11261 errors.ECODE_NORES)
11263 self.ia_result = ial.result
11265 if self.op.dry_run:
11266 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11267 constants.JOB_IDS_KEY: [],
11270 def _ConstructPartialResult(self):
11271 """Contructs the partial result.
11274 (allocatable, failed) = self.ia_result
11276 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11277 map(compat.fst, allocatable),
11278 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11281 def Exec(self, feedback_fn):
11282 """Executes the opcode.
11285 op2inst = dict((op.instance_name, op) for op in self.op.instances)
11286 (allocatable, failed) = self.ia_result
11289 for (name, nodes) in allocatable:
11290 op = op2inst.pop(name)
11293 (op.pnode, op.snode) = nodes
11295 (op.pnode,) = nodes
11299 missing = set(op2inst.keys()) - set(failed)
11300 assert not missing, \
11301 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11303 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11306 def _CheckRADOSFreeSpace():
11307 """Compute disk size requirements inside the RADOS cluster.
11310 # For the RADOS cluster we assume there is always enough space.
11314 class LUInstanceConsole(NoHooksLU):
11315 """Connect to an instance's console.
11317 This is somewhat special in that it returns the command line that
11318 you need to run on the master node in order to connect to the
11324 def ExpandNames(self):
11325 self.share_locks = _ShareAll()
11326 self._ExpandAndLockInstance()
11328 def CheckPrereq(self):
11329 """Check prerequisites.
11331 This checks that the instance is in the cluster.
11334 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11335 assert self.instance is not None, \
11336 "Cannot retrieve locked instance %s" % self.op.instance_name
11337 _CheckNodeOnline(self, self.instance.primary_node)
11339 def Exec(self, feedback_fn):
11340 """Connect to the console of an instance
11343 instance = self.instance
11344 node = instance.primary_node
11346 node_insts = self.rpc.call_instance_list([node],
11347 [instance.hypervisor])[node]
11348 node_insts.Raise("Can't get node information from %s" % node)
11350 if instance.name not in node_insts.payload:
11351 if instance.admin_state == constants.ADMINST_UP:
11352 state = constants.INSTST_ERRORDOWN
11353 elif instance.admin_state == constants.ADMINST_DOWN:
11354 state = constants.INSTST_ADMINDOWN
11356 state = constants.INSTST_ADMINOFFLINE
11357 raise errors.OpExecError("Instance %s is not running (state %s)" %
11358 (instance.name, state))
11360 logging.debug("Connecting to console of %s on %s", instance.name, node)
11362 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11365 def _GetInstanceConsole(cluster, instance):
11366 """Returns console information for an instance.
11368 @type cluster: L{objects.Cluster}
11369 @type instance: L{objects.Instance}
11373 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11374 # beparams and hvparams are passed separately, to avoid editing the
11375 # instance and then saving the defaults in the instance itself.
11376 hvparams = cluster.FillHV(instance)
11377 beparams = cluster.FillBE(instance)
11378 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11380 assert console.instance == instance.name
11381 assert console.Validate()
11383 return console.ToDict()
11386 class LUInstanceReplaceDisks(LogicalUnit):
11387 """Replace the disks of an instance.
11390 HPATH = "mirrors-replace"
11391 HTYPE = constants.HTYPE_INSTANCE
11394 def CheckArguments(self):
11395 """Check arguments.
11398 remote_node = self.op.remote_node
11399 ialloc = self.op.iallocator
11400 if self.op.mode == constants.REPLACE_DISK_CHG:
11401 if remote_node is None and ialloc is None:
11402 raise errors.OpPrereqError("When changing the secondary either an"
11403 " iallocator script must be used or the"
11404 " new node given", errors.ECODE_INVAL)
11406 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11408 elif remote_node is not None or ialloc is not None:
11409 # Not replacing the secondary
11410 raise errors.OpPrereqError("The iallocator and new node options can"
11411 " only be used when changing the"
11412 " secondary node", errors.ECODE_INVAL)
11414 def ExpandNames(self):
11415 self._ExpandAndLockInstance()
11417 assert locking.LEVEL_NODE not in self.needed_locks
11418 assert locking.LEVEL_NODE_RES not in self.needed_locks
11419 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11421 assert self.op.iallocator is None or self.op.remote_node is None, \
11422 "Conflicting options"
11424 if self.op.remote_node is not None:
11425 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11427 # Warning: do not remove the locking of the new secondary here
11428 # unless DRBD8.AddChildren is changed to work in parallel;
11429 # currently it doesn't since parallel invocations of
11430 # FindUnusedMinor will conflict
11431 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11432 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11434 self.needed_locks[locking.LEVEL_NODE] = []
11435 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11437 if self.op.iallocator is not None:
11438 # iallocator will select a new node in the same group
11439 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11440 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11442 self.needed_locks[locking.LEVEL_NODE_RES] = []
11444 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11445 self.op.iallocator, self.op.remote_node,
11446 self.op.disks, self.op.early_release,
11447 self.op.ignore_ipolicy)
11449 self.tasklets = [self.replacer]
11451 def DeclareLocks(self, level):
11452 if level == locking.LEVEL_NODEGROUP:
11453 assert self.op.remote_node is None
11454 assert self.op.iallocator is not None
11455 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11457 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11458 # Lock all groups used by instance optimistically; this requires going
11459 # via the node before it's locked, requiring verification later on
11460 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11461 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11463 elif level == locking.LEVEL_NODE:
11464 if self.op.iallocator is not None:
11465 assert self.op.remote_node is None
11466 assert not self.needed_locks[locking.LEVEL_NODE]
11467 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11469 # Lock member nodes of all locked groups
11470 self.needed_locks[locking.LEVEL_NODE] = \
11472 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11473 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11475 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11477 self._LockInstancesNodes()
11479 elif level == locking.LEVEL_NODE_RES:
11481 self.needed_locks[locking.LEVEL_NODE_RES] = \
11482 self.needed_locks[locking.LEVEL_NODE]
11484 def BuildHooksEnv(self):
11485 """Build hooks env.
11487 This runs on the master, the primary and all the secondaries.
11490 instance = self.replacer.instance
11492 "MODE": self.op.mode,
11493 "NEW_SECONDARY": self.op.remote_node,
11494 "OLD_SECONDARY": instance.secondary_nodes[0],
11496 env.update(_BuildInstanceHookEnvByObject(self, instance))
11499 def BuildHooksNodes(self):
11500 """Build hooks nodes.
11503 instance = self.replacer.instance
11505 self.cfg.GetMasterNode(),
11506 instance.primary_node,
11508 if self.op.remote_node is not None:
11509 nl.append(self.op.remote_node)
11512 def CheckPrereq(self):
11513 """Check prerequisites.
11516 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11517 self.op.iallocator is None)
11519 # Verify if node group locks are still correct
11520 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11522 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11524 return LogicalUnit.CheckPrereq(self)
11527 class TLReplaceDisks(Tasklet):
11528 """Replaces disks for an instance.
11530 Note: Locking is not within the scope of this class.
11533 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11534 disks, early_release, ignore_ipolicy):
11535 """Initializes this class.
11538 Tasklet.__init__(self, lu)
11541 self.instance_name = instance_name
11543 self.iallocator_name = iallocator_name
11544 self.remote_node = remote_node
11546 self.early_release = early_release
11547 self.ignore_ipolicy = ignore_ipolicy
11550 self.instance = None
11551 self.new_node = None
11552 self.target_node = None
11553 self.other_node = None
11554 self.remote_node_info = None
11555 self.node_secondary_ip = None
11558 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11559 """Compute a new secondary node using an IAllocator.
11562 req = iallocator.IAReqRelocate(name=instance_name,
11563 relocate_from=list(relocate_from))
11564 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11566 ial.Run(iallocator_name)
11568 if not ial.success:
11569 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11570 " %s" % (iallocator_name, ial.info),
11571 errors.ECODE_NORES)
11573 remote_node_name = ial.result[0]
11575 lu.LogInfo("Selected new secondary for instance '%s': %s",
11576 instance_name, remote_node_name)
11578 return remote_node_name
11580 def _FindFaultyDisks(self, node_name):
11581 """Wrapper for L{_FindFaultyInstanceDisks}.
11584 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11587 def _CheckDisksActivated(self, instance):
11588 """Checks if the instance disks are activated.
11590 @param instance: The instance to check disks
11591 @return: True if they are activated, False otherwise
11594 nodes = instance.all_nodes
11596 for idx, dev in enumerate(instance.disks):
11598 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11599 self.cfg.SetDiskID(dev, node)
11601 result = _BlockdevFind(self, node, dev, instance)
11605 elif result.fail_msg or not result.payload:
11610 def CheckPrereq(self):
11611 """Check prerequisites.
11613 This checks that the instance is in the cluster.
11616 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11617 assert instance is not None, \
11618 "Cannot retrieve locked instance %s" % self.instance_name
11620 if instance.disk_template != constants.DT_DRBD8:
11621 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11622 " instances", errors.ECODE_INVAL)
11624 if len(instance.secondary_nodes) != 1:
11625 raise errors.OpPrereqError("The instance has a strange layout,"
11626 " expected one secondary but found %d" %
11627 len(instance.secondary_nodes),
11628 errors.ECODE_FAULT)
11630 instance = self.instance
11631 secondary_node = instance.secondary_nodes[0]
11633 if self.iallocator_name is None:
11634 remote_node = self.remote_node
11636 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11637 instance.name, instance.secondary_nodes)
11639 if remote_node is None:
11640 self.remote_node_info = None
11642 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11643 "Remote node '%s' is not locked" % remote_node
11645 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11646 assert self.remote_node_info is not None, \
11647 "Cannot retrieve locked node %s" % remote_node
11649 if remote_node == self.instance.primary_node:
11650 raise errors.OpPrereqError("The specified node is the primary node of"
11651 " the instance", errors.ECODE_INVAL)
11653 if remote_node == secondary_node:
11654 raise errors.OpPrereqError("The specified node is already the"
11655 " secondary node of the instance",
11656 errors.ECODE_INVAL)
11658 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11659 constants.REPLACE_DISK_CHG):
11660 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11661 errors.ECODE_INVAL)
11663 if self.mode == constants.REPLACE_DISK_AUTO:
11664 if not self._CheckDisksActivated(instance):
11665 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11666 " first" % self.instance_name,
11667 errors.ECODE_STATE)
11668 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11669 faulty_secondary = self._FindFaultyDisks(secondary_node)
11671 if faulty_primary and faulty_secondary:
11672 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11673 " one node and can not be repaired"
11674 " automatically" % self.instance_name,
11675 errors.ECODE_STATE)
11678 self.disks = faulty_primary
11679 self.target_node = instance.primary_node
11680 self.other_node = secondary_node
11681 check_nodes = [self.target_node, self.other_node]
11682 elif faulty_secondary:
11683 self.disks = faulty_secondary
11684 self.target_node = secondary_node
11685 self.other_node = instance.primary_node
11686 check_nodes = [self.target_node, self.other_node]
11692 # Non-automatic modes
11693 if self.mode == constants.REPLACE_DISK_PRI:
11694 self.target_node = instance.primary_node
11695 self.other_node = secondary_node
11696 check_nodes = [self.target_node, self.other_node]
11698 elif self.mode == constants.REPLACE_DISK_SEC:
11699 self.target_node = secondary_node
11700 self.other_node = instance.primary_node
11701 check_nodes = [self.target_node, self.other_node]
11703 elif self.mode == constants.REPLACE_DISK_CHG:
11704 self.new_node = remote_node
11705 self.other_node = instance.primary_node
11706 self.target_node = secondary_node
11707 check_nodes = [self.new_node, self.other_node]
11709 _CheckNodeNotDrained(self.lu, remote_node)
11710 _CheckNodeVmCapable(self.lu, remote_node)
11712 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11713 assert old_node_info is not None
11714 if old_node_info.offline and not self.early_release:
11715 # doesn't make sense to delay the release
11716 self.early_release = True
11717 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11718 " early-release mode", secondary_node)
11721 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11724 # If not specified all disks should be replaced
11726 self.disks = range(len(self.instance.disks))
11728 # TODO: This is ugly, but right now we can't distinguish between internal
11729 # submitted opcode and external one. We should fix that.
11730 if self.remote_node_info:
11731 # We change the node, lets verify it still meets instance policy
11732 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11733 cluster = self.cfg.GetClusterInfo()
11734 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11736 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11737 ignore=self.ignore_ipolicy)
11739 for node in check_nodes:
11740 _CheckNodeOnline(self.lu, node)
11742 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11745 if node_name is not None)
11747 # Release unneeded node and node resource locks
11748 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11749 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11750 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11752 # Release any owned node group
11753 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11755 # Check whether disks are valid
11756 for disk_idx in self.disks:
11757 instance.FindDisk(disk_idx)
11759 # Get secondary node IP addresses
11760 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11761 in self.cfg.GetMultiNodeInfo(touched_nodes))
11763 def Exec(self, feedback_fn):
11764 """Execute disk replacement.
11766 This dispatches the disk replacement to the appropriate handler.
11770 # Verify owned locks before starting operation
11771 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11772 assert set(owned_nodes) == set(self.node_secondary_ip), \
11773 ("Incorrect node locks, owning %s, expected %s" %
11774 (owned_nodes, self.node_secondary_ip.keys()))
11775 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11776 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11777 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11779 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11780 assert list(owned_instances) == [self.instance_name], \
11781 "Instance '%s' not locked" % self.instance_name
11783 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11784 "Should not own any node group lock at this point"
11787 feedback_fn("No disks need replacement for instance '%s'" %
11788 self.instance.name)
11791 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11792 (utils.CommaJoin(self.disks), self.instance.name))
11793 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11794 feedback_fn("Current seconary node: %s" %
11795 utils.CommaJoin(self.instance.secondary_nodes))
11797 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11799 # Activate the instance disks if we're replacing them on a down instance
11801 _StartInstanceDisks(self.lu, self.instance, True)
11804 # Should we replace the secondary node?
11805 if self.new_node is not None:
11806 fn = self._ExecDrbd8Secondary
11808 fn = self._ExecDrbd8DiskOnly
11810 result = fn(feedback_fn)
11812 # Deactivate the instance disks if we're replacing them on a
11815 _SafeShutdownInstanceDisks(self.lu, self.instance)
11817 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11820 # Verify owned locks
11821 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11822 nodes = frozenset(self.node_secondary_ip)
11823 assert ((self.early_release and not owned_nodes) or
11824 (not self.early_release and not (set(owned_nodes) - nodes))), \
11825 ("Not owning the correct locks, early_release=%s, owned=%r,"
11826 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11830 def _CheckVolumeGroup(self, nodes):
11831 self.lu.LogInfo("Checking volume groups")
11833 vgname = self.cfg.GetVGName()
11835 # Make sure volume group exists on all involved nodes
11836 results = self.rpc.call_vg_list(nodes)
11838 raise errors.OpExecError("Can't list volume groups on the nodes")
11841 res = results[node]
11842 res.Raise("Error checking node %s" % node)
11843 if vgname not in res.payload:
11844 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11847 def _CheckDisksExistence(self, nodes):
11848 # Check disk existence
11849 for idx, dev in enumerate(self.instance.disks):
11850 if idx not in self.disks:
11854 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11855 self.cfg.SetDiskID(dev, node)
11857 result = _BlockdevFind(self, node, dev, self.instance)
11859 msg = result.fail_msg
11860 if msg or not result.payload:
11862 msg = "disk not found"
11863 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11866 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11867 for idx, dev in enumerate(self.instance.disks):
11868 if idx not in self.disks:
11871 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11874 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11875 on_primary, ldisk=ldisk):
11876 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11877 " replace disks for instance %s" %
11878 (node_name, self.instance.name))
11880 def _CreateNewStorage(self, node_name):
11881 """Create new storage on the primary or secondary node.
11883 This is only used for same-node replaces, not for changing the
11884 secondary node, hence we don't want to modify the existing disk.
11889 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11890 for idx, dev in enumerate(disks):
11891 if idx not in self.disks:
11894 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11896 self.cfg.SetDiskID(dev, node_name)
11898 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11899 names = _GenerateUniqueNames(self.lu, lv_names)
11901 (data_disk, meta_disk) = dev.children
11902 vg_data = data_disk.logical_id[0]
11903 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11904 logical_id=(vg_data, names[0]),
11905 params=data_disk.params)
11906 vg_meta = meta_disk.logical_id[0]
11907 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11908 size=constants.DRBD_META_SIZE,
11909 logical_id=(vg_meta, names[1]),
11910 params=meta_disk.params)
11912 new_lvs = [lv_data, lv_meta]
11913 old_lvs = [child.Copy() for child in dev.children]
11914 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11915 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11917 # we pass force_create=True to force the LVM creation
11918 for new_lv in new_lvs:
11919 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11920 _GetInstanceInfoText(self.instance), False,
11925 def _CheckDevices(self, node_name, iv_names):
11926 for name, (dev, _, _) in iv_names.iteritems():
11927 self.cfg.SetDiskID(dev, node_name)
11929 result = _BlockdevFind(self, node_name, dev, self.instance)
11931 msg = result.fail_msg
11932 if msg or not result.payload:
11934 msg = "disk not found"
11935 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11938 if result.payload.is_degraded:
11939 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11941 def _RemoveOldStorage(self, node_name, iv_names):
11942 for name, (_, old_lvs, _) in iv_names.iteritems():
11943 self.lu.LogInfo("Remove logical volumes for %s", name)
11946 self.cfg.SetDiskID(lv, node_name)
11948 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11950 self.lu.LogWarning("Can't remove old LV: %s", msg,
11951 hint="remove unused LVs manually")
11953 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11954 """Replace a disk on the primary or secondary for DRBD 8.
11956 The algorithm for replace is quite complicated:
11958 1. for each disk to be replaced:
11960 1. create new LVs on the target node with unique names
11961 1. detach old LVs from the drbd device
11962 1. rename old LVs to name_replaced.<time_t>
11963 1. rename new LVs to old LVs
11964 1. attach the new LVs (with the old names now) to the drbd device
11966 1. wait for sync across all devices
11968 1. for each modified disk:
11970 1. remove old LVs (which have the name name_replaces.<time_t>)
11972 Failures are not very well handled.
11977 # Step: check device activation
11978 self.lu.LogStep(1, steps_total, "Check device existence")
11979 self._CheckDisksExistence([self.other_node, self.target_node])
11980 self._CheckVolumeGroup([self.target_node, self.other_node])
11982 # Step: check other node consistency
11983 self.lu.LogStep(2, steps_total, "Check peer consistency")
11984 self._CheckDisksConsistency(self.other_node,
11985 self.other_node == self.instance.primary_node,
11988 # Step: create new storage
11989 self.lu.LogStep(3, steps_total, "Allocate new storage")
11990 iv_names = self._CreateNewStorage(self.target_node)
11992 # Step: for each lv, detach+rename*2+attach
11993 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11994 for dev, old_lvs, new_lvs in iv_names.itervalues():
11995 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
11997 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11999 result.Raise("Can't detach drbd from local storage on node"
12000 " %s for device %s" % (self.target_node, dev.iv_name))
12002 #cfg.Update(instance)
12004 # ok, we created the new LVs, so now we know we have the needed
12005 # storage; as such, we proceed on the target node to rename
12006 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12007 # using the assumption that logical_id == physical_id (which in
12008 # turn is the unique_id on that node)
12010 # FIXME(iustin): use a better name for the replaced LVs
12011 temp_suffix = int(time.time())
12012 ren_fn = lambda d, suff: (d.physical_id[0],
12013 d.physical_id[1] + "_replaced-%s" % suff)
12015 # Build the rename list based on what LVs exist on the node
12016 rename_old_to_new = []
12017 for to_ren in old_lvs:
12018 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12019 if not result.fail_msg and result.payload:
12021 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12023 self.lu.LogInfo("Renaming the old LVs on the target node")
12024 result = self.rpc.call_blockdev_rename(self.target_node,
12026 result.Raise("Can't rename old LVs on node %s" % self.target_node)
12028 # Now we rename the new LVs to the old LVs
12029 self.lu.LogInfo("Renaming the new LVs on the target node")
12030 rename_new_to_old = [(new, old.physical_id)
12031 for old, new in zip(old_lvs, new_lvs)]
12032 result = self.rpc.call_blockdev_rename(self.target_node,
12034 result.Raise("Can't rename new LVs on node %s" % self.target_node)
12036 # Intermediate steps of in memory modifications
12037 for old, new in zip(old_lvs, new_lvs):
12038 new.logical_id = old.logical_id
12039 self.cfg.SetDiskID(new, self.target_node)
12041 # We need to modify old_lvs so that removal later removes the
12042 # right LVs, not the newly added ones; note that old_lvs is a
12044 for disk in old_lvs:
12045 disk.logical_id = ren_fn(disk, temp_suffix)
12046 self.cfg.SetDiskID(disk, self.target_node)
12048 # Now that the new lvs have the old name, we can add them to the device
12049 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12050 result = self.rpc.call_blockdev_addchildren(self.target_node,
12051 (dev, self.instance), new_lvs)
12052 msg = result.fail_msg
12054 for new_lv in new_lvs:
12055 msg2 = self.rpc.call_blockdev_remove(self.target_node,
12058 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12059 hint=("cleanup manually the unused logical"
12061 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12063 cstep = itertools.count(5)
12065 if self.early_release:
12066 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12067 self._RemoveOldStorage(self.target_node, iv_names)
12068 # TODO: Check if releasing locks early still makes sense
12069 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12071 # Release all resource locks except those used by the instance
12072 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12073 keep=self.node_secondary_ip.keys())
12075 # Release all node locks while waiting for sync
12076 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12078 # TODO: Can the instance lock be downgraded here? Take the optional disk
12079 # shutdown in the caller into consideration.
12082 # This can fail as the old devices are degraded and _WaitForSync
12083 # does a combined result over all disks, so we don't check its return value
12084 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12085 _WaitForSync(self.lu, self.instance)
12087 # Check all devices manually
12088 self._CheckDevices(self.instance.primary_node, iv_names)
12090 # Step: remove old storage
12091 if not self.early_release:
12092 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12093 self._RemoveOldStorage(self.target_node, iv_names)
12095 def _ExecDrbd8Secondary(self, feedback_fn):
12096 """Replace the secondary node for DRBD 8.
12098 The algorithm for replace is quite complicated:
12099 - for all disks of the instance:
12100 - create new LVs on the new node with same names
12101 - shutdown the drbd device on the old secondary
12102 - disconnect the drbd network on the primary
12103 - create the drbd device on the new secondary
12104 - network attach the drbd on the primary, using an artifice:
12105 the drbd code for Attach() will connect to the network if it
12106 finds a device which is connected to the good local disks but
12107 not network enabled
12108 - wait for sync across all devices
12109 - remove all disks from the old secondary
12111 Failures are not very well handled.
12116 pnode = self.instance.primary_node
12118 # Step: check device activation
12119 self.lu.LogStep(1, steps_total, "Check device existence")
12120 self._CheckDisksExistence([self.instance.primary_node])
12121 self._CheckVolumeGroup([self.instance.primary_node])
12123 # Step: check other node consistency
12124 self.lu.LogStep(2, steps_total, "Check peer consistency")
12125 self._CheckDisksConsistency(self.instance.primary_node, True, True)
12127 # Step: create new storage
12128 self.lu.LogStep(3, steps_total, "Allocate new storage")
12129 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12130 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12131 for idx, dev in enumerate(disks):
12132 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12133 (self.new_node, idx))
12134 # we pass force_create=True to force LVM creation
12135 for new_lv in dev.children:
12136 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12137 True, _GetInstanceInfoText(self.instance), False,
12140 # Step 4: dbrd minors and drbd setups changes
12141 # after this, we must manually remove the drbd minors on both the
12142 # error and the success paths
12143 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12144 minors = self.cfg.AllocateDRBDMinor([self.new_node
12145 for dev in self.instance.disks],
12146 self.instance.name)
12147 logging.debug("Allocated minors %r", minors)
12150 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12151 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12152 (self.new_node, idx))
12153 # create new devices on new_node; note that we create two IDs:
12154 # one without port, so the drbd will be activated without
12155 # networking information on the new node at this stage, and one
12156 # with network, for the latter activation in step 4
12157 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12158 if self.instance.primary_node == o_node1:
12161 assert self.instance.primary_node == o_node2, "Three-node instance?"
12164 new_alone_id = (self.instance.primary_node, self.new_node, None,
12165 p_minor, new_minor, o_secret)
12166 new_net_id = (self.instance.primary_node, self.new_node, o_port,
12167 p_minor, new_minor, o_secret)
12169 iv_names[idx] = (dev, dev.children, new_net_id)
12170 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12172 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12173 logical_id=new_alone_id,
12174 children=dev.children,
12177 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12180 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12182 _GetInstanceInfoText(self.instance), False,
12184 except errors.GenericError:
12185 self.cfg.ReleaseDRBDMinors(self.instance.name)
12188 # We have new devices, shutdown the drbd on the old secondary
12189 for idx, dev in enumerate(self.instance.disks):
12190 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12191 self.cfg.SetDiskID(dev, self.target_node)
12192 msg = self.rpc.call_blockdev_shutdown(self.target_node,
12193 (dev, self.instance)).fail_msg
12195 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12196 "node: %s" % (idx, msg),
12197 hint=("Please cleanup this device manually as"
12198 " soon as possible"))
12200 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12201 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12202 self.instance.disks)[pnode]
12204 msg = result.fail_msg
12206 # detaches didn't succeed (unlikely)
12207 self.cfg.ReleaseDRBDMinors(self.instance.name)
12208 raise errors.OpExecError("Can't detach the disks from the network on"
12209 " old node: %s" % (msg,))
12211 # if we managed to detach at least one, we update all the disks of
12212 # the instance to point to the new secondary
12213 self.lu.LogInfo("Updating instance configuration")
12214 for dev, _, new_logical_id in iv_names.itervalues():
12215 dev.logical_id = new_logical_id
12216 self.cfg.SetDiskID(dev, self.instance.primary_node)
12218 self.cfg.Update(self.instance, feedback_fn)
12220 # Release all node locks (the configuration has been updated)
12221 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12223 # and now perform the drbd attach
12224 self.lu.LogInfo("Attaching primary drbds to new secondary"
12225 " (standalone => connected)")
12226 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12228 self.node_secondary_ip,
12229 (self.instance.disks, self.instance),
12230 self.instance.name,
12232 for to_node, to_result in result.items():
12233 msg = to_result.fail_msg
12235 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12237 hint=("please do a gnt-instance info to see the"
12238 " status of disks"))
12240 cstep = itertools.count(5)
12242 if self.early_release:
12243 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12244 self._RemoveOldStorage(self.target_node, iv_names)
12245 # TODO: Check if releasing locks early still makes sense
12246 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12248 # Release all resource locks except those used by the instance
12249 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12250 keep=self.node_secondary_ip.keys())
12252 # TODO: Can the instance lock be downgraded here? Take the optional disk
12253 # shutdown in the caller into consideration.
12256 # This can fail as the old devices are degraded and _WaitForSync
12257 # does a combined result over all disks, so we don't check its return value
12258 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12259 _WaitForSync(self.lu, self.instance)
12261 # Check all devices manually
12262 self._CheckDevices(self.instance.primary_node, iv_names)
12264 # Step: remove old storage
12265 if not self.early_release:
12266 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12267 self._RemoveOldStorage(self.target_node, iv_names)
12270 class LURepairNodeStorage(NoHooksLU):
12271 """Repairs the volume group on a node.
12276 def CheckArguments(self):
12277 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12279 storage_type = self.op.storage_type
12281 if (constants.SO_FIX_CONSISTENCY not in
12282 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12283 raise errors.OpPrereqError("Storage units of type '%s' can not be"
12284 " repaired" % storage_type,
12285 errors.ECODE_INVAL)
12287 def ExpandNames(self):
12288 self.needed_locks = {
12289 locking.LEVEL_NODE: [self.op.node_name],
12292 def _CheckFaultyDisks(self, instance, node_name):
12293 """Ensure faulty disks abort the opcode or at least warn."""
12295 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12297 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12298 " node '%s'" % (instance.name, node_name),
12299 errors.ECODE_STATE)
12300 except errors.OpPrereqError, err:
12301 if self.op.ignore_consistency:
12302 self.LogWarning(str(err.args[0]))
12306 def CheckPrereq(self):
12307 """Check prerequisites.
12310 # Check whether any instance on this node has faulty disks
12311 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12312 if inst.admin_state != constants.ADMINST_UP:
12314 check_nodes = set(inst.all_nodes)
12315 check_nodes.discard(self.op.node_name)
12316 for inst_node_name in check_nodes:
12317 self._CheckFaultyDisks(inst, inst_node_name)
12319 def Exec(self, feedback_fn):
12320 feedback_fn("Repairing storage unit '%s' on %s ..." %
12321 (self.op.name, self.op.node_name))
12323 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12324 result = self.rpc.call_storage_execute(self.op.node_name,
12325 self.op.storage_type, st_args,
12327 constants.SO_FIX_CONSISTENCY)
12328 result.Raise("Failed to repair storage unit '%s' on %s" %
12329 (self.op.name, self.op.node_name))
12332 class LUNodeEvacuate(NoHooksLU):
12333 """Evacuates instances off a list of nodes.
12338 _MODE2IALLOCATOR = {
12339 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12340 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12341 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12343 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12344 assert (frozenset(_MODE2IALLOCATOR.values()) ==
12345 constants.IALLOCATOR_NEVAC_MODES)
12347 def CheckArguments(self):
12348 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12350 def ExpandNames(self):
12351 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12353 if self.op.remote_node is not None:
12354 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12355 assert self.op.remote_node
12357 if self.op.remote_node == self.op.node_name:
12358 raise errors.OpPrereqError("Can not use evacuated node as a new"
12359 " secondary node", errors.ECODE_INVAL)
12361 if self.op.mode != constants.NODE_EVAC_SEC:
12362 raise errors.OpPrereqError("Without the use of an iallocator only"
12363 " secondary instances can be evacuated",
12364 errors.ECODE_INVAL)
12367 self.share_locks = _ShareAll()
12368 self.needed_locks = {
12369 locking.LEVEL_INSTANCE: [],
12370 locking.LEVEL_NODEGROUP: [],
12371 locking.LEVEL_NODE: [],
12374 # Determine nodes (via group) optimistically, needs verification once locks
12375 # have been acquired
12376 self.lock_nodes = self._DetermineNodes()
12378 def _DetermineNodes(self):
12379 """Gets the list of nodes to operate on.
12382 if self.op.remote_node is None:
12383 # Iallocator will choose any node(s) in the same group
12384 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12386 group_nodes = frozenset([self.op.remote_node])
12388 # Determine nodes to be locked
12389 return set([self.op.node_name]) | group_nodes
12391 def _DetermineInstances(self):
12392 """Builds list of instances to operate on.
12395 assert self.op.mode in constants.NODE_EVAC_MODES
12397 if self.op.mode == constants.NODE_EVAC_PRI:
12398 # Primary instances only
12399 inst_fn = _GetNodePrimaryInstances
12400 assert self.op.remote_node is None, \
12401 "Evacuating primary instances requires iallocator"
12402 elif self.op.mode == constants.NODE_EVAC_SEC:
12403 # Secondary instances only
12404 inst_fn = _GetNodeSecondaryInstances
12407 assert self.op.mode == constants.NODE_EVAC_ALL
12408 inst_fn = _GetNodeInstances
12409 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12411 raise errors.OpPrereqError("Due to an issue with the iallocator"
12412 " interface it is not possible to evacuate"
12413 " all instances at once; specify explicitly"
12414 " whether to evacuate primary or secondary"
12416 errors.ECODE_INVAL)
12418 return inst_fn(self.cfg, self.op.node_name)
12420 def DeclareLocks(self, level):
12421 if level == locking.LEVEL_INSTANCE:
12422 # Lock instances optimistically, needs verification once node and group
12423 # locks have been acquired
12424 self.needed_locks[locking.LEVEL_INSTANCE] = \
12425 set(i.name for i in self._DetermineInstances())
12427 elif level == locking.LEVEL_NODEGROUP:
12428 # Lock node groups for all potential target nodes optimistically, needs
12429 # verification once nodes have been acquired
12430 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12431 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12433 elif level == locking.LEVEL_NODE:
12434 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12436 def CheckPrereq(self):
12438 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12439 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12440 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12442 need_nodes = self._DetermineNodes()
12444 if not owned_nodes.issuperset(need_nodes):
12445 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12446 " locks were acquired, current nodes are"
12447 " are '%s', used to be '%s'; retry the"
12449 (self.op.node_name,
12450 utils.CommaJoin(need_nodes),
12451 utils.CommaJoin(owned_nodes)),
12452 errors.ECODE_STATE)
12454 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12455 if owned_groups != wanted_groups:
12456 raise errors.OpExecError("Node groups changed since locks were acquired,"
12457 " current groups are '%s', used to be '%s';"
12458 " retry the operation" %
12459 (utils.CommaJoin(wanted_groups),
12460 utils.CommaJoin(owned_groups)))
12462 # Determine affected instances
12463 self.instances = self._DetermineInstances()
12464 self.instance_names = [i.name for i in self.instances]
12466 if set(self.instance_names) != owned_instances:
12467 raise errors.OpExecError("Instances on node '%s' changed since locks"
12468 " were acquired, current instances are '%s',"
12469 " used to be '%s'; retry the operation" %
12470 (self.op.node_name,
12471 utils.CommaJoin(self.instance_names),
12472 utils.CommaJoin(owned_instances)))
12474 if self.instance_names:
12475 self.LogInfo("Evacuating instances from node '%s': %s",
12477 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12479 self.LogInfo("No instances to evacuate from node '%s'",
12482 if self.op.remote_node is not None:
12483 for i in self.instances:
12484 if i.primary_node == self.op.remote_node:
12485 raise errors.OpPrereqError("Node %s is the primary node of"
12486 " instance %s, cannot use it as"
12488 (self.op.remote_node, i.name),
12489 errors.ECODE_INVAL)
12491 def Exec(self, feedback_fn):
12492 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12494 if not self.instance_names:
12495 # No instances to evacuate
12498 elif self.op.iallocator is not None:
12499 # TODO: Implement relocation to other group
12500 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12501 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12502 instances=list(self.instance_names))
12503 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12505 ial.Run(self.op.iallocator)
12507 if not ial.success:
12508 raise errors.OpPrereqError("Can't compute node evacuation using"
12509 " iallocator '%s': %s" %
12510 (self.op.iallocator, ial.info),
12511 errors.ECODE_NORES)
12513 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12515 elif self.op.remote_node is not None:
12516 assert self.op.mode == constants.NODE_EVAC_SEC
12518 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12519 remote_node=self.op.remote_node,
12521 mode=constants.REPLACE_DISK_CHG,
12522 early_release=self.op.early_release)]
12523 for instance_name in self.instance_names]
12526 raise errors.ProgrammerError("No iallocator or remote node")
12528 return ResultWithJobs(jobs)
12531 def _SetOpEarlyRelease(early_release, op):
12532 """Sets C{early_release} flag on opcodes if available.
12536 op.early_release = early_release
12537 except AttributeError:
12538 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12543 def _NodeEvacDest(use_nodes, group, nodes):
12544 """Returns group or nodes depending on caller's choice.
12548 return utils.CommaJoin(nodes)
12553 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12554 """Unpacks the result of change-group and node-evacuate iallocator requests.
12556 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12557 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12559 @type lu: L{LogicalUnit}
12560 @param lu: Logical unit instance
12561 @type alloc_result: tuple/list
12562 @param alloc_result: Result from iallocator
12563 @type early_release: bool
12564 @param early_release: Whether to release locks early if possible
12565 @type use_nodes: bool
12566 @param use_nodes: Whether to display node names instead of groups
12569 (moved, failed, jobs) = alloc_result
12572 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12573 for (name, reason) in failed)
12574 lu.LogWarning("Unable to evacuate instances %s", failreason)
12575 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12578 lu.LogInfo("Instances to be moved: %s",
12579 utils.CommaJoin("%s (to %s)" %
12580 (name, _NodeEvacDest(use_nodes, group, nodes))
12581 for (name, group, nodes) in moved))
12583 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12584 map(opcodes.OpCode.LoadOpCode, ops))
12588 def _DiskSizeInBytesToMebibytes(lu, size):
12589 """Converts a disk size in bytes to mebibytes.
12591 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12594 (mib, remainder) = divmod(size, 1024 * 1024)
12597 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12598 " to not overwrite existing data (%s bytes will not be"
12599 " wiped)", (1024 * 1024) - remainder)
12605 class LUInstanceGrowDisk(LogicalUnit):
12606 """Grow a disk of an instance.
12609 HPATH = "disk-grow"
12610 HTYPE = constants.HTYPE_INSTANCE
12613 def ExpandNames(self):
12614 self._ExpandAndLockInstance()
12615 self.needed_locks[locking.LEVEL_NODE] = []
12616 self.needed_locks[locking.LEVEL_NODE_RES] = []
12617 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12618 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12620 def DeclareLocks(self, level):
12621 if level == locking.LEVEL_NODE:
12622 self._LockInstancesNodes()
12623 elif level == locking.LEVEL_NODE_RES:
12625 self.needed_locks[locking.LEVEL_NODE_RES] = \
12626 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12628 def BuildHooksEnv(self):
12629 """Build hooks env.
12631 This runs on the master, the primary and all the secondaries.
12635 "DISK": self.op.disk,
12636 "AMOUNT": self.op.amount,
12637 "ABSOLUTE": self.op.absolute,
12639 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12642 def BuildHooksNodes(self):
12643 """Build hooks nodes.
12646 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12649 def CheckPrereq(self):
12650 """Check prerequisites.
12652 This checks that the instance is in the cluster.
12655 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12656 assert instance is not None, \
12657 "Cannot retrieve locked instance %s" % self.op.instance_name
12658 nodenames = list(instance.all_nodes)
12659 for node in nodenames:
12660 _CheckNodeOnline(self, node)
12662 self.instance = instance
12664 if instance.disk_template not in constants.DTS_GROWABLE:
12665 raise errors.OpPrereqError("Instance's disk layout does not support"
12666 " growing", errors.ECODE_INVAL)
12668 self.disk = instance.FindDisk(self.op.disk)
12670 if self.op.absolute:
12671 self.target = self.op.amount
12672 self.delta = self.target - self.disk.size
12674 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12675 "current disk size (%s)" %
12676 (utils.FormatUnit(self.target, "h"),
12677 utils.FormatUnit(self.disk.size, "h")),
12678 errors.ECODE_STATE)
12680 self.delta = self.op.amount
12681 self.target = self.disk.size + self.delta
12683 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12684 utils.FormatUnit(self.delta, "h"),
12685 errors.ECODE_INVAL)
12687 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12689 def _CheckDiskSpace(self, nodenames, req_vgspace):
12690 template = self.instance.disk_template
12691 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12692 # TODO: check the free disk space for file, when that feature will be
12694 nodes = map(self.cfg.GetNodeInfo, nodenames)
12695 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12698 # With exclusive storage we need to something smarter than just looking
12699 # at free space; for now, let's simply abort the operation.
12700 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12701 " is enabled", errors.ECODE_STATE)
12702 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12704 def Exec(self, feedback_fn):
12705 """Execute disk grow.
12708 instance = self.instance
12711 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12712 assert (self.owned_locks(locking.LEVEL_NODE) ==
12713 self.owned_locks(locking.LEVEL_NODE_RES))
12715 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12717 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12719 raise errors.OpExecError("Cannot activate block device to grow")
12721 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12722 (self.op.disk, instance.name,
12723 utils.FormatUnit(self.delta, "h"),
12724 utils.FormatUnit(self.target, "h")))
12726 # First run all grow ops in dry-run mode
12727 for node in instance.all_nodes:
12728 self.cfg.SetDiskID(disk, node)
12729 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12731 result.Raise("Dry-run grow request failed to node %s" % node)
12734 # Get disk size from primary node for wiping
12735 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12736 result.Raise("Failed to retrieve disk size from node '%s'" %
12737 instance.primary_node)
12739 (disk_size_in_bytes, ) = result.payload
12741 if disk_size_in_bytes is None:
12742 raise errors.OpExecError("Failed to retrieve disk size from primary"
12743 " node '%s'" % instance.primary_node)
12745 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12747 assert old_disk_size >= disk.size, \
12748 ("Retrieved disk size too small (got %s, should be at least %s)" %
12749 (old_disk_size, disk.size))
12751 old_disk_size = None
12753 # We know that (as far as we can test) operations across different
12754 # nodes will succeed, time to run it for real on the backing storage
12755 for node in instance.all_nodes:
12756 self.cfg.SetDiskID(disk, node)
12757 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12759 result.Raise("Grow request failed to node %s" % node)
12761 # And now execute it for logical storage, on the primary node
12762 node = instance.primary_node
12763 self.cfg.SetDiskID(disk, node)
12764 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12766 result.Raise("Grow request failed to node %s" % node)
12768 disk.RecordGrow(self.delta)
12769 self.cfg.Update(instance, feedback_fn)
12771 # Changes have been recorded, release node lock
12772 _ReleaseLocks(self, locking.LEVEL_NODE)
12774 # Downgrade lock while waiting for sync
12775 self.glm.downgrade(locking.LEVEL_INSTANCE)
12777 assert wipe_disks ^ (old_disk_size is None)
12780 assert instance.disks[self.op.disk] == disk
12782 # Wipe newly added disk space
12783 _WipeDisks(self, instance,
12784 disks=[(self.op.disk, disk, old_disk_size)])
12786 if self.op.wait_for_sync:
12787 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12789 self.LogWarning("Disk syncing has not returned a good status; check"
12791 if instance.admin_state != constants.ADMINST_UP:
12792 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12793 elif instance.admin_state != constants.ADMINST_UP:
12794 self.LogWarning("Not shutting down the disk even if the instance is"
12795 " not supposed to be running because no wait for"
12796 " sync mode was requested")
12798 assert self.owned_locks(locking.LEVEL_NODE_RES)
12799 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12802 class LUInstanceQueryData(NoHooksLU):
12803 """Query runtime instance data.
12808 def ExpandNames(self):
12809 self.needed_locks = {}
12811 # Use locking if requested or when non-static information is wanted
12812 if not (self.op.static or self.op.use_locking):
12813 self.LogWarning("Non-static data requested, locks need to be acquired")
12814 self.op.use_locking = True
12816 if self.op.instances or not self.op.use_locking:
12817 # Expand instance names right here
12818 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12820 # Will use acquired locks
12821 self.wanted_names = None
12823 if self.op.use_locking:
12824 self.share_locks = _ShareAll()
12826 if self.wanted_names is None:
12827 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12829 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12831 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12832 self.needed_locks[locking.LEVEL_NODE] = []
12833 self.needed_locks[locking.LEVEL_NETWORK] = []
12834 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12836 def DeclareLocks(self, level):
12837 if self.op.use_locking:
12838 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12839 if level == locking.LEVEL_NODEGROUP:
12841 # Lock all groups used by instances optimistically; this requires going
12842 # via the node before it's locked, requiring verification later on
12843 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12844 frozenset(group_uuid
12845 for instance_name in owned_instances
12847 self.cfg.GetInstanceNodeGroups(instance_name))
12849 elif level == locking.LEVEL_NODE:
12850 self._LockInstancesNodes()
12852 elif level == locking.LEVEL_NETWORK:
12853 self.needed_locks[locking.LEVEL_NETWORK] = \
12855 for instance_name in owned_instances
12857 self.cfg.GetInstanceNetworks(instance_name))
12859 def CheckPrereq(self):
12860 """Check prerequisites.
12862 This only checks the optional instance list against the existing names.
12865 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12866 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12867 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12868 owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
12870 if self.wanted_names is None:
12871 assert self.op.use_locking, "Locking was not used"
12872 self.wanted_names = owned_instances
12874 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12876 if self.op.use_locking:
12877 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12880 assert not (owned_instances or owned_groups or
12881 owned_nodes or owned_networks)
12883 self.wanted_instances = instances.values()
12885 def _ComputeBlockdevStatus(self, node, instance, dev):
12886 """Returns the status of a block device
12889 if self.op.static or not node:
12892 self.cfg.SetDiskID(dev, node)
12894 result = self.rpc.call_blockdev_find(node, dev)
12898 result.Raise("Can't compute disk status for %s" % instance.name)
12900 status = result.payload
12904 return (status.dev_path, status.major, status.minor,
12905 status.sync_percent, status.estimated_time,
12906 status.is_degraded, status.ldisk_status)
12908 def _ComputeDiskStatus(self, instance, snode, dev):
12909 """Compute block device status.
12912 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12914 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12916 def _ComputeDiskStatusInner(self, instance, snode, dev):
12917 """Compute block device status.
12919 @attention: The device has to be annotated already.
12922 if dev.dev_type in constants.LDS_DRBD:
12923 # we change the snode then (otherwise we use the one passed in)
12924 if dev.logical_id[0] == instance.primary_node:
12925 snode = dev.logical_id[1]
12927 snode = dev.logical_id[0]
12929 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12931 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12934 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12941 "iv_name": dev.iv_name,
12942 "dev_type": dev.dev_type,
12943 "logical_id": dev.logical_id,
12944 "physical_id": dev.physical_id,
12945 "pstatus": dev_pstatus,
12946 "sstatus": dev_sstatus,
12947 "children": dev_children,
12952 def Exec(self, feedback_fn):
12953 """Gather and return data"""
12956 cluster = self.cfg.GetClusterInfo()
12958 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12959 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12961 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12962 for node in nodes.values()))
12964 group2name_fn = lambda uuid: groups[uuid].name
12965 for instance in self.wanted_instances:
12966 pnode = nodes[instance.primary_node]
12968 if self.op.static or pnode.offline:
12969 remote_state = None
12971 self.LogWarning("Primary node %s is marked offline, returning static"
12972 " information only for instance %s" %
12973 (pnode.name, instance.name))
12975 remote_info = self.rpc.call_instance_info(instance.primary_node,
12977 instance.hypervisor)
12978 remote_info.Raise("Error checking node %s" % instance.primary_node)
12979 remote_info = remote_info.payload
12980 if remote_info and "state" in remote_info:
12981 remote_state = "up"
12983 if instance.admin_state == constants.ADMINST_UP:
12984 remote_state = "down"
12986 remote_state = instance.admin_state
12988 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12991 snodes_group_uuids = [nodes[snode_name].group
12992 for snode_name in instance.secondary_nodes]
12994 result[instance.name] = {
12995 "name": instance.name,
12996 "config_state": instance.admin_state,
12997 "run_state": remote_state,
12998 "pnode": instance.primary_node,
12999 "pnode_group_uuid": pnode.group,
13000 "pnode_group_name": group2name_fn(pnode.group),
13001 "snodes": instance.secondary_nodes,
13002 "snodes_group_uuids": snodes_group_uuids,
13003 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
13005 # this happens to be the same format used for hooks
13006 "nics": _NICListToTuple(self, instance.nics),
13007 "disk_template": instance.disk_template,
13009 "hypervisor": instance.hypervisor,
13010 "network_port": instance.network_port,
13011 "hv_instance": instance.hvparams,
13012 "hv_actual": cluster.FillHV(instance, skip_globals=True),
13013 "be_instance": instance.beparams,
13014 "be_actual": cluster.FillBE(instance),
13015 "os_instance": instance.osparams,
13016 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13017 "serial_no": instance.serial_no,
13018 "mtime": instance.mtime,
13019 "ctime": instance.ctime,
13020 "uuid": instance.uuid,
13026 def PrepareContainerMods(mods, private_fn):
13027 """Prepares a list of container modifications by adding a private data field.
13029 @type mods: list of tuples; (operation, index, parameters)
13030 @param mods: List of modifications
13031 @type private_fn: callable or None
13032 @param private_fn: Callable for constructing a private data field for a
13037 if private_fn is None:
13042 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13045 #: Type description for changes as returned by L{ApplyContainerMods}'s
13047 _TApplyContModsCbChanges = \
13048 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13049 ht.TNonEmptyString,
13054 def ApplyContainerMods(kind, container, chgdesc, mods,
13055 create_fn, modify_fn, remove_fn):
13056 """Applies descriptions in C{mods} to C{container}.
13059 @param kind: One-word item description
13060 @type container: list
13061 @param container: Container to modify
13062 @type chgdesc: None or list
13063 @param chgdesc: List of applied changes
13065 @param mods: Modifications as returned by L{PrepareContainerMods}
13066 @type create_fn: callable
13067 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13068 receives absolute item index, parameters and private data object as added
13069 by L{PrepareContainerMods}, returns tuple containing new item and changes
13071 @type modify_fn: callable
13072 @param modify_fn: Callback for modifying an existing item
13073 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13074 and private data object as added by L{PrepareContainerMods}, returns
13076 @type remove_fn: callable
13077 @param remove_fn: Callback on removing item; receives absolute item index,
13078 item and private data object as added by L{PrepareContainerMods}
13081 for (op, idx, params, private) in mods:
13084 absidx = len(container) - 1
13086 raise IndexError("Not accepting negative indices other than -1")
13087 elif idx > len(container):
13088 raise IndexError("Got %s index %s, but there are only %s" %
13089 (kind, idx, len(container)))
13095 if op == constants.DDM_ADD:
13096 # Calculate where item will be added
13098 addidx = len(container)
13102 if create_fn is None:
13105 (item, changes) = create_fn(addidx, params, private)
13108 container.append(item)
13111 assert idx <= len(container)
13112 # list.insert does so before the specified index
13113 container.insert(idx, item)
13115 # Retrieve existing item
13117 item = container[absidx]
13119 raise IndexError("Invalid %s index %s" % (kind, idx))
13121 if op == constants.DDM_REMOVE:
13124 if remove_fn is not None:
13125 remove_fn(absidx, item, private)
13127 changes = [("%s/%s" % (kind, absidx), "remove")]
13129 assert container[absidx] == item
13130 del container[absidx]
13131 elif op == constants.DDM_MODIFY:
13132 if modify_fn is not None:
13133 changes = modify_fn(absidx, item, params, private)
13135 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13137 assert _TApplyContModsCbChanges(changes)
13139 if not (chgdesc is None or changes is None):
13140 chgdesc.extend(changes)
13143 def _UpdateIvNames(base_index, disks):
13144 """Updates the C{iv_name} attribute of disks.
13146 @type disks: list of L{objects.Disk}
13149 for (idx, disk) in enumerate(disks):
13150 disk.iv_name = "disk/%s" % (base_index + idx, )
13153 class _InstNicModPrivate:
13154 """Data structure for network interface modifications.
13156 Used by L{LUInstanceSetParams}.
13159 def __init__(self):
13164 class LUInstanceSetParams(LogicalUnit):
13165 """Modifies an instances's parameters.
13168 HPATH = "instance-modify"
13169 HTYPE = constants.HTYPE_INSTANCE
13173 def _UpgradeDiskNicMods(kind, mods, verify_fn):
13174 assert ht.TList(mods)
13175 assert not mods or len(mods[0]) in (2, 3)
13177 if mods and len(mods[0]) == 2:
13181 for op, params in mods:
13182 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13183 result.append((op, -1, params))
13187 raise errors.OpPrereqError("Only one %s add or remove operation is"
13188 " supported at a time" % kind,
13189 errors.ECODE_INVAL)
13191 result.append((constants.DDM_MODIFY, op, params))
13193 assert verify_fn(result)
13200 def _CheckMods(kind, mods, key_types, item_fn):
13201 """Ensures requested disk/NIC modifications are valid.
13204 for (op, _, params) in mods:
13205 assert ht.TDict(params)
13207 # If 'key_types' is an empty dict, we assume we have an
13208 # 'ext' template and thus do not ForceDictType
13210 utils.ForceDictType(params, key_types)
13212 if op == constants.DDM_REMOVE:
13214 raise errors.OpPrereqError("No settings should be passed when"
13215 " removing a %s" % kind,
13216 errors.ECODE_INVAL)
13217 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13218 item_fn(op, params)
13220 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13223 def _VerifyDiskModification(op, params):
13224 """Verifies a disk modification.
13227 if op == constants.DDM_ADD:
13228 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13229 if mode not in constants.DISK_ACCESS_SET:
13230 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13231 errors.ECODE_INVAL)
13233 size = params.get(constants.IDISK_SIZE, None)
13235 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13236 constants.IDISK_SIZE, errors.ECODE_INVAL)
13240 except (TypeError, ValueError), err:
13241 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13242 errors.ECODE_INVAL)
13244 params[constants.IDISK_SIZE] = size
13246 elif op == constants.DDM_MODIFY:
13247 if constants.IDISK_SIZE in params:
13248 raise errors.OpPrereqError("Disk size change not possible, use"
13249 " grow-disk", errors.ECODE_INVAL)
13250 if constants.IDISK_MODE not in params:
13251 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13252 " modification supported, but missing",
13253 errors.ECODE_NOENT)
13254 if len(params) > 1:
13255 raise errors.OpPrereqError("Disk modification doesn't support"
13256 " additional arbitrary parameters",
13257 errors.ECODE_INVAL)
13260 def _VerifyNicModification(op, params):
13261 """Verifies a network interface modification.
13264 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13265 ip = params.get(constants.INIC_IP, None)
13266 req_net = params.get(constants.INIC_NETWORK, None)
13267 link = params.get(constants.NIC_LINK, None)
13268 mode = params.get(constants.NIC_MODE, None)
13269 if req_net is not None:
13270 if req_net.lower() == constants.VALUE_NONE:
13271 params[constants.INIC_NETWORK] = None
13273 elif link is not None or mode is not None:
13274 raise errors.OpPrereqError("If network is given"
13275 " mode or link should not",
13276 errors.ECODE_INVAL)
13278 if op == constants.DDM_ADD:
13279 macaddr = params.get(constants.INIC_MAC, None)
13280 if macaddr is None:
13281 params[constants.INIC_MAC] = constants.VALUE_AUTO
13284 if ip.lower() == constants.VALUE_NONE:
13285 params[constants.INIC_IP] = None
13287 if ip.lower() == constants.NIC_IP_POOL:
13288 if op == constants.DDM_ADD and req_net is None:
13289 raise errors.OpPrereqError("If ip=pool, parameter network"
13291 errors.ECODE_INVAL)
13293 if not netutils.IPAddress.IsValid(ip):
13294 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13295 errors.ECODE_INVAL)
13297 if constants.INIC_MAC in params:
13298 macaddr = params[constants.INIC_MAC]
13299 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13300 macaddr = utils.NormalizeAndValidateMac(macaddr)
13302 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13303 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13304 " modifying an existing NIC",
13305 errors.ECODE_INVAL)
13307 def CheckArguments(self):
13308 if not (self.op.nics or self.op.disks or self.op.disk_template or
13309 self.op.hvparams or self.op.beparams or self.op.os_name or
13310 self.op.offline is not None or self.op.runtime_mem):
13311 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13313 if self.op.hvparams:
13314 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
13315 "hypervisor", "instance", "cluster")
13317 self.op.disks = self._UpgradeDiskNicMods(
13318 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13319 self.op.nics = self._UpgradeDiskNicMods(
13320 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13322 if self.op.disks and self.op.disk_template is not None:
13323 raise errors.OpPrereqError("Disk template conversion and other disk"
13324 " changes not supported at the same time",
13325 errors.ECODE_INVAL)
13327 if (self.op.disk_template and
13328 self.op.disk_template in constants.DTS_INT_MIRROR and
13329 self.op.remote_node is None):
13330 raise errors.OpPrereqError("Changing the disk template to a mirrored"
13331 " one requires specifying a secondary node",
13332 errors.ECODE_INVAL)
13334 # Check NIC modifications
13335 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13336 self._VerifyNicModification)
13338 def ExpandNames(self):
13339 self._ExpandAndLockInstance()
13340 self.needed_locks[locking.LEVEL_NODEGROUP] = []
13341 # Can't even acquire node locks in shared mode as upcoming changes in
13342 # Ganeti 2.6 will start to modify the node object on disk conversion
13343 self.needed_locks[locking.LEVEL_NODE] = []
13344 self.needed_locks[locking.LEVEL_NODE_RES] = []
13345 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13346 # Look node group to look up the ipolicy
13347 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13349 def DeclareLocks(self, level):
13350 if level == locking.LEVEL_NODEGROUP:
13351 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13352 # Acquire locks for the instance's nodegroups optimistically. Needs
13353 # to be verified in CheckPrereq
13354 self.needed_locks[locking.LEVEL_NODEGROUP] = \
13355 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13356 elif level == locking.LEVEL_NODE:
13357 self._LockInstancesNodes()
13358 if self.op.disk_template and self.op.remote_node:
13359 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13360 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13361 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13363 self.needed_locks[locking.LEVEL_NODE_RES] = \
13364 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13366 def BuildHooksEnv(self):
13367 """Build hooks env.
13369 This runs on the master, primary and secondaries.
13373 if constants.BE_MINMEM in self.be_new:
13374 args["minmem"] = self.be_new[constants.BE_MINMEM]
13375 if constants.BE_MAXMEM in self.be_new:
13376 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13377 if constants.BE_VCPUS in self.be_new:
13378 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13379 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13380 # information at all.
13382 if self._new_nics is not None:
13385 for nic in self._new_nics:
13386 n = copy.deepcopy(nic)
13387 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13388 n.nicparams = nicparams
13389 nics.append(_NICToTuple(self, n))
13391 args["nics"] = nics
13393 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13394 if self.op.disk_template:
13395 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13396 if self.op.runtime_mem:
13397 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13401 def BuildHooksNodes(self):
13402 """Build hooks nodes.
13405 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13408 def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
13409 old_params, cluster, pnode):
13411 update_params_dict = dict([(key, params[key])
13412 for key in constants.NICS_PARAMETERS
13415 req_link = update_params_dict.get(constants.NIC_LINK, None)
13416 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13418 new_net_uuid = None
13419 new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
13420 if new_net_uuid_or_name:
13421 new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
13422 new_net_obj = self.cfg.GetNetwork(new_net_uuid)
13425 old_net_obj = self.cfg.GetNetwork(old_net_uuid)
13428 netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
13430 raise errors.OpPrereqError("No netparams found for the network"
13431 " %s, probably not connected" %
13432 new_net_obj.name, errors.ECODE_INVAL)
13433 new_params = dict(netparams)
13435 new_params = _GetUpdatedParams(old_params, update_params_dict)
13437 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13439 new_filled_params = cluster.SimpleFillNIC(new_params)
13440 objects.NIC.CheckParameterSyntax(new_filled_params)
13442 new_mode = new_filled_params[constants.NIC_MODE]
13443 if new_mode == constants.NIC_MODE_BRIDGED:
13444 bridge = new_filled_params[constants.NIC_LINK]
13445 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13447 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13449 self.warn.append(msg)
13451 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13453 elif new_mode == constants.NIC_MODE_ROUTED:
13454 ip = params.get(constants.INIC_IP, old_ip)
13456 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13457 " on a routed NIC", errors.ECODE_INVAL)
13459 elif new_mode == constants.NIC_MODE_OVS:
13460 # TODO: check OVS link
13461 self.LogInfo("OVS links are currently not checked for correctness")
13463 if constants.INIC_MAC in params:
13464 mac = params[constants.INIC_MAC]
13466 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13467 errors.ECODE_INVAL)
13468 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13469 # otherwise generate the MAC address
13470 params[constants.INIC_MAC] = \
13471 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13473 # or validate/reserve the current one
13475 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13476 except errors.ReservationError:
13477 raise errors.OpPrereqError("MAC address '%s' already in use"
13478 " in cluster" % mac,
13479 errors.ECODE_NOTUNIQUE)
13480 elif new_net_uuid != old_net_uuid:
13482 def get_net_prefix(net_uuid):
13485 nobj = self.cfg.GetNetwork(net_uuid)
13486 mac_prefix = nobj.mac_prefix
13490 new_prefix = get_net_prefix(new_net_uuid)
13491 old_prefix = get_net_prefix(old_net_uuid)
13492 if old_prefix != new_prefix:
13493 params[constants.INIC_MAC] = \
13494 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13496 # if there is a change in (ip, network) tuple
13497 new_ip = params.get(constants.INIC_IP, old_ip)
13498 if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
13500 # if IP is pool then require a network and generate one IP
13501 if new_ip.lower() == constants.NIC_IP_POOL:
13504 new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
13505 except errors.ReservationError:
13506 raise errors.OpPrereqError("Unable to get a free IP"
13507 " from the address pool",
13508 errors.ECODE_STATE)
13509 self.LogInfo("Chose IP %s from network %s",
13512 params[constants.INIC_IP] = new_ip
13514 raise errors.OpPrereqError("ip=pool, but no network found",
13515 errors.ECODE_INVAL)
13516 # Reserve new IP if in the new network if any
13519 self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
13520 self.LogInfo("Reserving IP %s in network %s",
13521 new_ip, new_net_obj.name)
13522 except errors.ReservationError:
13523 raise errors.OpPrereqError("IP %s not available in network %s" %
13524 (new_ip, new_net_obj.name),
13525 errors.ECODE_NOTUNIQUE)
13526 # new network is None so check if new IP is a conflicting IP
13527 elif self.op.conflicts_check:
13528 _CheckForConflictingIp(self, new_ip, pnode)
13530 # release old IP if old network is not None
13531 if old_ip and old_net_uuid:
13533 self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
13534 except errors.AddressPoolError:
13535 logging.warning("Release IP %s not contained in network %s",
13536 old_ip, old_net_obj.name)
13538 # there are no changes in (ip, network) tuple and old network is not None
13539 elif (old_net_uuid is not None and
13540 (req_link is not None or req_mode is not None)):
13541 raise errors.OpPrereqError("Not allowed to change link or mode of"
13542 " a NIC that is connected to a network",
13543 errors.ECODE_INVAL)
13545 private.params = new_params
13546 private.filled = new_filled_params
13548 def _PreCheckDiskTemplate(self, pnode_info):
13549 """CheckPrereq checks related to a new disk template."""
13550 # Arguments are passed to avoid configuration lookups
13551 instance = self.instance
13552 pnode = instance.primary_node
13553 cluster = self.cluster
13554 if instance.disk_template == self.op.disk_template:
13555 raise errors.OpPrereqError("Instance already has disk template %s" %
13556 instance.disk_template, errors.ECODE_INVAL)
13558 if (instance.disk_template,
13559 self.op.disk_template) not in self._DISK_CONVERSIONS:
13560 raise errors.OpPrereqError("Unsupported disk template conversion from"
13561 " %s to %s" % (instance.disk_template,
13562 self.op.disk_template),
13563 errors.ECODE_INVAL)
13564 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13565 msg="cannot change disk template")
13566 if self.op.disk_template in constants.DTS_INT_MIRROR:
13567 if self.op.remote_node == pnode:
13568 raise errors.OpPrereqError("Given new secondary node %s is the same"
13569 " as the primary node of the instance" %
13570 self.op.remote_node, errors.ECODE_STATE)
13571 _CheckNodeOnline(self, self.op.remote_node)
13572 _CheckNodeNotDrained(self, self.op.remote_node)
13573 # FIXME: here we assume that the old instance type is DT_PLAIN
13574 assert instance.disk_template == constants.DT_PLAIN
13575 disks = [{constants.IDISK_SIZE: d.size,
13576 constants.IDISK_VG: d.logical_id[0]}
13577 for d in instance.disks]
13578 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13579 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13581 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13582 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13583 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13585 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
13586 ignore=self.op.ignore_ipolicy)
13587 if pnode_info.group != snode_info.group:
13588 self.LogWarning("The primary and secondary nodes are in two"
13589 " different node groups; the disk parameters"
13590 " from the first disk's node group will be"
13593 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13594 # Make sure none of the nodes require exclusive storage
13595 nodes = [pnode_info]
13596 if self.op.disk_template in constants.DTS_INT_MIRROR:
13598 nodes.append(snode_info)
13599 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13600 if compat.any(map(has_es, nodes)):
13601 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13602 " storage is enabled" % (instance.disk_template,
13603 self.op.disk_template))
13604 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13606 def CheckPrereq(self):
13607 """Check prerequisites.
13609 This only checks the instance list against the existing names.
13612 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13613 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13615 cluster = self.cluster = self.cfg.GetClusterInfo()
13616 assert self.instance is not None, \
13617 "Cannot retrieve locked instance %s" % self.op.instance_name
13619 pnode = instance.primary_node
13620 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13621 nodelist = list(instance.all_nodes)
13622 pnode_info = self.cfg.GetNodeInfo(pnode)
13623 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13625 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13626 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13627 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13629 # dictionary with instance information after the modification
13632 # Check disk modifications. This is done here and not in CheckArguments
13633 # (as with NICs), because we need to know the instance's disk template
13634 if instance.disk_template == constants.DT_EXT:
13635 self._CheckMods("disk", self.op.disks, {},
13636 self._VerifyDiskModification)
13638 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13639 self._VerifyDiskModification)
13641 # Prepare disk/NIC modifications
13642 self.diskmod = PrepareContainerMods(self.op.disks, None)
13643 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13645 # Check the validity of the `provider' parameter
13646 if instance.disk_template in constants.DT_EXT:
13647 for mod in self.diskmod:
13648 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13649 if mod[0] == constants.DDM_ADD:
13650 if ext_provider is None:
13651 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13652 " '%s' missing, during disk add" %
13654 constants.IDISK_PROVIDER),
13655 errors.ECODE_NOENT)
13656 elif mod[0] == constants.DDM_MODIFY:
13658 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13660 constants.IDISK_PROVIDER,
13661 errors.ECODE_INVAL)
13663 for mod in self.diskmod:
13664 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13665 if ext_provider is not None:
13666 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13667 " instances of type '%s'" %
13668 (constants.IDISK_PROVIDER,
13670 errors.ECODE_INVAL)
13673 if self.op.os_name and not self.op.force:
13674 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13675 self.op.force_variant)
13676 instance_os = self.op.os_name
13678 instance_os = instance.os
13680 assert not (self.op.disk_template and self.op.disks), \
13681 "Can't modify disk template and apply disk changes at the same time"
13683 if self.op.disk_template:
13684 self._PreCheckDiskTemplate(pnode_info)
13686 # hvparams processing
13687 if self.op.hvparams:
13688 hv_type = instance.hypervisor
13689 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13690 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13691 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13694 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13695 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13696 self.hv_proposed = self.hv_new = hv_new # the new actual values
13697 self.hv_inst = i_hvdict # the new dict (without defaults)
13699 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13701 self.hv_new = self.hv_inst = {}
13703 # beparams processing
13704 if self.op.beparams:
13705 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13707 objects.UpgradeBeParams(i_bedict)
13708 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13709 be_new = cluster.SimpleFillBE(i_bedict)
13710 self.be_proposed = self.be_new = be_new # the new actual values
13711 self.be_inst = i_bedict # the new dict (without defaults)
13713 self.be_new = self.be_inst = {}
13714 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13715 be_old = cluster.FillBE(instance)
13717 # CPU param validation -- checking every time a parameter is
13718 # changed to cover all cases where either CPU mask or vcpus have
13720 if (constants.BE_VCPUS in self.be_proposed and
13721 constants.HV_CPU_MASK in self.hv_proposed):
13723 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13724 # Verify mask is consistent with number of vCPUs. Can skip this
13725 # test if only 1 entry in the CPU mask, which means same mask
13726 # is applied to all vCPUs.
13727 if (len(cpu_list) > 1 and
13728 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13729 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13731 (self.be_proposed[constants.BE_VCPUS],
13732 self.hv_proposed[constants.HV_CPU_MASK]),
13733 errors.ECODE_INVAL)
13735 # Only perform this test if a new CPU mask is given
13736 if constants.HV_CPU_MASK in self.hv_new:
13737 # Calculate the largest CPU number requested
13738 max_requested_cpu = max(map(max, cpu_list))
13739 # Check that all of the instance's nodes have enough physical CPUs to
13740 # satisfy the requested CPU mask
13741 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13742 max_requested_cpu + 1, instance.hypervisor)
13744 # osparams processing
13745 if self.op.osparams:
13746 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13747 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13748 self.os_inst = i_osdict # the new dict (without defaults)
13754 #TODO(dynmem): do the appropriate check involving MINMEM
13755 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13756 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13757 mem_check_list = [pnode]
13758 if be_new[constants.BE_AUTO_BALANCE]:
13759 # either we changed auto_balance to yes or it was from before
13760 mem_check_list.extend(instance.secondary_nodes)
13761 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13762 instance.hypervisor)
13763 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13764 [instance.hypervisor], False)
13765 pninfo = nodeinfo[pnode]
13766 msg = pninfo.fail_msg
13768 # Assume the primary node is unreachable and go ahead
13769 self.warn.append("Can't get info from primary node %s: %s" %
13772 (_, _, (pnhvinfo, )) = pninfo.payload
13773 if not isinstance(pnhvinfo.get("memory_free", None), int):
13774 self.warn.append("Node data from primary node %s doesn't contain"
13775 " free memory information" % pnode)
13776 elif instance_info.fail_msg:
13777 self.warn.append("Can't get instance runtime information: %s" %
13778 instance_info.fail_msg)
13780 if instance_info.payload:
13781 current_mem = int(instance_info.payload["memory"])
13783 # Assume instance not running
13784 # (there is a slight race condition here, but it's not very
13785 # probable, and we have no other way to check)
13786 # TODO: Describe race condition
13788 #TODO(dynmem): do the appropriate check involving MINMEM
13789 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13790 pnhvinfo["memory_free"])
13792 raise errors.OpPrereqError("This change will prevent the instance"
13793 " from starting, due to %d MB of memory"
13794 " missing on its primary node" %
13795 miss_mem, errors.ECODE_NORES)
13797 if be_new[constants.BE_AUTO_BALANCE]:
13798 for node, nres in nodeinfo.items():
13799 if node not in instance.secondary_nodes:
13801 nres.Raise("Can't get info from secondary node %s" % node,
13802 prereq=True, ecode=errors.ECODE_STATE)
13803 (_, _, (nhvinfo, )) = nres.payload
13804 if not isinstance(nhvinfo.get("memory_free", None), int):
13805 raise errors.OpPrereqError("Secondary node %s didn't return free"
13806 " memory information" % node,
13807 errors.ECODE_STATE)
13808 #TODO(dynmem): do the appropriate check involving MINMEM
13809 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13810 raise errors.OpPrereqError("This change will prevent the instance"
13811 " from failover to its secondary node"
13812 " %s, due to not enough memory" % node,
13813 errors.ECODE_STATE)
13815 if self.op.runtime_mem:
13816 remote_info = self.rpc.call_instance_info(instance.primary_node,
13818 instance.hypervisor)
13819 remote_info.Raise("Error checking node %s" % instance.primary_node)
13820 if not remote_info.payload: # not running already
13821 raise errors.OpPrereqError("Instance %s is not running" %
13822 instance.name, errors.ECODE_STATE)
13824 current_memory = remote_info.payload["memory"]
13825 if (not self.op.force and
13826 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13827 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13828 raise errors.OpPrereqError("Instance %s must have memory between %d"
13829 " and %d MB of memory unless --force is"
13832 self.be_proposed[constants.BE_MINMEM],
13833 self.be_proposed[constants.BE_MAXMEM]),
13834 errors.ECODE_INVAL)
13836 delta = self.op.runtime_mem - current_memory
13838 _CheckNodeFreeMemory(self, instance.primary_node,
13839 "ballooning memory for instance %s" %
13840 instance.name, delta, instance.hypervisor)
13842 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13843 raise errors.OpPrereqError("Disk operations not supported for"
13844 " diskless instances", errors.ECODE_INVAL)
13846 def _PrepareNicCreate(_, params, private):
13847 self._PrepareNicModification(params, private, None, None,
13848 {}, cluster, pnode)
13849 return (None, None)
13851 def _PrepareNicMod(_, nic, params, private):
13852 self._PrepareNicModification(params, private, nic.ip, nic.network,
13853 nic.nicparams, cluster, pnode)
13856 def _PrepareNicRemove(_, params, __):
13858 net = params.network
13859 if net is not None and ip is not None:
13860 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13862 # Verify NIC changes (operating on copy)
13863 nics = instance.nics[:]
13864 ApplyContainerMods("NIC", nics, None, self.nicmod,
13865 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13866 if len(nics) > constants.MAX_NICS:
13867 raise errors.OpPrereqError("Instance has too many network interfaces"
13868 " (%d), cannot add more" % constants.MAX_NICS,
13869 errors.ECODE_STATE)
13871 # Verify disk changes (operating on a copy)
13872 disks = instance.disks[:]
13873 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13874 if len(disks) > constants.MAX_DISKS:
13875 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13876 " more" % constants.MAX_DISKS,
13877 errors.ECODE_STATE)
13878 disk_sizes = [disk.size for disk in instance.disks]
13879 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13880 self.diskmod if op == constants.DDM_ADD)
13881 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13882 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13884 if self.op.offline is not None and self.op.offline:
13885 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13886 msg="can't change to offline")
13888 # Pre-compute NIC changes (necessary to use result in hooks)
13889 self._nic_chgdesc = []
13891 # Operate on copies as this is still in prereq
13892 nics = [nic.Copy() for nic in instance.nics]
13893 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13894 self._CreateNewNic, self._ApplyNicMods, None)
13895 self._new_nics = nics
13896 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13898 self._new_nics = None
13899 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13901 if not self.op.ignore_ipolicy:
13902 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13905 # Fill ispec with backend parameters
13906 ispec[constants.ISPEC_SPINDLE_USE] = \
13907 self.be_new.get(constants.BE_SPINDLE_USE, None)
13908 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13911 # Copy ispec to verify parameters with min/max values separately
13912 ispec_max = ispec.copy()
13913 ispec_max[constants.ISPEC_MEM_SIZE] = \
13914 self.be_new.get(constants.BE_MAXMEM, None)
13915 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max)
13916 ispec_min = ispec.copy()
13917 ispec_min[constants.ISPEC_MEM_SIZE] = \
13918 self.be_new.get(constants.BE_MINMEM, None)
13919 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min)
13921 if (res_max or res_min):
13922 # FIXME: Improve error message by including information about whether
13923 # the upper or lower limit of the parameter fails the ipolicy.
13924 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13925 (group_info, group_info.name,
13926 utils.CommaJoin(set(res_max + res_min))))
13927 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13929 def _ConvertPlainToDrbd(self, feedback_fn):
13930 """Converts an instance from plain to drbd.
13933 feedback_fn("Converting template to drbd")
13934 instance = self.instance
13935 pnode = instance.primary_node
13936 snode = self.op.remote_node
13938 assert instance.disk_template == constants.DT_PLAIN
13940 # create a fake disk info for _GenerateDiskTemplate
13941 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13942 constants.IDISK_VG: d.logical_id[0]}
13943 for d in instance.disks]
13944 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13945 instance.name, pnode, [snode],
13946 disk_info, None, None, 0, feedback_fn,
13948 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13950 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13951 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13952 info = _GetInstanceInfoText(instance)
13953 feedback_fn("Creating additional volumes...")
13954 # first, create the missing data and meta devices
13955 for disk in anno_disks:
13956 # unfortunately this is... not too nice
13957 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13958 info, True, p_excl_stor)
13959 for child in disk.children:
13960 _CreateSingleBlockDev(self, snode, instance, child, info, True,
13962 # at this stage, all new LVs have been created, we can rename the
13964 feedback_fn("Renaming original volumes...")
13965 rename_list = [(o, n.children[0].logical_id)
13966 for (o, n) in zip(instance.disks, new_disks)]
13967 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13968 result.Raise("Failed to rename original LVs")
13970 feedback_fn("Initializing DRBD devices...")
13971 # all child devices are in place, we can now create the DRBD devices
13972 for disk in anno_disks:
13973 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13974 f_create = node == pnode
13975 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13978 # at this point, the instance has been modified
13979 instance.disk_template = constants.DT_DRBD8
13980 instance.disks = new_disks
13981 self.cfg.Update(instance, feedback_fn)
13983 # Release node locks while waiting for sync
13984 _ReleaseLocks(self, locking.LEVEL_NODE)
13986 # disks are created, waiting for sync
13987 disk_abort = not _WaitForSync(self, instance,
13988 oneshot=not self.op.wait_for_sync)
13990 raise errors.OpExecError("There are some degraded disks for"
13991 " this instance, please cleanup manually")
13993 # Node resource locks will be released by caller
13995 def _ConvertDrbdToPlain(self, feedback_fn):
13996 """Converts an instance from drbd to plain.
13999 instance = self.instance
14001 assert len(instance.secondary_nodes) == 1
14002 assert instance.disk_template == constants.DT_DRBD8
14004 pnode = instance.primary_node
14005 snode = instance.secondary_nodes[0]
14006 feedback_fn("Converting template to plain")
14008 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
14009 new_disks = [d.children[0] for d in instance.disks]
14011 # copy over size and mode
14012 for parent, child in zip(old_disks, new_disks):
14013 child.size = parent.size
14014 child.mode = parent.mode
14016 # this is a DRBD disk, return its port to the pool
14017 # NOTE: this must be done right before the call to cfg.Update!
14018 for disk in old_disks:
14019 tcp_port = disk.logical_id[2]
14020 self.cfg.AddTcpUdpPort(tcp_port)
14022 # update instance structure
14023 instance.disks = new_disks
14024 instance.disk_template = constants.DT_PLAIN
14025 self.cfg.Update(instance, feedback_fn)
14027 # Release locks in case removing disks takes a while
14028 _ReleaseLocks(self, locking.LEVEL_NODE)
14030 feedback_fn("Removing volumes on the secondary node...")
14031 for disk in old_disks:
14032 self.cfg.SetDiskID(disk, snode)
14033 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14035 self.LogWarning("Could not remove block device %s on node %s,"
14036 " continuing anyway: %s", disk.iv_name, snode, msg)
14038 feedback_fn("Removing unneeded volumes on the primary node...")
14039 for idx, disk in enumerate(old_disks):
14040 meta = disk.children[1]
14041 self.cfg.SetDiskID(meta, pnode)
14042 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14044 self.LogWarning("Could not remove metadata for disk %d on node %s,"
14045 " continuing anyway: %s", idx, pnode, msg)
14047 def _CreateNewDisk(self, idx, params, _):
14048 """Creates a new disk.
14051 instance = self.instance
14054 if instance.disk_template in constants.DTS_FILEBASED:
14055 (file_driver, file_path) = instance.disks[0].logical_id
14056 file_path = os.path.dirname(file_path)
14058 file_driver = file_path = None
14061 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14062 instance.primary_node, instance.secondary_nodes,
14063 [params], file_path, file_driver, idx,
14064 self.Log, self.diskparams)[0]
14066 info = _GetInstanceInfoText(instance)
14068 logging.info("Creating volume %s for instance %s",
14069 disk.iv_name, instance.name)
14070 # Note: this needs to be kept in sync with _CreateDisks
14072 for node in instance.all_nodes:
14073 f_create = (node == instance.primary_node)
14075 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14076 except errors.OpExecError, err:
14077 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14078 disk.iv_name, disk, node, err)
14081 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14085 def _ModifyDisk(idx, disk, params, _):
14086 """Modifies a disk.
14089 disk.mode = params[constants.IDISK_MODE]
14092 ("disk.mode/%d" % idx, disk.mode),
14095 def _RemoveDisk(self, idx, root, _):
14099 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14100 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14101 self.cfg.SetDiskID(disk, node)
14102 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14104 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14105 " continuing anyway", idx, node, msg)
14107 # if this is a DRBD disk, return its port to the pool
14108 if root.dev_type in constants.LDS_DRBD:
14109 self.cfg.AddTcpUdpPort(root.logical_id[2])
14111 def _CreateNewNic(self, idx, params, private):
14112 """Creates data structure for a new network interface.
14115 mac = params[constants.INIC_MAC]
14116 ip = params.get(constants.INIC_IP, None)
14117 net = params.get(constants.INIC_NETWORK, None)
14118 net_uuid = self.cfg.LookupNetwork(net)
14119 #TODO: not private.filled?? can a nic have no nicparams??
14120 nicparams = private.filled
14121 nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, nicparams=nicparams)
14125 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14126 (mac, ip, private.filled[constants.NIC_MODE],
14127 private.filled[constants.NIC_LINK],
14131 def _ApplyNicMods(self, idx, nic, params, private):
14132 """Modifies a network interface.
14137 for key in [constants.INIC_MAC, constants.INIC_IP]:
14139 changes.append(("nic.%s/%d" % (key, idx), params[key]))
14140 setattr(nic, key, params[key])
14142 new_net = params.get(constants.INIC_NETWORK, nic.network)
14143 new_net_uuid = self.cfg.LookupNetwork(new_net)
14144 if new_net_uuid != nic.network:
14145 changes.append(("nic.network/%d" % idx, new_net))
14146 nic.network = new_net_uuid
14149 nic.nicparams = private.filled
14151 for (key, val) in nic.nicparams.items():
14152 changes.append(("nic.%s/%d" % (key, idx), val))
14156 def Exec(self, feedback_fn):
14157 """Modifies an instance.
14159 All parameters take effect only at the next restart of the instance.
14162 # Process here the warnings from CheckPrereq, as we don't have a
14163 # feedback_fn there.
14164 # TODO: Replace with self.LogWarning
14165 for warn in self.warn:
14166 feedback_fn("WARNING: %s" % warn)
14168 assert ((self.op.disk_template is None) ^
14169 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14170 "Not owning any node resource locks"
14173 instance = self.instance
14176 if self.op.runtime_mem:
14177 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14179 self.op.runtime_mem)
14180 rpcres.Raise("Cannot modify instance runtime memory")
14181 result.append(("runtime_memory", self.op.runtime_mem))
14183 # Apply disk changes
14184 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14185 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14186 _UpdateIvNames(0, instance.disks)
14188 if self.op.disk_template:
14190 check_nodes = set(instance.all_nodes)
14191 if self.op.remote_node:
14192 check_nodes.add(self.op.remote_node)
14193 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14194 owned = self.owned_locks(level)
14195 assert not (check_nodes - owned), \
14196 ("Not owning the correct locks, owning %r, expected at least %r" %
14197 (owned, check_nodes))
14199 r_shut = _ShutdownInstanceDisks(self, instance)
14201 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14202 " proceed with disk template conversion")
14203 mode = (instance.disk_template, self.op.disk_template)
14205 self._DISK_CONVERSIONS[mode](self, feedback_fn)
14207 self.cfg.ReleaseDRBDMinors(instance.name)
14209 result.append(("disk_template", self.op.disk_template))
14211 assert instance.disk_template == self.op.disk_template, \
14212 ("Expected disk template '%s', found '%s'" %
14213 (self.op.disk_template, instance.disk_template))
14215 # Release node and resource locks if there are any (they might already have
14216 # been released during disk conversion)
14217 _ReleaseLocks(self, locking.LEVEL_NODE)
14218 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14220 # Apply NIC changes
14221 if self._new_nics is not None:
14222 instance.nics = self._new_nics
14223 result.extend(self._nic_chgdesc)
14226 if self.op.hvparams:
14227 instance.hvparams = self.hv_inst
14228 for key, val in self.op.hvparams.iteritems():
14229 result.append(("hv/%s" % key, val))
14232 if self.op.beparams:
14233 instance.beparams = self.be_inst
14234 for key, val in self.op.beparams.iteritems():
14235 result.append(("be/%s" % key, val))
14238 if self.op.os_name:
14239 instance.os = self.op.os_name
14242 if self.op.osparams:
14243 instance.osparams = self.os_inst
14244 for key, val in self.op.osparams.iteritems():
14245 result.append(("os/%s" % key, val))
14247 if self.op.offline is None:
14250 elif self.op.offline:
14251 # Mark instance as offline
14252 self.cfg.MarkInstanceOffline(instance.name)
14253 result.append(("admin_state", constants.ADMINST_OFFLINE))
14255 # Mark instance as online, but stopped
14256 self.cfg.MarkInstanceDown(instance.name)
14257 result.append(("admin_state", constants.ADMINST_DOWN))
14259 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14261 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14262 self.owned_locks(locking.LEVEL_NODE)), \
14263 "All node locks should have been released by now"
14267 _DISK_CONVERSIONS = {
14268 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14269 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14273 class LUInstanceChangeGroup(LogicalUnit):
14274 HPATH = "instance-change-group"
14275 HTYPE = constants.HTYPE_INSTANCE
14278 def ExpandNames(self):
14279 self.share_locks = _ShareAll()
14281 self.needed_locks = {
14282 locking.LEVEL_NODEGROUP: [],
14283 locking.LEVEL_NODE: [],
14284 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14287 self._ExpandAndLockInstance()
14289 if self.op.target_groups:
14290 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14291 self.op.target_groups)
14293 self.req_target_uuids = None
14295 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14297 def DeclareLocks(self, level):
14298 if level == locking.LEVEL_NODEGROUP:
14299 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14301 if self.req_target_uuids:
14302 lock_groups = set(self.req_target_uuids)
14304 # Lock all groups used by instance optimistically; this requires going
14305 # via the node before it's locked, requiring verification later on
14306 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14307 lock_groups.update(instance_groups)
14309 # No target groups, need to lock all of them
14310 lock_groups = locking.ALL_SET
14312 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14314 elif level == locking.LEVEL_NODE:
14315 if self.req_target_uuids:
14316 # Lock all nodes used by instances
14317 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14318 self._LockInstancesNodes()
14320 # Lock all nodes in all potential target groups
14321 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14322 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14323 member_nodes = [node_name
14324 for group in lock_groups
14325 for node_name in self.cfg.GetNodeGroup(group).members]
14326 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14328 # Lock all nodes as all groups are potential targets
14329 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14331 def CheckPrereq(self):
14332 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14333 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14334 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14336 assert (self.req_target_uuids is None or
14337 owned_groups.issuperset(self.req_target_uuids))
14338 assert owned_instances == set([self.op.instance_name])
14340 # Get instance information
14341 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14343 # Check if node groups for locked instance are still correct
14344 assert owned_nodes.issuperset(self.instance.all_nodes), \
14345 ("Instance %s's nodes changed while we kept the lock" %
14346 self.op.instance_name)
14348 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14351 if self.req_target_uuids:
14352 # User requested specific target groups
14353 self.target_uuids = frozenset(self.req_target_uuids)
14355 # All groups except those used by the instance are potential targets
14356 self.target_uuids = owned_groups - inst_groups
14358 conflicting_groups = self.target_uuids & inst_groups
14359 if conflicting_groups:
14360 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14361 " used by the instance '%s'" %
14362 (utils.CommaJoin(conflicting_groups),
14363 self.op.instance_name),
14364 errors.ECODE_INVAL)
14366 if not self.target_uuids:
14367 raise errors.OpPrereqError("There are no possible target groups",
14368 errors.ECODE_INVAL)
14370 def BuildHooksEnv(self):
14371 """Build hooks env.
14374 assert self.target_uuids
14377 "TARGET_GROUPS": " ".join(self.target_uuids),
14380 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14384 def BuildHooksNodes(self):
14385 """Build hooks nodes.
14388 mn = self.cfg.GetMasterNode()
14389 return ([mn], [mn])
14391 def Exec(self, feedback_fn):
14392 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14394 assert instances == [self.op.instance_name], "Instance not locked"
14396 req = iallocator.IAReqGroupChange(instances=instances,
14397 target_groups=list(self.target_uuids))
14398 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14400 ial.Run(self.op.iallocator)
14402 if not ial.success:
14403 raise errors.OpPrereqError("Can't compute solution for changing group of"
14404 " instance '%s' using iallocator '%s': %s" %
14405 (self.op.instance_name, self.op.iallocator,
14406 ial.info), errors.ECODE_NORES)
14408 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14410 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14411 " instance '%s'", len(jobs), self.op.instance_name)
14413 return ResultWithJobs(jobs)
14416 class LUBackupQuery(NoHooksLU):
14417 """Query the exports list
14422 def CheckArguments(self):
14423 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14424 ["node", "export"], self.op.use_locking)
14426 def ExpandNames(self):
14427 self.expq.ExpandNames(self)
14429 def DeclareLocks(self, level):
14430 self.expq.DeclareLocks(self, level)
14432 def Exec(self, feedback_fn):
14435 for (node, expname) in self.expq.OldStyleQuery(self):
14436 if expname is None:
14437 result[node] = False
14439 result.setdefault(node, []).append(expname)
14444 class _ExportQuery(_QueryBase):
14445 FIELDS = query.EXPORT_FIELDS
14447 #: The node name is not a unique key for this query
14448 SORT_FIELD = "node"
14450 def ExpandNames(self, lu):
14451 lu.needed_locks = {}
14453 # The following variables interact with _QueryBase._GetNames
14455 self.wanted = _GetWantedNodes(lu, self.names)
14457 self.wanted = locking.ALL_SET
14459 self.do_locking = self.use_locking
14461 if self.do_locking:
14462 lu.share_locks = _ShareAll()
14463 lu.needed_locks = {
14464 locking.LEVEL_NODE: self.wanted,
14468 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14470 def DeclareLocks(self, lu, level):
14473 def _GetQueryData(self, lu):
14474 """Computes the list of nodes and their attributes.
14477 # Locking is not used
14479 assert not (compat.any(lu.glm.is_owned(level)
14480 for level in locking.LEVELS
14481 if level != locking.LEVEL_CLUSTER) or
14482 self.do_locking or self.use_locking)
14484 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14488 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14490 result.append((node, None))
14492 result.extend((node, expname) for expname in nres.payload)
14497 class LUBackupPrepare(NoHooksLU):
14498 """Prepares an instance for an export and returns useful information.
14503 def ExpandNames(self):
14504 self._ExpandAndLockInstance()
14506 def CheckPrereq(self):
14507 """Check prerequisites.
14510 instance_name = self.op.instance_name
14512 self.instance = self.cfg.GetInstanceInfo(instance_name)
14513 assert self.instance is not None, \
14514 "Cannot retrieve locked instance %s" % self.op.instance_name
14515 _CheckNodeOnline(self, self.instance.primary_node)
14517 self._cds = _GetClusterDomainSecret()
14519 def Exec(self, feedback_fn):
14520 """Prepares an instance for an export.
14523 instance = self.instance
14525 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14526 salt = utils.GenerateSecret(8)
14528 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14529 result = self.rpc.call_x509_cert_create(instance.primary_node,
14530 constants.RIE_CERT_VALIDITY)
14531 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14533 (name, cert_pem) = result.payload
14535 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14539 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14540 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14542 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14548 class LUBackupExport(LogicalUnit):
14549 """Export an instance to an image in the cluster.
14552 HPATH = "instance-export"
14553 HTYPE = constants.HTYPE_INSTANCE
14556 def CheckArguments(self):
14557 """Check the arguments.
14560 self.x509_key_name = self.op.x509_key_name
14561 self.dest_x509_ca_pem = self.op.destination_x509_ca
14563 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14564 if not self.x509_key_name:
14565 raise errors.OpPrereqError("Missing X509 key name for encryption",
14566 errors.ECODE_INVAL)
14568 if not self.dest_x509_ca_pem:
14569 raise errors.OpPrereqError("Missing destination X509 CA",
14570 errors.ECODE_INVAL)
14572 def ExpandNames(self):
14573 self._ExpandAndLockInstance()
14575 # Lock all nodes for local exports
14576 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14577 # FIXME: lock only instance primary and destination node
14579 # Sad but true, for now we have do lock all nodes, as we don't know where
14580 # the previous export might be, and in this LU we search for it and
14581 # remove it from its current node. In the future we could fix this by:
14582 # - making a tasklet to search (share-lock all), then create the
14583 # new one, then one to remove, after
14584 # - removing the removal operation altogether
14585 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14587 # Allocations should be stopped while this LU runs with node locks, but
14588 # it doesn't have to be exclusive
14589 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14590 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14592 def DeclareLocks(self, level):
14593 """Last minute lock declaration."""
14594 # All nodes are locked anyway, so nothing to do here.
14596 def BuildHooksEnv(self):
14597 """Build hooks env.
14599 This will run on the master, primary node and target node.
14603 "EXPORT_MODE": self.op.mode,
14604 "EXPORT_NODE": self.op.target_node,
14605 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14606 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14607 # TODO: Generic function for boolean env variables
14608 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14611 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14615 def BuildHooksNodes(self):
14616 """Build hooks nodes.
14619 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14621 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14622 nl.append(self.op.target_node)
14626 def CheckPrereq(self):
14627 """Check prerequisites.
14629 This checks that the instance and node names are valid.
14632 instance_name = self.op.instance_name
14634 self.instance = self.cfg.GetInstanceInfo(instance_name)
14635 assert self.instance is not None, \
14636 "Cannot retrieve locked instance %s" % self.op.instance_name
14637 _CheckNodeOnline(self, self.instance.primary_node)
14639 if (self.op.remove_instance and
14640 self.instance.admin_state == constants.ADMINST_UP and
14641 not self.op.shutdown):
14642 raise errors.OpPrereqError("Can not remove instance without shutting it"
14643 " down before", errors.ECODE_STATE)
14645 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14646 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14647 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14648 assert self.dst_node is not None
14650 _CheckNodeOnline(self, self.dst_node.name)
14651 _CheckNodeNotDrained(self, self.dst_node.name)
14654 self.dest_disk_info = None
14655 self.dest_x509_ca = None
14657 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14658 self.dst_node = None
14660 if len(self.op.target_node) != len(self.instance.disks):
14661 raise errors.OpPrereqError(("Received destination information for %s"
14662 " disks, but instance %s has %s disks") %
14663 (len(self.op.target_node), instance_name,
14664 len(self.instance.disks)),
14665 errors.ECODE_INVAL)
14667 cds = _GetClusterDomainSecret()
14669 # Check X509 key name
14671 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14672 except (TypeError, ValueError), err:
14673 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14674 errors.ECODE_INVAL)
14676 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14677 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14678 errors.ECODE_INVAL)
14680 # Load and verify CA
14682 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14683 except OpenSSL.crypto.Error, err:
14684 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14685 (err, ), errors.ECODE_INVAL)
14687 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14688 if errcode is not None:
14689 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14690 (msg, ), errors.ECODE_INVAL)
14692 self.dest_x509_ca = cert
14694 # Verify target information
14696 for idx, disk_data in enumerate(self.op.target_node):
14698 (host, port, magic) = \
14699 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14700 except errors.GenericError, err:
14701 raise errors.OpPrereqError("Target info for disk %s: %s" %
14702 (idx, err), errors.ECODE_INVAL)
14704 disk_info.append((host, port, magic))
14706 assert len(disk_info) == len(self.op.target_node)
14707 self.dest_disk_info = disk_info
14710 raise errors.ProgrammerError("Unhandled export mode %r" %
14713 # instance disk type verification
14714 # TODO: Implement export support for file-based disks
14715 for disk in self.instance.disks:
14716 if disk.dev_type == constants.LD_FILE:
14717 raise errors.OpPrereqError("Export not supported for instances with"
14718 " file-based disks", errors.ECODE_INVAL)
14720 def _CleanupExports(self, feedback_fn):
14721 """Removes exports of current instance from all other nodes.
14723 If an instance in a cluster with nodes A..D was exported to node C, its
14724 exports will be removed from the nodes A, B and D.
14727 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14729 nodelist = self.cfg.GetNodeList()
14730 nodelist.remove(self.dst_node.name)
14732 # on one-node clusters nodelist will be empty after the removal
14733 # if we proceed the backup would be removed because OpBackupQuery
14734 # substitutes an empty list with the full cluster node list.
14735 iname = self.instance.name
14737 feedback_fn("Removing old exports for instance %s" % iname)
14738 exportlist = self.rpc.call_export_list(nodelist)
14739 for node in exportlist:
14740 if exportlist[node].fail_msg:
14742 if iname in exportlist[node].payload:
14743 msg = self.rpc.call_export_remove(node, iname).fail_msg
14745 self.LogWarning("Could not remove older export for instance %s"
14746 " on node %s: %s", iname, node, msg)
14748 def Exec(self, feedback_fn):
14749 """Export an instance to an image in the cluster.
14752 assert self.op.mode in constants.EXPORT_MODES
14754 instance = self.instance
14755 src_node = instance.primary_node
14757 if self.op.shutdown:
14758 # shutdown the instance, but not the disks
14759 feedback_fn("Shutting down instance %s" % instance.name)
14760 result = self.rpc.call_instance_shutdown(src_node, instance,
14761 self.op.shutdown_timeout)
14762 # TODO: Maybe ignore failures if ignore_remove_failures is set
14763 result.Raise("Could not shutdown instance %s on"
14764 " node %s" % (instance.name, src_node))
14766 # set the disks ID correctly since call_instance_start needs the
14767 # correct drbd minor to create the symlinks
14768 for disk in instance.disks:
14769 self.cfg.SetDiskID(disk, src_node)
14771 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14774 # Activate the instance disks if we'exporting a stopped instance
14775 feedback_fn("Activating disks for %s" % instance.name)
14776 _StartInstanceDisks(self, instance, None)
14779 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14782 helper.CreateSnapshots()
14784 if (self.op.shutdown and
14785 instance.admin_state == constants.ADMINST_UP and
14786 not self.op.remove_instance):
14787 assert not activate_disks
14788 feedback_fn("Starting instance %s" % instance.name)
14789 result = self.rpc.call_instance_start(src_node,
14790 (instance, None, None), False)
14791 msg = result.fail_msg
14793 feedback_fn("Failed to start instance: %s" % msg)
14794 _ShutdownInstanceDisks(self, instance)
14795 raise errors.OpExecError("Could not start instance: %s" % msg)
14797 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14798 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14799 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14800 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14801 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14803 (key_name, _, _) = self.x509_key_name
14806 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14809 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14810 key_name, dest_ca_pem,
14815 # Check for backwards compatibility
14816 assert len(dresults) == len(instance.disks)
14817 assert compat.all(isinstance(i, bool) for i in dresults), \
14818 "Not all results are boolean: %r" % dresults
14822 feedback_fn("Deactivating disks for %s" % instance.name)
14823 _ShutdownInstanceDisks(self, instance)
14825 if not (compat.all(dresults) and fin_resu):
14828 failures.append("export finalization")
14829 if not compat.all(dresults):
14830 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14832 failures.append("disk export: disk(s) %s" % fdsk)
14834 raise errors.OpExecError("Export failed, errors in %s" %
14835 utils.CommaJoin(failures))
14837 # At this point, the export was successful, we can cleanup/finish
14839 # Remove instance if requested
14840 if self.op.remove_instance:
14841 feedback_fn("Removing instance %s" % instance.name)
14842 _RemoveInstance(self, feedback_fn, instance,
14843 self.op.ignore_remove_failures)
14845 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14846 self._CleanupExports(feedback_fn)
14848 return fin_resu, dresults
14851 class LUBackupRemove(NoHooksLU):
14852 """Remove exports related to the named instance.
14857 def ExpandNames(self):
14858 self.needed_locks = {
14859 # We need all nodes to be locked in order for RemoveExport to work, but
14860 # we don't need to lock the instance itself, as nothing will happen to it
14861 # (and we can remove exports also for a removed instance)
14862 locking.LEVEL_NODE: locking.ALL_SET,
14864 # Removing backups is quick, so blocking allocations is justified
14865 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14868 # Allocations should be stopped while this LU runs with node locks, but it
14869 # doesn't have to be exclusive
14870 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14872 def Exec(self, feedback_fn):
14873 """Remove any export.
14876 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14877 # If the instance was not found we'll try with the name that was passed in.
14878 # This will only work if it was an FQDN, though.
14880 if not instance_name:
14882 instance_name = self.op.instance_name
14884 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14885 exportlist = self.rpc.call_export_list(locked_nodes)
14887 for node in exportlist:
14888 msg = exportlist[node].fail_msg
14890 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14892 if instance_name in exportlist[node].payload:
14894 result = self.rpc.call_export_remove(node, instance_name)
14895 msg = result.fail_msg
14897 logging.error("Could not remove export for instance %s"
14898 " on node %s: %s", instance_name, node, msg)
14900 if fqdn_warn and not found:
14901 feedback_fn("Export not found. If trying to remove an export belonging"
14902 " to a deleted instance please use its Fully Qualified"
14906 class LUGroupAdd(LogicalUnit):
14907 """Logical unit for creating node groups.
14910 HPATH = "group-add"
14911 HTYPE = constants.HTYPE_GROUP
14914 def ExpandNames(self):
14915 # We need the new group's UUID here so that we can create and acquire the
14916 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14917 # that it should not check whether the UUID exists in the configuration.
14918 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14919 self.needed_locks = {}
14920 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14922 def CheckPrereq(self):
14923 """Check prerequisites.
14925 This checks that the given group name is not an existing node group
14930 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14931 except errors.OpPrereqError:
14934 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14935 " node group (UUID: %s)" %
14936 (self.op.group_name, existing_uuid),
14937 errors.ECODE_EXISTS)
14939 if self.op.ndparams:
14940 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14942 if self.op.hv_state:
14943 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14945 self.new_hv_state = None
14947 if self.op.disk_state:
14948 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14950 self.new_disk_state = None
14952 if self.op.diskparams:
14953 for templ in constants.DISK_TEMPLATES:
14954 if templ in self.op.diskparams:
14955 utils.ForceDictType(self.op.diskparams[templ],
14956 constants.DISK_DT_TYPES)
14957 self.new_diskparams = self.op.diskparams
14959 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14960 except errors.OpPrereqError, err:
14961 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14962 errors.ECODE_INVAL)
14964 self.new_diskparams = {}
14966 if self.op.ipolicy:
14967 cluster = self.cfg.GetClusterInfo()
14968 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14970 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14971 except errors.ConfigurationError, err:
14972 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14973 errors.ECODE_INVAL)
14975 def BuildHooksEnv(self):
14976 """Build hooks env.
14980 "GROUP_NAME": self.op.group_name,
14983 def BuildHooksNodes(self):
14984 """Build hooks nodes.
14987 mn = self.cfg.GetMasterNode()
14988 return ([mn], [mn])
14990 def Exec(self, feedback_fn):
14991 """Add the node group to the cluster.
14994 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14995 uuid=self.group_uuid,
14996 alloc_policy=self.op.alloc_policy,
14997 ndparams=self.op.ndparams,
14998 diskparams=self.new_diskparams,
14999 ipolicy=self.op.ipolicy,
15000 hv_state_static=self.new_hv_state,
15001 disk_state_static=self.new_disk_state)
15003 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
15004 del self.remove_locks[locking.LEVEL_NODEGROUP]
15007 class LUGroupAssignNodes(NoHooksLU):
15008 """Logical unit for assigning nodes to groups.
15013 def ExpandNames(self):
15014 # These raise errors.OpPrereqError on their own:
15015 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15016 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15018 # We want to lock all the affected nodes and groups. We have readily
15019 # available the list of nodes, and the *destination* group. To gather the
15020 # list of "source" groups, we need to fetch node information later on.
15021 self.needed_locks = {
15022 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
15023 locking.LEVEL_NODE: self.op.nodes,
15026 def DeclareLocks(self, level):
15027 if level == locking.LEVEL_NODEGROUP:
15028 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15030 # Try to get all affected nodes' groups without having the group or node
15031 # lock yet. Needs verification later in the code flow.
15032 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15034 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15036 def CheckPrereq(self):
15037 """Check prerequisites.
15040 assert self.needed_locks[locking.LEVEL_NODEGROUP]
15041 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15042 frozenset(self.op.nodes))
15044 expected_locks = (set([self.group_uuid]) |
15045 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15046 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15047 if actual_locks != expected_locks:
15048 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15049 " current groups are '%s', used to be '%s'" %
15050 (utils.CommaJoin(expected_locks),
15051 utils.CommaJoin(actual_locks)))
15053 self.node_data = self.cfg.GetAllNodesInfo()
15054 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15055 instance_data = self.cfg.GetAllInstancesInfo()
15057 if self.group is None:
15058 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15059 (self.op.group_name, self.group_uuid))
15061 (new_splits, previous_splits) = \
15062 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15063 for node in self.op.nodes],
15064 self.node_data, instance_data)
15067 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15069 if not self.op.force:
15070 raise errors.OpExecError("The following instances get split by this"
15071 " change and --force was not given: %s" %
15074 self.LogWarning("This operation will split the following instances: %s",
15077 if previous_splits:
15078 self.LogWarning("In addition, these already-split instances continue"
15079 " to be split across groups: %s",
15080 utils.CommaJoin(utils.NiceSort(previous_splits)))
15082 def Exec(self, feedback_fn):
15083 """Assign nodes to a new group.
15086 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15088 self.cfg.AssignGroupNodes(mods)
15091 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15092 """Check for split instances after a node assignment.
15094 This method considers a series of node assignments as an atomic operation,
15095 and returns information about split instances after applying the set of
15098 In particular, it returns information about newly split instances, and
15099 instances that were already split, and remain so after the change.
15101 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15104 @type changes: list of (node_name, new_group_uuid) pairs.
15105 @param changes: list of node assignments to consider.
15106 @param node_data: a dict with data for all nodes
15107 @param instance_data: a dict with all instances to consider
15108 @rtype: a two-tuple
15109 @return: a list of instances that were previously okay and result split as a
15110 consequence of this change, and a list of instances that were previously
15111 split and this change does not fix.
15114 changed_nodes = dict((node, group) for node, group in changes
15115 if node_data[node].group != group)
15117 all_split_instances = set()
15118 previously_split_instances = set()
15120 def InstanceNodes(instance):
15121 return [instance.primary_node] + list(instance.secondary_nodes)
15123 for inst in instance_data.values():
15124 if inst.disk_template not in constants.DTS_INT_MIRROR:
15127 instance_nodes = InstanceNodes(inst)
15129 if len(set(node_data[node].group for node in instance_nodes)) > 1:
15130 previously_split_instances.add(inst.name)
15132 if len(set(changed_nodes.get(node, node_data[node].group)
15133 for node in instance_nodes)) > 1:
15134 all_split_instances.add(inst.name)
15136 return (list(all_split_instances - previously_split_instances),
15137 list(previously_split_instances & all_split_instances))
15140 class _GroupQuery(_QueryBase):
15141 FIELDS = query.GROUP_FIELDS
15143 def ExpandNames(self, lu):
15144 lu.needed_locks = {}
15146 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15147 self._cluster = lu.cfg.GetClusterInfo()
15148 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15151 self.wanted = [name_to_uuid[name]
15152 for name in utils.NiceSort(name_to_uuid.keys())]
15154 # Accept names to be either names or UUIDs.
15157 all_uuid = frozenset(self._all_groups.keys())
15159 for name in self.names:
15160 if name in all_uuid:
15161 self.wanted.append(name)
15162 elif name in name_to_uuid:
15163 self.wanted.append(name_to_uuid[name])
15165 missing.append(name)
15168 raise errors.OpPrereqError("Some groups do not exist: %s" %
15169 utils.CommaJoin(missing),
15170 errors.ECODE_NOENT)
15172 def DeclareLocks(self, lu, level):
15175 def _GetQueryData(self, lu):
15176 """Computes the list of node groups and their attributes.
15179 do_nodes = query.GQ_NODE in self.requested_data
15180 do_instances = query.GQ_INST in self.requested_data
15182 group_to_nodes = None
15183 group_to_instances = None
15185 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15186 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15187 # latter GetAllInstancesInfo() is not enough, for we have to go through
15188 # instance->node. Hence, we will need to process nodes even if we only need
15189 # instance information.
15190 if do_nodes or do_instances:
15191 all_nodes = lu.cfg.GetAllNodesInfo()
15192 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15195 for node in all_nodes.values():
15196 if node.group in group_to_nodes:
15197 group_to_nodes[node.group].append(node.name)
15198 node_to_group[node.name] = node.group
15201 all_instances = lu.cfg.GetAllInstancesInfo()
15202 group_to_instances = dict((uuid, []) for uuid in self.wanted)
15204 for instance in all_instances.values():
15205 node = instance.primary_node
15206 if node in node_to_group:
15207 group_to_instances[node_to_group[node]].append(instance.name)
15210 # Do not pass on node information if it was not requested.
15211 group_to_nodes = None
15213 return query.GroupQueryData(self._cluster,
15214 [self._all_groups[uuid]
15215 for uuid in self.wanted],
15216 group_to_nodes, group_to_instances,
15217 query.GQ_DISKPARAMS in self.requested_data)
15220 class LUGroupQuery(NoHooksLU):
15221 """Logical unit for querying node groups.
15226 def CheckArguments(self):
15227 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15228 self.op.output_fields, False)
15230 def ExpandNames(self):
15231 self.gq.ExpandNames(self)
15233 def DeclareLocks(self, level):
15234 self.gq.DeclareLocks(self, level)
15236 def Exec(self, feedback_fn):
15237 return self.gq.OldStyleQuery(self)
15240 class LUGroupSetParams(LogicalUnit):
15241 """Modifies the parameters of a node group.
15244 HPATH = "group-modify"
15245 HTYPE = constants.HTYPE_GROUP
15248 def CheckArguments(self):
15251 self.op.diskparams,
15252 self.op.alloc_policy,
15254 self.op.disk_state,
15258 if all_changes.count(None) == len(all_changes):
15259 raise errors.OpPrereqError("Please pass at least one modification",
15260 errors.ECODE_INVAL)
15262 def ExpandNames(self):
15263 # This raises errors.OpPrereqError on its own:
15264 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15266 self.needed_locks = {
15267 locking.LEVEL_INSTANCE: [],
15268 locking.LEVEL_NODEGROUP: [self.group_uuid],
15271 self.share_locks[locking.LEVEL_INSTANCE] = 1
15273 def DeclareLocks(self, level):
15274 if level == locking.LEVEL_INSTANCE:
15275 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15277 # Lock instances optimistically, needs verification once group lock has
15279 self.needed_locks[locking.LEVEL_INSTANCE] = \
15280 self.cfg.GetNodeGroupInstances(self.group_uuid)
15283 def _UpdateAndVerifyDiskParams(old, new):
15284 """Updates and verifies disk parameters.
15287 new_params = _GetUpdatedParams(old, new)
15288 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15291 def CheckPrereq(self):
15292 """Check prerequisites.
15295 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15297 # Check if locked instances are still correct
15298 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15300 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15301 cluster = self.cfg.GetClusterInfo()
15303 if self.group is None:
15304 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15305 (self.op.group_name, self.group_uuid))
15307 if self.op.ndparams:
15308 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15309 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15310 self.new_ndparams = new_ndparams
15312 if self.op.diskparams:
15313 diskparams = self.group.diskparams
15314 uavdp = self._UpdateAndVerifyDiskParams
15315 # For each disktemplate subdict update and verify the values
15316 new_diskparams = dict((dt,
15317 uavdp(diskparams.get(dt, {}),
15318 self.op.diskparams[dt]))
15319 for dt in constants.DISK_TEMPLATES
15320 if dt in self.op.diskparams)
15321 # As we've all subdicts of diskparams ready, lets merge the actual
15322 # dict with all updated subdicts
15323 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15325 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15326 except errors.OpPrereqError, err:
15327 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15328 errors.ECODE_INVAL)
15330 if self.op.hv_state:
15331 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15332 self.group.hv_state_static)
15334 if self.op.disk_state:
15335 self.new_disk_state = \
15336 _MergeAndVerifyDiskState(self.op.disk_state,
15337 self.group.disk_state_static)
15339 if self.op.ipolicy:
15340 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15344 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15345 inst_filter = lambda inst: inst.name in owned_instances
15346 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15347 gmi = ganeti.masterd.instance
15349 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15351 new_ipolicy, instances)
15354 self.LogWarning("After the ipolicy change the following instances"
15355 " violate them: %s",
15356 utils.CommaJoin(violations))
15358 def BuildHooksEnv(self):
15359 """Build hooks env.
15363 "GROUP_NAME": self.op.group_name,
15364 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15367 def BuildHooksNodes(self):
15368 """Build hooks nodes.
15371 mn = self.cfg.GetMasterNode()
15372 return ([mn], [mn])
15374 def Exec(self, feedback_fn):
15375 """Modifies the node group.
15380 if self.op.ndparams:
15381 self.group.ndparams = self.new_ndparams
15382 result.append(("ndparams", str(self.group.ndparams)))
15384 if self.op.diskparams:
15385 self.group.diskparams = self.new_diskparams
15386 result.append(("diskparams", str(self.group.diskparams)))
15388 if self.op.alloc_policy:
15389 self.group.alloc_policy = self.op.alloc_policy
15391 if self.op.hv_state:
15392 self.group.hv_state_static = self.new_hv_state
15394 if self.op.disk_state:
15395 self.group.disk_state_static = self.new_disk_state
15397 if self.op.ipolicy:
15398 self.group.ipolicy = self.new_ipolicy
15400 self.cfg.Update(self.group, feedback_fn)
15404 class LUGroupRemove(LogicalUnit):
15405 HPATH = "group-remove"
15406 HTYPE = constants.HTYPE_GROUP
15409 def ExpandNames(self):
15410 # This will raises errors.OpPrereqError on its own:
15411 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15412 self.needed_locks = {
15413 locking.LEVEL_NODEGROUP: [self.group_uuid],
15416 def CheckPrereq(self):
15417 """Check prerequisites.
15419 This checks that the given group name exists as a node group, that is
15420 empty (i.e., contains no nodes), and that is not the last group of the
15424 # Verify that the group is empty.
15425 group_nodes = [node.name
15426 for node in self.cfg.GetAllNodesInfo().values()
15427 if node.group == self.group_uuid]
15430 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15432 (self.op.group_name,
15433 utils.CommaJoin(utils.NiceSort(group_nodes))),
15434 errors.ECODE_STATE)
15436 # Verify the cluster would not be left group-less.
15437 if len(self.cfg.GetNodeGroupList()) == 1:
15438 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15439 " removed" % self.op.group_name,
15440 errors.ECODE_STATE)
15442 def BuildHooksEnv(self):
15443 """Build hooks env.
15447 "GROUP_NAME": self.op.group_name,
15450 def BuildHooksNodes(self):
15451 """Build hooks nodes.
15454 mn = self.cfg.GetMasterNode()
15455 return ([mn], [mn])
15457 def Exec(self, feedback_fn):
15458 """Remove the node group.
15462 self.cfg.RemoveNodeGroup(self.group_uuid)
15463 except errors.ConfigurationError:
15464 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15465 (self.op.group_name, self.group_uuid))
15467 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15470 class LUGroupRename(LogicalUnit):
15471 HPATH = "group-rename"
15472 HTYPE = constants.HTYPE_GROUP
15475 def ExpandNames(self):
15476 # This raises errors.OpPrereqError on its own:
15477 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15479 self.needed_locks = {
15480 locking.LEVEL_NODEGROUP: [self.group_uuid],
15483 def CheckPrereq(self):
15484 """Check prerequisites.
15486 Ensures requested new name is not yet used.
15490 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15491 except errors.OpPrereqError:
15494 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15495 " node group (UUID: %s)" %
15496 (self.op.new_name, new_name_uuid),
15497 errors.ECODE_EXISTS)
15499 def BuildHooksEnv(self):
15500 """Build hooks env.
15504 "OLD_NAME": self.op.group_name,
15505 "NEW_NAME": self.op.new_name,
15508 def BuildHooksNodes(self):
15509 """Build hooks nodes.
15512 mn = self.cfg.GetMasterNode()
15514 all_nodes = self.cfg.GetAllNodesInfo()
15515 all_nodes.pop(mn, None)
15518 run_nodes.extend(node.name for node in all_nodes.values()
15519 if node.group == self.group_uuid)
15521 return (run_nodes, run_nodes)
15523 def Exec(self, feedback_fn):
15524 """Rename the node group.
15527 group = self.cfg.GetNodeGroup(self.group_uuid)
15530 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15531 (self.op.group_name, self.group_uuid))
15533 group.name = self.op.new_name
15534 self.cfg.Update(group, feedback_fn)
15536 return self.op.new_name
15539 class LUGroupEvacuate(LogicalUnit):
15540 HPATH = "group-evacuate"
15541 HTYPE = constants.HTYPE_GROUP
15544 def ExpandNames(self):
15545 # This raises errors.OpPrereqError on its own:
15546 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15548 if self.op.target_groups:
15549 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15550 self.op.target_groups)
15552 self.req_target_uuids = []
15554 if self.group_uuid in self.req_target_uuids:
15555 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15556 " as a target group (targets are %s)" %
15558 utils.CommaJoin(self.req_target_uuids)),
15559 errors.ECODE_INVAL)
15561 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15563 self.share_locks = _ShareAll()
15564 self.needed_locks = {
15565 locking.LEVEL_INSTANCE: [],
15566 locking.LEVEL_NODEGROUP: [],
15567 locking.LEVEL_NODE: [],
15570 def DeclareLocks(self, level):
15571 if level == locking.LEVEL_INSTANCE:
15572 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15574 # Lock instances optimistically, needs verification once node and group
15575 # locks have been acquired
15576 self.needed_locks[locking.LEVEL_INSTANCE] = \
15577 self.cfg.GetNodeGroupInstances(self.group_uuid)
15579 elif level == locking.LEVEL_NODEGROUP:
15580 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15582 if self.req_target_uuids:
15583 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15585 # Lock all groups used by instances optimistically; this requires going
15586 # via the node before it's locked, requiring verification later on
15587 lock_groups.update(group_uuid
15588 for instance_name in
15589 self.owned_locks(locking.LEVEL_INSTANCE)
15591 self.cfg.GetInstanceNodeGroups(instance_name))
15593 # No target groups, need to lock all of them
15594 lock_groups = locking.ALL_SET
15596 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15598 elif level == locking.LEVEL_NODE:
15599 # This will only lock the nodes in the group to be evacuated which
15600 # contain actual instances
15601 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15602 self._LockInstancesNodes()
15604 # Lock all nodes in group to be evacuated and target groups
15605 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15606 assert self.group_uuid in owned_groups
15607 member_nodes = [node_name
15608 for group in owned_groups
15609 for node_name in self.cfg.GetNodeGroup(group).members]
15610 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15612 def CheckPrereq(self):
15613 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15614 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15615 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15617 assert owned_groups.issuperset(self.req_target_uuids)
15618 assert self.group_uuid in owned_groups
15620 # Check if locked instances are still correct
15621 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15623 # Get instance information
15624 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15626 # Check if node groups for locked instances are still correct
15627 _CheckInstancesNodeGroups(self.cfg, self.instances,
15628 owned_groups, owned_nodes, self.group_uuid)
15630 if self.req_target_uuids:
15631 # User requested specific target groups
15632 self.target_uuids = self.req_target_uuids
15634 # All groups except the one to be evacuated are potential targets
15635 self.target_uuids = [group_uuid for group_uuid in owned_groups
15636 if group_uuid != self.group_uuid]
15638 if not self.target_uuids:
15639 raise errors.OpPrereqError("There are no possible target groups",
15640 errors.ECODE_INVAL)
15642 def BuildHooksEnv(self):
15643 """Build hooks env.
15647 "GROUP_NAME": self.op.group_name,
15648 "TARGET_GROUPS": " ".join(self.target_uuids),
15651 def BuildHooksNodes(self):
15652 """Build hooks nodes.
15655 mn = self.cfg.GetMasterNode()
15657 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15659 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15661 return (run_nodes, run_nodes)
15663 def Exec(self, feedback_fn):
15664 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15666 assert self.group_uuid not in self.target_uuids
15668 req = iallocator.IAReqGroupChange(instances=instances,
15669 target_groups=self.target_uuids)
15670 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15672 ial.Run(self.op.iallocator)
15674 if not ial.success:
15675 raise errors.OpPrereqError("Can't compute group evacuation using"
15676 " iallocator '%s': %s" %
15677 (self.op.iallocator, ial.info),
15678 errors.ECODE_NORES)
15680 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15682 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15683 len(jobs), self.op.group_name)
15685 return ResultWithJobs(jobs)
15688 class TagsLU(NoHooksLU): # pylint: disable=W0223
15689 """Generic tags LU.
15691 This is an abstract class which is the parent of all the other tags LUs.
15694 def ExpandNames(self):
15695 self.group_uuid = None
15696 self.needed_locks = {}
15698 if self.op.kind == constants.TAG_NODE:
15699 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15700 lock_level = locking.LEVEL_NODE
15701 lock_name = self.op.name
15702 elif self.op.kind == constants.TAG_INSTANCE:
15703 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15704 lock_level = locking.LEVEL_INSTANCE
15705 lock_name = self.op.name
15706 elif self.op.kind == constants.TAG_NODEGROUP:
15707 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15708 lock_level = locking.LEVEL_NODEGROUP
15709 lock_name = self.group_uuid
15710 elif self.op.kind == constants.TAG_NETWORK:
15711 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15712 lock_level = locking.LEVEL_NETWORK
15713 lock_name = self.network_uuid
15718 if lock_level and getattr(self.op, "use_locking", True):
15719 self.needed_locks[lock_level] = lock_name
15721 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15722 # not possible to acquire the BGL based on opcode parameters)
15724 def CheckPrereq(self):
15725 """Check prerequisites.
15728 if self.op.kind == constants.TAG_CLUSTER:
15729 self.target = self.cfg.GetClusterInfo()
15730 elif self.op.kind == constants.TAG_NODE:
15731 self.target = self.cfg.GetNodeInfo(self.op.name)
15732 elif self.op.kind == constants.TAG_INSTANCE:
15733 self.target = self.cfg.GetInstanceInfo(self.op.name)
15734 elif self.op.kind == constants.TAG_NODEGROUP:
15735 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15736 elif self.op.kind == constants.TAG_NETWORK:
15737 self.target = self.cfg.GetNetwork(self.network_uuid)
15739 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15740 str(self.op.kind), errors.ECODE_INVAL)
15743 class LUTagsGet(TagsLU):
15744 """Returns the tags of a given object.
15749 def ExpandNames(self):
15750 TagsLU.ExpandNames(self)
15752 # Share locks as this is only a read operation
15753 self.share_locks = _ShareAll()
15755 def Exec(self, feedback_fn):
15756 """Returns the tag list.
15759 return list(self.target.GetTags())
15762 class LUTagsSearch(NoHooksLU):
15763 """Searches the tags for a given pattern.
15768 def ExpandNames(self):
15769 self.needed_locks = {}
15771 def CheckPrereq(self):
15772 """Check prerequisites.
15774 This checks the pattern passed for validity by compiling it.
15778 self.re = re.compile(self.op.pattern)
15779 except re.error, err:
15780 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15781 (self.op.pattern, err), errors.ECODE_INVAL)
15783 def Exec(self, feedback_fn):
15784 """Returns the tag list.
15788 tgts = [("/cluster", cfg.GetClusterInfo())]
15789 ilist = cfg.GetAllInstancesInfo().values()
15790 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15791 nlist = cfg.GetAllNodesInfo().values()
15792 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15793 tgts.extend(("/nodegroup/%s" % n.name, n)
15794 for n in cfg.GetAllNodeGroupsInfo().values())
15796 for path, target in tgts:
15797 for tag in target.GetTags():
15798 if self.re.search(tag):
15799 results.append((path, tag))
15803 class LUTagsSet(TagsLU):
15804 """Sets a tag on a given object.
15809 def CheckPrereq(self):
15810 """Check prerequisites.
15812 This checks the type and length of the tag name and value.
15815 TagsLU.CheckPrereq(self)
15816 for tag in self.op.tags:
15817 objects.TaggableObject.ValidateTag(tag)
15819 def Exec(self, feedback_fn):
15824 for tag in self.op.tags:
15825 self.target.AddTag(tag)
15826 except errors.TagError, err:
15827 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15828 self.cfg.Update(self.target, feedback_fn)
15831 class LUTagsDel(TagsLU):
15832 """Delete a list of tags from a given object.
15837 def CheckPrereq(self):
15838 """Check prerequisites.
15840 This checks that we have the given tag.
15843 TagsLU.CheckPrereq(self)
15844 for tag in self.op.tags:
15845 objects.TaggableObject.ValidateTag(tag)
15846 del_tags = frozenset(self.op.tags)
15847 cur_tags = self.target.GetTags()
15849 diff_tags = del_tags - cur_tags
15851 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15852 raise errors.OpPrereqError("Tag(s) %s not found" %
15853 (utils.CommaJoin(diff_names), ),
15854 errors.ECODE_NOENT)
15856 def Exec(self, feedback_fn):
15857 """Remove the tag from the object.
15860 for tag in self.op.tags:
15861 self.target.RemoveTag(tag)
15862 self.cfg.Update(self.target, feedback_fn)
15865 class LUTestDelay(NoHooksLU):
15866 """Sleep for a specified amount of time.
15868 This LU sleeps on the master and/or nodes for a specified amount of
15874 def ExpandNames(self):
15875 """Expand names and set required locks.
15877 This expands the node list, if any.
15880 self.needed_locks = {}
15881 if self.op.on_nodes:
15882 # _GetWantedNodes can be used here, but is not always appropriate to use
15883 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15884 # more information.
15885 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15886 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15888 def _TestDelay(self):
15889 """Do the actual sleep.
15892 if self.op.on_master:
15893 if not utils.TestDelay(self.op.duration):
15894 raise errors.OpExecError("Error during master delay test")
15895 if self.op.on_nodes:
15896 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15897 for node, node_result in result.items():
15898 node_result.Raise("Failure during rpc call to node %s" % node)
15900 def Exec(self, feedback_fn):
15901 """Execute the test delay opcode, with the wanted repetitions.
15904 if self.op.repeat == 0:
15907 top_value = self.op.repeat - 1
15908 for i in range(self.op.repeat):
15909 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15913 class LURestrictedCommand(NoHooksLU):
15914 """Logical unit for executing restricted commands.
15919 def ExpandNames(self):
15921 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15923 self.needed_locks = {
15924 locking.LEVEL_NODE: self.op.nodes,
15926 self.share_locks = {
15927 locking.LEVEL_NODE: not self.op.use_locking,
15930 def CheckPrereq(self):
15931 """Check prerequisites.
15935 def Exec(self, feedback_fn):
15936 """Execute restricted command and return output.
15939 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15941 # Check if correct locks are held
15942 assert set(self.op.nodes).issubset(owned_nodes)
15944 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15948 for node_name in self.op.nodes:
15949 nres = rpcres[node_name]
15951 msg = ("Command '%s' on node '%s' failed: %s" %
15952 (self.op.command, node_name, nres.fail_msg))
15953 result.append((False, msg))
15955 result.append((True, nres.payload))
15960 class LUTestJqueue(NoHooksLU):
15961 """Utility LU to test some aspects of the job queue.
15966 # Must be lower than default timeout for WaitForJobChange to see whether it
15967 # notices changed jobs
15968 _CLIENT_CONNECT_TIMEOUT = 20.0
15969 _CLIENT_CONFIRM_TIMEOUT = 60.0
15972 def _NotifyUsingSocket(cls, cb, errcls):
15973 """Opens a Unix socket and waits for another program to connect.
15976 @param cb: Callback to send socket name to client
15977 @type errcls: class
15978 @param errcls: Exception class to use for errors
15981 # Using a temporary directory as there's no easy way to create temporary
15982 # sockets without writing a custom loop around tempfile.mktemp and
15984 tmpdir = tempfile.mkdtemp()
15986 tmpsock = utils.PathJoin(tmpdir, "sock")
15988 logging.debug("Creating temporary socket at %s", tmpsock)
15989 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15994 # Send details to client
15997 # Wait for client to connect before continuing
15998 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
16000 (conn, _) = sock.accept()
16001 except socket.error, err:
16002 raise errcls("Client didn't connect in time (%s)" % err)
16006 # Remove as soon as client is connected
16007 shutil.rmtree(tmpdir)
16009 # Wait for client to close
16012 # pylint: disable=E1101
16013 # Instance of '_socketobject' has no ... member
16014 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
16016 except socket.error, err:
16017 raise errcls("Client failed to confirm notification (%s)" % err)
16021 def _SendNotification(self, test, arg, sockname):
16022 """Sends a notification to the client.
16025 @param test: Test name
16026 @param arg: Test argument (depends on test)
16027 @type sockname: string
16028 @param sockname: Socket path
16031 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16033 def _Notify(self, prereq, test, arg):
16034 """Notifies the client of a test.
16037 @param prereq: Whether this is a prereq-phase test
16039 @param test: Test name
16040 @param arg: Test argument (depends on test)
16044 errcls = errors.OpPrereqError
16046 errcls = errors.OpExecError
16048 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16052 def CheckArguments(self):
16053 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16054 self.expandnames_calls = 0
16056 def ExpandNames(self):
16057 checkargs_calls = getattr(self, "checkargs_calls", 0)
16058 if checkargs_calls < 1:
16059 raise errors.ProgrammerError("CheckArguments was not called")
16061 self.expandnames_calls += 1
16063 if self.op.notify_waitlock:
16064 self._Notify(True, constants.JQT_EXPANDNAMES, None)
16066 self.LogInfo("Expanding names")
16068 # Get lock on master node (just to get a lock, not for a particular reason)
16069 self.needed_locks = {
16070 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16073 def Exec(self, feedback_fn):
16074 if self.expandnames_calls < 1:
16075 raise errors.ProgrammerError("ExpandNames was not called")
16077 if self.op.notify_exec:
16078 self._Notify(False, constants.JQT_EXEC, None)
16080 self.LogInfo("Executing")
16082 if self.op.log_messages:
16083 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16084 for idx, msg in enumerate(self.op.log_messages):
16085 self.LogInfo("Sending log message %s", idx + 1)
16086 feedback_fn(constants.JQT_MSGPREFIX + msg)
16087 # Report how many test messages have been sent
16088 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16091 raise errors.OpExecError("Opcode failure was requested")
16096 class LUTestAllocator(NoHooksLU):
16097 """Run allocator tests.
16099 This LU runs the allocator tests
16102 def CheckPrereq(self):
16103 """Check prerequisites.
16105 This checks the opcode parameters depending on the director and mode test.
16108 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16109 constants.IALLOCATOR_MODE_MULTI_ALLOC):
16110 for attr in ["memory", "disks", "disk_template",
16111 "os", "tags", "nics", "vcpus"]:
16112 if not hasattr(self.op, attr):
16113 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16114 attr, errors.ECODE_INVAL)
16115 iname = self.cfg.ExpandInstanceName(self.op.name)
16116 if iname is not None:
16117 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16118 iname, errors.ECODE_EXISTS)
16119 if not isinstance(self.op.nics, list):
16120 raise errors.OpPrereqError("Invalid parameter 'nics'",
16121 errors.ECODE_INVAL)
16122 if not isinstance(self.op.disks, list):
16123 raise errors.OpPrereqError("Invalid parameter 'disks'",
16124 errors.ECODE_INVAL)
16125 for row in self.op.disks:
16126 if (not isinstance(row, dict) or
16127 constants.IDISK_SIZE not in row or
16128 not isinstance(row[constants.IDISK_SIZE], int) or
16129 constants.IDISK_MODE not in row or
16130 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16131 raise errors.OpPrereqError("Invalid contents of the 'disks'"
16132 " parameter", errors.ECODE_INVAL)
16133 if self.op.hypervisor is None:
16134 self.op.hypervisor = self.cfg.GetHypervisorType()
16135 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16136 fname = _ExpandInstanceName(self.cfg, self.op.name)
16137 self.op.name = fname
16138 self.relocate_from = \
16139 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16140 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16141 constants.IALLOCATOR_MODE_NODE_EVAC):
16142 if not self.op.instances:
16143 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16144 self.op.instances = _GetWantedInstances(self, self.op.instances)
16146 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16147 self.op.mode, errors.ECODE_INVAL)
16149 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16150 if self.op.iallocator is None:
16151 raise errors.OpPrereqError("Missing allocator name",
16152 errors.ECODE_INVAL)
16153 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16154 raise errors.OpPrereqError("Wrong allocator test '%s'" %
16155 self.op.direction, errors.ECODE_INVAL)
16157 def Exec(self, feedback_fn):
16158 """Run the allocator test.
16161 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16162 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16163 memory=self.op.memory,
16164 disks=self.op.disks,
16165 disk_template=self.op.disk_template,
16169 vcpus=self.op.vcpus,
16170 spindle_use=self.op.spindle_use,
16171 hypervisor=self.op.hypervisor)
16172 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16173 req = iallocator.IAReqRelocate(name=self.op.name,
16174 relocate_from=list(self.relocate_from))
16175 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16176 req = iallocator.IAReqGroupChange(instances=self.op.instances,
16177 target_groups=self.op.target_groups)
16178 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16179 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16180 evac_mode=self.op.evac_mode)
16181 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16182 disk_template = self.op.disk_template
16183 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16184 memory=self.op.memory,
16185 disks=self.op.disks,
16186 disk_template=disk_template,
16190 vcpus=self.op.vcpus,
16191 spindle_use=self.op.spindle_use,
16192 hypervisor=self.op.hypervisor)
16193 for idx in range(self.op.count)]
16194 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16196 raise errors.ProgrammerError("Uncatched mode %s in"
16197 " LUTestAllocator.Exec", self.op.mode)
16199 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16200 if self.op.direction == constants.IALLOCATOR_DIR_IN:
16201 result = ial.in_text
16203 ial.Run(self.op.iallocator, validate=False)
16204 result = ial.out_text
16208 class LUNetworkAdd(LogicalUnit):
16209 """Logical unit for creating networks.
16212 HPATH = "network-add"
16213 HTYPE = constants.HTYPE_NETWORK
16216 def BuildHooksNodes(self):
16217 """Build hooks nodes.
16220 mn = self.cfg.GetMasterNode()
16221 return ([mn], [mn])
16223 def CheckArguments(self):
16224 if self.op.mac_prefix:
16225 self.op.mac_prefix = \
16226 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16228 def ExpandNames(self):
16229 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16231 if self.op.conflicts_check:
16232 self.share_locks[locking.LEVEL_NODE] = 1
16233 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16234 self.needed_locks = {
16235 locking.LEVEL_NODE: locking.ALL_SET,
16236 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16239 self.needed_locks = {}
16241 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16243 def CheckPrereq(self):
16244 if self.op.network is None:
16245 raise errors.OpPrereqError("Network must be given",
16246 errors.ECODE_INVAL)
16249 existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
16250 except errors.OpPrereqError:
16253 raise errors.OpPrereqError("Desired network name '%s' already exists as a"
16254 " network (UUID: %s)" %
16255 (self.op.network_name, existing_uuid),
16256 errors.ECODE_EXISTS)
16258 # Check tag validity
16259 for tag in self.op.tags:
16260 objects.TaggableObject.ValidateTag(tag)
16262 def BuildHooksEnv(self):
16263 """Build hooks env.
16267 "name": self.op.network_name,
16268 "subnet": self.op.network,
16269 "gateway": self.op.gateway,
16270 "network6": self.op.network6,
16271 "gateway6": self.op.gateway6,
16272 "mac_prefix": self.op.mac_prefix,
16273 "tags": self.op.tags,
16275 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16277 def Exec(self, feedback_fn):
16278 """Add the ip pool to the cluster.
16281 nobj = objects.Network(name=self.op.network_name,
16282 network=self.op.network,
16283 gateway=self.op.gateway,
16284 network6=self.op.network6,
16285 gateway6=self.op.gateway6,
16286 mac_prefix=self.op.mac_prefix,
16287 uuid=self.network_uuid)
16288 # Initialize the associated address pool
16290 pool = network.AddressPool.InitializeNetwork(nobj)
16291 except errors.AddressPoolError, err:
16292 raise errors.OpExecError("Cannot create IP address pool for network"
16293 " '%s': %s" % (self.op.network_name, err))
16295 # Check if we need to reserve the nodes and the cluster master IP
16296 # These may not be allocated to any instances in routed mode, as
16297 # they wouldn't function anyway.
16298 if self.op.conflicts_check:
16299 for node in self.cfg.GetAllNodesInfo().values():
16300 for ip in [node.primary_ip, node.secondary_ip]:
16302 if pool.Contains(ip):
16304 self.LogInfo("Reserved IP address of node '%s' (%s)",
16306 except errors.AddressPoolError, err:
16307 self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s",
16308 ip, node.name, err)
16310 master_ip = self.cfg.GetClusterInfo().master_ip
16312 if pool.Contains(master_ip):
16313 pool.Reserve(master_ip)
16314 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16315 except errors.AddressPoolError, err:
16316 self.LogWarning("Cannot reserve cluster master IP address (%s): %s",
16319 if self.op.add_reserved_ips:
16320 for ip in self.op.add_reserved_ips:
16322 pool.Reserve(ip, external=True)
16323 except errors.AddressPoolError, err:
16324 raise errors.OpExecError("Cannot reserve IP address '%s': %s" %
16328 for tag in self.op.tags:
16331 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16332 del self.remove_locks[locking.LEVEL_NETWORK]
16335 class LUNetworkRemove(LogicalUnit):
16336 HPATH = "network-remove"
16337 HTYPE = constants.HTYPE_NETWORK
16340 def ExpandNames(self):
16341 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16343 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16344 self.needed_locks = {
16345 locking.LEVEL_NETWORK: [self.network_uuid],
16346 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16349 def CheckPrereq(self):
16350 """Check prerequisites.
16352 This checks that the given network name exists as a network, that is
16353 empty (i.e., contains no nodes), and that is not the last group of the
16357 # Verify that the network is not conncted.
16358 node_groups = [group.name
16359 for group in self.cfg.GetAllNodeGroupsInfo().values()
16360 if self.network_uuid in group.networks]
16363 self.LogWarning("Network '%s' is connected to the following"
16364 " node groups: %s" %
16365 (self.op.network_name,
16366 utils.CommaJoin(utils.NiceSort(node_groups))))
16367 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16369 def BuildHooksEnv(self):
16370 """Build hooks env.
16374 "NETWORK_NAME": self.op.network_name,
16377 def BuildHooksNodes(self):
16378 """Build hooks nodes.
16381 mn = self.cfg.GetMasterNode()
16382 return ([mn], [mn])
16384 def Exec(self, feedback_fn):
16385 """Remove the network.
16389 self.cfg.RemoveNetwork(self.network_uuid)
16390 except errors.ConfigurationError:
16391 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16392 (self.op.network_name, self.network_uuid))
16395 class LUNetworkSetParams(LogicalUnit):
16396 """Modifies the parameters of a network.
16399 HPATH = "network-modify"
16400 HTYPE = constants.HTYPE_NETWORK
16403 def CheckArguments(self):
16404 if (self.op.gateway and
16405 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16406 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16407 " at once", errors.ECODE_INVAL)
16409 def ExpandNames(self):
16410 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16412 self.needed_locks = {
16413 locking.LEVEL_NETWORK: [self.network_uuid],
16416 def CheckPrereq(self):
16417 """Check prerequisites.
16420 self.network = self.cfg.GetNetwork(self.network_uuid)
16421 self.gateway = self.network.gateway
16422 self.mac_prefix = self.network.mac_prefix
16423 self.network6 = self.network.network6
16424 self.gateway6 = self.network.gateway6
16425 self.tags = self.network.tags
16427 self.pool = network.AddressPool(self.network)
16429 if self.op.gateway:
16430 if self.op.gateway == constants.VALUE_NONE:
16431 self.gateway = None
16433 self.gateway = self.op.gateway
16434 if self.pool.IsReserved(self.gateway):
16435 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16436 " reserved" % self.gateway,
16437 errors.ECODE_STATE)
16439 if self.op.mac_prefix:
16440 if self.op.mac_prefix == constants.VALUE_NONE:
16441 self.mac_prefix = None
16443 self.mac_prefix = \
16444 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16446 if self.op.gateway6:
16447 if self.op.gateway6 == constants.VALUE_NONE:
16448 self.gateway6 = None
16450 self.gateway6 = self.op.gateway6
16452 if self.op.network6:
16453 if self.op.network6 == constants.VALUE_NONE:
16454 self.network6 = None
16456 self.network6 = self.op.network6
16458 def BuildHooksEnv(self):
16459 """Build hooks env.
16463 "name": self.op.network_name,
16464 "subnet": self.network.network,
16465 "gateway": self.gateway,
16466 "network6": self.network6,
16467 "gateway6": self.gateway6,
16468 "mac_prefix": self.mac_prefix,
16471 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16473 def BuildHooksNodes(self):
16474 """Build hooks nodes.
16477 mn = self.cfg.GetMasterNode()
16478 return ([mn], [mn])
16480 def Exec(self, feedback_fn):
16481 """Modifies the network.
16484 #TODO: reserve/release via temporary reservation manager
16485 # extend cfg.ReserveIp/ReleaseIp with the external flag
16486 if self.op.gateway:
16487 if self.gateway == self.network.gateway:
16488 self.LogWarning("Gateway is already %s", self.gateway)
16491 self.pool.Reserve(self.gateway, external=True)
16492 if self.network.gateway:
16493 self.pool.Release(self.network.gateway, external=True)
16494 self.network.gateway = self.gateway
16496 if self.op.add_reserved_ips:
16497 for ip in self.op.add_reserved_ips:
16499 if self.pool.IsReserved(ip):
16500 self.LogWarning("IP address %s is already reserved", ip)
16502 self.pool.Reserve(ip, external=True)
16503 except errors.AddressPoolError, err:
16504 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16506 if self.op.remove_reserved_ips:
16507 for ip in self.op.remove_reserved_ips:
16508 if ip == self.network.gateway:
16509 self.LogWarning("Cannot unreserve Gateway's IP")
16512 if not self.pool.IsReserved(ip):
16513 self.LogWarning("IP address %s is already unreserved", ip)
16515 self.pool.Release(ip, external=True)
16516 except errors.AddressPoolError, err:
16517 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16519 if self.op.mac_prefix:
16520 self.network.mac_prefix = self.mac_prefix
16522 if self.op.network6:
16523 self.network.network6 = self.network6
16525 if self.op.gateway6:
16526 self.network.gateway6 = self.gateway6
16528 self.pool.Validate()
16530 self.cfg.Update(self.network, feedback_fn)
16533 class _NetworkQuery(_QueryBase):
16534 FIELDS = query.NETWORK_FIELDS
16536 def ExpandNames(self, lu):
16537 lu.needed_locks = {}
16538 lu.share_locks = _ShareAll()
16540 self.do_locking = self.use_locking
16542 all_networks = lu.cfg.GetAllNetworksInfo()
16543 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16549 for name in self.names:
16550 if name in name_to_uuid:
16551 self.wanted.append(name_to_uuid[name])
16553 missing.append(name)
16556 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16557 errors.ECODE_NOENT)
16559 self.wanted = locking.ALL_SET
16561 if self.do_locking:
16562 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16563 if query.NETQ_INST in self.requested_data:
16564 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16565 if query.NETQ_GROUP in self.requested_data:
16566 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16568 def DeclareLocks(self, lu, level):
16571 def _GetQueryData(self, lu):
16572 """Computes the list of networks and their attributes.
16575 all_networks = lu.cfg.GetAllNetworksInfo()
16577 network_uuids = self._GetNames(lu, all_networks.keys(),
16578 locking.LEVEL_NETWORK)
16580 do_instances = query.NETQ_INST in self.requested_data
16581 do_groups = query.NETQ_GROUP in self.requested_data
16583 network_to_instances = None
16584 network_to_groups = None
16586 # For NETQ_GROUP, we need to map network->[groups]
16588 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16589 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16590 for _, group in all_groups.iteritems():
16591 for net_uuid in network_uuids:
16592 netparams = group.networks.get(net_uuid, None)
16594 info = (group.name, netparams[constants.NIC_MODE],
16595 netparams[constants.NIC_LINK])
16597 network_to_groups[net_uuid].append(info)
16600 all_instances = lu.cfg.GetAllInstancesInfo()
16601 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16602 for instance in all_instances.values():
16603 for nic in instance.nics:
16604 if nic.network in network_uuids:
16605 network_to_instances[nic.network].append(instance.name)
16608 if query.NETQ_STATS in self.requested_data:
16611 self._GetStats(network.AddressPool(all_networks[uuid])))
16612 for uuid in network_uuids)
16616 return query.NetworkQueryData([all_networks[uuid]
16617 for uuid in network_uuids],
16619 network_to_instances,
16623 def _GetStats(pool):
16624 """Returns statistics for a network address pool.
16628 "free_count": pool.GetFreeCount(),
16629 "reserved_count": pool.GetReservedCount(),
16630 "map": pool.GetMap(),
16631 "external_reservations":
16632 utils.CommaJoin(pool.GetExternalReservations()),
16636 class LUNetworkQuery(NoHooksLU):
16637 """Logical unit for querying networks.
16642 def CheckArguments(self):
16643 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16644 self.op.output_fields, self.op.use_locking)
16646 def ExpandNames(self):
16647 self.nq.ExpandNames(self)
16649 def Exec(self, feedback_fn):
16650 return self.nq.OldStyleQuery(self)
16653 class LUNetworkConnect(LogicalUnit):
16654 """Connect a network to a nodegroup
16657 HPATH = "network-connect"
16658 HTYPE = constants.HTYPE_NETWORK
16661 def ExpandNames(self):
16662 self.network_name = self.op.network_name
16663 self.group_name = self.op.group_name
16664 self.network_mode = self.op.network_mode
16665 self.network_link = self.op.network_link
16667 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16668 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16670 self.needed_locks = {
16671 locking.LEVEL_INSTANCE: [],
16672 locking.LEVEL_NODEGROUP: [self.group_uuid],
16674 self.share_locks[locking.LEVEL_INSTANCE] = 1
16676 if self.op.conflicts_check:
16677 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16678 self.share_locks[locking.LEVEL_NETWORK] = 1
16680 def DeclareLocks(self, level):
16681 if level == locking.LEVEL_INSTANCE:
16682 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16684 # Lock instances optimistically, needs verification once group lock has
16686 if self.op.conflicts_check:
16687 self.needed_locks[locking.LEVEL_INSTANCE] = \
16688 self.cfg.GetNodeGroupInstances(self.group_uuid)
16690 def BuildHooksEnv(self):
16692 "GROUP_NAME": self.group_name,
16693 "GROUP_NETWORK_MODE": self.network_mode,
16694 "GROUP_NETWORK_LINK": self.network_link,
16698 def BuildHooksNodes(self):
16699 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16700 return (nodes, nodes)
16702 def CheckPrereq(self):
16703 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16705 assert self.group_uuid in owned_groups
16707 # Check if locked instances are still correct
16708 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16709 if self.op.conflicts_check:
16710 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16713 constants.NIC_MODE: self.network_mode,
16714 constants.NIC_LINK: self.network_link,
16716 objects.NIC.CheckParameterSyntax(self.netparams)
16718 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16719 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16720 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16721 self.connected = False
16722 if self.network_uuid in self.group.networks:
16723 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16724 (self.network_name, self.group.name))
16725 self.connected = True
16727 # check only if not already connected
16728 elif self.op.conflicts_check:
16729 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16731 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16732 "connect to", owned_instances)
16734 def Exec(self, feedback_fn):
16735 # Connect the network and update the group only if not already connected
16736 if not self.connected:
16737 self.group.networks[self.network_uuid] = self.netparams
16738 self.cfg.Update(self.group, feedback_fn)
16741 def _NetworkConflictCheck(lu, check_fn, action, instances):
16742 """Checks for network interface conflicts with a network.
16744 @type lu: L{LogicalUnit}
16745 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16747 @param check_fn: Function checking for conflict
16748 @type action: string
16749 @param action: Part of error message (see code)
16750 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16755 for (_, instance) in lu.cfg.GetMultiInstanceInfo(instances):
16756 instconflicts = [(idx, nic.ip)
16757 for (idx, nic) in enumerate(instance.nics)
16761 conflicts.append((instance.name, instconflicts))
16764 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16765 " node group '%s', are in use: %s" %
16766 (lu.network_name, action, lu.group.name,
16767 utils.CommaJoin(("%s: %s" %
16768 (name, _FmtNetworkConflict(details)))
16769 for (name, details) in conflicts)))
16771 raise errors.OpPrereqError("Conflicting IP addresses found; "
16772 " remove/modify the corresponding network"
16773 " interfaces", errors.ECODE_STATE)
16776 def _FmtNetworkConflict(details):
16777 """Utility for L{_NetworkConflictCheck}.
16780 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16781 for (idx, ipaddr) in details)
16784 class LUNetworkDisconnect(LogicalUnit):
16785 """Disconnect a network to a nodegroup
16788 HPATH = "network-disconnect"
16789 HTYPE = constants.HTYPE_NETWORK
16792 def ExpandNames(self):
16793 self.network_name = self.op.network_name
16794 self.group_name = self.op.group_name
16796 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16797 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16799 self.needed_locks = {
16800 locking.LEVEL_INSTANCE: [],
16801 locking.LEVEL_NODEGROUP: [self.group_uuid],
16803 self.share_locks[locking.LEVEL_INSTANCE] = 1
16805 def DeclareLocks(self, level):
16806 if level == locking.LEVEL_INSTANCE:
16807 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16809 # Lock instances optimistically, needs verification once group lock has
16811 self.needed_locks[locking.LEVEL_INSTANCE] = \
16812 self.cfg.GetNodeGroupInstances(self.group_uuid)
16814 def BuildHooksEnv(self):
16816 "GROUP_NAME": self.group_name,
16820 def BuildHooksNodes(self):
16821 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16822 return (nodes, nodes)
16824 def CheckPrereq(self):
16825 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16827 assert self.group_uuid in owned_groups
16829 # Check if locked instances are still correct
16830 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16831 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16833 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16834 self.connected = True
16835 if self.network_uuid not in self.group.networks:
16836 self.LogWarning("Network '%s' is not mapped to group '%s'",
16837 self.network_name, self.group.name)
16838 self.connected = False
16840 # We need this check only if network is not already connected
16842 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid,
16843 "disconnect from", owned_instances)
16845 def Exec(self, feedback_fn):
16846 # Disconnect the network and update the group only if network is connected
16848 del self.group.networks[self.network_uuid]
16849 self.cfg.Update(self.group, feedback_fn)
16852 #: Query type implementations
16854 constants.QR_CLUSTER: _ClusterQuery,
16855 constants.QR_INSTANCE: _InstanceQuery,
16856 constants.QR_NODE: _NodeQuery,
16857 constants.QR_GROUP: _GroupQuery,
16858 constants.QR_NETWORK: _NetworkQuery,
16859 constants.QR_OS: _OsQuery,
16860 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16861 constants.QR_EXPORT: _ExportQuery,
16864 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16867 def _GetQueryImplementation(name):
16868 """Returns the implemtnation for a query type.
16870 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16874 return _QUERY_IMPL[name]
16876 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16877 errors.ECODE_INVAL)
16880 def _CheckForConflictingIp(lu, ip, node):
16881 """In case of conflicting IP address raise error.
16884 @param ip: IP address
16886 @param node: node name
16889 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16890 if conf_net is not None:
16891 raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16893 errors.ECODE_STATE)
16895 return (None, None)