4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node.
703 @type cfg: L{config.ConfigWriter}
704 @param cfg: The cluster configuration
705 @type node: L{objects.Node}
706 @param node: The node
708 @return: The effective value of exclusive_storage
711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node.
717 @type cfg: L{config.ConfigWriter}
718 @param cfg: The cluster configuration
719 @type nodename: string
720 @param nodename: The node
722 @return: The effective value of exclusive_storage
723 @raise errors.OpPrereqError: if no node exists with the given name
726 ni = cfg.GetNodeInfo(nodename)
728 raise errors.OpPrereqError("Invalid node name %s" % nodename,
730 return _IsExclusiveStorageEnabledNode(cfg, ni)
733 def _CopyLockList(names):
734 """Makes a copy of a list of lock names.
736 Handles L{locking.ALL_SET} correctly.
739 if names == locking.ALL_SET:
740 return locking.ALL_SET
745 def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names.
748 @type lu: L{LogicalUnit}
749 @param lu: the logical unit on whose behalf we execute
751 @param nodes: list of node names or None for all nodes
753 @return: the list of nodes, sorted
754 @raise errors.ProgrammerError: if the nodes parameter is wrong type
758 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
760 return utils.NiceSort(lu.cfg.GetNodeList())
763 def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names.
766 @type lu: L{LogicalUnit}
767 @param lu: the logical unit on whose behalf we execute
768 @type instances: list
769 @param instances: list of instance names or None for all instances
771 @return: the list of instances, sorted
772 @raise errors.OpPrereqError: if the instances parameter is wrong type
773 @raise errors.OpPrereqError: if any of the passed instances is not found
777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
779 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
783 def _GetUpdatedParams(old_params, update_dict,
784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary.
787 @type old_params: dict
788 @param old_params: old parameters
789 @type update_dict: dict
790 @param update_dict: dict containing new parameter values, or
791 constants.VALUE_DEFAULT to reset the parameter to its default
793 @param use_default: boolean
794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
795 values as 'to be deleted' values
796 @param use_none: boolean
797 @type use_none: whether to recognise C{None} values as 'to be
800 @return: the new parameter dictionary
803 params_copy = copy.deepcopy(old_params)
804 for key, val in update_dict.iteritems():
805 if ((use_default and val == constants.VALUE_DEFAULT) or
806 (use_none and val is None)):
812 params_copy[key] = val
816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
817 """Return the new version of a instance policy.
819 @param group_policy: whether this policy applies to a group and thus
820 we should support removal of policy entries
823 use_none = use_default = group_policy
824 ipolicy = copy.deepcopy(old_ipolicy)
825 for key, value in new_ipolicy.items():
826 if key not in constants.IPOLICY_ALL_KEYS:
827 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
829 if key in constants.IPOLICY_ISPECS:
830 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
832 use_default=use_default)
833 utils.ForceDictType(ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
835 if (not value or value == [constants.VALUE_DEFAULT] or
836 value == constants.VALUE_DEFAULT):
840 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
841 " on the cluster'" % key,
844 if key in constants.IPOLICY_PARAMETERS:
845 # FIXME: we assume all such values are float
847 ipolicy[key] = float(value)
848 except (TypeError, ValueError), err:
849 raise errors.OpPrereqError("Invalid value for attribute"
850 " '%s': '%s', error: %s" %
851 (key, value, err), errors.ECODE_INVAL)
853 # FIXME: we assume all others are lists; this should be redone
855 ipolicy[key] = list(value)
857 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
858 except errors.ConfigurationError, err:
859 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
864 def _UpdateAndVerifySubDict(base, updates, type_check):
865 """Updates and verifies a dict with sub dicts of the same type.
867 @param base: The dict with the old data
868 @param updates: The dict with the new data
869 @param type_check: Dict suitable to ForceDictType to verify correct types
870 @returns: A new dict with updated and verified values
874 new = _GetUpdatedParams(old, value)
875 utils.ForceDictType(new, type_check)
878 ret = copy.deepcopy(base)
879 ret.update(dict((key, fn(base.get(key, {}), value))
880 for key, value in updates.items()))
884 def _MergeAndVerifyHvState(op_input, obj_input):
885 """Combines the hv state from an opcode with the one of the object
887 @param op_input: The input dict from the opcode
888 @param obj_input: The input dict from the objects
889 @return: The verified and updated dict
893 invalid_hvs = set(op_input) - constants.HYPER_TYPES
895 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
896 " %s" % utils.CommaJoin(invalid_hvs),
898 if obj_input is None:
900 type_check = constants.HVSTS_PARAMETER_TYPES
901 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
906 def _MergeAndVerifyDiskState(op_input, obj_input):
907 """Combines the disk state from an opcode with the one of the object
909 @param op_input: The input dict from the opcode
910 @param obj_input: The input dict from the objects
911 @return: The verified and updated dict
914 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
916 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
917 utils.CommaJoin(invalid_dst),
919 type_check = constants.DSS_PARAMETER_TYPES
920 if obj_input is None:
922 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
924 for key, value in op_input.items())
929 def _ReleaseLocks(lu, level, names=None, keep=None):
930 """Releases locks owned by an LU.
932 @type lu: L{LogicalUnit}
933 @param level: Lock level
934 @type names: list or None
935 @param names: Names of locks to release
936 @type keep: list or None
937 @param keep: Names of locks to retain
940 assert not (keep is not None and names is not None), \
941 "Only one of the 'names' and the 'keep' parameters can be given"
943 if names is not None:
944 should_release = names.__contains__
946 should_release = lambda name: name not in keep
948 should_release = None
950 owned = lu.owned_locks(level)
952 # Not owning any lock at this level, do nothing
959 # Determine which locks to release
961 if should_release(name):
966 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
968 # Release just some locks
969 lu.glm.release(level, names=release)
971 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
974 lu.glm.release(level)
976 assert not lu.glm.is_owned(level), "No locks should be owned"
979 def _MapInstanceDisksToNodes(instances):
980 """Creates a map from (node, volume) to instance name.
982 @type instances: list of L{objects.Instance}
983 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
986 return dict(((node, vol), inst.name)
987 for inst in instances
988 for (node, vols) in inst.MapLVsByNode().items()
992 def _RunPostHook(lu, node_name):
993 """Runs the post-hook for an opcode on a single node.
996 hm = lu.proc.BuildHooksManager(lu)
998 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
999 except Exception, err: # pylint: disable=W0703
1000 lu.LogWarning("Errors occurred running hooks on %s: %s",
1004 def _CheckOutputFields(static, dynamic, selected):
1005 """Checks whether all selected fields are valid.
1007 @type static: L{utils.FieldSet}
1008 @param static: static fields set
1009 @type dynamic: L{utils.FieldSet}
1010 @param dynamic: dynamic fields set
1013 f = utils.FieldSet()
1017 delta = f.NonMatching(selected)
1019 raise errors.OpPrereqError("Unknown output fields selected: %s"
1020 % ",".join(delta), errors.ECODE_INVAL)
1023 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1024 """Make sure that none of the given paramters is global.
1026 If a global parameter is found, an L{errors.OpPrereqError} exception is
1027 raised. This is used to avoid setting global parameters for individual nodes.
1029 @type params: dictionary
1030 @param params: Parameters to check
1031 @type glob_pars: dictionary
1032 @param glob_pars: Forbidden parameters
1034 @param kind: Kind of parameters (e.g. "node")
1035 @type bad_levels: string
1036 @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
1038 @type good_levels: strings
1039 @param good_levels: Level(s) at which the parameters are allowed (e.g.
1043 used_globals = glob_pars.intersection(params)
1045 msg = ("The following %s parameters are global and cannot"
1046 " be customized at %s level, please modify them at"
1048 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
1049 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1052 def _CheckNodeOnline(lu, node, msg=None):
1053 """Ensure that a given node is online.
1055 @param lu: the LU on behalf of which we make the check
1056 @param node: the node to check
1057 @param msg: if passed, should be a message to replace the default one
1058 @raise errors.OpPrereqError: if the node is offline
1062 msg = "Can't use offline node"
1063 if lu.cfg.GetNodeInfo(node).offline:
1064 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1067 def _CheckNodeNotDrained(lu, node):
1068 """Ensure that a given node is not drained.
1070 @param lu: the LU on behalf of which we make the check
1071 @param node: the node to check
1072 @raise errors.OpPrereqError: if the node is drained
1075 if lu.cfg.GetNodeInfo(node).drained:
1076 raise errors.OpPrereqError("Can't use drained node %s" % node,
1080 def _CheckNodeVmCapable(lu, node):
1081 """Ensure that a given node is vm capable.
1083 @param lu: the LU on behalf of which we make the check
1084 @param node: the node to check
1085 @raise errors.OpPrereqError: if the node is not vm capable
1088 if not lu.cfg.GetNodeInfo(node).vm_capable:
1089 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1093 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1094 """Ensure that a node supports a given OS.
1096 @param lu: the LU on behalf of which we make the check
1097 @param node: the node to check
1098 @param os_name: the OS to query about
1099 @param force_variant: whether to ignore variant errors
1100 @raise errors.OpPrereqError: if the node is not supporting the OS
1103 result = lu.rpc.call_os_get(node, os_name)
1104 result.Raise("OS '%s' not in supported OS list for node %s" %
1106 prereq=True, ecode=errors.ECODE_INVAL)
1107 if not force_variant:
1108 _CheckOSVariant(result.payload, os_name)
1111 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1112 """Ensure that a node has the given secondary ip.
1114 @type lu: L{LogicalUnit}
1115 @param lu: the LU on behalf of which we make the check
1117 @param node: the node to check
1118 @type secondary_ip: string
1119 @param secondary_ip: the ip to check
1120 @type prereq: boolean
1121 @param prereq: whether to throw a prerequisite or an execute error
1122 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1123 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1126 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1127 result.Raise("Failure checking secondary ip on node %s" % node,
1128 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1129 if not result.payload:
1130 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1131 " please fix and re-run this command" % secondary_ip)
1133 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1135 raise errors.OpExecError(msg)
1138 def _CheckNodePVs(nresult, exclusive_storage):
1142 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1143 if pvlist_dict is None:
1144 return (["Can't get PV list from node"], None)
1145 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1147 # check that ':' is not present in PV names, since it's a
1148 # special character for lvcreate (denotes the range of PEs to
1152 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1153 (pv.name, pv.vg_name))
1155 if exclusive_storage:
1156 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1157 errlist.extend(errmsgs)
1158 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1160 for (pvname, lvlist) in shared_pvs:
1161 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1162 errlist.append("PV %s is shared among unrelated LVs (%s)" %
1163 (pvname, utils.CommaJoin(lvlist)))
1164 return (errlist, es_pvinfo)
1167 def _GetClusterDomainSecret():
1168 """Reads the cluster domain secret.
1171 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1175 def _CheckInstanceState(lu, instance, req_states, msg=None):
1176 """Ensure that an instance is in one of the required states.
1178 @param lu: the LU on behalf of which we make the check
1179 @param instance: the instance to check
1180 @param msg: if passed, should be a message to replace the default one
1181 @raise errors.OpPrereqError: if the instance is not in the required state
1185 msg = ("can't use instance from outside %s states" %
1186 utils.CommaJoin(req_states))
1187 if instance.admin_state not in req_states:
1188 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1189 (instance.name, instance.admin_state, msg),
1192 if constants.ADMINST_UP not in req_states:
1193 pnode = instance.primary_node
1194 if not lu.cfg.GetNodeInfo(pnode).offline:
1195 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1196 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1197 prereq=True, ecode=errors.ECODE_ENVIRON)
1198 if instance.name in ins_l.payload:
1199 raise errors.OpPrereqError("Instance %s is running, %s" %
1200 (instance.name, msg), errors.ECODE_STATE)
1202 lu.LogWarning("Primary node offline, ignoring check that instance"
1206 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1207 """Computes if value is in the desired range.
1209 @param name: name of the parameter for which we perform the check
1210 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1212 @param ipolicy: dictionary containing min, max and std values
1213 @param value: actual value that we want to use
1214 @return: None or element not meeting the criteria
1218 if value in [None, constants.VALUE_AUTO]:
1220 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1221 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1222 if value > max_v or min_v > value:
1224 fqn = "%s/%s" % (name, qualifier)
1227 return ("%s value %s is not in range [%s, %s]" %
1228 (fqn, value, min_v, max_v))
1232 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1233 nic_count, disk_sizes, spindle_use,
1235 _compute_fn=_ComputeMinMaxSpec):
1236 """Verifies ipolicy against provided specs.
1239 @param ipolicy: The ipolicy
1241 @param mem_size: The memory size
1242 @type cpu_count: int
1243 @param cpu_count: Used cpu cores
1244 @type disk_count: int
1245 @param disk_count: Number of disks used
1246 @type nic_count: int
1247 @param nic_count: Number of nics used
1248 @type disk_sizes: list of ints
1249 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1250 @type spindle_use: int
1251 @param spindle_use: The number of spindles this instance uses
1252 @type disk_template: string
1253 @param disk_template: The disk template of the instance
1254 @param _compute_fn: The compute function (unittest only)
1255 @return: A list of violations, or an empty list of no violations are found
1258 assert disk_count == len(disk_sizes)
1261 (constants.ISPEC_MEM_SIZE, "", mem_size),
1262 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1263 (constants.ISPEC_NIC_COUNT, "", nic_count),
1264 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1265 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1266 for idx, d in enumerate(disk_sizes)]
1267 if disk_template != constants.DT_DISKLESS:
1268 # This check doesn't make sense for diskless instances
1269 test_settings.append((constants.ISPEC_DISK_COUNT, "", disk_count))
1271 allowed_dts = ipolicy[constants.IPOLICY_DTS]
1272 if disk_template not in allowed_dts:
1273 ret.append("Disk template %s is not allowed (allowed templates: %s)" %
1274 (disk_template, utils.CommaJoin(allowed_dts)))
1276 return ret + filter(None,
1277 (_compute_fn(name, qualifier, ipolicy, value)
1278 for (name, qualifier, value) in test_settings))
1281 def _ComputeIPolicyInstanceViolation(ipolicy, instance, cfg,
1282 _compute_fn=_ComputeIPolicySpecViolation):
1283 """Compute if instance meets the specs of ipolicy.
1286 @param ipolicy: The ipolicy to verify against
1287 @type instance: L{objects.Instance}
1288 @param instance: The instance to verify
1289 @type cfg: L{config.ConfigWriter}
1290 @param cfg: Cluster configuration
1291 @param _compute_fn: The function to verify ipolicy (unittest only)
1292 @see: L{_ComputeIPolicySpecViolation}
1295 be_full = cfg.GetClusterInfo().FillBE(instance)
1296 mem_size = be_full[constants.BE_MAXMEM]
1297 cpu_count = be_full[constants.BE_VCPUS]
1298 spindle_use = be_full[constants.BE_SPINDLE_USE]
1299 disk_count = len(instance.disks)
1300 disk_sizes = [disk.size for disk in instance.disks]
1301 nic_count = len(instance.nics)
1302 disk_template = instance.disk_template
1304 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1305 disk_sizes, spindle_use, disk_template)
1308 def _ComputeIPolicyInstanceSpecViolation(
1309 ipolicy, instance_spec, disk_template,
1310 _compute_fn=_ComputeIPolicySpecViolation):
1311 """Compute if instance specs meets the specs of ipolicy.
1314 @param ipolicy: The ipolicy to verify against
1315 @param instance_spec: dict
1316 @param instance_spec: The instance spec to verify
1317 @type disk_template: string
1318 @param disk_template: the disk template of the instance
1319 @param _compute_fn: The function to verify ipolicy (unittest only)
1320 @see: L{_ComputeIPolicySpecViolation}
1323 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1324 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1325 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1326 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1327 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1328 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1330 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1331 disk_sizes, spindle_use, disk_template)
1334 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1336 _compute_fn=_ComputeIPolicyInstanceViolation):
1337 """Compute if instance meets the specs of the new target group.
1339 @param ipolicy: The ipolicy to verify
1340 @param instance: The instance object to verify
1341 @param current_group: The current group of the instance
1342 @param target_group: The new group of the instance
1343 @type cfg: L{config.ConfigWriter}
1344 @param cfg: Cluster configuration
1345 @param _compute_fn: The function to verify ipolicy (unittest only)
1346 @see: L{_ComputeIPolicySpecViolation}
1349 if current_group == target_group:
1352 return _compute_fn(ipolicy, instance, cfg)
1355 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False,
1356 _compute_fn=_ComputeIPolicyNodeViolation):
1357 """Checks that the target node is correct in terms of instance policy.
1359 @param ipolicy: The ipolicy to verify
1360 @param instance: The instance object to verify
1361 @param node: The new node to relocate
1362 @type cfg: L{config.ConfigWriter}
1363 @param cfg: Cluster configuration
1364 @param ignore: Ignore violations of the ipolicy
1365 @param _compute_fn: The function to verify ipolicy (unittest only)
1366 @see: L{_ComputeIPolicySpecViolation}
1369 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1370 res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg)
1373 msg = ("Instance does not meet target node group's (%s) instance"
1374 " policy: %s") % (node.group, utils.CommaJoin(res))
1378 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1381 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances, cfg):
1382 """Computes a set of any instances that would violate the new ipolicy.
1384 @param old_ipolicy: The current (still in-place) ipolicy
1385 @param new_ipolicy: The new (to become) ipolicy
1386 @param instances: List of instances to verify
1387 @type cfg: L{config.ConfigWriter}
1388 @param cfg: Cluster configuration
1389 @return: A list of instances which violates the new ipolicy but
1393 return (_ComputeViolatingInstances(new_ipolicy, instances, cfg) -
1394 _ComputeViolatingInstances(old_ipolicy, instances, cfg))
1397 def _ExpandItemName(fn, name, kind):
1398 """Expand an item name.
1400 @param fn: the function to use for expansion
1401 @param name: requested item name
1402 @param kind: text description ('Node' or 'Instance')
1403 @return: the resolved (full) name
1404 @raise errors.OpPrereqError: if the item is not found
1407 full_name = fn(name)
1408 if full_name is None:
1409 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1414 def _ExpandNodeName(cfg, name):
1415 """Wrapper over L{_ExpandItemName} for nodes."""
1416 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1419 def _ExpandInstanceName(cfg, name):
1420 """Wrapper over L{_ExpandItemName} for instance."""
1421 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1424 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1426 """Builds network related env variables for hooks
1428 This builds the hook environment from individual variables.
1431 @param name: the name of the network
1432 @type subnet: string
1433 @param subnet: the ipv4 subnet
1434 @type gateway: string
1435 @param gateway: the ipv4 gateway
1436 @type network6: string
1437 @param network6: the ipv6 subnet
1438 @type gateway6: string
1439 @param gateway6: the ipv6 gateway
1440 @type mac_prefix: string
1441 @param mac_prefix: the mac_prefix
1443 @param tags: the tags of the network
1448 env["NETWORK_NAME"] = name
1450 env["NETWORK_SUBNET"] = subnet
1452 env["NETWORK_GATEWAY"] = gateway
1454 env["NETWORK_SUBNET6"] = network6
1456 env["NETWORK_GATEWAY6"] = gateway6
1458 env["NETWORK_MAC_PREFIX"] = mac_prefix
1460 env["NETWORK_TAGS"] = " ".join(tags)
1465 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1466 minmem, maxmem, vcpus, nics, disk_template, disks,
1467 bep, hvp, hypervisor_name, tags):
1468 """Builds instance related env variables for hooks
1470 This builds the hook environment from individual variables.
1473 @param name: the name of the instance
1474 @type primary_node: string
1475 @param primary_node: the name of the instance's primary node
1476 @type secondary_nodes: list
1477 @param secondary_nodes: list of secondary nodes as strings
1478 @type os_type: string
1479 @param os_type: the name of the instance's OS
1480 @type status: string
1481 @param status: the desired status of the instance
1482 @type minmem: string
1483 @param minmem: the minimum memory size of the instance
1484 @type maxmem: string
1485 @param maxmem: the maximum memory size of the instance
1487 @param vcpus: the count of VCPUs the instance has
1489 @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1490 the NICs the instance has
1491 @type disk_template: string
1492 @param disk_template: the disk template of the instance
1494 @param disks: the list of (size, mode) pairs
1496 @param bep: the backend parameters for the instance
1498 @param hvp: the hypervisor parameters for the instance
1499 @type hypervisor_name: string
1500 @param hypervisor_name: the hypervisor for the instance
1502 @param tags: list of instance tags as strings
1504 @return: the hook environment for this instance
1509 "INSTANCE_NAME": name,
1510 "INSTANCE_PRIMARY": primary_node,
1511 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1512 "INSTANCE_OS_TYPE": os_type,
1513 "INSTANCE_STATUS": status,
1514 "INSTANCE_MINMEM": minmem,
1515 "INSTANCE_MAXMEM": maxmem,
1516 # TODO(2.7) remove deprecated "memory" value
1517 "INSTANCE_MEMORY": maxmem,
1518 "INSTANCE_VCPUS": vcpus,
1519 "INSTANCE_DISK_TEMPLATE": disk_template,
1520 "INSTANCE_HYPERVISOR": hypervisor_name,
1523 nic_count = len(nics)
1524 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1527 env["INSTANCE_NIC%d_IP" % idx] = ip
1528 env["INSTANCE_NIC%d_MAC" % idx] = mac
1529 env["INSTANCE_NIC%d_MODE" % idx] = mode
1530 env["INSTANCE_NIC%d_LINK" % idx] = link
1532 nobj = objects.Network.FromDict(netinfo)
1533 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1535 # FIXME: broken network reference: the instance NIC specifies a
1536 # network, but the relevant network entry was not in the config. This
1537 # should be made impossible.
1538 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
1539 if mode == constants.NIC_MODE_BRIDGED:
1540 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1544 env["INSTANCE_NIC_COUNT"] = nic_count
1547 disk_count = len(disks)
1548 for idx, (size, mode) in enumerate(disks):
1549 env["INSTANCE_DISK%d_SIZE" % idx] = size
1550 env["INSTANCE_DISK%d_MODE" % idx] = mode
1554 env["INSTANCE_DISK_COUNT"] = disk_count
1559 env["INSTANCE_TAGS"] = " ".join(tags)
1561 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1562 for key, value in source.items():
1563 env["INSTANCE_%s_%s" % (kind, key)] = value
1568 def _NICToTuple(lu, nic):
1569 """Build a tupple of nic information.
1571 @type lu: L{LogicalUnit}
1572 @param lu: the logical unit on whose behalf we execute
1573 @type nic: L{objects.NIC}
1574 @param nic: nic to convert to hooks tuple
1577 cluster = lu.cfg.GetClusterInfo()
1578 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1579 mode = filled_params[constants.NIC_MODE]
1580 link = filled_params[constants.NIC_LINK]
1583 nobj = lu.cfg.GetNetwork(nic.network)
1584 netinfo = objects.Network.ToDict(nobj)
1585 return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1588 def _NICListToTuple(lu, nics):
1589 """Build a list of nic information tuples.
1591 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1592 value in LUInstanceQueryData.
1594 @type lu: L{LogicalUnit}
1595 @param lu: the logical unit on whose behalf we execute
1596 @type nics: list of L{objects.NIC}
1597 @param nics: list of nics to convert to hooks tuples
1602 hooks_nics.append(_NICToTuple(lu, nic))
1606 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1607 """Builds instance related env variables for hooks from an object.
1609 @type lu: L{LogicalUnit}
1610 @param lu: the logical unit on whose behalf we execute
1611 @type instance: L{objects.Instance}
1612 @param instance: the instance for which we should build the
1614 @type override: dict
1615 @param override: dictionary with key/values that will override
1618 @return: the hook environment dictionary
1621 cluster = lu.cfg.GetClusterInfo()
1622 bep = cluster.FillBE(instance)
1623 hvp = cluster.FillHV(instance)
1625 "name": instance.name,
1626 "primary_node": instance.primary_node,
1627 "secondary_nodes": instance.secondary_nodes,
1628 "os_type": instance.os,
1629 "status": instance.admin_state,
1630 "maxmem": bep[constants.BE_MAXMEM],
1631 "minmem": bep[constants.BE_MINMEM],
1632 "vcpus": bep[constants.BE_VCPUS],
1633 "nics": _NICListToTuple(lu, instance.nics),
1634 "disk_template": instance.disk_template,
1635 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1638 "hypervisor_name": instance.hypervisor,
1639 "tags": instance.tags,
1642 args.update(override)
1643 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1646 def _AdjustCandidatePool(lu, exceptions):
1647 """Adjust the candidate pool after node operations.
1650 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1652 lu.LogInfo("Promoted nodes to master candidate role: %s",
1653 utils.CommaJoin(node.name for node in mod_list))
1654 for name in mod_list:
1655 lu.context.ReaddNode(name)
1656 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1658 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1662 def _DecideSelfPromotion(lu, exceptions=None):
1663 """Decide whether I should promote myself as a master candidate.
1666 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1667 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1668 # the new node will increase mc_max with one, so:
1669 mc_should = min(mc_should + 1, cp_size)
1670 return mc_now < mc_should
1673 def _ComputeViolatingInstances(ipolicy, instances, cfg):
1674 """Computes a set of instances who violates given ipolicy.
1676 @param ipolicy: The ipolicy to verify
1677 @type instances: L{objects.Instance}
1678 @param instances: List of instances to verify
1679 @type cfg: L{config.ConfigWriter}
1680 @param cfg: Cluster configuration
1681 @return: A frozenset of instance names violating the ipolicy
1684 return frozenset([inst.name for inst in instances
1685 if _ComputeIPolicyInstanceViolation(ipolicy, inst, cfg)])
1688 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1689 """Check that the brigdes needed by a list of nics exist.
1692 cluster = lu.cfg.GetClusterInfo()
1693 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1694 brlist = [params[constants.NIC_LINK] for params in paramslist
1695 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1697 result = lu.rpc.call_bridges_exist(target_node, brlist)
1698 result.Raise("Error checking bridges on destination node '%s'" %
1699 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1702 def _CheckInstanceBridgesExist(lu, instance, node=None):
1703 """Check that the brigdes needed by an instance exist.
1707 node = instance.primary_node
1708 _CheckNicsBridgesExist(lu, instance.nics, node)
1711 def _CheckOSVariant(os_obj, name):
1712 """Check whether an OS name conforms to the os variants specification.
1714 @type os_obj: L{objects.OS}
1715 @param os_obj: OS object to check
1717 @param name: OS name passed by the user, to check for validity
1720 variant = objects.OS.GetVariant(name)
1721 if not os_obj.supported_variants:
1723 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1724 " passed)" % (os_obj.name, variant),
1728 raise errors.OpPrereqError("OS name must include a variant",
1731 if variant not in os_obj.supported_variants:
1732 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1735 def _GetNodeInstancesInner(cfg, fn):
1736 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1739 def _GetNodeInstances(cfg, node_name):
1740 """Returns a list of all primary and secondary instances on a node.
1744 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1747 def _GetNodePrimaryInstances(cfg, node_name):
1748 """Returns primary instances on a node.
1751 return _GetNodeInstancesInner(cfg,
1752 lambda inst: node_name == inst.primary_node)
1755 def _GetNodeSecondaryInstances(cfg, node_name):
1756 """Returns secondary instances on a node.
1759 return _GetNodeInstancesInner(cfg,
1760 lambda inst: node_name in inst.secondary_nodes)
1763 def _GetStorageTypeArgs(cfg, storage_type):
1764 """Returns the arguments for a storage type.
1767 # Special case for file storage
1768 if storage_type == constants.ST_FILE:
1769 # storage.FileStorage wants a list of storage directories
1770 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1775 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1778 for dev in instance.disks:
1779 cfg.SetDiskID(dev, node_name)
1781 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1783 result.Raise("Failed to get disk status from node %s" % node_name,
1784 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1786 for idx, bdev_status in enumerate(result.payload):
1787 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1793 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1794 """Check the sanity of iallocator and node arguments and use the
1795 cluster-wide iallocator if appropriate.
1797 Check that at most one of (iallocator, node) is specified. If none is
1798 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1799 then the LU's opcode's iallocator slot is filled with the cluster-wide
1802 @type iallocator_slot: string
1803 @param iallocator_slot: the name of the opcode iallocator slot
1804 @type node_slot: string
1805 @param node_slot: the name of the opcode target node slot
1808 node = getattr(lu.op, node_slot, None)
1809 ialloc = getattr(lu.op, iallocator_slot, None)
1813 if node is not None and ialloc is not None:
1814 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1816 elif ((node is None and ialloc is None) or
1817 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1818 default_iallocator = lu.cfg.GetDefaultIAllocator()
1819 if default_iallocator:
1820 setattr(lu.op, iallocator_slot, default_iallocator)
1822 raise errors.OpPrereqError("No iallocator or node given and no"
1823 " cluster-wide default iallocator found;"
1824 " please specify either an iallocator or a"
1825 " node, or set a cluster-wide default"
1826 " iallocator", errors.ECODE_INVAL)
1829 def _GetDefaultIAllocator(cfg, ialloc):
1830 """Decides on which iallocator to use.
1832 @type cfg: L{config.ConfigWriter}
1833 @param cfg: Cluster configuration object
1834 @type ialloc: string or None
1835 @param ialloc: Iallocator specified in opcode
1837 @return: Iallocator name
1841 # Use default iallocator
1842 ialloc = cfg.GetDefaultIAllocator()
1845 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1846 " opcode nor as a cluster-wide default",
1852 def _CheckHostnameSane(lu, name):
1853 """Ensures that a given hostname resolves to a 'sane' name.
1855 The given name is required to be a prefix of the resolved hostname,
1856 to prevent accidental mismatches.
1858 @param lu: the logical unit on behalf of which we're checking
1859 @param name: the name we should resolve and check
1860 @return: the resolved hostname object
1863 hostname = netutils.GetHostname(name=name)
1864 if hostname.name != name:
1865 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1866 if not utils.MatchNameComponent(name, [hostname.name]):
1867 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1868 " same as given hostname '%s'") %
1869 (hostname.name, name), errors.ECODE_INVAL)
1873 class LUClusterPostInit(LogicalUnit):
1874 """Logical unit for running hooks after cluster initialization.
1877 HPATH = "cluster-init"
1878 HTYPE = constants.HTYPE_CLUSTER
1880 def BuildHooksEnv(self):
1885 "OP_TARGET": self.cfg.GetClusterName(),
1888 def BuildHooksNodes(self):
1889 """Build hooks nodes.
1892 return ([], [self.cfg.GetMasterNode()])
1894 def Exec(self, feedback_fn):
1901 class LUClusterDestroy(LogicalUnit):
1902 """Logical unit for destroying the cluster.
1905 HPATH = "cluster-destroy"
1906 HTYPE = constants.HTYPE_CLUSTER
1908 def BuildHooksEnv(self):
1913 "OP_TARGET": self.cfg.GetClusterName(),
1916 def BuildHooksNodes(self):
1917 """Build hooks nodes.
1922 def CheckPrereq(self):
1923 """Check prerequisites.
1925 This checks whether the cluster is empty.
1927 Any errors are signaled by raising errors.OpPrereqError.
1930 master = self.cfg.GetMasterNode()
1932 nodelist = self.cfg.GetNodeList()
1933 if len(nodelist) != 1 or nodelist[0] != master:
1934 raise errors.OpPrereqError("There are still %d node(s) in"
1935 " this cluster." % (len(nodelist) - 1),
1937 instancelist = self.cfg.GetInstanceList()
1939 raise errors.OpPrereqError("There are still %d instance(s) in"
1940 " this cluster." % len(instancelist),
1943 def Exec(self, feedback_fn):
1944 """Destroys the cluster.
1947 master_params = self.cfg.GetMasterNetworkParameters()
1949 # Run post hooks on master node before it's removed
1950 _RunPostHook(self, master_params.name)
1952 ems = self.cfg.GetUseExternalMipScript()
1953 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1956 self.LogWarning("Error disabling the master IP address: %s",
1959 return master_params.name
1962 def _VerifyCertificate(filename):
1963 """Verifies a certificate for L{LUClusterVerifyConfig}.
1965 @type filename: string
1966 @param filename: Path to PEM file
1970 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1971 utils.ReadFile(filename))
1972 except Exception, err: # pylint: disable=W0703
1973 return (LUClusterVerifyConfig.ETYPE_ERROR,
1974 "Failed to load X509 certificate %s: %s" % (filename, err))
1977 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1978 constants.SSL_CERT_EXPIRATION_ERROR)
1981 fnamemsg = "While verifying %s: %s" % (filename, msg)
1986 return (None, fnamemsg)
1987 elif errcode == utils.CERT_WARNING:
1988 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1989 elif errcode == utils.CERT_ERROR:
1990 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1992 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1995 def _GetAllHypervisorParameters(cluster, instances):
1996 """Compute the set of all hypervisor parameters.
1998 @type cluster: L{objects.Cluster}
1999 @param cluster: the cluster object
2000 @param instances: list of L{objects.Instance}
2001 @param instances: additional instances from which to obtain parameters
2002 @rtype: list of (origin, hypervisor, parameters)
2003 @return: a list with all parameters found, indicating the hypervisor they
2004 apply to, and the origin (can be "cluster", "os X", or "instance Y")
2009 for hv_name in cluster.enabled_hypervisors:
2010 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2012 for os_name, os_hvp in cluster.os_hvp.items():
2013 for hv_name, hv_params in os_hvp.items():
2015 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2016 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2018 # TODO: collapse identical parameter values in a single one
2019 for instance in instances:
2020 if instance.hvparams:
2021 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2022 cluster.FillHV(instance)))
2027 class _VerifyErrors(object):
2028 """Mix-in for cluster/group verify LUs.
2030 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2031 self.op and self._feedback_fn to be available.)
2035 ETYPE_FIELD = "code"
2036 ETYPE_ERROR = "ERROR"
2037 ETYPE_WARNING = "WARNING"
2039 def _Error(self, ecode, item, msg, *args, **kwargs):
2040 """Format an error message.
2042 Based on the opcode's error_codes parameter, either format a
2043 parseable error code, or a simpler error string.
2045 This must be called only from Exec and functions called from Exec.
2048 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2049 itype, etxt, _ = ecode
2050 # If the error code is in the list of ignored errors, demote the error to a
2052 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2053 ltype = self.ETYPE_WARNING
2054 # first complete the msg
2057 # then format the whole message
2058 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2059 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2065 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2066 # and finally report it via the feedback_fn
2067 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2068 # do not mark the operation as failed for WARN cases only
2069 if ltype == self.ETYPE_ERROR:
2072 def _ErrorIf(self, cond, *args, **kwargs):
2073 """Log an error message if the passed condition is True.
2077 or self.op.debug_simulate_errors): # pylint: disable=E1101
2078 self._Error(*args, **kwargs)
2081 class LUClusterVerify(NoHooksLU):
2082 """Submits all jobs necessary to verify the cluster.
2087 def ExpandNames(self):
2088 self.needed_locks = {}
2090 def Exec(self, feedback_fn):
2093 if self.op.group_name:
2094 groups = [self.op.group_name]
2095 depends_fn = lambda: None
2097 groups = self.cfg.GetNodeGroupList()
2099 # Verify global configuration
2101 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2104 # Always depend on global verification
2105 depends_fn = lambda: [(-len(jobs), [])]
2108 [opcodes.OpClusterVerifyGroup(group_name=group,
2109 ignore_errors=self.op.ignore_errors,
2110 depends=depends_fn())]
2111 for group in groups)
2113 # Fix up all parameters
2114 for op in itertools.chain(*jobs): # pylint: disable=W0142
2115 op.debug_simulate_errors = self.op.debug_simulate_errors
2116 op.verbose = self.op.verbose
2117 op.error_codes = self.op.error_codes
2119 op.skip_checks = self.op.skip_checks
2120 except AttributeError:
2121 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2123 return ResultWithJobs(jobs)
2126 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2127 """Verifies the cluster config.
2132 def _VerifyHVP(self, hvp_data):
2133 """Verifies locally the syntax of the hypervisor parameters.
2136 for item, hv_name, hv_params in hvp_data:
2137 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2140 hv_class = hypervisor.GetHypervisorClass(hv_name)
2141 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2142 hv_class.CheckParameterSyntax(hv_params)
2143 except errors.GenericError, err:
2144 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2146 def ExpandNames(self):
2147 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2148 self.share_locks = _ShareAll()
2150 def CheckPrereq(self):
2151 """Check prerequisites.
2154 # Retrieve all information
2155 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2156 self.all_node_info = self.cfg.GetAllNodesInfo()
2157 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2159 def Exec(self, feedback_fn):
2160 """Verify integrity of cluster, performing various test on nodes.
2164 self._feedback_fn = feedback_fn
2166 feedback_fn("* Verifying cluster config")
2168 for msg in self.cfg.VerifyConfig():
2169 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2171 feedback_fn("* Verifying cluster certificate files")
2173 for cert_filename in pathutils.ALL_CERT_FILES:
2174 (errcode, msg) = _VerifyCertificate(cert_filename)
2175 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2177 feedback_fn("* Verifying hypervisor parameters")
2179 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2180 self.all_inst_info.values()))
2182 feedback_fn("* Verifying all nodes belong to an existing group")
2184 # We do this verification here because, should this bogus circumstance
2185 # occur, it would never be caught by VerifyGroup, which only acts on
2186 # nodes/instances reachable from existing node groups.
2188 dangling_nodes = set(node.name for node in self.all_node_info.values()
2189 if node.group not in self.all_group_info)
2191 dangling_instances = {}
2192 no_node_instances = []
2194 for inst in self.all_inst_info.values():
2195 if inst.primary_node in dangling_nodes:
2196 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2197 elif inst.primary_node not in self.all_node_info:
2198 no_node_instances.append(inst.name)
2203 utils.CommaJoin(dangling_instances.get(node.name,
2205 for node in dangling_nodes]
2207 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2209 "the following nodes (and their instances) belong to a non"
2210 " existing group: %s", utils.CommaJoin(pretty_dangling))
2212 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2214 "the following instances have a non-existing primary-node:"
2215 " %s", utils.CommaJoin(no_node_instances))
2220 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2221 """Verifies the status of a node group.
2224 HPATH = "cluster-verify"
2225 HTYPE = constants.HTYPE_CLUSTER
2228 _HOOKS_INDENT_RE = re.compile("^", re.M)
2230 class NodeImage(object):
2231 """A class representing the logical and physical status of a node.
2234 @ivar name: the node name to which this object refers
2235 @ivar volumes: a structure as returned from
2236 L{ganeti.backend.GetVolumeList} (runtime)
2237 @ivar instances: a list of running instances (runtime)
2238 @ivar pinst: list of configured primary instances (config)
2239 @ivar sinst: list of configured secondary instances (config)
2240 @ivar sbp: dictionary of {primary-node: list of instances} for all
2241 instances for which this node is secondary (config)
2242 @ivar mfree: free memory, as reported by hypervisor (runtime)
2243 @ivar dfree: free disk, as reported by the node (runtime)
2244 @ivar offline: the offline status (config)
2245 @type rpc_fail: boolean
2246 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2247 not whether the individual keys were correct) (runtime)
2248 @type lvm_fail: boolean
2249 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2250 @type hyp_fail: boolean
2251 @ivar hyp_fail: whether the RPC call didn't return the instance list
2252 @type ghost: boolean
2253 @ivar ghost: whether this is a known node or not (config)
2254 @type os_fail: boolean
2255 @ivar os_fail: whether the RPC call didn't return valid OS data
2257 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2258 @type vm_capable: boolean
2259 @ivar vm_capable: whether the node can host instances
2261 @ivar pv_min: size in MiB of the smallest PVs
2263 @ivar pv_max: size in MiB of the biggest PVs
2266 def __init__(self, offline=False, name=None, vm_capable=True):
2275 self.offline = offline
2276 self.vm_capable = vm_capable
2277 self.rpc_fail = False
2278 self.lvm_fail = False
2279 self.hyp_fail = False
2281 self.os_fail = False
2286 def ExpandNames(self):
2287 # This raises errors.OpPrereqError on its own:
2288 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2290 # Get instances in node group; this is unsafe and needs verification later
2292 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2294 self.needed_locks = {
2295 locking.LEVEL_INSTANCE: inst_names,
2296 locking.LEVEL_NODEGROUP: [self.group_uuid],
2297 locking.LEVEL_NODE: [],
2299 # This opcode is run by watcher every five minutes and acquires all nodes
2300 # for a group. It doesn't run for a long time, so it's better to acquire
2301 # the node allocation lock as well.
2302 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2305 self.share_locks = _ShareAll()
2307 def DeclareLocks(self, level):
2308 if level == locking.LEVEL_NODE:
2309 # Get members of node group; this is unsafe and needs verification later
2310 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2312 all_inst_info = self.cfg.GetAllInstancesInfo()
2314 # In Exec(), we warn about mirrored instances that have primary and
2315 # secondary living in separate node groups. To fully verify that
2316 # volumes for these instances are healthy, we will need to do an
2317 # extra call to their secondaries. We ensure here those nodes will
2319 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2320 # Important: access only the instances whose lock is owned
2321 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2322 nodes.update(all_inst_info[inst].secondary_nodes)
2324 self.needed_locks[locking.LEVEL_NODE] = nodes
2326 def CheckPrereq(self):
2327 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2328 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2330 group_nodes = set(self.group_info.members)
2332 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2335 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2337 unlocked_instances = \
2338 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2341 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2342 utils.CommaJoin(unlocked_nodes),
2345 if unlocked_instances:
2346 raise errors.OpPrereqError("Missing lock for instances: %s" %
2347 utils.CommaJoin(unlocked_instances),
2350 self.all_node_info = self.cfg.GetAllNodesInfo()
2351 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2353 self.my_node_names = utils.NiceSort(group_nodes)
2354 self.my_inst_names = utils.NiceSort(group_instances)
2356 self.my_node_info = dict((name, self.all_node_info[name])
2357 for name in self.my_node_names)
2359 self.my_inst_info = dict((name, self.all_inst_info[name])
2360 for name in self.my_inst_names)
2362 # We detect here the nodes that will need the extra RPC calls for verifying
2363 # split LV volumes; they should be locked.
2364 extra_lv_nodes = set()
2366 for inst in self.my_inst_info.values():
2367 if inst.disk_template in constants.DTS_INT_MIRROR:
2368 for nname in inst.all_nodes:
2369 if self.all_node_info[nname].group != self.group_uuid:
2370 extra_lv_nodes.add(nname)
2372 unlocked_lv_nodes = \
2373 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2375 if unlocked_lv_nodes:
2376 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2377 utils.CommaJoin(unlocked_lv_nodes),
2379 self.extra_lv_nodes = list(extra_lv_nodes)
2381 def _VerifyNode(self, ninfo, nresult):
2382 """Perform some basic validation on data returned from a node.
2384 - check the result data structure is well formed and has all the
2386 - check ganeti version
2388 @type ninfo: L{objects.Node}
2389 @param ninfo: the node to check
2390 @param nresult: the results from the node
2392 @return: whether overall this call was successful (and we can expect
2393 reasonable values in the respose)
2397 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2399 # main result, nresult should be a non-empty dict
2400 test = not nresult or not isinstance(nresult, dict)
2401 _ErrorIf(test, constants.CV_ENODERPC, node,
2402 "unable to verify node: no data returned")
2406 # compares ganeti version
2407 local_version = constants.PROTOCOL_VERSION
2408 remote_version = nresult.get("version", None)
2409 test = not (remote_version and
2410 isinstance(remote_version, (list, tuple)) and
2411 len(remote_version) == 2)
2412 _ErrorIf(test, constants.CV_ENODERPC, node,
2413 "connection to node returned invalid data")
2417 test = local_version != remote_version[0]
2418 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2419 "incompatible protocol versions: master %s,"
2420 " node %s", local_version, remote_version[0])
2424 # node seems compatible, we can actually try to look into its results
2426 # full package version
2427 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2428 constants.CV_ENODEVERSION, node,
2429 "software version mismatch: master %s, node %s",
2430 constants.RELEASE_VERSION, remote_version[1],
2431 code=self.ETYPE_WARNING)
2433 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2434 if ninfo.vm_capable and isinstance(hyp_result, dict):
2435 for hv_name, hv_result in hyp_result.iteritems():
2436 test = hv_result is not None
2437 _ErrorIf(test, constants.CV_ENODEHV, node,
2438 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2440 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2441 if ninfo.vm_capable and isinstance(hvp_result, list):
2442 for item, hv_name, hv_result in hvp_result:
2443 _ErrorIf(True, constants.CV_ENODEHV, node,
2444 "hypervisor %s parameter verify failure (source %s): %s",
2445 hv_name, item, hv_result)
2447 test = nresult.get(constants.NV_NODESETUP,
2448 ["Missing NODESETUP results"])
2449 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2454 def _VerifyNodeTime(self, ninfo, nresult,
2455 nvinfo_starttime, nvinfo_endtime):
2456 """Check the node time.
2458 @type ninfo: L{objects.Node}
2459 @param ninfo: the node to check
2460 @param nresult: the remote results for the node
2461 @param nvinfo_starttime: the start time of the RPC call
2462 @param nvinfo_endtime: the end time of the RPC call
2466 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2468 ntime = nresult.get(constants.NV_TIME, None)
2470 ntime_merged = utils.MergeTime(ntime)
2471 except (ValueError, TypeError):
2472 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2475 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2476 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2477 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2478 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2482 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2483 "Node time diverges by at least %s from master node time",
2486 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2487 """Check the node LVM results and update info for cross-node checks.
2489 @type ninfo: L{objects.Node}
2490 @param ninfo: the node to check
2491 @param nresult: the remote results for the node
2492 @param vg_name: the configured VG name
2493 @type nimg: L{NodeImage}
2494 @param nimg: node image
2501 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2503 # checks vg existence and size > 20G
2504 vglist = nresult.get(constants.NV_VGLIST, None)
2506 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2508 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2509 constants.MIN_VG_SIZE)
2510 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2513 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2515 self._Error(constants.CV_ENODELVM, node, em)
2516 if pvminmax is not None:
2517 (nimg.pv_min, nimg.pv_max) = pvminmax
2519 def _VerifyGroupLVM(self, node_image, vg_name):
2520 """Check cross-node consistency in LVM.
2522 @type node_image: dict
2523 @param node_image: info about nodes, mapping from node to names to
2524 L{NodeImage} objects
2525 @param vg_name: the configured VG name
2531 # Only exlcusive storage needs this kind of checks
2532 if not self._exclusive_storage:
2535 # exclusive_storage wants all PVs to have the same size (approximately),
2536 # if the smallest and the biggest ones are okay, everything is fine.
2537 # pv_min is None iff pv_max is None
2538 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2541 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2542 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2543 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2544 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2545 "PV sizes differ too much in the group; smallest (%s MB) is"
2546 " on %s, biggest (%s MB) is on %s",
2547 pvmin, minnode, pvmax, maxnode)
2549 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2550 """Check the node bridges.
2552 @type ninfo: L{objects.Node}
2553 @param ninfo: the node to check
2554 @param nresult: the remote results for the node
2555 @param bridges: the expected list of bridges
2562 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2564 missing = nresult.get(constants.NV_BRIDGES, None)
2565 test = not isinstance(missing, list)
2566 _ErrorIf(test, constants.CV_ENODENET, node,
2567 "did not return valid bridge information")
2569 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2570 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2572 def _VerifyNodeUserScripts(self, ninfo, nresult):
2573 """Check the results of user scripts presence and executability on the node
2575 @type ninfo: L{objects.Node}
2576 @param ninfo: the node to check
2577 @param nresult: the remote results for the node
2582 test = not constants.NV_USERSCRIPTS in nresult
2583 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2584 "did not return user scripts information")
2586 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2588 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2589 "user scripts not present or not executable: %s" %
2590 utils.CommaJoin(sorted(broken_scripts)))
2592 def _VerifyNodeNetwork(self, ninfo, nresult):
2593 """Check the node network connectivity results.
2595 @type ninfo: L{objects.Node}
2596 @param ninfo: the node to check
2597 @param nresult: the remote results for the node
2601 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2603 test = constants.NV_NODELIST not in nresult
2604 _ErrorIf(test, constants.CV_ENODESSH, node,
2605 "node hasn't returned node ssh connectivity data")
2607 if nresult[constants.NV_NODELIST]:
2608 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2609 _ErrorIf(True, constants.CV_ENODESSH, node,
2610 "ssh communication with node '%s': %s", a_node, a_msg)
2612 test = constants.NV_NODENETTEST not in nresult
2613 _ErrorIf(test, constants.CV_ENODENET, node,
2614 "node hasn't returned node tcp connectivity data")
2616 if nresult[constants.NV_NODENETTEST]:
2617 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2619 _ErrorIf(True, constants.CV_ENODENET, node,
2620 "tcp communication with node '%s': %s",
2621 anode, nresult[constants.NV_NODENETTEST][anode])
2623 test = constants.NV_MASTERIP not in nresult
2624 _ErrorIf(test, constants.CV_ENODENET, node,
2625 "node hasn't returned node master IP reachability data")
2627 if not nresult[constants.NV_MASTERIP]:
2628 if node == self.master_node:
2629 msg = "the master node cannot reach the master IP (not configured?)"
2631 msg = "cannot reach the master IP"
2632 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2634 def _VerifyInstance(self, instance, inst_config, node_image,
2636 """Verify an instance.
2638 This function checks to see if the required block devices are
2639 available on the instance's node, and that the nodes are in the correct
2643 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2644 pnode = inst_config.primary_node
2645 pnode_img = node_image[pnode]
2646 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2648 node_vol_should = {}
2649 inst_config.MapLVsByNode(node_vol_should)
2651 cluster = self.cfg.GetClusterInfo()
2652 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2654 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg)
2655 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2656 code=self.ETYPE_WARNING)
2658 for node in node_vol_should:
2659 n_img = node_image[node]
2660 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2661 # ignore missing volumes on offline or broken nodes
2663 for volume in node_vol_should[node]:
2664 test = volume not in n_img.volumes
2665 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2666 "volume %s missing on node %s", volume, node)
2668 if inst_config.admin_state == constants.ADMINST_UP:
2669 test = instance not in pnode_img.instances and not pnode_img.offline
2670 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2671 "instance not running on its primary node %s",
2673 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2674 "instance is marked as running and lives on offline node %s",
2677 diskdata = [(nname, success, status, idx)
2678 for (nname, disks) in diskstatus.items()
2679 for idx, (success, status) in enumerate(disks)]
2681 for nname, success, bdev_status, idx in diskdata:
2682 # the 'ghost node' construction in Exec() ensures that we have a
2684 snode = node_image[nname]
2685 bad_snode = snode.ghost or snode.offline
2686 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2687 not success and not bad_snode,
2688 constants.CV_EINSTANCEFAULTYDISK, instance,
2689 "couldn't retrieve status for disk/%s on %s: %s",
2690 idx, nname, bdev_status)
2691 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2692 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2693 constants.CV_EINSTANCEFAULTYDISK, instance,
2694 "disk/%s on %s is faulty", idx, nname)
2696 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2697 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2698 " primary node failed", instance)
2700 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2701 constants.CV_EINSTANCELAYOUT,
2702 instance, "instance has multiple secondary nodes: %s",
2703 utils.CommaJoin(inst_config.secondary_nodes),
2704 code=self.ETYPE_WARNING)
2706 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2707 # Disk template not compatible with exclusive_storage: no instance
2708 # node should have the flag set
2709 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2710 inst_config.all_nodes)
2711 es_nodes = [n for (n, es) in es_flags.items()
2713 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2714 "instance has template %s, which is not supported on nodes"
2715 " that have exclusive storage set: %s",
2716 inst_config.disk_template, utils.CommaJoin(es_nodes))
2718 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2719 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2720 instance_groups = {}
2722 for node in instance_nodes:
2723 instance_groups.setdefault(self.all_node_info[node].group,
2727 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2728 # Sort so that we always list the primary node first.
2729 for group, nodes in sorted(instance_groups.items(),
2730 key=lambda (_, nodes): pnode in nodes,
2733 self._ErrorIf(len(instance_groups) > 1,
2734 constants.CV_EINSTANCESPLITGROUPS,
2735 instance, "instance has primary and secondary nodes in"
2736 " different groups: %s", utils.CommaJoin(pretty_list),
2737 code=self.ETYPE_WARNING)
2739 inst_nodes_offline = []
2740 for snode in inst_config.secondary_nodes:
2741 s_img = node_image[snode]
2742 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2743 snode, "instance %s, connection to secondary node failed",
2747 inst_nodes_offline.append(snode)
2749 # warn that the instance lives on offline nodes
2750 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2751 "instance has offline secondary node(s) %s",
2752 utils.CommaJoin(inst_nodes_offline))
2753 # ... or ghost/non-vm_capable nodes
2754 for node in inst_config.all_nodes:
2755 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2756 instance, "instance lives on ghost node %s", node)
2757 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2758 instance, "instance lives on non-vm_capable node %s", node)
2760 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2761 """Verify if there are any unknown volumes in the cluster.
2763 The .os, .swap and backup volumes are ignored. All other volumes are
2764 reported as unknown.
2766 @type reserved: L{ganeti.utils.FieldSet}
2767 @param reserved: a FieldSet of reserved volume names
2770 for node, n_img in node_image.items():
2771 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2772 self.all_node_info[node].group != self.group_uuid):
2773 # skip non-healthy nodes
2775 for volume in n_img.volumes:
2776 test = ((node not in node_vol_should or
2777 volume not in node_vol_should[node]) and
2778 not reserved.Matches(volume))
2779 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2780 "volume %s is unknown", volume)
2782 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2783 """Verify N+1 Memory Resilience.
2785 Check that if one single node dies we can still start all the
2786 instances it was primary for.
2789 cluster_info = self.cfg.GetClusterInfo()
2790 for node, n_img in node_image.items():
2791 # This code checks that every node which is now listed as
2792 # secondary has enough memory to host all instances it is
2793 # supposed to should a single other node in the cluster fail.
2794 # FIXME: not ready for failover to an arbitrary node
2795 # FIXME: does not support file-backed instances
2796 # WARNING: we currently take into account down instances as well
2797 # as up ones, considering that even if they're down someone
2798 # might want to start them even in the event of a node failure.
2799 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2800 # we're skipping nodes marked offline and nodes in other groups from
2801 # the N+1 warning, since most likely we don't have good memory
2802 # infromation from them; we already list instances living on such
2803 # nodes, and that's enough warning
2805 #TODO(dynmem): also consider ballooning out other instances
2806 for prinode, instances in n_img.sbp.items():
2808 for instance in instances:
2809 bep = cluster_info.FillBE(instance_cfg[instance])
2810 if bep[constants.BE_AUTO_BALANCE]:
2811 needed_mem += bep[constants.BE_MINMEM]
2812 test = n_img.mfree < needed_mem
2813 self._ErrorIf(test, constants.CV_ENODEN1, node,
2814 "not enough memory to accomodate instance failovers"
2815 " should node %s fail (%dMiB needed, %dMiB available)",
2816 prinode, needed_mem, n_img.mfree)
2819 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2820 (files_all, files_opt, files_mc, files_vm)):
2821 """Verifies file checksums collected from all nodes.
2823 @param errorif: Callback for reporting errors
2824 @param nodeinfo: List of L{objects.Node} objects
2825 @param master_node: Name of master node
2826 @param all_nvinfo: RPC results
2829 # Define functions determining which nodes to consider for a file
2832 (files_mc, lambda node: (node.master_candidate or
2833 node.name == master_node)),
2834 (files_vm, lambda node: node.vm_capable),
2837 # Build mapping from filename to list of nodes which should have the file
2839 for (files, fn) in files2nodefn:
2841 filenodes = nodeinfo
2843 filenodes = filter(fn, nodeinfo)
2844 nodefiles.update((filename,
2845 frozenset(map(operator.attrgetter("name"), filenodes)))
2846 for filename in files)
2848 assert set(nodefiles) == (files_all | files_mc | files_vm)
2850 fileinfo = dict((filename, {}) for filename in nodefiles)
2851 ignore_nodes = set()
2853 for node in nodeinfo:
2855 ignore_nodes.add(node.name)
2858 nresult = all_nvinfo[node.name]
2860 if nresult.fail_msg or not nresult.payload:
2863 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2864 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2865 for (key, value) in fingerprints.items())
2868 test = not (node_files and isinstance(node_files, dict))
2869 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2870 "Node did not return file checksum data")
2872 ignore_nodes.add(node.name)
2875 # Build per-checksum mapping from filename to nodes having it
2876 for (filename, checksum) in node_files.items():
2877 assert filename in nodefiles
2878 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2880 for (filename, checksums) in fileinfo.items():
2881 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2883 # Nodes having the file
2884 with_file = frozenset(node_name
2885 for nodes in fileinfo[filename].values()
2886 for node_name in nodes) - ignore_nodes
2888 expected_nodes = nodefiles[filename] - ignore_nodes
2890 # Nodes missing file
2891 missing_file = expected_nodes - with_file
2893 if filename in files_opt:
2895 errorif(missing_file and missing_file != expected_nodes,
2896 constants.CV_ECLUSTERFILECHECK, None,
2897 "File %s is optional, but it must exist on all or no"
2898 " nodes (not found on %s)",
2899 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2901 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2902 "File %s is missing from node(s) %s", filename,
2903 utils.CommaJoin(utils.NiceSort(missing_file)))
2905 # Warn if a node has a file it shouldn't
2906 unexpected = with_file - expected_nodes
2908 constants.CV_ECLUSTERFILECHECK, None,
2909 "File %s should not exist on node(s) %s",
2910 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2912 # See if there are multiple versions of the file
2913 test = len(checksums) > 1
2915 variants = ["variant %s on %s" %
2916 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2917 for (idx, (checksum, nodes)) in
2918 enumerate(sorted(checksums.items()))]
2922 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2923 "File %s found with %s different checksums (%s)",
2924 filename, len(checksums), "; ".join(variants))
2926 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2928 """Verifies and the node DRBD status.
2930 @type ninfo: L{objects.Node}
2931 @param ninfo: the node to check
2932 @param nresult: the remote results for the node
2933 @param instanceinfo: the dict of instances
2934 @param drbd_helper: the configured DRBD usermode helper
2935 @param drbd_map: the DRBD map as returned by
2936 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2940 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2943 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2944 test = (helper_result is None)
2945 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2946 "no drbd usermode helper returned")
2948 status, payload = helper_result
2950 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2951 "drbd usermode helper check unsuccessful: %s", payload)
2952 test = status and (payload != drbd_helper)
2953 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2954 "wrong drbd usermode helper: %s", payload)
2956 # compute the DRBD minors
2958 for minor, instance in drbd_map[node].items():
2959 test = instance not in instanceinfo
2960 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2961 "ghost instance '%s' in temporary DRBD map", instance)
2962 # ghost instance should not be running, but otherwise we
2963 # don't give double warnings (both ghost instance and
2964 # unallocated minor in use)
2966 node_drbd[minor] = (instance, False)
2968 instance = instanceinfo[instance]
2969 node_drbd[minor] = (instance.name,
2970 instance.admin_state == constants.ADMINST_UP)
2972 # and now check them
2973 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2974 test = not isinstance(used_minors, (tuple, list))
2975 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2976 "cannot parse drbd status file: %s", str(used_minors))
2978 # we cannot check drbd status
2981 for minor, (iname, must_exist) in node_drbd.items():
2982 test = minor not in used_minors and must_exist
2983 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2984 "drbd minor %d of instance %s is not active", minor, iname)
2985 for minor in used_minors:
2986 test = minor not in node_drbd
2987 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2988 "unallocated drbd minor %d is in use", minor)
2990 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2991 """Builds the node OS structures.
2993 @type ninfo: L{objects.Node}
2994 @param ninfo: the node to check
2995 @param nresult: the remote results for the node
2996 @param nimg: the node image object
3000 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3002 remote_os = nresult.get(constants.NV_OSLIST, None)
3003 test = (not isinstance(remote_os, list) or
3004 not compat.all(isinstance(v, list) and len(v) == 7
3005 for v in remote_os))
3007 _ErrorIf(test, constants.CV_ENODEOS, node,
3008 "node hasn't returned valid OS data")
3017 for (name, os_path, status, diagnose,
3018 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
3020 if name not in os_dict:
3023 # parameters is a list of lists instead of list of tuples due to
3024 # JSON lacking a real tuple type, fix it:
3025 parameters = [tuple(v) for v in parameters]
3026 os_dict[name].append((os_path, status, diagnose,
3027 set(variants), set(parameters), set(api_ver)))
3029 nimg.oslist = os_dict
3031 def _VerifyNodeOS(self, ninfo, nimg, base):
3032 """Verifies the node OS list.
3034 @type ninfo: L{objects.Node}
3035 @param ninfo: the node to check
3036 @param nimg: the node image object
3037 @param base: the 'template' node we match against (e.g. from the master)
3041 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3043 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3045 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3046 for os_name, os_data in nimg.oslist.items():
3047 assert os_data, "Empty OS status for OS %s?!" % os_name
3048 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3049 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3050 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3051 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3052 "OS '%s' has multiple entries (first one shadows the rest): %s",
3053 os_name, utils.CommaJoin([v[0] for v in os_data]))
3054 # comparisons with the 'base' image
3055 test = os_name not in base.oslist
3056 _ErrorIf(test, constants.CV_ENODEOS, node,
3057 "Extra OS %s not present on reference node (%s)",
3061 assert base.oslist[os_name], "Base node has empty OS status?"
3062 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3064 # base OS is invalid, skipping
3066 for kind, a, b in [("API version", f_api, b_api),
3067 ("variants list", f_var, b_var),
3068 ("parameters", beautify_params(f_param),
3069 beautify_params(b_param))]:
3070 _ErrorIf(a != b, constants.CV_ENODEOS, node,
3071 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3072 kind, os_name, base.name,
3073 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3075 # check any missing OSes
3076 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3077 _ErrorIf(missing, constants.CV_ENODEOS, node,
3078 "OSes present on reference node %s but missing on this node: %s",
3079 base.name, utils.CommaJoin(missing))
3081 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3082 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3084 @type ninfo: L{objects.Node}
3085 @param ninfo: the node to check
3086 @param nresult: the remote results for the node
3087 @type is_master: bool
3088 @param is_master: Whether node is the master node
3094 (constants.ENABLE_FILE_STORAGE or
3095 constants.ENABLE_SHARED_FILE_STORAGE)):
3097 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3099 # This should never happen
3100 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3101 "Node did not return forbidden file storage paths")
3103 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3104 "Found forbidden file storage paths: %s",
3105 utils.CommaJoin(fspaths))
3107 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3108 constants.CV_ENODEFILESTORAGEPATHS, node,
3109 "Node should not have returned forbidden file storage"
3112 def _VerifyOob(self, ninfo, nresult):
3113 """Verifies out of band functionality of a node.
3115 @type ninfo: L{objects.Node}
3116 @param ninfo: the node to check
3117 @param nresult: the remote results for the node
3121 # We just have to verify the paths on master and/or master candidates
3122 # as the oob helper is invoked on the master
3123 if ((ninfo.master_candidate or ninfo.master_capable) and
3124 constants.NV_OOB_PATHS in nresult):
3125 for path_result in nresult[constants.NV_OOB_PATHS]:
3126 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3128 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3129 """Verifies and updates the node volume data.
3131 This function will update a L{NodeImage}'s internal structures
3132 with data from the remote call.
3134 @type ninfo: L{objects.Node}
3135 @param ninfo: the node to check
3136 @param nresult: the remote results for the node
3137 @param nimg: the node image object
3138 @param vg_name: the configured VG name
3142 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3144 nimg.lvm_fail = True
3145 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3148 elif isinstance(lvdata, basestring):
3149 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3150 utils.SafeEncode(lvdata))
3151 elif not isinstance(lvdata, dict):
3152 _ErrorIf(True, constants.CV_ENODELVM, node,
3153 "rpc call to node failed (lvlist)")
3155 nimg.volumes = lvdata
3156 nimg.lvm_fail = False
3158 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3159 """Verifies and updates the node instance list.
3161 If the listing was successful, then updates this node's instance
3162 list. Otherwise, it marks the RPC call as failed for the instance
3165 @type ninfo: L{objects.Node}
3166 @param ninfo: the node to check
3167 @param nresult: the remote results for the node
3168 @param nimg: the node image object
3171 idata = nresult.get(constants.NV_INSTANCELIST, None)
3172 test = not isinstance(idata, list)
3173 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3174 "rpc call to node failed (instancelist): %s",
3175 utils.SafeEncode(str(idata)))
3177 nimg.hyp_fail = True
3179 nimg.instances = idata
3181 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3182 """Verifies and computes a node information map
3184 @type ninfo: L{objects.Node}
3185 @param ninfo: the node to check
3186 @param nresult: the remote results for the node
3187 @param nimg: the node image object
3188 @param vg_name: the configured VG name
3192 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3194 # try to read free memory (from the hypervisor)
3195 hv_info = nresult.get(constants.NV_HVINFO, None)
3196 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3197 _ErrorIf(test, constants.CV_ENODEHV, node,
3198 "rpc call to node failed (hvinfo)")
3201 nimg.mfree = int(hv_info["memory_free"])
3202 except (ValueError, TypeError):
3203 _ErrorIf(True, constants.CV_ENODERPC, node,
3204 "node returned invalid nodeinfo, check hypervisor")
3206 # FIXME: devise a free space model for file based instances as well
3207 if vg_name is not None:
3208 test = (constants.NV_VGLIST not in nresult or
3209 vg_name not in nresult[constants.NV_VGLIST])
3210 _ErrorIf(test, constants.CV_ENODELVM, node,
3211 "node didn't return data for the volume group '%s'"
3212 " - it is either missing or broken", vg_name)
3215 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3216 except (ValueError, TypeError):
3217 _ErrorIf(True, constants.CV_ENODERPC, node,
3218 "node returned invalid LVM info, check LVM status")
3220 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3221 """Gets per-disk status information for all instances.
3223 @type nodelist: list of strings
3224 @param nodelist: Node names
3225 @type node_image: dict of (name, L{objects.Node})
3226 @param node_image: Node objects
3227 @type instanceinfo: dict of (name, L{objects.Instance})
3228 @param instanceinfo: Instance objects
3229 @rtype: {instance: {node: [(succes, payload)]}}
3230 @return: a dictionary of per-instance dictionaries with nodes as
3231 keys and disk information as values; the disk information is a
3232 list of tuples (success, payload)
3235 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3238 node_disks_devonly = {}
3239 diskless_instances = set()
3240 diskless = constants.DT_DISKLESS
3242 for nname in nodelist:
3243 node_instances = list(itertools.chain(node_image[nname].pinst,
3244 node_image[nname].sinst))
3245 diskless_instances.update(inst for inst in node_instances
3246 if instanceinfo[inst].disk_template == diskless)
3247 disks = [(inst, disk)
3248 for inst in node_instances
3249 for disk in instanceinfo[inst].disks]
3252 # No need to collect data
3255 node_disks[nname] = disks
3257 # _AnnotateDiskParams makes already copies of the disks
3259 for (inst, dev) in disks:
3260 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3261 self.cfg.SetDiskID(anno_disk, nname)
3262 devonly.append(anno_disk)
3264 node_disks_devonly[nname] = devonly
3266 assert len(node_disks) == len(node_disks_devonly)
3268 # Collect data from all nodes with disks
3269 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3272 assert len(result) == len(node_disks)
3276 for (nname, nres) in result.items():
3277 disks = node_disks[nname]
3280 # No data from this node
3281 data = len(disks) * [(False, "node offline")]
3284 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3285 "while getting disk information: %s", msg)
3287 # No data from this node
3288 data = len(disks) * [(False, msg)]
3291 for idx, i in enumerate(nres.payload):
3292 if isinstance(i, (tuple, list)) and len(i) == 2:
3295 logging.warning("Invalid result from node %s, entry %d: %s",
3297 data.append((False, "Invalid result from the remote node"))
3299 for ((inst, _), status) in zip(disks, data):
3300 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3302 # Add empty entries for diskless instances.
3303 for inst in diskless_instances:
3304 assert inst not in instdisk
3307 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3308 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3309 compat.all(isinstance(s, (tuple, list)) and
3310 len(s) == 2 for s in statuses)
3311 for inst, nnames in instdisk.items()
3312 for nname, statuses in nnames.items())
3314 instdisk_keys = set(instdisk)
3315 instanceinfo_keys = set(instanceinfo)
3316 assert instdisk_keys == instanceinfo_keys, \
3317 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3318 (instdisk_keys, instanceinfo_keys))
3323 def _SshNodeSelector(group_uuid, all_nodes):
3324 """Create endless iterators for all potential SSH check hosts.
3327 nodes = [node for node in all_nodes
3328 if (node.group != group_uuid and
3330 keyfunc = operator.attrgetter("group")
3332 return map(itertools.cycle,
3333 [sorted(map(operator.attrgetter("name"), names))
3334 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3338 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3339 """Choose which nodes should talk to which other nodes.
3341 We will make nodes contact all nodes in their group, and one node from
3344 @warning: This algorithm has a known issue if one node group is much
3345 smaller than others (e.g. just one node). In such a case all other
3346 nodes will talk to the single node.
3349 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3350 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3352 return (online_nodes,
3353 dict((name, sorted([i.next() for i in sel]))
3354 for name in online_nodes))
3356 def BuildHooksEnv(self):
3359 Cluster-Verify hooks just ran in the post phase and their failure makes
3360 the output be logged in the verify output and the verification to fail.
3364 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3367 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3368 for node in self.my_node_info.values())
3372 def BuildHooksNodes(self):
3373 """Build hooks nodes.
3376 return ([], self.my_node_names)
3378 def Exec(self, feedback_fn):
3379 """Verify integrity of the node group, performing various test on nodes.
3382 # This method has too many local variables. pylint: disable=R0914
3383 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3385 if not self.my_node_names:
3387 feedback_fn("* Empty node group, skipping verification")
3391 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3392 verbose = self.op.verbose
3393 self._feedback_fn = feedback_fn
3395 vg_name = self.cfg.GetVGName()
3396 drbd_helper = self.cfg.GetDRBDHelper()
3397 cluster = self.cfg.GetClusterInfo()
3398 hypervisors = cluster.enabled_hypervisors
3399 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3401 i_non_redundant = [] # Non redundant instances
3402 i_non_a_balanced = [] # Non auto-balanced instances
3403 i_offline = 0 # Count of offline instances
3404 n_offline = 0 # Count of offline nodes
3405 n_drained = 0 # Count of nodes being drained
3406 node_vol_should = {}
3408 # FIXME: verify OS list
3411 filemap = _ComputeAncillaryFiles(cluster, False)
3413 # do local checksums
3414 master_node = self.master_node = self.cfg.GetMasterNode()
3415 master_ip = self.cfg.GetMasterIP()
3417 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3420 if self.cfg.GetUseExternalMipScript():
3421 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3423 node_verify_param = {
3424 constants.NV_FILELIST:
3425 map(vcluster.MakeVirtualPath,
3426 utils.UniqueSequence(filename
3427 for files in filemap
3428 for filename in files)),
3429 constants.NV_NODELIST:
3430 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3431 self.all_node_info.values()),
3432 constants.NV_HYPERVISOR: hypervisors,
3433 constants.NV_HVPARAMS:
3434 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3435 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3436 for node in node_data_list
3437 if not node.offline],
3438 constants.NV_INSTANCELIST: hypervisors,
3439 constants.NV_VERSION: None,
3440 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3441 constants.NV_NODESETUP: None,
3442 constants.NV_TIME: None,
3443 constants.NV_MASTERIP: (master_node, master_ip),
3444 constants.NV_OSLIST: None,
3445 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3446 constants.NV_USERSCRIPTS: user_scripts,
3449 if vg_name is not None:
3450 node_verify_param[constants.NV_VGLIST] = None
3451 node_verify_param[constants.NV_LVLIST] = vg_name
3452 node_verify_param[constants.NV_PVLIST] = [vg_name]
3455 node_verify_param[constants.NV_DRBDLIST] = None
3456 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3458 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3459 # Load file storage paths only from master node
3460 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3463 # FIXME: this needs to be changed per node-group, not cluster-wide
3465 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3466 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3467 bridges.add(default_nicpp[constants.NIC_LINK])
3468 for instance in self.my_inst_info.values():
3469 for nic in instance.nics:
3470 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3471 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3472 bridges.add(full_nic[constants.NIC_LINK])
3475 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3477 # Build our expected cluster state
3478 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3480 vm_capable=node.vm_capable))
3481 for node in node_data_list)
3485 for node in self.all_node_info.values():
3486 path = _SupportsOob(self.cfg, node)
3487 if path and path not in oob_paths:
3488 oob_paths.append(path)
3491 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3493 for instance in self.my_inst_names:
3494 inst_config = self.my_inst_info[instance]
3495 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3498 for nname in inst_config.all_nodes:
3499 if nname not in node_image:
3500 gnode = self.NodeImage(name=nname)
3501 gnode.ghost = (nname not in self.all_node_info)
3502 node_image[nname] = gnode
3504 inst_config.MapLVsByNode(node_vol_should)
3506 pnode = inst_config.primary_node
3507 node_image[pnode].pinst.append(instance)
3509 for snode in inst_config.secondary_nodes:
3510 nimg = node_image[snode]
3511 nimg.sinst.append(instance)
3512 if pnode not in nimg.sbp:
3513 nimg.sbp[pnode] = []
3514 nimg.sbp[pnode].append(instance)
3516 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3517 # The value of exclusive_storage should be the same across the group, so if
3518 # it's True for at least a node, we act as if it were set for all the nodes
3519 self._exclusive_storage = compat.any(es_flags.values())
3520 if self._exclusive_storage:
3521 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3523 # At this point, we have the in-memory data structures complete,
3524 # except for the runtime information, which we'll gather next
3526 # Due to the way our RPC system works, exact response times cannot be
3527 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3528 # time before and after executing the request, we can at least have a time
3530 nvinfo_starttime = time.time()
3531 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3533 self.cfg.GetClusterName())
3534 nvinfo_endtime = time.time()
3536 if self.extra_lv_nodes and vg_name is not None:
3538 self.rpc.call_node_verify(self.extra_lv_nodes,
3539 {constants.NV_LVLIST: vg_name},
3540 self.cfg.GetClusterName())
3542 extra_lv_nvinfo = {}
3544 all_drbd_map = self.cfg.ComputeDRBDMap()
3546 feedback_fn("* Gathering disk information (%s nodes)" %
3547 len(self.my_node_names))
3548 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3551 feedback_fn("* Verifying configuration file consistency")
3553 # If not all nodes are being checked, we need to make sure the master node
3554 # and a non-checked vm_capable node are in the list.
3555 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3557 vf_nvinfo = all_nvinfo.copy()
3558 vf_node_info = list(self.my_node_info.values())
3559 additional_nodes = []
3560 if master_node not in self.my_node_info:
3561 additional_nodes.append(master_node)
3562 vf_node_info.append(self.all_node_info[master_node])
3563 # Add the first vm_capable node we find which is not included,
3564 # excluding the master node (which we already have)
3565 for node in absent_nodes:
3566 nodeinfo = self.all_node_info[node]
3567 if (nodeinfo.vm_capable and not nodeinfo.offline and
3568 node != master_node):
3569 additional_nodes.append(node)
3570 vf_node_info.append(self.all_node_info[node])
3572 key = constants.NV_FILELIST
3573 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3574 {key: node_verify_param[key]},
3575 self.cfg.GetClusterName()))
3577 vf_nvinfo = all_nvinfo
3578 vf_node_info = self.my_node_info.values()
3580 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3582 feedback_fn("* Verifying node status")
3586 for node_i in node_data_list:
3588 nimg = node_image[node]
3592 feedback_fn("* Skipping offline node %s" % (node,))
3596 if node == master_node:
3598 elif node_i.master_candidate:
3599 ntype = "master candidate"
3600 elif node_i.drained:
3606 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3608 msg = all_nvinfo[node].fail_msg
3609 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3612 nimg.rpc_fail = True
3615 nresult = all_nvinfo[node].payload
3617 nimg.call_ok = self._VerifyNode(node_i, nresult)
3618 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3619 self._VerifyNodeNetwork(node_i, nresult)
3620 self._VerifyNodeUserScripts(node_i, nresult)
3621 self._VerifyOob(node_i, nresult)
3622 self._VerifyFileStoragePaths(node_i, nresult,
3623 node == master_node)
3626 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3627 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3630 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3631 self._UpdateNodeInstances(node_i, nresult, nimg)
3632 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3633 self._UpdateNodeOS(node_i, nresult, nimg)
3635 if not nimg.os_fail:
3636 if refos_img is None:
3638 self._VerifyNodeOS(node_i, nimg, refos_img)
3639 self._VerifyNodeBridges(node_i, nresult, bridges)
3641 # Check whether all running instancies are primary for the node. (This
3642 # can no longer be done from _VerifyInstance below, since some of the
3643 # wrong instances could be from other node groups.)
3644 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3646 for inst in non_primary_inst:
3647 test = inst in self.all_inst_info
3648 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3649 "instance should not run on node %s", node_i.name)
3650 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3651 "node is running unknown instance %s", inst)
3653 self._VerifyGroupLVM(node_image, vg_name)
3655 for node, result in extra_lv_nvinfo.items():
3656 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3657 node_image[node], vg_name)
3659 feedback_fn("* Verifying instance status")
3660 for instance in self.my_inst_names:
3662 feedback_fn("* Verifying instance %s" % instance)
3663 inst_config = self.my_inst_info[instance]
3664 self._VerifyInstance(instance, inst_config, node_image,
3667 # If the instance is non-redundant we cannot survive losing its primary
3668 # node, so we are not N+1 compliant.
3669 if inst_config.disk_template not in constants.DTS_MIRRORED:
3670 i_non_redundant.append(instance)
3672 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3673 i_non_a_balanced.append(instance)
3675 feedback_fn("* Verifying orphan volumes")
3676 reserved = utils.FieldSet(*cluster.reserved_lvs)
3678 # We will get spurious "unknown volume" warnings if any node of this group
3679 # is secondary for an instance whose primary is in another group. To avoid
3680 # them, we find these instances and add their volumes to node_vol_should.
3681 for inst in self.all_inst_info.values():
3682 for secondary in inst.secondary_nodes:
3683 if (secondary in self.my_node_info
3684 and inst.name not in self.my_inst_info):
3685 inst.MapLVsByNode(node_vol_should)
3688 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3690 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3691 feedback_fn("* Verifying N+1 Memory redundancy")
3692 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3694 feedback_fn("* Other Notes")
3696 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3697 % len(i_non_redundant))
3699 if i_non_a_balanced:
3700 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3701 % len(i_non_a_balanced))
3704 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3707 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3710 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3714 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3715 """Analyze the post-hooks' result
3717 This method analyses the hook result, handles it, and sends some
3718 nicely-formatted feedback back to the user.
3720 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3721 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3722 @param hooks_results: the results of the multi-node hooks rpc call
3723 @param feedback_fn: function used send feedback back to the caller
3724 @param lu_result: previous Exec result
3725 @return: the new Exec result, based on the previous result
3729 # We only really run POST phase hooks, only for non-empty groups,
3730 # and are only interested in their results
3731 if not self.my_node_names:
3734 elif phase == constants.HOOKS_PHASE_POST:
3735 # Used to change hooks' output to proper indentation
3736 feedback_fn("* Hooks Results")
3737 assert hooks_results, "invalid result from hooks"
3739 for node_name in hooks_results:
3740 res = hooks_results[node_name]
3742 test = msg and not res.offline
3743 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3744 "Communication failure in hooks execution: %s", msg)
3745 if res.offline or msg:
3746 # No need to investigate payload if node is offline or gave
3749 for script, hkr, output in res.payload:
3750 test = hkr == constants.HKR_FAIL
3751 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3752 "Script %s failed, output:", script)
3754 output = self._HOOKS_INDENT_RE.sub(" ", output)
3755 feedback_fn("%s" % output)
3761 class LUClusterVerifyDisks(NoHooksLU):
3762 """Verifies the cluster disks status.
3767 def ExpandNames(self):
3768 self.share_locks = _ShareAll()
3769 self.needed_locks = {
3770 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3773 def Exec(self, feedback_fn):
3774 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3776 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3777 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3778 for group in group_names])
3781 class LUGroupVerifyDisks(NoHooksLU):
3782 """Verifies the status of all disks in a node group.
3787 def ExpandNames(self):
3788 # Raises errors.OpPrereqError on its own if group can't be found
3789 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3791 self.share_locks = _ShareAll()
3792 self.needed_locks = {
3793 locking.LEVEL_INSTANCE: [],
3794 locking.LEVEL_NODEGROUP: [],
3795 locking.LEVEL_NODE: [],
3797 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3798 # starts one instance of this opcode for every group, which means all
3799 # nodes will be locked for a short amount of time, so it's better to
3800 # acquire the node allocation lock as well.
3801 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3804 def DeclareLocks(self, level):
3805 if level == locking.LEVEL_INSTANCE:
3806 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3808 # Lock instances optimistically, needs verification once node and group
3809 # locks have been acquired
3810 self.needed_locks[locking.LEVEL_INSTANCE] = \
3811 self.cfg.GetNodeGroupInstances(self.group_uuid)
3813 elif level == locking.LEVEL_NODEGROUP:
3814 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3816 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3817 set([self.group_uuid] +
3818 # Lock all groups used by instances optimistically; this requires
3819 # going via the node before it's locked, requiring verification
3822 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3823 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3825 elif level == locking.LEVEL_NODE:
3826 # This will only lock the nodes in the group to be verified which contain
3828 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3829 self._LockInstancesNodes()
3831 # Lock all nodes in group to be verified
3832 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3833 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3834 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3836 def CheckPrereq(self):
3837 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3838 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3839 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3841 assert self.group_uuid in owned_groups
3843 # Check if locked instances are still correct
3844 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3846 # Get instance information
3847 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3849 # Check if node groups for locked instances are still correct
3850 _CheckInstancesNodeGroups(self.cfg, self.instances,
3851 owned_groups, owned_nodes, self.group_uuid)
3853 def Exec(self, feedback_fn):
3854 """Verify integrity of cluster disks.
3856 @rtype: tuple of three items
3857 @return: a tuple of (dict of node-to-node_error, list of instances
3858 which need activate-disks, dict of instance: (node, volume) for
3863 res_instances = set()
3866 nv_dict = _MapInstanceDisksToNodes(
3867 [inst for inst in self.instances.values()
3868 if inst.admin_state == constants.ADMINST_UP])
3871 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3872 set(self.cfg.GetVmCapableNodeList()))
3874 node_lvs = self.rpc.call_lv_list(nodes, [])
3876 for (node, node_res) in node_lvs.items():
3877 if node_res.offline:
3880 msg = node_res.fail_msg
3882 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3883 res_nodes[node] = msg
3886 for lv_name, (_, _, lv_online) in node_res.payload.items():
3887 inst = nv_dict.pop((node, lv_name), None)
3888 if not (lv_online or inst is None):
3889 res_instances.add(inst)
3891 # any leftover items in nv_dict are missing LVs, let's arrange the data
3893 for key, inst in nv_dict.iteritems():
3894 res_missing.setdefault(inst, []).append(list(key))
3896 return (res_nodes, list(res_instances), res_missing)
3899 class LUClusterRepairDiskSizes(NoHooksLU):
3900 """Verifies the cluster disks sizes.
3905 def ExpandNames(self):
3906 if self.op.instances:
3907 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3908 # Not getting the node allocation lock as only a specific set of
3909 # instances (and their nodes) is going to be acquired
3910 self.needed_locks = {
3911 locking.LEVEL_NODE_RES: [],
3912 locking.LEVEL_INSTANCE: self.wanted_names,
3914 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3916 self.wanted_names = None
3917 self.needed_locks = {
3918 locking.LEVEL_NODE_RES: locking.ALL_SET,
3919 locking.LEVEL_INSTANCE: locking.ALL_SET,
3921 # This opcode is acquires the node locks for all instances
3922 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3925 self.share_locks = {
3926 locking.LEVEL_NODE_RES: 1,
3927 locking.LEVEL_INSTANCE: 0,
3928 locking.LEVEL_NODE_ALLOC: 1,
3931 def DeclareLocks(self, level):
3932 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3933 self._LockInstancesNodes(primary_only=True, level=level)
3935 def CheckPrereq(self):
3936 """Check prerequisites.
3938 This only checks the optional instance list against the existing names.
3941 if self.wanted_names is None:
3942 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3944 self.wanted_instances = \
3945 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3947 def _EnsureChildSizes(self, disk):
3948 """Ensure children of the disk have the needed disk size.
3950 This is valid mainly for DRBD8 and fixes an issue where the
3951 children have smaller disk size.
3953 @param disk: an L{ganeti.objects.Disk} object
3956 if disk.dev_type == constants.LD_DRBD8:
3957 assert disk.children, "Empty children for DRBD8?"
3958 fchild = disk.children[0]
3959 mismatch = fchild.size < disk.size
3961 self.LogInfo("Child disk has size %d, parent %d, fixing",
3962 fchild.size, disk.size)
3963 fchild.size = disk.size
3965 # and we recurse on this child only, not on the metadev
3966 return self._EnsureChildSizes(fchild) or mismatch
3970 def Exec(self, feedback_fn):
3971 """Verify the size of cluster disks.
3974 # TODO: check child disks too
3975 # TODO: check differences in size between primary/secondary nodes
3977 for instance in self.wanted_instances:
3978 pnode = instance.primary_node
3979 if pnode not in per_node_disks:
3980 per_node_disks[pnode] = []
3981 for idx, disk in enumerate(instance.disks):
3982 per_node_disks[pnode].append((instance, idx, disk))
3984 assert not (frozenset(per_node_disks.keys()) -
3985 self.owned_locks(locking.LEVEL_NODE_RES)), \
3986 "Not owning correct locks"
3987 assert not self.owned_locks(locking.LEVEL_NODE)
3990 for node, dskl in per_node_disks.items():
3991 newl = [v[2].Copy() for v in dskl]
3993 self.cfg.SetDiskID(dsk, node)
3994 result = self.rpc.call_blockdev_getsize(node, newl)
3996 self.LogWarning("Failure in blockdev_getsize call to node"
3997 " %s, ignoring", node)
3999 if len(result.payload) != len(dskl):
4000 logging.warning("Invalid result from node %s: len(dksl)=%d,"
4001 " result.payload=%s", node, len(dskl), result.payload)
4002 self.LogWarning("Invalid result from node %s, ignoring node results",
4005 for ((instance, idx, disk), size) in zip(dskl, result.payload):
4007 self.LogWarning("Disk %d of instance %s did not return size"
4008 " information, ignoring", idx, instance.name)
4010 if not isinstance(size, (int, long)):
4011 self.LogWarning("Disk %d of instance %s did not return valid"
4012 " size information, ignoring", idx, instance.name)
4015 if size != disk.size:
4016 self.LogInfo("Disk %d of instance %s has mismatched size,"
4017 " correcting: recorded %d, actual %d", idx,
4018 instance.name, disk.size, size)
4020 self.cfg.Update(instance, feedback_fn)
4021 changed.append((instance.name, idx, size))
4022 if self._EnsureChildSizes(disk):
4023 self.cfg.Update(instance, feedback_fn)
4024 changed.append((instance.name, idx, disk.size))
4028 class LUClusterRename(LogicalUnit):
4029 """Rename the cluster.
4032 HPATH = "cluster-rename"
4033 HTYPE = constants.HTYPE_CLUSTER
4035 def BuildHooksEnv(self):
4040 "OP_TARGET": self.cfg.GetClusterName(),
4041 "NEW_NAME": self.op.name,
4044 def BuildHooksNodes(self):
4045 """Build hooks nodes.
4048 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4050 def CheckPrereq(self):
4051 """Verify that the passed name is a valid one.
4054 hostname = netutils.GetHostname(name=self.op.name,
4055 family=self.cfg.GetPrimaryIPFamily())
4057 new_name = hostname.name
4058 self.ip = new_ip = hostname.ip
4059 old_name = self.cfg.GetClusterName()
4060 old_ip = self.cfg.GetMasterIP()
4061 if new_name == old_name and new_ip == old_ip:
4062 raise errors.OpPrereqError("Neither the name nor the IP address of the"
4063 " cluster has changed",
4065 if new_ip != old_ip:
4066 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4067 raise errors.OpPrereqError("The given cluster IP address (%s) is"
4068 " reachable on the network" %
4069 new_ip, errors.ECODE_NOTUNIQUE)
4071 self.op.name = new_name
4073 def Exec(self, feedback_fn):
4074 """Rename the cluster.
4077 clustername = self.op.name
4080 # shutdown the master IP
4081 master_params = self.cfg.GetMasterNetworkParameters()
4082 ems = self.cfg.GetUseExternalMipScript()
4083 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4085 result.Raise("Could not disable the master role")
4088 cluster = self.cfg.GetClusterInfo()
4089 cluster.cluster_name = clustername
4090 cluster.master_ip = new_ip
4091 self.cfg.Update(cluster, feedback_fn)
4093 # update the known hosts file
4094 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4095 node_list = self.cfg.GetOnlineNodeList()
4097 node_list.remove(master_params.name)
4100 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4102 master_params.ip = new_ip
4103 result = self.rpc.call_node_activate_master_ip(master_params.name,
4105 msg = result.fail_msg
4107 self.LogWarning("Could not re-enable the master role on"
4108 " the master, please restart manually: %s", msg)
4113 def _ValidateNetmask(cfg, netmask):
4114 """Checks if a netmask is valid.
4116 @type cfg: L{config.ConfigWriter}
4117 @param cfg: The cluster configuration
4119 @param netmask: the netmask to be verified
4120 @raise errors.OpPrereqError: if the validation fails
4123 ip_family = cfg.GetPrimaryIPFamily()
4125 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4126 except errors.ProgrammerError:
4127 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4128 ip_family, errors.ECODE_INVAL)
4129 if not ipcls.ValidateNetmask(netmask):
4130 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4131 (netmask), errors.ECODE_INVAL)
4134 class LUClusterSetParams(LogicalUnit):
4135 """Change the parameters of the cluster.
4138 HPATH = "cluster-modify"
4139 HTYPE = constants.HTYPE_CLUSTER
4142 def CheckArguments(self):
4146 if self.op.uid_pool:
4147 uidpool.CheckUidPool(self.op.uid_pool)
4149 if self.op.add_uids:
4150 uidpool.CheckUidPool(self.op.add_uids)
4152 if self.op.remove_uids:
4153 uidpool.CheckUidPool(self.op.remove_uids)
4155 if self.op.master_netmask is not None:
4156 _ValidateNetmask(self.cfg, self.op.master_netmask)
4158 if self.op.diskparams:
4159 for dt_params in self.op.diskparams.values():
4160 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4162 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4163 except errors.OpPrereqError, err:
4164 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4167 def ExpandNames(self):
4168 # FIXME: in the future maybe other cluster params won't require checking on
4169 # all nodes to be modified.
4170 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4171 # resource locks the right thing, shouldn't it be the BGL instead?
4172 self.needed_locks = {
4173 locking.LEVEL_NODE: locking.ALL_SET,
4174 locking.LEVEL_INSTANCE: locking.ALL_SET,
4175 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4176 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4178 self.share_locks = _ShareAll()
4180 def BuildHooksEnv(self):
4185 "OP_TARGET": self.cfg.GetClusterName(),
4186 "NEW_VG_NAME": self.op.vg_name,
4189 def BuildHooksNodes(self):
4190 """Build hooks nodes.
4193 mn = self.cfg.GetMasterNode()
4196 def CheckPrereq(self):
4197 """Check prerequisites.
4199 This checks whether the given params don't conflict and
4200 if the given volume group is valid.
4203 if self.op.vg_name is not None and not self.op.vg_name:
4204 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4205 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4206 " instances exist", errors.ECODE_INVAL)
4208 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4209 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4210 raise errors.OpPrereqError("Cannot disable drbd helper while"
4211 " drbd-based instances exist",
4214 node_list = self.owned_locks(locking.LEVEL_NODE)
4216 # if vg_name not None, checks given volume group on all nodes
4218 vglist = self.rpc.call_vg_list(node_list)
4219 for node in node_list:
4220 msg = vglist[node].fail_msg
4222 # ignoring down node
4223 self.LogWarning("Error while gathering data on node %s"
4224 " (ignoring node): %s", node, msg)
4226 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4228 constants.MIN_VG_SIZE)
4230 raise errors.OpPrereqError("Error on node '%s': %s" %
4231 (node, vgstatus), errors.ECODE_ENVIRON)
4233 if self.op.drbd_helper:
4234 # checks given drbd helper on all nodes
4235 helpers = self.rpc.call_drbd_helper(node_list)
4236 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4238 self.LogInfo("Not checking drbd helper on offline node %s", node)
4240 msg = helpers[node].fail_msg
4242 raise errors.OpPrereqError("Error checking drbd helper on node"
4243 " '%s': %s" % (node, msg),
4244 errors.ECODE_ENVIRON)
4245 node_helper = helpers[node].payload
4246 if node_helper != self.op.drbd_helper:
4247 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4248 (node, node_helper), errors.ECODE_ENVIRON)
4250 self.cluster = cluster = self.cfg.GetClusterInfo()
4251 # validate params changes
4252 if self.op.beparams:
4253 objects.UpgradeBeParams(self.op.beparams)
4254 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4255 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4257 if self.op.ndparams:
4258 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4259 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4261 # TODO: we need a more general way to handle resetting
4262 # cluster-level parameters to default values
4263 if self.new_ndparams["oob_program"] == "":
4264 self.new_ndparams["oob_program"] = \
4265 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4267 if self.op.hv_state:
4268 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4269 self.cluster.hv_state_static)
4270 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4271 for hv, values in new_hv_state.items())
4273 if self.op.disk_state:
4274 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4275 self.cluster.disk_state_static)
4276 self.new_disk_state = \
4277 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4278 for name, values in svalues.items()))
4279 for storage, svalues in new_disk_state.items())
4282 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4285 all_instances = self.cfg.GetAllInstancesInfo().values()
4287 for group in self.cfg.GetAllNodeGroupsInfo().values():
4288 instances = frozenset([inst for inst in all_instances
4289 if compat.any(node in group.members
4290 for node in inst.all_nodes)])
4291 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4292 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4293 new = _ComputeNewInstanceViolations(ipol,
4294 new_ipolicy, instances, self.cfg)
4296 violations.update(new)
4299 self.LogWarning("After the ipolicy change the following instances"
4300 " violate them: %s",
4301 utils.CommaJoin(utils.NiceSort(violations)))
4303 if self.op.nicparams:
4304 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4305 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4306 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4309 # check all instances for consistency
4310 for instance in self.cfg.GetAllInstancesInfo().values():
4311 for nic_idx, nic in enumerate(instance.nics):
4312 params_copy = copy.deepcopy(nic.nicparams)
4313 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4315 # check parameter syntax
4317 objects.NIC.CheckParameterSyntax(params_filled)
4318 except errors.ConfigurationError, err:
4319 nic_errors.append("Instance %s, nic/%d: %s" %
4320 (instance.name, nic_idx, err))
4322 # if we're moving instances to routed, check that they have an ip
4323 target_mode = params_filled[constants.NIC_MODE]
4324 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4325 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4326 " address" % (instance.name, nic_idx))
4328 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4329 "\n".join(nic_errors), errors.ECODE_INVAL)
4331 # hypervisor list/parameters
4332 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4333 if self.op.hvparams:
4334 for hv_name, hv_dict in self.op.hvparams.items():
4335 if hv_name not in self.new_hvparams:
4336 self.new_hvparams[hv_name] = hv_dict
4338 self.new_hvparams[hv_name].update(hv_dict)
4340 # disk template parameters
4341 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4342 if self.op.diskparams:
4343 for dt_name, dt_params in self.op.diskparams.items():
4344 if dt_name not in self.op.diskparams:
4345 self.new_diskparams[dt_name] = dt_params
4347 self.new_diskparams[dt_name].update(dt_params)
4349 # os hypervisor parameters
4350 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4352 for os_name, hvs in self.op.os_hvp.items():
4353 if os_name not in self.new_os_hvp:
4354 self.new_os_hvp[os_name] = hvs
4356 for hv_name, hv_dict in hvs.items():
4358 # Delete if it exists
4359 self.new_os_hvp[os_name].pop(hv_name, None)
4360 elif hv_name not in self.new_os_hvp[os_name]:
4361 self.new_os_hvp[os_name][hv_name] = hv_dict
4363 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4366 self.new_osp = objects.FillDict(cluster.osparams, {})
4367 if self.op.osparams:
4368 for os_name, osp in self.op.osparams.items():
4369 if os_name not in self.new_osp:
4370 self.new_osp[os_name] = {}
4372 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4375 if not self.new_osp[os_name]:
4376 # we removed all parameters
4377 del self.new_osp[os_name]
4379 # check the parameter validity (remote check)
4380 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4381 os_name, self.new_osp[os_name])
4383 # changes to the hypervisor list
4384 if self.op.enabled_hypervisors is not None:
4385 self.hv_list = self.op.enabled_hypervisors
4386 for hv in self.hv_list:
4387 # if the hypervisor doesn't already exist in the cluster
4388 # hvparams, we initialize it to empty, and then (in both
4389 # cases) we make sure to fill the defaults, as we might not
4390 # have a complete defaults list if the hypervisor wasn't
4392 if hv not in new_hvp:
4394 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4395 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4397 self.hv_list = cluster.enabled_hypervisors
4399 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4400 # either the enabled list has changed, or the parameters have, validate
4401 for hv_name, hv_params in self.new_hvparams.items():
4402 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4403 (self.op.enabled_hypervisors and
4404 hv_name in self.op.enabled_hypervisors)):
4405 # either this is a new hypervisor, or its parameters have changed
4406 hv_class = hypervisor.GetHypervisorClass(hv_name)
4407 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4408 hv_class.CheckParameterSyntax(hv_params)
4409 _CheckHVParams(self, node_list, hv_name, hv_params)
4411 # FIXME: Regarding enabled_storage_types: If a method is removed
4412 # which is actually currently used by an instance, should removing
4416 # no need to check any newly-enabled hypervisors, since the
4417 # defaults have already been checked in the above code-block
4418 for os_name, os_hvp in self.new_os_hvp.items():
4419 for hv_name, hv_params in os_hvp.items():
4420 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4421 # we need to fill in the new os_hvp on top of the actual hv_p
4422 cluster_defaults = self.new_hvparams.get(hv_name, {})
4423 new_osp = objects.FillDict(cluster_defaults, hv_params)
4424 hv_class = hypervisor.GetHypervisorClass(hv_name)
4425 hv_class.CheckParameterSyntax(new_osp)
4426 _CheckHVParams(self, node_list, hv_name, new_osp)
4428 if self.op.default_iallocator:
4429 alloc_script = utils.FindFile(self.op.default_iallocator,
4430 constants.IALLOCATOR_SEARCH_PATH,
4432 if alloc_script is None:
4433 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4434 " specified" % self.op.default_iallocator,
4437 def Exec(self, feedback_fn):
4438 """Change the parameters of the cluster.
4441 if self.op.vg_name is not None:
4442 new_volume = self.op.vg_name
4445 if new_volume != self.cfg.GetVGName():
4446 self.cfg.SetVGName(new_volume)
4448 feedback_fn("Cluster LVM configuration already in desired"
4449 " state, not changing")
4450 if self.op.drbd_helper is not None:
4451 new_helper = self.op.drbd_helper
4454 if new_helper != self.cfg.GetDRBDHelper():
4455 self.cfg.SetDRBDHelper(new_helper)
4457 feedback_fn("Cluster DRBD helper already in desired state,"
4459 if self.op.hvparams:
4460 self.cluster.hvparams = self.new_hvparams
4462 self.cluster.os_hvp = self.new_os_hvp
4463 if self.op.enabled_hypervisors is not None:
4464 self.cluster.hvparams = self.new_hvparams
4465 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4466 if self.op.enabled_storage_types is not None:
4467 self.cluster.enabled_storage_types = \
4468 list(set(self.op.enabled_storage_types))
4469 if self.op.beparams:
4470 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4471 if self.op.nicparams:
4472 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4474 self.cluster.ipolicy = self.new_ipolicy
4475 if self.op.osparams:
4476 self.cluster.osparams = self.new_osp
4477 if self.op.ndparams:
4478 self.cluster.ndparams = self.new_ndparams
4479 if self.op.diskparams:
4480 self.cluster.diskparams = self.new_diskparams
4481 if self.op.hv_state:
4482 self.cluster.hv_state_static = self.new_hv_state
4483 if self.op.disk_state:
4484 self.cluster.disk_state_static = self.new_disk_state
4486 if self.op.candidate_pool_size is not None:
4487 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4488 # we need to update the pool size here, otherwise the save will fail
4489 _AdjustCandidatePool(self, [])
4491 if self.op.maintain_node_health is not None:
4492 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4493 feedback_fn("Note: CONFD was disabled at build time, node health"
4494 " maintenance is not useful (still enabling it)")
4495 self.cluster.maintain_node_health = self.op.maintain_node_health
4497 if self.op.prealloc_wipe_disks is not None:
4498 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4500 if self.op.add_uids is not None:
4501 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4503 if self.op.remove_uids is not None:
4504 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4506 if self.op.uid_pool is not None:
4507 self.cluster.uid_pool = self.op.uid_pool
4509 if self.op.default_iallocator is not None:
4510 self.cluster.default_iallocator = self.op.default_iallocator
4512 if self.op.reserved_lvs is not None:
4513 self.cluster.reserved_lvs = self.op.reserved_lvs
4515 if self.op.use_external_mip_script is not None:
4516 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4518 def helper_os(aname, mods, desc):
4520 lst = getattr(self.cluster, aname)
4521 for key, val in mods:
4522 if key == constants.DDM_ADD:
4524 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4527 elif key == constants.DDM_REMOVE:
4531 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4533 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4535 if self.op.hidden_os:
4536 helper_os("hidden_os", self.op.hidden_os, "hidden")
4538 if self.op.blacklisted_os:
4539 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4541 if self.op.master_netdev:
4542 master_params = self.cfg.GetMasterNetworkParameters()
4543 ems = self.cfg.GetUseExternalMipScript()
4544 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4545 self.cluster.master_netdev)
4546 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4548 result.Raise("Could not disable the master ip")
4549 feedback_fn("Changing master_netdev from %s to %s" %
4550 (master_params.netdev, self.op.master_netdev))
4551 self.cluster.master_netdev = self.op.master_netdev
4553 if self.op.master_netmask:
4554 master_params = self.cfg.GetMasterNetworkParameters()
4555 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4556 result = self.rpc.call_node_change_master_netmask(master_params.name,
4557 master_params.netmask,
4558 self.op.master_netmask,
4560 master_params.netdev)
4562 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4565 self.cluster.master_netmask = self.op.master_netmask
4567 self.cfg.Update(self.cluster, feedback_fn)
4569 if self.op.master_netdev:
4570 master_params = self.cfg.GetMasterNetworkParameters()
4571 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4572 self.op.master_netdev)
4573 ems = self.cfg.GetUseExternalMipScript()
4574 result = self.rpc.call_node_activate_master_ip(master_params.name,
4577 self.LogWarning("Could not re-enable the master ip on"
4578 " the master, please restart manually: %s",
4582 def _UploadHelper(lu, nodes, fname):
4583 """Helper for uploading a file and showing warnings.
4586 if os.path.exists(fname):
4587 result = lu.rpc.call_upload_file(nodes, fname)
4588 for to_node, to_result in result.items():
4589 msg = to_result.fail_msg
4591 msg = ("Copy of file %s to node %s failed: %s" %
4592 (fname, to_node, msg))
4596 def _ComputeAncillaryFiles(cluster, redist):
4597 """Compute files external to Ganeti which need to be consistent.
4599 @type redist: boolean
4600 @param redist: Whether to include files which need to be redistributed
4603 # Compute files for all nodes
4605 pathutils.SSH_KNOWN_HOSTS_FILE,
4606 pathutils.CONFD_HMAC_KEY,
4607 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4608 pathutils.SPICE_CERT_FILE,
4609 pathutils.SPICE_CACERT_FILE,
4610 pathutils.RAPI_USERS_FILE,
4614 # we need to ship at least the RAPI certificate
4615 files_all.add(pathutils.RAPI_CERT_FILE)
4617 files_all.update(pathutils.ALL_CERT_FILES)
4618 files_all.update(ssconf.SimpleStore().GetFileList())
4620 if cluster.modify_etc_hosts:
4621 files_all.add(pathutils.ETC_HOSTS)
4623 if cluster.use_external_mip_script:
4624 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4626 # Files which are optional, these must:
4627 # - be present in one other category as well
4628 # - either exist or not exist on all nodes of that category (mc, vm all)
4630 pathutils.RAPI_USERS_FILE,
4633 # Files which should only be on master candidates
4637 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4641 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4642 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4643 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4645 # Files which should only be on VM-capable nodes
4648 for hv_name in cluster.enabled_hypervisors
4650 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4654 for hv_name in cluster.enabled_hypervisors
4656 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4658 # Filenames in each category must be unique
4659 all_files_set = files_all | files_mc | files_vm
4660 assert (len(all_files_set) ==
4661 sum(map(len, [files_all, files_mc, files_vm]))), \
4662 "Found file listed in more than one file list"
4664 # Optional files must be present in one other category
4665 assert all_files_set.issuperset(files_opt), \
4666 "Optional file not in a different required list"
4668 # This one file should never ever be re-distributed via RPC
4669 assert not (redist and
4670 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4672 return (files_all, files_opt, files_mc, files_vm)
4675 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4676 """Distribute additional files which are part of the cluster configuration.
4678 ConfigWriter takes care of distributing the config and ssconf files, but
4679 there are more files which should be distributed to all nodes. This function
4680 makes sure those are copied.
4682 @param lu: calling logical unit
4683 @param additional_nodes: list of nodes not in the config to distribute to
4684 @type additional_vm: boolean
4685 @param additional_vm: whether the additional nodes are vm-capable or not
4688 # Gather target nodes
4689 cluster = lu.cfg.GetClusterInfo()
4690 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4692 online_nodes = lu.cfg.GetOnlineNodeList()
4693 online_set = frozenset(online_nodes)
4694 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4696 if additional_nodes is not None:
4697 online_nodes.extend(additional_nodes)
4699 vm_nodes.extend(additional_nodes)
4701 # Never distribute to master node
4702 for nodelist in [online_nodes, vm_nodes]:
4703 if master_info.name in nodelist:
4704 nodelist.remove(master_info.name)
4707 (files_all, _, files_mc, files_vm) = \
4708 _ComputeAncillaryFiles(cluster, True)
4710 # Never re-distribute configuration file from here
4711 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4712 pathutils.CLUSTER_CONF_FILE in files_vm)
4713 assert not files_mc, "Master candidates not handled in this function"
4716 (online_nodes, files_all),
4717 (vm_nodes, files_vm),
4721 for (node_list, files) in filemap:
4723 _UploadHelper(lu, node_list, fname)
4726 class LUClusterRedistConf(NoHooksLU):
4727 """Force the redistribution of cluster configuration.
4729 This is a very simple LU.
4734 def ExpandNames(self):
4735 self.needed_locks = {
4736 locking.LEVEL_NODE: locking.ALL_SET,
4737 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4739 self.share_locks = _ShareAll()
4741 def Exec(self, feedback_fn):
4742 """Redistribute the configuration.
4745 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4746 _RedistributeAncillaryFiles(self)
4749 class LUClusterActivateMasterIp(NoHooksLU):
4750 """Activate the master IP on the master node.
4753 def Exec(self, feedback_fn):
4754 """Activate the master IP.
4757 master_params = self.cfg.GetMasterNetworkParameters()
4758 ems = self.cfg.GetUseExternalMipScript()
4759 result = self.rpc.call_node_activate_master_ip(master_params.name,
4761 result.Raise("Could not activate the master IP")
4764 class LUClusterDeactivateMasterIp(NoHooksLU):
4765 """Deactivate the master IP on the master node.
4768 def Exec(self, feedback_fn):
4769 """Deactivate the master IP.
4772 master_params = self.cfg.GetMasterNetworkParameters()
4773 ems = self.cfg.GetUseExternalMipScript()
4774 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4776 result.Raise("Could not deactivate the master IP")
4779 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4780 """Sleep and poll for an instance's disk to sync.
4783 if not instance.disks or disks is not None and not disks:
4786 disks = _ExpandCheckDisks(instance, disks)
4789 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4791 node = instance.primary_node
4794 lu.cfg.SetDiskID(dev, node)
4796 # TODO: Convert to utils.Retry
4799 degr_retries = 10 # in seconds, as we sleep 1 second each time
4803 cumul_degraded = False
4804 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4805 msg = rstats.fail_msg
4807 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4810 raise errors.RemoteError("Can't contact node %s for mirror data,"
4811 " aborting." % node)
4814 rstats = rstats.payload
4816 for i, mstat in enumerate(rstats):
4818 lu.LogWarning("Can't compute data for node %s/%s",
4819 node, disks[i].iv_name)
4822 cumul_degraded = (cumul_degraded or
4823 (mstat.is_degraded and mstat.sync_percent is None))
4824 if mstat.sync_percent is not None:
4826 if mstat.estimated_time is not None:
4827 rem_time = ("%s remaining (estimated)" %
4828 utils.FormatSeconds(mstat.estimated_time))
4829 max_time = mstat.estimated_time
4831 rem_time = "no time estimate"
4832 lu.LogInfo("- device %s: %5.2f%% done, %s",
4833 disks[i].iv_name, mstat.sync_percent, rem_time)
4835 # if we're done but degraded, let's do a few small retries, to
4836 # make sure we see a stable and not transient situation; therefore
4837 # we force restart of the loop
4838 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4839 logging.info("Degraded disks found, %d retries left", degr_retries)
4847 time.sleep(min(60, max_time))
4850 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4852 return not cumul_degraded
4855 def _BlockdevFind(lu, node, dev, instance):
4856 """Wrapper around call_blockdev_find to annotate diskparams.
4858 @param lu: A reference to the lu object
4859 @param node: The node to call out
4860 @param dev: The device to find
4861 @param instance: The instance object the device belongs to
4862 @returns The result of the rpc call
4865 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4866 return lu.rpc.call_blockdev_find(node, disk)
4869 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4870 """Wrapper around L{_CheckDiskConsistencyInner}.
4873 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4874 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4878 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4880 """Check that mirrors are not degraded.
4882 @attention: The device has to be annotated already.
4884 The ldisk parameter, if True, will change the test from the
4885 is_degraded attribute (which represents overall non-ok status for
4886 the device(s)) to the ldisk (representing the local storage status).
4889 lu.cfg.SetDiskID(dev, node)
4893 if on_primary or dev.AssembleOnSecondary():
4894 rstats = lu.rpc.call_blockdev_find(node, dev)
4895 msg = rstats.fail_msg
4897 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4899 elif not rstats.payload:
4900 lu.LogWarning("Can't find disk on node %s", node)
4904 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4906 result = result and not rstats.payload.is_degraded
4909 for child in dev.children:
4910 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4916 class LUOobCommand(NoHooksLU):
4917 """Logical unit for OOB handling.
4921 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4923 def ExpandNames(self):
4924 """Gather locks we need.
4927 if self.op.node_names:
4928 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4929 lock_names = self.op.node_names
4931 lock_names = locking.ALL_SET
4933 self.needed_locks = {
4934 locking.LEVEL_NODE: lock_names,
4937 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4939 if not self.op.node_names:
4940 # Acquire node allocation lock only if all nodes are affected
4941 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4943 def CheckPrereq(self):
4944 """Check prerequisites.
4947 - the node exists in the configuration
4950 Any errors are signaled by raising errors.OpPrereqError.
4954 self.master_node = self.cfg.GetMasterNode()
4956 assert self.op.power_delay >= 0.0
4958 if self.op.node_names:
4959 if (self.op.command in self._SKIP_MASTER and
4960 self.master_node in self.op.node_names):
4961 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4962 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4964 if master_oob_handler:
4965 additional_text = ("run '%s %s %s' if you want to operate on the"
4966 " master regardless") % (master_oob_handler,
4970 additional_text = "it does not support out-of-band operations"
4972 raise errors.OpPrereqError(("Operating on the master node %s is not"
4973 " allowed for %s; %s") %
4974 (self.master_node, self.op.command,
4975 additional_text), errors.ECODE_INVAL)
4977 self.op.node_names = self.cfg.GetNodeList()
4978 if self.op.command in self._SKIP_MASTER:
4979 self.op.node_names.remove(self.master_node)
4981 if self.op.command in self._SKIP_MASTER:
4982 assert self.master_node not in self.op.node_names
4984 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4986 raise errors.OpPrereqError("Node %s not found" % node_name,
4989 self.nodes.append(node)
4991 if (not self.op.ignore_status and
4992 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4993 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4994 " not marked offline") % node_name,
4997 def Exec(self, feedback_fn):
4998 """Execute OOB and return result if we expect any.
5001 master_node = self.master_node
5004 for idx, node in enumerate(utils.NiceSort(self.nodes,
5005 key=lambda node: node.name)):
5006 node_entry = [(constants.RS_NORMAL, node.name)]
5007 ret.append(node_entry)
5009 oob_program = _SupportsOob(self.cfg, node)
5012 node_entry.append((constants.RS_UNAVAIL, None))
5015 logging.info("Executing out-of-band command '%s' using '%s' on %s",
5016 self.op.command, oob_program, node.name)
5017 result = self.rpc.call_run_oob(master_node, oob_program,
5018 self.op.command, node.name,
5022 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
5023 node.name, result.fail_msg)
5024 node_entry.append((constants.RS_NODATA, None))
5027 self._CheckPayload(result)
5028 except errors.OpExecError, err:
5029 self.LogWarning("Payload returned by node '%s' is not valid: %s",
5031 node_entry.append((constants.RS_NODATA, None))
5033 if self.op.command == constants.OOB_HEALTH:
5034 # For health we should log important events
5035 for item, status in result.payload:
5036 if status in [constants.OOB_STATUS_WARNING,
5037 constants.OOB_STATUS_CRITICAL]:
5038 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5039 item, node.name, status)
5041 if self.op.command == constants.OOB_POWER_ON:
5043 elif self.op.command == constants.OOB_POWER_OFF:
5044 node.powered = False
5045 elif self.op.command == constants.OOB_POWER_STATUS:
5046 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5047 if powered != node.powered:
5048 logging.warning(("Recorded power state (%s) of node '%s' does not"
5049 " match actual power state (%s)"), node.powered,
5052 # For configuration changing commands we should update the node
5053 if self.op.command in (constants.OOB_POWER_ON,
5054 constants.OOB_POWER_OFF):
5055 self.cfg.Update(node, feedback_fn)
5057 node_entry.append((constants.RS_NORMAL, result.payload))
5059 if (self.op.command == constants.OOB_POWER_ON and
5060 idx < len(self.nodes) - 1):
5061 time.sleep(self.op.power_delay)
5065 def _CheckPayload(self, result):
5066 """Checks if the payload is valid.
5068 @param result: RPC result
5069 @raises errors.OpExecError: If payload is not valid
5073 if self.op.command == constants.OOB_HEALTH:
5074 if not isinstance(result.payload, list):
5075 errs.append("command 'health' is expected to return a list but got %s" %
5076 type(result.payload))
5078 for item, status in result.payload:
5079 if status not in constants.OOB_STATUSES:
5080 errs.append("health item '%s' has invalid status '%s'" %
5083 if self.op.command == constants.OOB_POWER_STATUS:
5084 if not isinstance(result.payload, dict):
5085 errs.append("power-status is expected to return a dict but got %s" %
5086 type(result.payload))
5088 if self.op.command in [
5089 constants.OOB_POWER_ON,
5090 constants.OOB_POWER_OFF,
5091 constants.OOB_POWER_CYCLE,
5093 if result.payload is not None:
5094 errs.append("%s is expected to not return payload but got '%s'" %
5095 (self.op.command, result.payload))
5098 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5099 utils.CommaJoin(errs))
5102 class _OsQuery(_QueryBase):
5103 FIELDS = query.OS_FIELDS
5105 def ExpandNames(self, lu):
5106 # Lock all nodes in shared mode
5107 # Temporary removal of locks, should be reverted later
5108 # TODO: reintroduce locks when they are lighter-weight
5109 lu.needed_locks = {}
5110 #self.share_locks[locking.LEVEL_NODE] = 1
5111 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5113 # The following variables interact with _QueryBase._GetNames
5115 self.wanted = self.names
5117 self.wanted = locking.ALL_SET
5119 self.do_locking = self.use_locking
5121 def DeclareLocks(self, lu, level):
5125 def _DiagnoseByOS(rlist):
5126 """Remaps a per-node return list into an a per-os per-node dictionary
5128 @param rlist: a map with node names as keys and OS objects as values
5131 @return: a dictionary with osnames as keys and as value another
5132 map, with nodes as keys and tuples of (path, status, diagnose,
5133 variants, parameters, api_versions) as values, eg::
5135 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5136 (/srv/..., False, "invalid api")],
5137 "node2": [(/srv/..., True, "", [], [])]}
5142 # we build here the list of nodes that didn't fail the RPC (at RPC
5143 # level), so that nodes with a non-responding node daemon don't
5144 # make all OSes invalid
5145 good_nodes = [node_name for node_name in rlist
5146 if not rlist[node_name].fail_msg]
5147 for node_name, nr in rlist.items():
5148 if nr.fail_msg or not nr.payload:
5150 for (name, path, status, diagnose, variants,
5151 params, api_versions) in nr.payload:
5152 if name not in all_os:
5153 # build a list of nodes for this os containing empty lists
5154 # for each node in node_list
5156 for nname in good_nodes:
5157 all_os[name][nname] = []
5158 # convert params from [name, help] to (name, help)
5159 params = [tuple(v) for v in params]
5160 all_os[name][node_name].append((path, status, diagnose,
5161 variants, params, api_versions))
5164 def _GetQueryData(self, lu):
5165 """Computes the list of nodes and their attributes.
5168 # Locking is not used
5169 assert not (compat.any(lu.glm.is_owned(level)
5170 for level in locking.LEVELS
5171 if level != locking.LEVEL_CLUSTER) or
5172 self.do_locking or self.use_locking)
5174 valid_nodes = [node.name
5175 for node in lu.cfg.GetAllNodesInfo().values()
5176 if not node.offline and node.vm_capable]
5177 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5178 cluster = lu.cfg.GetClusterInfo()
5182 for (os_name, os_data) in pol.items():
5183 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5184 hidden=(os_name in cluster.hidden_os),
5185 blacklisted=(os_name in cluster.blacklisted_os))
5189 api_versions = set()
5191 for idx, osl in enumerate(os_data.values()):
5192 info.valid = bool(info.valid and osl and osl[0][1])
5196 (node_variants, node_params, node_api) = osl[0][3:6]
5199 variants.update(node_variants)
5200 parameters.update(node_params)
5201 api_versions.update(node_api)
5203 # Filter out inconsistent values
5204 variants.intersection_update(node_variants)
5205 parameters.intersection_update(node_params)
5206 api_versions.intersection_update(node_api)
5208 info.variants = list(variants)
5209 info.parameters = list(parameters)
5210 info.api_versions = list(api_versions)
5212 data[os_name] = info
5214 # Prepare data in requested order
5215 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5219 class LUOsDiagnose(NoHooksLU):
5220 """Logical unit for OS diagnose/query.
5226 def _BuildFilter(fields, names):
5227 """Builds a filter for querying OSes.
5230 name_filter = qlang.MakeSimpleFilter("name", names)
5232 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5233 # respective field is not requested
5234 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5235 for fname in ["hidden", "blacklisted"]
5236 if fname not in fields]
5237 if "valid" not in fields:
5238 status_filter.append([qlang.OP_TRUE, "valid"])
5241 status_filter.insert(0, qlang.OP_AND)
5243 status_filter = None
5245 if name_filter and status_filter:
5246 return [qlang.OP_AND, name_filter, status_filter]
5250 return status_filter
5252 def CheckArguments(self):
5253 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5254 self.op.output_fields, False)
5256 def ExpandNames(self):
5257 self.oq.ExpandNames(self)
5259 def Exec(self, feedback_fn):
5260 return self.oq.OldStyleQuery(self)
5263 class _ExtStorageQuery(_QueryBase):
5264 FIELDS = query.EXTSTORAGE_FIELDS
5266 def ExpandNames(self, lu):
5267 # Lock all nodes in shared mode
5268 # Temporary removal of locks, should be reverted later
5269 # TODO: reintroduce locks when they are lighter-weight
5270 lu.needed_locks = {}
5271 #self.share_locks[locking.LEVEL_NODE] = 1
5272 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5274 # The following variables interact with _QueryBase._GetNames
5276 self.wanted = self.names
5278 self.wanted = locking.ALL_SET
5280 self.do_locking = self.use_locking
5282 def DeclareLocks(self, lu, level):
5286 def _DiagnoseByProvider(rlist):
5287 """Remaps a per-node return list into an a per-provider per-node dictionary
5289 @param rlist: a map with node names as keys and ExtStorage objects as values
5292 @return: a dictionary with extstorage providers as keys and as
5293 value another map, with nodes as keys and tuples of
5294 (path, status, diagnose, parameters) as values, eg::
5296 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5297 "node2": [(/srv/..., False, "missing file")]
5298 "node3": [(/srv/..., True, "", [])]
5303 # we build here the list of nodes that didn't fail the RPC (at RPC
5304 # level), so that nodes with a non-responding node daemon don't
5305 # make all OSes invalid
5306 good_nodes = [node_name for node_name in rlist
5307 if not rlist[node_name].fail_msg]
5308 for node_name, nr in rlist.items():
5309 if nr.fail_msg or not nr.payload:
5311 for (name, path, status, diagnose, params) in nr.payload:
5312 if name not in all_es:
5313 # build a list of nodes for this os containing empty lists
5314 # for each node in node_list
5316 for nname in good_nodes:
5317 all_es[name][nname] = []
5318 # convert params from [name, help] to (name, help)
5319 params = [tuple(v) for v in params]
5320 all_es[name][node_name].append((path, status, diagnose, params))
5323 def _GetQueryData(self, lu):
5324 """Computes the list of nodes and their attributes.
5327 # Locking is not used
5328 assert not (compat.any(lu.glm.is_owned(level)
5329 for level in locking.LEVELS
5330 if level != locking.LEVEL_CLUSTER) or
5331 self.do_locking or self.use_locking)
5333 valid_nodes = [node.name
5334 for node in lu.cfg.GetAllNodesInfo().values()
5335 if not node.offline and node.vm_capable]
5336 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5340 nodegroup_list = lu.cfg.GetNodeGroupList()
5342 for (es_name, es_data) in pol.items():
5343 # For every provider compute the nodegroup validity.
5344 # To do this we need to check the validity of each node in es_data
5345 # and then construct the corresponding nodegroup dict:
5346 # { nodegroup1: status
5347 # nodegroup2: status
5350 for nodegroup in nodegroup_list:
5351 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5353 nodegroup_nodes = ndgrp.members
5354 nodegroup_name = ndgrp.name
5357 for node in nodegroup_nodes:
5358 if node in valid_nodes:
5359 if es_data[node] != []:
5360 node_status = es_data[node][0][1]
5361 node_statuses.append(node_status)
5363 node_statuses.append(False)
5365 if False in node_statuses:
5366 ndgrp_data[nodegroup_name] = False
5368 ndgrp_data[nodegroup_name] = True
5370 # Compute the provider's parameters
5372 for idx, esl in enumerate(es_data.values()):
5373 valid = bool(esl and esl[0][1])
5377 node_params = esl[0][3]
5380 parameters.update(node_params)
5382 # Filter out inconsistent values
5383 parameters.intersection_update(node_params)
5385 params = list(parameters)
5387 # Now fill all the info for this provider
5388 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5389 nodegroup_status=ndgrp_data,
5392 data[es_name] = info
5394 # Prepare data in requested order
5395 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5399 class LUExtStorageDiagnose(NoHooksLU):
5400 """Logical unit for ExtStorage diagnose/query.
5405 def CheckArguments(self):
5406 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5407 self.op.output_fields, False)
5409 def ExpandNames(self):
5410 self.eq.ExpandNames(self)
5412 def Exec(self, feedback_fn):
5413 return self.eq.OldStyleQuery(self)
5416 class LUNodeRemove(LogicalUnit):
5417 """Logical unit for removing a node.
5420 HPATH = "node-remove"
5421 HTYPE = constants.HTYPE_NODE
5423 def BuildHooksEnv(self):
5428 "OP_TARGET": self.op.node_name,
5429 "NODE_NAME": self.op.node_name,
5432 def BuildHooksNodes(self):
5433 """Build hooks nodes.
5435 This doesn't run on the target node in the pre phase as a failed
5436 node would then be impossible to remove.
5439 all_nodes = self.cfg.GetNodeList()
5441 all_nodes.remove(self.op.node_name)
5444 return (all_nodes, all_nodes)
5446 def CheckPrereq(self):
5447 """Check prerequisites.
5450 - the node exists in the configuration
5451 - it does not have primary or secondary instances
5452 - it's not the master
5454 Any errors are signaled by raising errors.OpPrereqError.
5457 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5458 node = self.cfg.GetNodeInfo(self.op.node_name)
5459 assert node is not None
5461 masternode = self.cfg.GetMasterNode()
5462 if node.name == masternode:
5463 raise errors.OpPrereqError("Node is the master node, failover to another"
5464 " node is required", errors.ECODE_INVAL)
5466 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5467 if node.name in instance.all_nodes:
5468 raise errors.OpPrereqError("Instance %s is still running on the node,"
5469 " please remove first" % instance_name,
5471 self.op.node_name = node.name
5474 def Exec(self, feedback_fn):
5475 """Removes the node from the cluster.
5479 logging.info("Stopping the node daemon and removing configs from node %s",
5482 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5484 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5487 # Promote nodes to master candidate as needed
5488 _AdjustCandidatePool(self, exceptions=[node.name])
5489 self.context.RemoveNode(node.name)
5491 # Run post hooks on the node before it's removed
5492 _RunPostHook(self, node.name)
5494 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5495 msg = result.fail_msg
5497 self.LogWarning("Errors encountered on the remote node while leaving"
5498 " the cluster: %s", msg)
5500 # Remove node from our /etc/hosts
5501 if self.cfg.GetClusterInfo().modify_etc_hosts:
5502 master_node = self.cfg.GetMasterNode()
5503 result = self.rpc.call_etc_hosts_modify(master_node,
5504 constants.ETC_HOSTS_REMOVE,
5506 result.Raise("Can't update hosts file with new host data")
5507 _RedistributeAncillaryFiles(self)
5510 class _NodeQuery(_QueryBase):
5511 FIELDS = query.NODE_FIELDS
5513 def ExpandNames(self, lu):
5514 lu.needed_locks = {}
5515 lu.share_locks = _ShareAll()
5518 self.wanted = _GetWantedNodes(lu, self.names)
5520 self.wanted = locking.ALL_SET
5522 self.do_locking = (self.use_locking and
5523 query.NQ_LIVE in self.requested_data)
5526 # If any non-static field is requested we need to lock the nodes
5527 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5528 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5530 def DeclareLocks(self, lu, level):
5533 def _GetQueryData(self, lu):
5534 """Computes the list of nodes and their attributes.
5537 all_info = lu.cfg.GetAllNodesInfo()
5539 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5541 # Gather data as requested
5542 if query.NQ_LIVE in self.requested_data:
5543 # filter out non-vm_capable nodes
5544 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5546 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5547 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5548 [lu.cfg.GetHypervisorType()], es_flags)
5549 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5550 for (name, nresult) in node_data.items()
5551 if not nresult.fail_msg and nresult.payload)
5555 if query.NQ_INST in self.requested_data:
5556 node_to_primary = dict([(name, set()) for name in nodenames])
5557 node_to_secondary = dict([(name, set()) for name in nodenames])
5559 inst_data = lu.cfg.GetAllInstancesInfo()
5561 for inst in inst_data.values():
5562 if inst.primary_node in node_to_primary:
5563 node_to_primary[inst.primary_node].add(inst.name)
5564 for secnode in inst.secondary_nodes:
5565 if secnode in node_to_secondary:
5566 node_to_secondary[secnode].add(inst.name)
5568 node_to_primary = None
5569 node_to_secondary = None
5571 if query.NQ_OOB in self.requested_data:
5572 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5573 for name, node in all_info.iteritems())
5577 if query.NQ_GROUP in self.requested_data:
5578 groups = lu.cfg.GetAllNodeGroupsInfo()
5582 return query.NodeQueryData([all_info[name] for name in nodenames],
5583 live_data, lu.cfg.GetMasterNode(),
5584 node_to_primary, node_to_secondary, groups,
5585 oob_support, lu.cfg.GetClusterInfo())
5588 class LUNodeQuery(NoHooksLU):
5589 """Logical unit for querying nodes.
5592 # pylint: disable=W0142
5595 def CheckArguments(self):
5596 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5597 self.op.output_fields, self.op.use_locking)
5599 def ExpandNames(self):
5600 self.nq.ExpandNames(self)
5602 def DeclareLocks(self, level):
5603 self.nq.DeclareLocks(self, level)
5605 def Exec(self, feedback_fn):
5606 return self.nq.OldStyleQuery(self)
5609 class LUNodeQueryvols(NoHooksLU):
5610 """Logical unit for getting volumes on node(s).
5614 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5615 _FIELDS_STATIC = utils.FieldSet("node")
5617 def CheckArguments(self):
5618 _CheckOutputFields(static=self._FIELDS_STATIC,
5619 dynamic=self._FIELDS_DYNAMIC,
5620 selected=self.op.output_fields)
5622 def ExpandNames(self):
5623 self.share_locks = _ShareAll()
5626 self.needed_locks = {
5627 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5630 self.needed_locks = {
5631 locking.LEVEL_NODE: locking.ALL_SET,
5632 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5635 def Exec(self, feedback_fn):
5636 """Computes the list of nodes and their attributes.
5639 nodenames = self.owned_locks(locking.LEVEL_NODE)
5640 volumes = self.rpc.call_node_volumes(nodenames)
5642 ilist = self.cfg.GetAllInstancesInfo()
5643 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5646 for node in nodenames:
5647 nresult = volumes[node]
5650 msg = nresult.fail_msg
5652 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5655 node_vols = sorted(nresult.payload,
5656 key=operator.itemgetter("dev"))
5658 for vol in node_vols:
5660 for field in self.op.output_fields:
5663 elif field == "phys":
5667 elif field == "name":
5669 elif field == "size":
5670 val = int(float(vol["size"]))
5671 elif field == "instance":
5672 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5674 raise errors.ParameterError(field)
5675 node_output.append(str(val))
5677 output.append(node_output)
5682 class LUNodeQueryStorage(NoHooksLU):
5683 """Logical unit for getting information on storage units on node(s).
5686 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5689 def CheckArguments(self):
5690 _CheckOutputFields(static=self._FIELDS_STATIC,
5691 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5692 selected=self.op.output_fields)
5694 def ExpandNames(self):
5695 self.share_locks = _ShareAll()
5698 self.needed_locks = {
5699 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5702 self.needed_locks = {
5703 locking.LEVEL_NODE: locking.ALL_SET,
5704 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5707 def Exec(self, feedback_fn):
5708 """Computes the list of nodes and their attributes.
5711 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5713 # Always get name to sort by
5714 if constants.SF_NAME in self.op.output_fields:
5715 fields = self.op.output_fields[:]
5717 fields = [constants.SF_NAME] + self.op.output_fields
5719 # Never ask for node or type as it's only known to the LU
5720 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5721 while extra in fields:
5722 fields.remove(extra)
5724 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5725 name_idx = field_idx[constants.SF_NAME]
5727 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5728 data = self.rpc.call_storage_list(self.nodes,
5729 self.op.storage_type, st_args,
5730 self.op.name, fields)
5734 for node in utils.NiceSort(self.nodes):
5735 nresult = data[node]
5739 msg = nresult.fail_msg
5741 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5744 rows = dict([(row[name_idx], row) for row in nresult.payload])
5746 for name in utils.NiceSort(rows.keys()):
5751 for field in self.op.output_fields:
5752 if field == constants.SF_NODE:
5754 elif field == constants.SF_TYPE:
5755 val = self.op.storage_type
5756 elif field in field_idx:
5757 val = row[field_idx[field]]
5759 raise errors.ParameterError(field)
5768 class _InstanceQuery(_QueryBase):
5769 FIELDS = query.INSTANCE_FIELDS
5771 def ExpandNames(self, lu):
5772 lu.needed_locks = {}
5773 lu.share_locks = _ShareAll()
5776 self.wanted = _GetWantedInstances(lu, self.names)
5778 self.wanted = locking.ALL_SET
5780 self.do_locking = (self.use_locking and
5781 query.IQ_LIVE in self.requested_data)
5783 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5784 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5785 lu.needed_locks[locking.LEVEL_NODE] = []
5786 lu.needed_locks[locking.LEVEL_NETWORK] = []
5787 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5789 self.do_grouplocks = (self.do_locking and
5790 query.IQ_NODES in self.requested_data)
5792 def DeclareLocks(self, lu, level):
5794 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5795 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5797 # Lock all groups used by instances optimistically; this requires going
5798 # via the node before it's locked, requiring verification later on
5799 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5801 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5802 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5803 elif level == locking.LEVEL_NODE:
5804 lu._LockInstancesNodes() # pylint: disable=W0212
5806 elif level == locking.LEVEL_NETWORK:
5807 lu.needed_locks[locking.LEVEL_NETWORK] = \
5809 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5810 for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
5813 def _CheckGroupLocks(lu):
5814 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5815 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5817 # Check if node groups for locked instances are still correct
5818 for instance_name in owned_instances:
5819 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5821 def _GetQueryData(self, lu):
5822 """Computes the list of instances and their attributes.
5825 if self.do_grouplocks:
5826 self._CheckGroupLocks(lu)
5828 cluster = lu.cfg.GetClusterInfo()
5829 all_info = lu.cfg.GetAllInstancesInfo()
5831 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5833 instance_list = [all_info[name] for name in instance_names]
5834 nodes = frozenset(itertools.chain(*(inst.all_nodes
5835 for inst in instance_list)))
5836 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5839 wrongnode_inst = set()
5841 # Gather data as requested
5842 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5844 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5846 result = node_data[name]
5848 # offline nodes will be in both lists
5849 assert result.fail_msg
5850 offline_nodes.append(name)
5852 bad_nodes.append(name)
5853 elif result.payload:
5854 for inst in result.payload:
5855 if inst in all_info:
5856 if all_info[inst].primary_node == name:
5857 live_data.update(result.payload)
5859 wrongnode_inst.add(inst)
5861 # orphan instance; we don't list it here as we don't
5862 # handle this case yet in the output of instance listing
5863 logging.warning("Orphan instance '%s' found on node %s",
5865 # else no instance is alive
5869 if query.IQ_DISKUSAGE in self.requested_data:
5870 gmi = ganeti.masterd.instance
5871 disk_usage = dict((inst.name,
5872 gmi.ComputeDiskSize(inst.disk_template,
5873 [{constants.IDISK_SIZE: disk.size}
5874 for disk in inst.disks]))
5875 for inst in instance_list)
5879 if query.IQ_CONSOLE in self.requested_data:
5881 for inst in instance_list:
5882 if inst.name in live_data:
5883 # Instance is running
5884 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5886 consinfo[inst.name] = None
5887 assert set(consinfo.keys()) == set(instance_names)
5891 if query.IQ_NODES in self.requested_data:
5892 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5894 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5895 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5896 for uuid in set(map(operator.attrgetter("group"),
5902 if query.IQ_NETWORKS in self.requested_data:
5903 net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
5904 for i in instance_list))
5905 networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
5909 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5910 disk_usage, offline_nodes, bad_nodes,
5911 live_data, wrongnode_inst, consinfo,
5912 nodes, groups, networks)
5915 class LUQuery(NoHooksLU):
5916 """Query for resources/items of a certain kind.
5919 # pylint: disable=W0142
5922 def CheckArguments(self):
5923 qcls = _GetQueryImplementation(self.op.what)
5925 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5927 def ExpandNames(self):
5928 self.impl.ExpandNames(self)
5930 def DeclareLocks(self, level):
5931 self.impl.DeclareLocks(self, level)
5933 def Exec(self, feedback_fn):
5934 return self.impl.NewStyleQuery(self)
5937 class LUQueryFields(NoHooksLU):
5938 """Query for resources/items of a certain kind.
5941 # pylint: disable=W0142
5944 def CheckArguments(self):
5945 self.qcls = _GetQueryImplementation(self.op.what)
5947 def ExpandNames(self):
5948 self.needed_locks = {}
5950 def Exec(self, feedback_fn):
5951 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5954 class LUNodeModifyStorage(NoHooksLU):
5955 """Logical unit for modifying a storage volume on a node.
5960 def CheckArguments(self):
5961 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5963 storage_type = self.op.storage_type
5966 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5968 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5969 " modified" % storage_type,
5972 diff = set(self.op.changes.keys()) - modifiable
5974 raise errors.OpPrereqError("The following fields can not be modified for"
5975 " storage units of type '%s': %r" %
5976 (storage_type, list(diff)),
5979 def ExpandNames(self):
5980 self.needed_locks = {
5981 locking.LEVEL_NODE: self.op.node_name,
5984 def Exec(self, feedback_fn):
5985 """Computes the list of nodes and their attributes.
5988 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5989 result = self.rpc.call_storage_modify(self.op.node_name,
5990 self.op.storage_type, st_args,
5991 self.op.name, self.op.changes)
5992 result.Raise("Failed to modify storage unit '%s' on %s" %
5993 (self.op.name, self.op.node_name))
5996 class LUNodeAdd(LogicalUnit):
5997 """Logical unit for adding node to the cluster.
6001 HTYPE = constants.HTYPE_NODE
6002 _NFLAGS = ["master_capable", "vm_capable"]
6004 def CheckArguments(self):
6005 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
6006 # validate/normalize the node name
6007 self.hostname = netutils.GetHostname(name=self.op.node_name,
6008 family=self.primary_ip_family)
6009 self.op.node_name = self.hostname.name
6011 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
6012 raise errors.OpPrereqError("Cannot readd the master node",
6015 if self.op.readd and self.op.group:
6016 raise errors.OpPrereqError("Cannot pass a node group when a node is"
6017 " being readded", errors.ECODE_INVAL)
6019 def BuildHooksEnv(self):
6022 This will run on all nodes before, and on all nodes + the new node after.
6026 "OP_TARGET": self.op.node_name,
6027 "NODE_NAME": self.op.node_name,
6028 "NODE_PIP": self.op.primary_ip,
6029 "NODE_SIP": self.op.secondary_ip,
6030 "MASTER_CAPABLE": str(self.op.master_capable),
6031 "VM_CAPABLE": str(self.op.vm_capable),
6034 def BuildHooksNodes(self):
6035 """Build hooks nodes.
6038 # Exclude added node
6039 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6040 post_nodes = pre_nodes + [self.op.node_name, ]
6042 return (pre_nodes, post_nodes)
6044 def CheckPrereq(self):
6045 """Check prerequisites.
6048 - the new node is not already in the config
6050 - its parameters (single/dual homed) matches the cluster
6052 Any errors are signaled by raising errors.OpPrereqError.
6056 hostname = self.hostname
6057 node = hostname.name
6058 primary_ip = self.op.primary_ip = hostname.ip
6059 if self.op.secondary_ip is None:
6060 if self.primary_ip_family == netutils.IP6Address.family:
6061 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6062 " IPv4 address must be given as secondary",
6064 self.op.secondary_ip = primary_ip
6066 secondary_ip = self.op.secondary_ip
6067 if not netutils.IP4Address.IsValid(secondary_ip):
6068 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6069 " address" % secondary_ip, errors.ECODE_INVAL)
6071 node_list = cfg.GetNodeList()
6072 if not self.op.readd and node in node_list:
6073 raise errors.OpPrereqError("Node %s is already in the configuration" %
6074 node, errors.ECODE_EXISTS)
6075 elif self.op.readd and node not in node_list:
6076 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6079 self.changed_primary_ip = False
6081 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6082 if self.op.readd and node == existing_node_name:
6083 if existing_node.secondary_ip != secondary_ip:
6084 raise errors.OpPrereqError("Readded node doesn't have the same IP"
6085 " address configuration as before",
6087 if existing_node.primary_ip != primary_ip:
6088 self.changed_primary_ip = True
6092 if (existing_node.primary_ip == primary_ip or
6093 existing_node.secondary_ip == primary_ip or
6094 existing_node.primary_ip == secondary_ip or
6095 existing_node.secondary_ip == secondary_ip):
6096 raise errors.OpPrereqError("New node ip address(es) conflict with"
6097 " existing node %s" % existing_node.name,
6098 errors.ECODE_NOTUNIQUE)
6100 # After this 'if' block, None is no longer a valid value for the
6101 # _capable op attributes
6103 old_node = self.cfg.GetNodeInfo(node)
6104 assert old_node is not None, "Can't retrieve locked node %s" % node
6105 for attr in self._NFLAGS:
6106 if getattr(self.op, attr) is None:
6107 setattr(self.op, attr, getattr(old_node, attr))
6109 for attr in self._NFLAGS:
6110 if getattr(self.op, attr) is None:
6111 setattr(self.op, attr, True)
6113 if self.op.readd and not self.op.vm_capable:
6114 pri, sec = cfg.GetNodeInstances(node)
6116 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6117 " flag set to false, but it already holds"
6118 " instances" % node,
6121 # check that the type of the node (single versus dual homed) is the
6122 # same as for the master
6123 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6124 master_singlehomed = myself.secondary_ip == myself.primary_ip
6125 newbie_singlehomed = secondary_ip == primary_ip
6126 if master_singlehomed != newbie_singlehomed:
6127 if master_singlehomed:
6128 raise errors.OpPrereqError("The master has no secondary ip but the"
6129 " new node has one",
6132 raise errors.OpPrereqError("The master has a secondary ip but the"
6133 " new node doesn't have one",
6136 # checks reachability
6137 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6138 raise errors.OpPrereqError("Node not reachable by ping",
6139 errors.ECODE_ENVIRON)
6141 if not newbie_singlehomed:
6142 # check reachability from my secondary ip to newbie's secondary ip
6143 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6144 source=myself.secondary_ip):
6145 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6146 " based ping to node daemon port",
6147 errors.ECODE_ENVIRON)
6154 if self.op.master_capable:
6155 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6157 self.master_candidate = False
6160 self.new_node = old_node
6162 node_group = cfg.LookupNodeGroup(self.op.group)
6163 self.new_node = objects.Node(name=node,
6164 primary_ip=primary_ip,
6165 secondary_ip=secondary_ip,
6166 master_candidate=self.master_candidate,
6167 offline=False, drained=False,
6168 group=node_group, ndparams={})
6170 if self.op.ndparams:
6171 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6172 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6173 "node", "cluster or group")
6175 if self.op.hv_state:
6176 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6178 if self.op.disk_state:
6179 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6181 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6182 # it a property on the base class.
6183 rpcrunner = rpc.DnsOnlyRunner()
6184 result = rpcrunner.call_version([node])[node]
6185 result.Raise("Can't get version information from node %s" % node)
6186 if constants.PROTOCOL_VERSION == result.payload:
6187 logging.info("Communication to node %s fine, sw version %s match",
6188 node, result.payload)
6190 raise errors.OpPrereqError("Version mismatch master version %s,"
6191 " node version %s" %
6192 (constants.PROTOCOL_VERSION, result.payload),
6193 errors.ECODE_ENVIRON)
6195 vg_name = cfg.GetVGName()
6196 if vg_name is not None:
6197 vparams = {constants.NV_PVLIST: [vg_name]}
6198 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6199 cname = self.cfg.GetClusterName()
6200 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6201 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6203 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6204 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6206 def Exec(self, feedback_fn):
6207 """Adds the new node to the cluster.
6210 new_node = self.new_node
6211 node = new_node.name
6213 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6216 # We adding a new node so we assume it's powered
6217 new_node.powered = True
6219 # for re-adds, reset the offline/drained/master-candidate flags;
6220 # we need to reset here, otherwise offline would prevent RPC calls
6221 # later in the procedure; this also means that if the re-add
6222 # fails, we are left with a non-offlined, broken node
6224 new_node.drained = new_node.offline = False # pylint: disable=W0201
6225 self.LogInfo("Readding a node, the offline/drained flags were reset")
6226 # if we demote the node, we do cleanup later in the procedure
6227 new_node.master_candidate = self.master_candidate
6228 if self.changed_primary_ip:
6229 new_node.primary_ip = self.op.primary_ip
6231 # copy the master/vm_capable flags
6232 for attr in self._NFLAGS:
6233 setattr(new_node, attr, getattr(self.op, attr))
6235 # notify the user about any possible mc promotion
6236 if new_node.master_candidate:
6237 self.LogInfo("Node will be a master candidate")
6239 if self.op.ndparams:
6240 new_node.ndparams = self.op.ndparams
6242 new_node.ndparams = {}
6244 if self.op.hv_state:
6245 new_node.hv_state_static = self.new_hv_state
6247 if self.op.disk_state:
6248 new_node.disk_state_static = self.new_disk_state
6250 # Add node to our /etc/hosts, and add key to known_hosts
6251 if self.cfg.GetClusterInfo().modify_etc_hosts:
6252 master_node = self.cfg.GetMasterNode()
6253 result = self.rpc.call_etc_hosts_modify(master_node,
6254 constants.ETC_HOSTS_ADD,
6257 result.Raise("Can't update hosts file with new host data")
6259 if new_node.secondary_ip != new_node.primary_ip:
6260 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6263 node_verify_list = [self.cfg.GetMasterNode()]
6264 node_verify_param = {
6265 constants.NV_NODELIST: ([node], {}),
6266 # TODO: do a node-net-test as well?
6269 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6270 self.cfg.GetClusterName())
6271 for verifier in node_verify_list:
6272 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6273 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6275 for failed in nl_payload:
6276 feedback_fn("ssh/hostname verification failed"
6277 " (checking from %s): %s" %
6278 (verifier, nl_payload[failed]))
6279 raise errors.OpExecError("ssh/hostname verification failed")
6282 _RedistributeAncillaryFiles(self)
6283 self.context.ReaddNode(new_node)
6284 # make sure we redistribute the config
6285 self.cfg.Update(new_node, feedback_fn)
6286 # and make sure the new node will not have old files around
6287 if not new_node.master_candidate:
6288 result = self.rpc.call_node_demote_from_mc(new_node.name)
6289 msg = result.fail_msg
6291 self.LogWarning("Node failed to demote itself from master"
6292 " candidate status: %s" % msg)
6294 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6295 additional_vm=self.op.vm_capable)
6296 self.context.AddNode(new_node, self.proc.GetECId())
6299 class LUNodeSetParams(LogicalUnit):
6300 """Modifies the parameters of a node.
6302 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6303 to the node role (as _ROLE_*)
6304 @cvar _R2F: a dictionary from node role to tuples of flags
6305 @cvar _FLAGS: a list of attribute names corresponding to the flags
6308 HPATH = "node-modify"
6309 HTYPE = constants.HTYPE_NODE
6311 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6313 (True, False, False): _ROLE_CANDIDATE,
6314 (False, True, False): _ROLE_DRAINED,
6315 (False, False, True): _ROLE_OFFLINE,
6316 (False, False, False): _ROLE_REGULAR,
6318 _R2F = dict((v, k) for k, v in _F2R.items())
6319 _FLAGS = ["master_candidate", "drained", "offline"]
6321 def CheckArguments(self):
6322 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6323 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6324 self.op.master_capable, self.op.vm_capable,
6325 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6327 if all_mods.count(None) == len(all_mods):
6328 raise errors.OpPrereqError("Please pass at least one modification",
6330 if all_mods.count(True) > 1:
6331 raise errors.OpPrereqError("Can't set the node into more than one"
6332 " state at the same time",
6335 # Boolean value that tells us whether we might be demoting from MC
6336 self.might_demote = (self.op.master_candidate is False or
6337 self.op.offline is True or
6338 self.op.drained is True or
6339 self.op.master_capable is False)
6341 if self.op.secondary_ip:
6342 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6343 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6344 " address" % self.op.secondary_ip,
6347 self.lock_all = self.op.auto_promote and self.might_demote
6348 self.lock_instances = self.op.secondary_ip is not None
6350 def _InstanceFilter(self, instance):
6351 """Filter for getting affected instances.
6354 return (instance.disk_template in constants.DTS_INT_MIRROR and
6355 self.op.node_name in instance.all_nodes)
6357 def ExpandNames(self):
6359 self.needed_locks = {
6360 locking.LEVEL_NODE: locking.ALL_SET,
6362 # Block allocations when all nodes are locked
6363 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6366 self.needed_locks = {
6367 locking.LEVEL_NODE: self.op.node_name,
6370 # Since modifying a node can have severe effects on currently running
6371 # operations the resource lock is at least acquired in shared mode
6372 self.needed_locks[locking.LEVEL_NODE_RES] = \
6373 self.needed_locks[locking.LEVEL_NODE]
6375 # Get all locks except nodes in shared mode; they are not used for anything
6376 # but read-only access
6377 self.share_locks = _ShareAll()
6378 self.share_locks[locking.LEVEL_NODE] = 0
6379 self.share_locks[locking.LEVEL_NODE_RES] = 0
6380 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6382 if self.lock_instances:
6383 self.needed_locks[locking.LEVEL_INSTANCE] = \
6384 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6386 def BuildHooksEnv(self):
6389 This runs on the master node.
6393 "OP_TARGET": self.op.node_name,
6394 "MASTER_CANDIDATE": str(self.op.master_candidate),
6395 "OFFLINE": str(self.op.offline),
6396 "DRAINED": str(self.op.drained),
6397 "MASTER_CAPABLE": str(self.op.master_capable),
6398 "VM_CAPABLE": str(self.op.vm_capable),
6401 def BuildHooksNodes(self):
6402 """Build hooks nodes.
6405 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6408 def CheckPrereq(self):
6409 """Check prerequisites.
6411 This only checks the instance list against the existing names.
6414 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6416 if self.lock_instances:
6417 affected_instances = \
6418 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6420 # Verify instance locks
6421 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6422 wanted_instances = frozenset(affected_instances.keys())
6423 if wanted_instances - owned_instances:
6424 raise errors.OpPrereqError("Instances affected by changing node %s's"
6425 " secondary IP address have changed since"
6426 " locks were acquired, wanted '%s', have"
6427 " '%s'; retry the operation" %
6429 utils.CommaJoin(wanted_instances),
6430 utils.CommaJoin(owned_instances)),
6433 affected_instances = None
6435 if (self.op.master_candidate is not None or
6436 self.op.drained is not None or
6437 self.op.offline is not None):
6438 # we can't change the master's node flags
6439 if self.op.node_name == self.cfg.GetMasterNode():
6440 raise errors.OpPrereqError("The master role can be changed"
6441 " only via master-failover",
6444 if self.op.master_candidate and not node.master_capable:
6445 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6446 " it a master candidate" % node.name,
6449 if self.op.vm_capable is False:
6450 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6452 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6453 " the vm_capable flag" % node.name,
6456 if node.master_candidate and self.might_demote and not self.lock_all:
6457 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6458 # check if after removing the current node, we're missing master
6460 (mc_remaining, mc_should, _) = \
6461 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6462 if mc_remaining < mc_should:
6463 raise errors.OpPrereqError("Not enough master candidates, please"
6464 " pass auto promote option to allow"
6465 " promotion (--auto-promote or RAPI"
6466 " auto_promote=True)", errors.ECODE_STATE)
6468 self.old_flags = old_flags = (node.master_candidate,
6469 node.drained, node.offline)
6470 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6471 self.old_role = old_role = self._F2R[old_flags]
6473 # Check for ineffective changes
6474 for attr in self._FLAGS:
6475 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6476 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6477 setattr(self.op, attr, None)
6479 # Past this point, any flag change to False means a transition
6480 # away from the respective state, as only real changes are kept
6482 # TODO: We might query the real power state if it supports OOB
6483 if _SupportsOob(self.cfg, node):
6484 if self.op.offline is False and not (node.powered or
6485 self.op.powered is True):
6486 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6487 " offline status can be reset") %
6488 self.op.node_name, errors.ECODE_STATE)
6489 elif self.op.powered is not None:
6490 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6491 " as it does not support out-of-band"
6492 " handling") % self.op.node_name,
6495 # If we're being deofflined/drained, we'll MC ourself if needed
6496 if (self.op.drained is False or self.op.offline is False or
6497 (self.op.master_capable and not node.master_capable)):
6498 if _DecideSelfPromotion(self):
6499 self.op.master_candidate = True
6500 self.LogInfo("Auto-promoting node to master candidate")
6502 # If we're no longer master capable, we'll demote ourselves from MC
6503 if self.op.master_capable is False and node.master_candidate:
6504 self.LogInfo("Demoting from master candidate")
6505 self.op.master_candidate = False
6508 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6509 if self.op.master_candidate:
6510 new_role = self._ROLE_CANDIDATE
6511 elif self.op.drained:
6512 new_role = self._ROLE_DRAINED
6513 elif self.op.offline:
6514 new_role = self._ROLE_OFFLINE
6515 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6516 # False is still in new flags, which means we're un-setting (the
6518 new_role = self._ROLE_REGULAR
6519 else: # no new flags, nothing, keep old role
6522 self.new_role = new_role
6524 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6525 # Trying to transition out of offline status
6526 result = self.rpc.call_version([node.name])[node.name]
6528 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6529 " to report its version: %s" %
6530 (node.name, result.fail_msg),
6533 self.LogWarning("Transitioning node from offline to online state"
6534 " without using re-add. Please make sure the node"
6537 # When changing the secondary ip, verify if this is a single-homed to
6538 # multi-homed transition or vice versa, and apply the relevant
6540 if self.op.secondary_ip:
6541 # Ok even without locking, because this can't be changed by any LU
6542 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6543 master_singlehomed = master.secondary_ip == master.primary_ip
6544 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6545 if self.op.force and node.name == master.name:
6546 self.LogWarning("Transitioning from single-homed to multi-homed"
6547 " cluster; all nodes will require a secondary IP"
6550 raise errors.OpPrereqError("Changing the secondary ip on a"
6551 " single-homed cluster requires the"
6552 " --force option to be passed, and the"
6553 " target node to be the master",
6555 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6556 if self.op.force and node.name == master.name:
6557 self.LogWarning("Transitioning from multi-homed to single-homed"
6558 " cluster; secondary IP addresses will have to be"
6561 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6562 " same as the primary IP on a multi-homed"
6563 " cluster, unless the --force option is"
6564 " passed, and the target node is the"
6565 " master", errors.ECODE_INVAL)
6567 assert not (frozenset(affected_instances) -
6568 self.owned_locks(locking.LEVEL_INSTANCE))
6571 if affected_instances:
6572 msg = ("Cannot change secondary IP address: offline node has"
6573 " instances (%s) configured to use it" %
6574 utils.CommaJoin(affected_instances.keys()))
6575 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6577 # On online nodes, check that no instances are running, and that
6578 # the node has the new ip and we can reach it.
6579 for instance in affected_instances.values():
6580 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6581 msg="cannot change secondary ip")
6583 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6584 if master.name != node.name:
6585 # check reachability from master secondary ip to new secondary ip
6586 if not netutils.TcpPing(self.op.secondary_ip,
6587 constants.DEFAULT_NODED_PORT,
6588 source=master.secondary_ip):
6589 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6590 " based ping to node daemon port",
6591 errors.ECODE_ENVIRON)
6593 if self.op.ndparams:
6594 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6595 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6596 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6597 "node", "cluster or group")
6598 self.new_ndparams = new_ndparams
6600 if self.op.hv_state:
6601 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6602 self.node.hv_state_static)
6604 if self.op.disk_state:
6605 self.new_disk_state = \
6606 _MergeAndVerifyDiskState(self.op.disk_state,
6607 self.node.disk_state_static)
6609 def Exec(self, feedback_fn):
6614 old_role = self.old_role
6615 new_role = self.new_role
6619 if self.op.ndparams:
6620 node.ndparams = self.new_ndparams
6622 if self.op.powered is not None:
6623 node.powered = self.op.powered
6625 if self.op.hv_state:
6626 node.hv_state_static = self.new_hv_state
6628 if self.op.disk_state:
6629 node.disk_state_static = self.new_disk_state
6631 for attr in ["master_capable", "vm_capable"]:
6632 val = getattr(self.op, attr)
6634 setattr(node, attr, val)
6635 result.append((attr, str(val)))
6637 if new_role != old_role:
6638 # Tell the node to demote itself, if no longer MC and not offline
6639 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6640 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6642 self.LogWarning("Node failed to demote itself: %s", msg)
6644 new_flags = self._R2F[new_role]
6645 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6647 result.append((desc, str(nf)))
6648 (node.master_candidate, node.drained, node.offline) = new_flags
6650 # we locked all nodes, we adjust the CP before updating this node
6652 _AdjustCandidatePool(self, [node.name])
6654 if self.op.secondary_ip:
6655 node.secondary_ip = self.op.secondary_ip
6656 result.append(("secondary_ip", self.op.secondary_ip))
6658 # this will trigger configuration file update, if needed
6659 self.cfg.Update(node, feedback_fn)
6661 # this will trigger job queue propagation or cleanup if the mc
6663 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6664 self.context.ReaddNode(node)
6669 class LUNodePowercycle(NoHooksLU):
6670 """Powercycles a node.
6675 def CheckArguments(self):
6676 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6677 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6678 raise errors.OpPrereqError("The node is the master and the force"
6679 " parameter was not set",
6682 def ExpandNames(self):
6683 """Locking for PowercycleNode.
6685 This is a last-resort option and shouldn't block on other
6686 jobs. Therefore, we grab no locks.
6689 self.needed_locks = {}
6691 def Exec(self, feedback_fn):
6695 result = self.rpc.call_node_powercycle(self.op.node_name,
6696 self.cfg.GetHypervisorType())
6697 result.Raise("Failed to schedule the reboot")
6698 return result.payload
6701 class LUClusterQuery(NoHooksLU):
6702 """Query cluster configuration.
6707 def ExpandNames(self):
6708 self.needed_locks = {}
6710 def Exec(self, feedback_fn):
6711 """Return cluster config.
6714 cluster = self.cfg.GetClusterInfo()
6717 # Filter just for enabled hypervisors
6718 for os_name, hv_dict in cluster.os_hvp.items():
6719 os_hvp[os_name] = {}
6720 for hv_name, hv_params in hv_dict.items():
6721 if hv_name in cluster.enabled_hypervisors:
6722 os_hvp[os_name][hv_name] = hv_params
6724 # Convert ip_family to ip_version
6725 primary_ip_version = constants.IP4_VERSION
6726 if cluster.primary_ip_family == netutils.IP6Address.family:
6727 primary_ip_version = constants.IP6_VERSION
6730 "software_version": constants.RELEASE_VERSION,
6731 "protocol_version": constants.PROTOCOL_VERSION,
6732 "config_version": constants.CONFIG_VERSION,
6733 "os_api_version": max(constants.OS_API_VERSIONS),
6734 "export_version": constants.EXPORT_VERSION,
6735 "architecture": runtime.GetArchInfo(),
6736 "name": cluster.cluster_name,
6737 "master": cluster.master_node,
6738 "default_hypervisor": cluster.primary_hypervisor,
6739 "enabled_hypervisors": cluster.enabled_hypervisors,
6740 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6741 for hypervisor_name in cluster.enabled_hypervisors]),
6743 "beparams": cluster.beparams,
6744 "osparams": cluster.osparams,
6745 "ipolicy": cluster.ipolicy,
6746 "nicparams": cluster.nicparams,
6747 "ndparams": cluster.ndparams,
6748 "diskparams": cluster.diskparams,
6749 "candidate_pool_size": cluster.candidate_pool_size,
6750 "master_netdev": cluster.master_netdev,
6751 "master_netmask": cluster.master_netmask,
6752 "use_external_mip_script": cluster.use_external_mip_script,
6753 "volume_group_name": cluster.volume_group_name,
6754 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6755 "file_storage_dir": cluster.file_storage_dir,
6756 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6757 "maintain_node_health": cluster.maintain_node_health,
6758 "ctime": cluster.ctime,
6759 "mtime": cluster.mtime,
6760 "uuid": cluster.uuid,
6761 "tags": list(cluster.GetTags()),
6762 "uid_pool": cluster.uid_pool,
6763 "default_iallocator": cluster.default_iallocator,
6764 "reserved_lvs": cluster.reserved_lvs,
6765 "primary_ip_version": primary_ip_version,
6766 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6767 "hidden_os": cluster.hidden_os,
6768 "blacklisted_os": cluster.blacklisted_os,
6774 class LUClusterConfigQuery(NoHooksLU):
6775 """Return configuration values.
6780 def CheckArguments(self):
6781 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6783 def ExpandNames(self):
6784 self.cq.ExpandNames(self)
6786 def DeclareLocks(self, level):
6787 self.cq.DeclareLocks(self, level)
6789 def Exec(self, feedback_fn):
6790 result = self.cq.OldStyleQuery(self)
6792 assert len(result) == 1
6797 class _ClusterQuery(_QueryBase):
6798 FIELDS = query.CLUSTER_FIELDS
6800 #: Do not sort (there is only one item)
6803 def ExpandNames(self, lu):
6804 lu.needed_locks = {}
6806 # The following variables interact with _QueryBase._GetNames
6807 self.wanted = locking.ALL_SET
6808 self.do_locking = self.use_locking
6811 raise errors.OpPrereqError("Can not use locking for cluster queries",
6814 def DeclareLocks(self, lu, level):
6817 def _GetQueryData(self, lu):
6818 """Computes the list of nodes and their attributes.
6821 # Locking is not used
6822 assert not (compat.any(lu.glm.is_owned(level)
6823 for level in locking.LEVELS
6824 if level != locking.LEVEL_CLUSTER) or
6825 self.do_locking or self.use_locking)
6827 if query.CQ_CONFIG in self.requested_data:
6828 cluster = lu.cfg.GetClusterInfo()
6830 cluster = NotImplemented
6832 if query.CQ_QUEUE_DRAINED in self.requested_data:
6833 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6835 drain_flag = NotImplemented
6837 if query.CQ_WATCHER_PAUSE in self.requested_data:
6838 master_name = lu.cfg.GetMasterNode()
6840 result = lu.rpc.call_get_watcher_pause(master_name)
6841 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6844 watcher_pause = result.payload
6846 watcher_pause = NotImplemented
6848 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6851 class LUInstanceActivateDisks(NoHooksLU):
6852 """Bring up an instance's disks.
6857 def ExpandNames(self):
6858 self._ExpandAndLockInstance()
6859 self.needed_locks[locking.LEVEL_NODE] = []
6860 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6862 def DeclareLocks(self, level):
6863 if level == locking.LEVEL_NODE:
6864 self._LockInstancesNodes()
6866 def CheckPrereq(self):
6867 """Check prerequisites.
6869 This checks that the instance is in the cluster.
6872 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6873 assert self.instance is not None, \
6874 "Cannot retrieve locked instance %s" % self.op.instance_name
6875 _CheckNodeOnline(self, self.instance.primary_node)
6877 def Exec(self, feedback_fn):
6878 """Activate the disks.
6881 disks_ok, disks_info = \
6882 _AssembleInstanceDisks(self, self.instance,
6883 ignore_size=self.op.ignore_size)
6885 raise errors.OpExecError("Cannot activate block devices")
6887 if self.op.wait_for_sync:
6888 if not _WaitForSync(self, self.instance):
6889 raise errors.OpExecError("Some disks of the instance are degraded!")
6894 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6896 """Prepare the block devices for an instance.
6898 This sets up the block devices on all nodes.
6900 @type lu: L{LogicalUnit}
6901 @param lu: the logical unit on whose behalf we execute
6902 @type instance: L{objects.Instance}
6903 @param instance: the instance for whose disks we assemble
6904 @type disks: list of L{objects.Disk} or None
6905 @param disks: which disks to assemble (or all, if None)
6906 @type ignore_secondaries: boolean
6907 @param ignore_secondaries: if true, errors on secondary nodes
6908 won't result in an error return from the function
6909 @type ignore_size: boolean
6910 @param ignore_size: if true, the current known size of the disk
6911 will not be used during the disk activation, useful for cases
6912 when the size is wrong
6913 @return: False if the operation failed, otherwise a list of
6914 (host, instance_visible_name, node_visible_name)
6915 with the mapping from node devices to instance devices
6920 iname = instance.name
6921 disks = _ExpandCheckDisks(instance, disks)
6923 # With the two passes mechanism we try to reduce the window of
6924 # opportunity for the race condition of switching DRBD to primary
6925 # before handshaking occured, but we do not eliminate it
6927 # The proper fix would be to wait (with some limits) until the
6928 # connection has been made and drbd transitions from WFConnection
6929 # into any other network-connected state (Connected, SyncTarget,
6932 # 1st pass, assemble on all nodes in secondary mode
6933 for idx, inst_disk in enumerate(disks):
6934 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6936 node_disk = node_disk.Copy()
6937 node_disk.UnsetSize()
6938 lu.cfg.SetDiskID(node_disk, node)
6939 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6941 msg = result.fail_msg
6943 is_offline_secondary = (node in instance.secondary_nodes and
6945 lu.LogWarning("Could not prepare block device %s on node %s"
6946 " (is_primary=False, pass=1): %s",
6947 inst_disk.iv_name, node, msg)
6948 if not (ignore_secondaries or is_offline_secondary):
6951 # FIXME: race condition on drbd migration to primary
6953 # 2nd pass, do only the primary node
6954 for idx, inst_disk in enumerate(disks):
6957 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6958 if node != instance.primary_node:
6961 node_disk = node_disk.Copy()
6962 node_disk.UnsetSize()
6963 lu.cfg.SetDiskID(node_disk, node)
6964 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6966 msg = result.fail_msg
6968 lu.LogWarning("Could not prepare block device %s on node %s"
6969 " (is_primary=True, pass=2): %s",
6970 inst_disk.iv_name, node, msg)
6973 dev_path = result.payload
6975 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6977 # leave the disks configured for the primary node
6978 # this is a workaround that would be fixed better by
6979 # improving the logical/physical id handling
6981 lu.cfg.SetDiskID(disk, instance.primary_node)
6983 return disks_ok, device_info
6986 def _StartInstanceDisks(lu, instance, force):
6987 """Start the disks of an instance.
6990 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6991 ignore_secondaries=force)
6993 _ShutdownInstanceDisks(lu, instance)
6994 if force is not None and not force:
6996 hint=("If the message above refers to a secondary node,"
6997 " you can retry the operation using '--force'"))
6998 raise errors.OpExecError("Disk consistency error")
7001 class LUInstanceDeactivateDisks(NoHooksLU):
7002 """Shutdown an instance's disks.
7007 def ExpandNames(self):
7008 self._ExpandAndLockInstance()
7009 self.needed_locks[locking.LEVEL_NODE] = []
7010 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7012 def DeclareLocks(self, level):
7013 if level == locking.LEVEL_NODE:
7014 self._LockInstancesNodes()
7016 def CheckPrereq(self):
7017 """Check prerequisites.
7019 This checks that the instance is in the cluster.
7022 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7023 assert self.instance is not None, \
7024 "Cannot retrieve locked instance %s" % self.op.instance_name
7026 def Exec(self, feedback_fn):
7027 """Deactivate the disks
7030 instance = self.instance
7032 _ShutdownInstanceDisks(self, instance)
7034 _SafeShutdownInstanceDisks(self, instance)
7037 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7038 """Shutdown block devices of an instance.
7040 This function checks if an instance is running, before calling
7041 _ShutdownInstanceDisks.
7044 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7045 _ShutdownInstanceDisks(lu, instance, disks=disks)
7048 def _ExpandCheckDisks(instance, disks):
7049 """Return the instance disks selected by the disks list
7051 @type disks: list of L{objects.Disk} or None
7052 @param disks: selected disks
7053 @rtype: list of L{objects.Disk}
7054 @return: selected instance disks to act on
7058 return instance.disks
7060 if not set(disks).issubset(instance.disks):
7061 raise errors.ProgrammerError("Can only act on disks belonging to the"
7066 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7067 """Shutdown block devices of an instance.
7069 This does the shutdown on all nodes of the instance.
7071 If the ignore_primary is false, errors on the primary node are
7076 disks = _ExpandCheckDisks(instance, disks)
7079 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7080 lu.cfg.SetDiskID(top_disk, node)
7081 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7082 msg = result.fail_msg
7084 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7085 disk.iv_name, node, msg)
7086 if ((node == instance.primary_node and not ignore_primary) or
7087 (node != instance.primary_node and not result.offline)):
7092 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7093 """Checks if a node has enough free memory.
7095 This function checks if a given node has the needed amount of free
7096 memory. In case the node has less memory or we cannot get the
7097 information from the node, this function raises an OpPrereqError
7100 @type lu: C{LogicalUnit}
7101 @param lu: a logical unit from which we get configuration data
7103 @param node: the node to check
7104 @type reason: C{str}
7105 @param reason: string to use in the error message
7106 @type requested: C{int}
7107 @param requested: the amount of memory in MiB to check for
7108 @type hypervisor_name: C{str}
7109 @param hypervisor_name: the hypervisor to ask for memory stats
7111 @return: node current free memory
7112 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7113 we cannot check the node
7116 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7117 nodeinfo[node].Raise("Can't get data from node %s" % node,
7118 prereq=True, ecode=errors.ECODE_ENVIRON)
7119 (_, _, (hv_info, )) = nodeinfo[node].payload
7121 free_mem = hv_info.get("memory_free", None)
7122 if not isinstance(free_mem, int):
7123 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7124 " was '%s'" % (node, free_mem),
7125 errors.ECODE_ENVIRON)
7126 if requested > free_mem:
7127 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7128 " needed %s MiB, available %s MiB" %
7129 (node, reason, requested, free_mem),
7134 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7135 """Checks if nodes have enough free disk space in all the VGs.
7137 This function checks if all given nodes have the needed amount of
7138 free disk. In case any node has less disk or we cannot get the
7139 information from the node, this function raises an OpPrereqError
7142 @type lu: C{LogicalUnit}
7143 @param lu: a logical unit from which we get configuration data
7144 @type nodenames: C{list}
7145 @param nodenames: the list of node names to check
7146 @type req_sizes: C{dict}
7147 @param req_sizes: the hash of vg and corresponding amount of disk in
7149 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7150 or we cannot check the node
7153 for vg, req_size in req_sizes.items():
7154 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7157 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7158 """Checks if nodes have enough free disk space in the specified VG.
7160 This function checks if all given nodes have the needed amount of
7161 free disk. In case any node has less disk or we cannot get the
7162 information from the node, this function raises an OpPrereqError
7165 @type lu: C{LogicalUnit}
7166 @param lu: a logical unit from which we get configuration data
7167 @type nodenames: C{list}
7168 @param nodenames: the list of node names to check
7170 @param vg: the volume group to check
7171 @type requested: C{int}
7172 @param requested: the amount of disk in MiB to check for
7173 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7174 or we cannot check the node
7177 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7178 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7179 for node in nodenames:
7180 info = nodeinfo[node]
7181 info.Raise("Cannot get current information from node %s" % node,
7182 prereq=True, ecode=errors.ECODE_ENVIRON)
7183 (_, (vg_info, ), _) = info.payload
7184 vg_free = vg_info.get("vg_free", None)
7185 if not isinstance(vg_free, int):
7186 raise errors.OpPrereqError("Can't compute free disk space on node"
7187 " %s for vg %s, result was '%s'" %
7188 (node, vg, vg_free), errors.ECODE_ENVIRON)
7189 if requested > vg_free:
7190 raise errors.OpPrereqError("Not enough disk space on target node %s"
7191 " vg %s: required %d MiB, available %d MiB" %
7192 (node, vg, requested, vg_free),
7196 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7197 """Checks if nodes have enough physical CPUs
7199 This function checks if all given nodes have the needed number of
7200 physical CPUs. In case any node has less CPUs or we cannot get the
7201 information from the node, this function raises an OpPrereqError
7204 @type lu: C{LogicalUnit}
7205 @param lu: a logical unit from which we get configuration data
7206 @type nodenames: C{list}
7207 @param nodenames: the list of node names to check
7208 @type requested: C{int}
7209 @param requested: the minimum acceptable number of physical CPUs
7210 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7211 or we cannot check the node
7214 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7215 for node in nodenames:
7216 info = nodeinfo[node]
7217 info.Raise("Cannot get current information from node %s" % node,
7218 prereq=True, ecode=errors.ECODE_ENVIRON)
7219 (_, _, (hv_info, )) = info.payload
7220 num_cpus = hv_info.get("cpu_total", None)
7221 if not isinstance(num_cpus, int):
7222 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7223 " on node %s, result was '%s'" %
7224 (node, num_cpus), errors.ECODE_ENVIRON)
7225 if requested > num_cpus:
7226 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7227 "required" % (node, num_cpus, requested),
7231 class LUInstanceStartup(LogicalUnit):
7232 """Starts an instance.
7235 HPATH = "instance-start"
7236 HTYPE = constants.HTYPE_INSTANCE
7239 def CheckArguments(self):
7241 if self.op.beparams:
7242 # fill the beparams dict
7243 objects.UpgradeBeParams(self.op.beparams)
7244 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7246 def ExpandNames(self):
7247 self._ExpandAndLockInstance()
7248 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7250 def DeclareLocks(self, level):
7251 if level == locking.LEVEL_NODE_RES:
7252 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7254 def BuildHooksEnv(self):
7257 This runs on master, primary and secondary nodes of the instance.
7261 "FORCE": self.op.force,
7264 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7268 def BuildHooksNodes(self):
7269 """Build hooks nodes.
7272 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7275 def CheckPrereq(self):
7276 """Check prerequisites.
7278 This checks that the instance is in the cluster.
7281 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7282 assert self.instance is not None, \
7283 "Cannot retrieve locked instance %s" % self.op.instance_name
7286 if self.op.hvparams:
7287 # check hypervisor parameter syntax (locally)
7288 cluster = self.cfg.GetClusterInfo()
7289 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7290 filled_hvp = cluster.FillHV(instance)
7291 filled_hvp.update(self.op.hvparams)
7292 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7293 hv_type.CheckParameterSyntax(filled_hvp)
7294 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7296 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7298 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7300 if self.primary_offline and self.op.ignore_offline_nodes:
7301 self.LogWarning("Ignoring offline primary node")
7303 if self.op.hvparams or self.op.beparams:
7304 self.LogWarning("Overridden parameters are ignored")
7306 _CheckNodeOnline(self, instance.primary_node)
7308 bep = self.cfg.GetClusterInfo().FillBE(instance)
7309 bep.update(self.op.beparams)
7311 # check bridges existence
7312 _CheckInstanceBridgesExist(self, instance)
7314 remote_info = self.rpc.call_instance_info(instance.primary_node,
7316 instance.hypervisor)
7317 remote_info.Raise("Error checking node %s" % instance.primary_node,
7318 prereq=True, ecode=errors.ECODE_ENVIRON)
7319 if not remote_info.payload: # not running already
7320 _CheckNodeFreeMemory(self, instance.primary_node,
7321 "starting instance %s" % instance.name,
7322 bep[constants.BE_MINMEM], instance.hypervisor)
7324 def Exec(self, feedback_fn):
7325 """Start the instance.
7328 instance = self.instance
7329 force = self.op.force
7331 if not self.op.no_remember:
7332 self.cfg.MarkInstanceUp(instance.name)
7334 if self.primary_offline:
7335 assert self.op.ignore_offline_nodes
7336 self.LogInfo("Primary node offline, marked instance as started")
7338 node_current = instance.primary_node
7340 _StartInstanceDisks(self, instance, force)
7343 self.rpc.call_instance_start(node_current,
7344 (instance, self.op.hvparams,
7346 self.op.startup_paused)
7347 msg = result.fail_msg
7349 _ShutdownInstanceDisks(self, instance)
7350 raise errors.OpExecError("Could not start instance: %s" % msg)
7353 class LUInstanceReboot(LogicalUnit):
7354 """Reboot an instance.
7357 HPATH = "instance-reboot"
7358 HTYPE = constants.HTYPE_INSTANCE
7361 def ExpandNames(self):
7362 self._ExpandAndLockInstance()
7364 def BuildHooksEnv(self):
7367 This runs on master, primary and secondary nodes of the instance.
7371 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7372 "REBOOT_TYPE": self.op.reboot_type,
7373 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7376 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7380 def BuildHooksNodes(self):
7381 """Build hooks nodes.
7384 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7387 def CheckPrereq(self):
7388 """Check prerequisites.
7390 This checks that the instance is in the cluster.
7393 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7394 assert self.instance is not None, \
7395 "Cannot retrieve locked instance %s" % self.op.instance_name
7396 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7397 _CheckNodeOnline(self, instance.primary_node)
7399 # check bridges existence
7400 _CheckInstanceBridgesExist(self, instance)
7402 def Exec(self, feedback_fn):
7403 """Reboot the instance.
7406 instance = self.instance
7407 ignore_secondaries = self.op.ignore_secondaries
7408 reboot_type = self.op.reboot_type
7409 reason = self.op.reason
7411 remote_info = self.rpc.call_instance_info(instance.primary_node,
7413 instance.hypervisor)
7414 remote_info.Raise("Error checking node %s" % instance.primary_node)
7415 instance_running = bool(remote_info.payload)
7417 node_current = instance.primary_node
7419 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7420 constants.INSTANCE_REBOOT_HARD]:
7421 for disk in instance.disks:
7422 self.cfg.SetDiskID(disk, node_current)
7423 result = self.rpc.call_instance_reboot(node_current, instance,
7425 self.op.shutdown_timeout,
7427 result.Raise("Could not reboot instance")
7429 if instance_running:
7430 result = self.rpc.call_instance_shutdown(node_current, instance,
7431 self.op.shutdown_timeout)
7432 result.Raise("Could not shutdown instance for full reboot")
7433 _ShutdownInstanceDisks(self, instance)
7435 self.LogInfo("Instance %s was already stopped, starting now",
7437 _StartInstanceDisks(self, instance, ignore_secondaries)
7438 result = self.rpc.call_instance_start(node_current,
7439 (instance, None, None), False)
7440 msg = result.fail_msg
7442 _ShutdownInstanceDisks(self, instance)
7443 raise errors.OpExecError("Could not start instance for"
7444 " full reboot: %s" % msg)
7446 self.cfg.MarkInstanceUp(instance.name)
7449 class LUInstanceShutdown(LogicalUnit):
7450 """Shutdown an instance.
7453 HPATH = "instance-stop"
7454 HTYPE = constants.HTYPE_INSTANCE
7457 def ExpandNames(self):
7458 self._ExpandAndLockInstance()
7460 def BuildHooksEnv(self):
7463 This runs on master, primary and secondary nodes of the instance.
7466 env = _BuildInstanceHookEnvByObject(self, self.instance)
7467 env["TIMEOUT"] = self.op.timeout
7470 def BuildHooksNodes(self):
7471 """Build hooks nodes.
7474 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7477 def CheckPrereq(self):
7478 """Check prerequisites.
7480 This checks that the instance is in the cluster.
7483 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7484 assert self.instance is not None, \
7485 "Cannot retrieve locked instance %s" % self.op.instance_name
7487 if not self.op.force:
7488 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7490 self.LogWarning("Ignoring offline instance check")
7492 self.primary_offline = \
7493 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7495 if self.primary_offline and self.op.ignore_offline_nodes:
7496 self.LogWarning("Ignoring offline primary node")
7498 _CheckNodeOnline(self, self.instance.primary_node)
7500 def Exec(self, feedback_fn):
7501 """Shutdown the instance.
7504 instance = self.instance
7505 node_current = instance.primary_node
7506 timeout = self.op.timeout
7508 # If the instance is offline we shouldn't mark it as down, as that
7509 # resets the offline flag.
7510 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7511 self.cfg.MarkInstanceDown(instance.name)
7513 if self.primary_offline:
7514 assert self.op.ignore_offline_nodes
7515 self.LogInfo("Primary node offline, marked instance as stopped")
7517 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7518 msg = result.fail_msg
7520 self.LogWarning("Could not shutdown instance: %s", msg)
7522 _ShutdownInstanceDisks(self, instance)
7525 class LUInstanceReinstall(LogicalUnit):
7526 """Reinstall an instance.
7529 HPATH = "instance-reinstall"
7530 HTYPE = constants.HTYPE_INSTANCE
7533 def ExpandNames(self):
7534 self._ExpandAndLockInstance()
7536 def BuildHooksEnv(self):
7539 This runs on master, primary and secondary nodes of the instance.
7542 return _BuildInstanceHookEnvByObject(self, self.instance)
7544 def BuildHooksNodes(self):
7545 """Build hooks nodes.
7548 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7551 def CheckPrereq(self):
7552 """Check prerequisites.
7554 This checks that the instance is in the cluster and is not running.
7557 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7558 assert instance is not None, \
7559 "Cannot retrieve locked instance %s" % self.op.instance_name
7560 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7561 " offline, cannot reinstall")
7563 if instance.disk_template == constants.DT_DISKLESS:
7564 raise errors.OpPrereqError("Instance '%s' has no disks" %
7565 self.op.instance_name,
7567 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7569 if self.op.os_type is not None:
7571 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7572 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7573 instance_os = self.op.os_type
7575 instance_os = instance.os
7577 nodelist = list(instance.all_nodes)
7579 if self.op.osparams:
7580 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7581 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7582 self.os_inst = i_osdict # the new dict (without defaults)
7586 self.instance = instance
7588 def Exec(self, feedback_fn):
7589 """Reinstall the instance.
7592 inst = self.instance
7594 if self.op.os_type is not None:
7595 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7596 inst.os = self.op.os_type
7597 # Write to configuration
7598 self.cfg.Update(inst, feedback_fn)
7600 _StartInstanceDisks(self, inst, None)
7602 feedback_fn("Running the instance OS create scripts...")
7603 # FIXME: pass debug option from opcode to backend
7604 result = self.rpc.call_instance_os_add(inst.primary_node,
7605 (inst, self.os_inst), True,
7606 self.op.debug_level)
7607 result.Raise("Could not install OS for instance %s on node %s" %
7608 (inst.name, inst.primary_node))
7610 _ShutdownInstanceDisks(self, inst)
7613 class LUInstanceRecreateDisks(LogicalUnit):
7614 """Recreate an instance's missing disks.
7617 HPATH = "instance-recreate-disks"
7618 HTYPE = constants.HTYPE_INSTANCE
7621 _MODIFYABLE = compat.UniqueFrozenset([
7622 constants.IDISK_SIZE,
7623 constants.IDISK_MODE,
7626 # New or changed disk parameters may have different semantics
7627 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7628 constants.IDISK_ADOPT,
7630 # TODO: Implement support changing VG while recreating
7632 constants.IDISK_METAVG,
7633 constants.IDISK_PROVIDER,
7636 def _RunAllocator(self):
7637 """Run the allocator based on input opcode.
7640 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7643 # The allocator should actually run in "relocate" mode, but current
7644 # allocators don't support relocating all the nodes of an instance at
7645 # the same time. As a workaround we use "allocate" mode, but this is
7646 # suboptimal for two reasons:
7647 # - The instance name passed to the allocator is present in the list of
7648 # existing instances, so there could be a conflict within the
7649 # internal structures of the allocator. This doesn't happen with the
7650 # current allocators, but it's a liability.
7651 # - The allocator counts the resources used by the instance twice: once
7652 # because the instance exists already, and once because it tries to
7653 # allocate a new instance.
7654 # The allocator could choose some of the nodes on which the instance is
7655 # running, but that's not a problem. If the instance nodes are broken,
7656 # they should be already be marked as drained or offline, and hence
7657 # skipped by the allocator. If instance disks have been lost for other
7658 # reasons, then recreating the disks on the same nodes should be fine.
7659 disk_template = self.instance.disk_template
7660 spindle_use = be_full[constants.BE_SPINDLE_USE]
7661 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7662 disk_template=disk_template,
7663 tags=list(self.instance.GetTags()),
7664 os=self.instance.os,
7666 vcpus=be_full[constants.BE_VCPUS],
7667 memory=be_full[constants.BE_MAXMEM],
7668 spindle_use=spindle_use,
7669 disks=[{constants.IDISK_SIZE: d.size,
7670 constants.IDISK_MODE: d.mode}
7671 for d in self.instance.disks],
7672 hypervisor=self.instance.hypervisor,
7673 node_whitelist=None)
7674 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7676 ial.Run(self.op.iallocator)
7678 assert req.RequiredNodes() == len(self.instance.all_nodes)
7681 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7682 " %s" % (self.op.iallocator, ial.info),
7685 self.op.nodes = ial.result
7686 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7687 self.op.instance_name, self.op.iallocator,
7688 utils.CommaJoin(ial.result))
7690 def CheckArguments(self):
7691 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7692 # Normalize and convert deprecated list of disk indices
7693 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7695 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7697 raise errors.OpPrereqError("Some disks have been specified more than"
7698 " once: %s" % utils.CommaJoin(duplicates),
7701 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7702 # when neither iallocator nor nodes are specified
7703 if self.op.iallocator or self.op.nodes:
7704 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7706 for (idx, params) in self.op.disks:
7707 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7708 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7710 raise errors.OpPrereqError("Parameters for disk %s try to change"
7711 " unmodifyable parameter(s): %s" %
7712 (idx, utils.CommaJoin(unsupported)),
7715 def ExpandNames(self):
7716 self._ExpandAndLockInstance()
7717 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7720 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7721 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7723 self.needed_locks[locking.LEVEL_NODE] = []
7724 if self.op.iallocator:
7725 # iallocator will select a new node in the same group
7726 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7727 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7729 self.needed_locks[locking.LEVEL_NODE_RES] = []
7731 def DeclareLocks(self, level):
7732 if level == locking.LEVEL_NODEGROUP:
7733 assert self.op.iallocator is not None
7734 assert not self.op.nodes
7735 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7736 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7737 # Lock the primary group used by the instance optimistically; this
7738 # requires going via the node before it's locked, requiring
7739 # verification later on
7740 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7741 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7743 elif level == locking.LEVEL_NODE:
7744 # If an allocator is used, then we lock all the nodes in the current
7745 # instance group, as we don't know yet which ones will be selected;
7746 # if we replace the nodes without using an allocator, locks are
7747 # already declared in ExpandNames; otherwise, we need to lock all the
7748 # instance nodes for disk re-creation
7749 if self.op.iallocator:
7750 assert not self.op.nodes
7751 assert not self.needed_locks[locking.LEVEL_NODE]
7752 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7754 # Lock member nodes of the group of the primary node
7755 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7756 self.needed_locks[locking.LEVEL_NODE].extend(
7757 self.cfg.GetNodeGroup(group_uuid).members)
7759 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7760 elif not self.op.nodes:
7761 self._LockInstancesNodes(primary_only=False)
7762 elif level == locking.LEVEL_NODE_RES:
7764 self.needed_locks[locking.LEVEL_NODE_RES] = \
7765 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7767 def BuildHooksEnv(self):
7770 This runs on master, primary and secondary nodes of the instance.
7773 return _BuildInstanceHookEnvByObject(self, self.instance)
7775 def BuildHooksNodes(self):
7776 """Build hooks nodes.
7779 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7782 def CheckPrereq(self):
7783 """Check prerequisites.
7785 This checks that the instance is in the cluster and is not running.
7788 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7789 assert instance is not None, \
7790 "Cannot retrieve locked instance %s" % self.op.instance_name
7792 if len(self.op.nodes) != len(instance.all_nodes):
7793 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7794 " %d replacement nodes were specified" %
7795 (instance.name, len(instance.all_nodes),
7796 len(self.op.nodes)),
7798 assert instance.disk_template != constants.DT_DRBD8 or \
7799 len(self.op.nodes) == 2
7800 assert instance.disk_template != constants.DT_PLAIN or \
7801 len(self.op.nodes) == 1
7802 primary_node = self.op.nodes[0]
7804 primary_node = instance.primary_node
7805 if not self.op.iallocator:
7806 _CheckNodeOnline(self, primary_node)
7808 if instance.disk_template == constants.DT_DISKLESS:
7809 raise errors.OpPrereqError("Instance '%s' has no disks" %
7810 self.op.instance_name, errors.ECODE_INVAL)
7812 # Verify if node group locks are still correct
7813 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7815 # Node group locks are acquired only for the primary node (and only
7816 # when the allocator is used)
7817 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7820 # if we replace nodes *and* the old primary is offline, we don't
7821 # check the instance state
7822 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7823 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7824 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7825 msg="cannot recreate disks")
7828 self.disks = dict(self.op.disks)
7830 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7832 maxidx = max(self.disks.keys())
7833 if maxidx >= len(instance.disks):
7834 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7837 if ((self.op.nodes or self.op.iallocator) and
7838 sorted(self.disks.keys()) != range(len(instance.disks))):
7839 raise errors.OpPrereqError("Can't recreate disks partially and"
7840 " change the nodes at the same time",
7843 self.instance = instance
7845 if self.op.iallocator:
7846 self._RunAllocator()
7847 # Release unneeded node and node resource locks
7848 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7849 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7850 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7852 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7854 def Exec(self, feedback_fn):
7855 """Recreate the disks.
7858 instance = self.instance
7860 assert (self.owned_locks(locking.LEVEL_NODE) ==
7861 self.owned_locks(locking.LEVEL_NODE_RES))
7864 mods = [] # keeps track of needed changes
7866 for idx, disk in enumerate(instance.disks):
7868 changes = self.disks[idx]
7870 # Disk should not be recreated
7874 # update secondaries for disks, if needed
7875 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7876 # need to update the nodes and minors
7877 assert len(self.op.nodes) == 2
7878 assert len(disk.logical_id) == 6 # otherwise disk internals
7880 (_, _, old_port, _, _, old_secret) = disk.logical_id
7881 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7882 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7883 new_minors[0], new_minors[1], old_secret)
7884 assert len(disk.logical_id) == len(new_id)
7888 mods.append((idx, new_id, changes))
7890 # now that we have passed all asserts above, we can apply the mods
7891 # in a single run (to avoid partial changes)
7892 for idx, new_id, changes in mods:
7893 disk = instance.disks[idx]
7894 if new_id is not None:
7895 assert disk.dev_type == constants.LD_DRBD8
7896 disk.logical_id = new_id
7898 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7899 mode=changes.get(constants.IDISK_MODE, None))
7901 # change primary node, if needed
7903 instance.primary_node = self.op.nodes[0]
7904 self.LogWarning("Changing the instance's nodes, you will have to"
7905 " remove any disks left on the older nodes manually")
7908 self.cfg.Update(instance, feedback_fn)
7910 # All touched nodes must be locked
7911 mylocks = self.owned_locks(locking.LEVEL_NODE)
7912 assert mylocks.issuperset(frozenset(instance.all_nodes))
7913 _CreateDisks(self, instance, to_skip=to_skip)
7916 class LUInstanceRename(LogicalUnit):
7917 """Rename an instance.
7920 HPATH = "instance-rename"
7921 HTYPE = constants.HTYPE_INSTANCE
7923 def CheckArguments(self):
7927 if self.op.ip_check and not self.op.name_check:
7928 # TODO: make the ip check more flexible and not depend on the name check
7929 raise errors.OpPrereqError("IP address check requires a name check",
7932 def BuildHooksEnv(self):
7935 This runs on master, primary and secondary nodes of the instance.
7938 env = _BuildInstanceHookEnvByObject(self, self.instance)
7939 env["INSTANCE_NEW_NAME"] = self.op.new_name
7942 def BuildHooksNodes(self):
7943 """Build hooks nodes.
7946 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7949 def CheckPrereq(self):
7950 """Check prerequisites.
7952 This checks that the instance is in the cluster and is not running.
7955 self.op.instance_name = _ExpandInstanceName(self.cfg,
7956 self.op.instance_name)
7957 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7958 assert instance is not None
7959 _CheckNodeOnline(self, instance.primary_node)
7960 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7961 msg="cannot rename")
7962 self.instance = instance
7964 new_name = self.op.new_name
7965 if self.op.name_check:
7966 hostname = _CheckHostnameSane(self, new_name)
7967 new_name = self.op.new_name = hostname.name
7968 if (self.op.ip_check and
7969 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7970 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7971 (hostname.ip, new_name),
7972 errors.ECODE_NOTUNIQUE)
7974 instance_list = self.cfg.GetInstanceList()
7975 if new_name in instance_list and new_name != instance.name:
7976 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7977 new_name, errors.ECODE_EXISTS)
7979 def Exec(self, feedback_fn):
7980 """Rename the instance.
7983 inst = self.instance
7984 old_name = inst.name
7986 rename_file_storage = False
7987 if (inst.disk_template in constants.DTS_FILEBASED and
7988 self.op.new_name != inst.name):
7989 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7990 rename_file_storage = True
7992 self.cfg.RenameInstance(inst.name, self.op.new_name)
7993 # Change the instance lock. This is definitely safe while we hold the BGL.
7994 # Otherwise the new lock would have to be added in acquired mode.
7996 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7997 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7998 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
8000 # re-read the instance from the configuration after rename
8001 inst = self.cfg.GetInstanceInfo(self.op.new_name)
8003 if rename_file_storage:
8004 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
8005 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
8006 old_file_storage_dir,
8007 new_file_storage_dir)
8008 result.Raise("Could not rename on node %s directory '%s' to '%s'"
8009 " (but the instance has been renamed in Ganeti)" %
8010 (inst.primary_node, old_file_storage_dir,
8011 new_file_storage_dir))
8013 _StartInstanceDisks(self, inst, None)
8014 # update info on disks
8015 info = _GetInstanceInfoText(inst)
8016 for (idx, disk) in enumerate(inst.disks):
8017 for node in inst.all_nodes:
8018 self.cfg.SetDiskID(disk, node)
8019 result = self.rpc.call_blockdev_setinfo(node, disk, info)
8021 self.LogWarning("Error setting info on node %s for disk %s: %s",
8022 node, idx, result.fail_msg)
8024 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
8025 old_name, self.op.debug_level)
8026 msg = result.fail_msg
8028 msg = ("Could not run OS rename script for instance %s on node %s"
8029 " (but the instance has been renamed in Ganeti): %s" %
8030 (inst.name, inst.primary_node, msg))
8031 self.LogWarning(msg)
8033 _ShutdownInstanceDisks(self, inst)
8038 class LUInstanceRemove(LogicalUnit):
8039 """Remove an instance.
8042 HPATH = "instance-remove"
8043 HTYPE = constants.HTYPE_INSTANCE
8046 def ExpandNames(self):
8047 self._ExpandAndLockInstance()
8048 self.needed_locks[locking.LEVEL_NODE] = []
8049 self.needed_locks[locking.LEVEL_NODE_RES] = []
8050 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8052 def DeclareLocks(self, level):
8053 if level == locking.LEVEL_NODE:
8054 self._LockInstancesNodes()
8055 elif level == locking.LEVEL_NODE_RES:
8057 self.needed_locks[locking.LEVEL_NODE_RES] = \
8058 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8060 def BuildHooksEnv(self):
8063 This runs on master, primary and secondary nodes of the instance.
8066 env = _BuildInstanceHookEnvByObject(self, self.instance)
8067 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8070 def BuildHooksNodes(self):
8071 """Build hooks nodes.
8074 nl = [self.cfg.GetMasterNode()]
8075 nl_post = list(self.instance.all_nodes) + nl
8076 return (nl, nl_post)
8078 def CheckPrereq(self):
8079 """Check prerequisites.
8081 This checks that the instance is in the cluster.
8084 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8085 assert self.instance is not None, \
8086 "Cannot retrieve locked instance %s" % self.op.instance_name
8088 def Exec(self, feedback_fn):
8089 """Remove the instance.
8092 instance = self.instance
8093 logging.info("Shutting down instance %s on node %s",
8094 instance.name, instance.primary_node)
8096 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8097 self.op.shutdown_timeout)
8098 msg = result.fail_msg
8100 if self.op.ignore_failures:
8101 feedback_fn("Warning: can't shutdown instance: %s" % msg)
8103 raise errors.OpExecError("Could not shutdown instance %s on"
8105 (instance.name, instance.primary_node, msg))
8107 assert (self.owned_locks(locking.LEVEL_NODE) ==
8108 self.owned_locks(locking.LEVEL_NODE_RES))
8109 assert not (set(instance.all_nodes) -
8110 self.owned_locks(locking.LEVEL_NODE)), \
8111 "Not owning correct locks"
8113 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8116 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8117 """Utility function to remove an instance.
8120 logging.info("Removing block devices for instance %s", instance.name)
8122 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8123 if not ignore_failures:
8124 raise errors.OpExecError("Can't remove instance's disks")
8125 feedback_fn("Warning: can't remove instance's disks")
8127 logging.info("Removing instance %s out of cluster config", instance.name)
8129 lu.cfg.RemoveInstance(instance.name)
8131 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8132 "Instance lock removal conflict"
8134 # Remove lock for the instance
8135 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8138 class LUInstanceQuery(NoHooksLU):
8139 """Logical unit for querying instances.
8142 # pylint: disable=W0142
8145 def CheckArguments(self):
8146 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8147 self.op.output_fields, self.op.use_locking)
8149 def ExpandNames(self):
8150 self.iq.ExpandNames(self)
8152 def DeclareLocks(self, level):
8153 self.iq.DeclareLocks(self, level)
8155 def Exec(self, feedback_fn):
8156 return self.iq.OldStyleQuery(self)
8159 def _ExpandNamesForMigration(lu):
8160 """Expands names for use with L{TLMigrateInstance}.
8162 @type lu: L{LogicalUnit}
8165 if lu.op.target_node is not None:
8166 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8168 lu.needed_locks[locking.LEVEL_NODE] = []
8169 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8171 lu.needed_locks[locking.LEVEL_NODE_RES] = []
8172 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8174 # The node allocation lock is actually only needed for externally replicated
8175 # instances (e.g. sharedfile or RBD) and if an iallocator is used.
8176 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8179 def _DeclareLocksForMigration(lu, level):
8180 """Declares locks for L{TLMigrateInstance}.
8182 @type lu: L{LogicalUnit}
8183 @param level: Lock level
8186 if level == locking.LEVEL_NODE_ALLOC:
8187 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8189 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8191 # Node locks are already declared here rather than at LEVEL_NODE as we need
8192 # the instance object anyway to declare the node allocation lock.
8193 if instance.disk_template in constants.DTS_EXT_MIRROR:
8194 if lu.op.target_node is None:
8195 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8196 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8198 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8200 del lu.recalculate_locks[locking.LEVEL_NODE]
8202 lu._LockInstancesNodes() # pylint: disable=W0212
8204 elif level == locking.LEVEL_NODE:
8205 # Node locks are declared together with the node allocation lock
8206 assert (lu.needed_locks[locking.LEVEL_NODE] or
8207 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8209 elif level == locking.LEVEL_NODE_RES:
8211 lu.needed_locks[locking.LEVEL_NODE_RES] = \
8212 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8215 class LUInstanceFailover(LogicalUnit):
8216 """Failover an instance.
8219 HPATH = "instance-failover"
8220 HTYPE = constants.HTYPE_INSTANCE
8223 def CheckArguments(self):
8224 """Check the arguments.
8227 self.iallocator = getattr(self.op, "iallocator", None)
8228 self.target_node = getattr(self.op, "target_node", None)
8230 def ExpandNames(self):
8231 self._ExpandAndLockInstance()
8232 _ExpandNamesForMigration(self)
8235 TLMigrateInstance(self, self.op.instance_name, False, True, False,
8236 self.op.ignore_consistency, True,
8237 self.op.shutdown_timeout, self.op.ignore_ipolicy)
8239 self.tasklets = [self._migrater]
8241 def DeclareLocks(self, level):
8242 _DeclareLocksForMigration(self, level)
8244 def BuildHooksEnv(self):
8247 This runs on master, primary and secondary nodes of the instance.
8250 instance = self._migrater.instance
8251 source_node = instance.primary_node
8252 target_node = self.op.target_node
8254 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8255 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8256 "OLD_PRIMARY": source_node,
8257 "NEW_PRIMARY": target_node,
8260 if instance.disk_template in constants.DTS_INT_MIRROR:
8261 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8262 env["NEW_SECONDARY"] = source_node
8264 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8266 env.update(_BuildInstanceHookEnvByObject(self, instance))
8270 def BuildHooksNodes(self):
8271 """Build hooks nodes.
8274 instance = self._migrater.instance
8275 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8276 return (nl, nl + [instance.primary_node])
8279 class LUInstanceMigrate(LogicalUnit):
8280 """Migrate an instance.
8282 This is migration without shutting down, compared to the failover,
8283 which is done with shutdown.
8286 HPATH = "instance-migrate"
8287 HTYPE = constants.HTYPE_INSTANCE
8290 def ExpandNames(self):
8291 self._ExpandAndLockInstance()
8292 _ExpandNamesForMigration(self)
8295 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8296 False, self.op.allow_failover, False,
8297 self.op.allow_runtime_changes,
8298 constants.DEFAULT_SHUTDOWN_TIMEOUT,
8299 self.op.ignore_ipolicy)
8301 self.tasklets = [self._migrater]
8303 def DeclareLocks(self, level):
8304 _DeclareLocksForMigration(self, level)
8306 def BuildHooksEnv(self):
8309 This runs on master, primary and secondary nodes of the instance.
8312 instance = self._migrater.instance
8313 source_node = instance.primary_node
8314 target_node = self.op.target_node
8315 env = _BuildInstanceHookEnvByObject(self, instance)
8317 "MIGRATE_LIVE": self._migrater.live,
8318 "MIGRATE_CLEANUP": self.op.cleanup,
8319 "OLD_PRIMARY": source_node,
8320 "NEW_PRIMARY": target_node,
8321 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8324 if instance.disk_template in constants.DTS_INT_MIRROR:
8325 env["OLD_SECONDARY"] = target_node
8326 env["NEW_SECONDARY"] = source_node
8328 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8332 def BuildHooksNodes(self):
8333 """Build hooks nodes.
8336 instance = self._migrater.instance
8337 snodes = list(instance.secondary_nodes)
8338 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8342 class LUInstanceMove(LogicalUnit):
8343 """Move an instance by data-copying.
8346 HPATH = "instance-move"
8347 HTYPE = constants.HTYPE_INSTANCE
8350 def ExpandNames(self):
8351 self._ExpandAndLockInstance()
8352 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8353 self.op.target_node = target_node
8354 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8355 self.needed_locks[locking.LEVEL_NODE_RES] = []
8356 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8358 def DeclareLocks(self, level):
8359 if level == locking.LEVEL_NODE:
8360 self._LockInstancesNodes(primary_only=True)
8361 elif level == locking.LEVEL_NODE_RES:
8363 self.needed_locks[locking.LEVEL_NODE_RES] = \
8364 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8366 def BuildHooksEnv(self):
8369 This runs on master, primary and secondary nodes of the instance.
8373 "TARGET_NODE": self.op.target_node,
8374 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8376 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8379 def BuildHooksNodes(self):
8380 """Build hooks nodes.
8384 self.cfg.GetMasterNode(),
8385 self.instance.primary_node,
8386 self.op.target_node,
8390 def CheckPrereq(self):
8391 """Check prerequisites.
8393 This checks that the instance is in the cluster.
8396 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8397 assert self.instance is not None, \
8398 "Cannot retrieve locked instance %s" % self.op.instance_name
8400 node = self.cfg.GetNodeInfo(self.op.target_node)
8401 assert node is not None, \
8402 "Cannot retrieve locked node %s" % self.op.target_node
8404 self.target_node = target_node = node.name
8406 if target_node == instance.primary_node:
8407 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8408 (instance.name, target_node),
8411 bep = self.cfg.GetClusterInfo().FillBE(instance)
8413 for idx, dsk in enumerate(instance.disks):
8414 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8415 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8416 " cannot copy" % idx, errors.ECODE_STATE)
8418 _CheckNodeOnline(self, target_node)
8419 _CheckNodeNotDrained(self, target_node)
8420 _CheckNodeVmCapable(self, target_node)
8421 cluster = self.cfg.GetClusterInfo()
8422 group_info = self.cfg.GetNodeGroup(node.group)
8423 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8424 _CheckTargetNodeIPolicy(self, ipolicy, instance, node, self.cfg,
8425 ignore=self.op.ignore_ipolicy)
8427 if instance.admin_state == constants.ADMINST_UP:
8428 # check memory requirements on the secondary node
8429 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8430 instance.name, bep[constants.BE_MAXMEM],
8431 instance.hypervisor)
8433 self.LogInfo("Not checking memory on the secondary node as"
8434 " instance will not be started")
8436 # check bridge existance
8437 _CheckInstanceBridgesExist(self, instance, node=target_node)
8439 def Exec(self, feedback_fn):
8440 """Move an instance.
8442 The move is done by shutting it down on its present node, copying
8443 the data over (slow) and starting it on the new node.
8446 instance = self.instance
8448 source_node = instance.primary_node
8449 target_node = self.target_node
8451 self.LogInfo("Shutting down instance %s on source node %s",
8452 instance.name, source_node)
8454 assert (self.owned_locks(locking.LEVEL_NODE) ==
8455 self.owned_locks(locking.LEVEL_NODE_RES))
8457 result = self.rpc.call_instance_shutdown(source_node, instance,
8458 self.op.shutdown_timeout)
8459 msg = result.fail_msg
8461 if self.op.ignore_consistency:
8462 self.LogWarning("Could not shutdown instance %s on node %s."
8463 " Proceeding anyway. Please make sure node"
8464 " %s is down. Error details: %s",
8465 instance.name, source_node, source_node, msg)
8467 raise errors.OpExecError("Could not shutdown instance %s on"
8469 (instance.name, source_node, msg))
8471 # create the target disks
8473 _CreateDisks(self, instance, target_node=target_node)
8474 except errors.OpExecError:
8475 self.LogWarning("Device creation failed, reverting...")
8477 _RemoveDisks(self, instance, target_node=target_node)
8479 self.cfg.ReleaseDRBDMinors(instance.name)
8482 cluster_name = self.cfg.GetClusterInfo().cluster_name
8485 # activate, get path, copy the data over
8486 for idx, disk in enumerate(instance.disks):
8487 self.LogInfo("Copying data for disk %d", idx)
8488 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8489 instance.name, True, idx)
8491 self.LogWarning("Can't assemble newly created disk %d: %s",
8492 idx, result.fail_msg)
8493 errs.append(result.fail_msg)
8495 dev_path = result.payload
8496 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8497 target_node, dev_path,
8500 self.LogWarning("Can't copy data over for disk %d: %s",
8501 idx, result.fail_msg)
8502 errs.append(result.fail_msg)
8506 self.LogWarning("Some disks failed to copy, aborting")
8508 _RemoveDisks(self, instance, target_node=target_node)
8510 self.cfg.ReleaseDRBDMinors(instance.name)
8511 raise errors.OpExecError("Errors during disk copy: %s" %
8514 instance.primary_node = target_node
8515 self.cfg.Update(instance, feedback_fn)
8517 self.LogInfo("Removing the disks on the original node")
8518 _RemoveDisks(self, instance, target_node=source_node)
8520 # Only start the instance if it's marked as up
8521 if instance.admin_state == constants.ADMINST_UP:
8522 self.LogInfo("Starting instance %s on node %s",
8523 instance.name, target_node)
8525 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8526 ignore_secondaries=True)
8528 _ShutdownInstanceDisks(self, instance)
8529 raise errors.OpExecError("Can't activate the instance's disks")
8531 result = self.rpc.call_instance_start(target_node,
8532 (instance, None, None), False)
8533 msg = result.fail_msg
8535 _ShutdownInstanceDisks(self, instance)
8536 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8537 (instance.name, target_node, msg))
8540 class LUNodeMigrate(LogicalUnit):
8541 """Migrate all instances from a node.
8544 HPATH = "node-migrate"
8545 HTYPE = constants.HTYPE_NODE
8548 def CheckArguments(self):
8551 def ExpandNames(self):
8552 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8554 self.share_locks = _ShareAll()
8555 self.needed_locks = {
8556 locking.LEVEL_NODE: [self.op.node_name],
8559 def BuildHooksEnv(self):
8562 This runs on the master, the primary and all the secondaries.
8566 "NODE_NAME": self.op.node_name,
8567 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8570 def BuildHooksNodes(self):
8571 """Build hooks nodes.
8574 nl = [self.cfg.GetMasterNode()]
8577 def CheckPrereq(self):
8580 def Exec(self, feedback_fn):
8581 # Prepare jobs for migration instances
8582 allow_runtime_changes = self.op.allow_runtime_changes
8584 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8587 iallocator=self.op.iallocator,
8588 target_node=self.op.target_node,
8589 allow_runtime_changes=allow_runtime_changes,
8590 ignore_ipolicy=self.op.ignore_ipolicy)]
8591 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8593 # TODO: Run iallocator in this opcode and pass correct placement options to
8594 # OpInstanceMigrate. Since other jobs can modify the cluster between
8595 # running the iallocator and the actual migration, a good consistency model
8596 # will have to be found.
8598 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8599 frozenset([self.op.node_name]))
8601 return ResultWithJobs(jobs)
8604 class TLMigrateInstance(Tasklet):
8605 """Tasklet class for instance migration.
8608 @ivar live: whether the migration will be done live or non-live;
8609 this variable is initalized only after CheckPrereq has run
8610 @type cleanup: boolean
8611 @ivar cleanup: Wheater we cleanup from a failed migration
8612 @type iallocator: string
8613 @ivar iallocator: The iallocator used to determine target_node
8614 @type target_node: string
8615 @ivar target_node: If given, the target_node to reallocate the instance to
8616 @type failover: boolean
8617 @ivar failover: Whether operation results in failover or migration
8618 @type fallback: boolean
8619 @ivar fallback: Whether fallback to failover is allowed if migration not
8621 @type ignore_consistency: boolean
8622 @ivar ignore_consistency: Wheter we should ignore consistency between source
8624 @type shutdown_timeout: int
8625 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8626 @type ignore_ipolicy: bool
8627 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8632 _MIGRATION_POLL_INTERVAL = 1 # seconds
8633 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8635 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8636 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8638 """Initializes this class.
8641 Tasklet.__init__(self, lu)
8644 self.instance_name = instance_name
8645 self.cleanup = cleanup
8646 self.live = False # will be overridden later
8647 self.failover = failover
8648 self.fallback = fallback
8649 self.ignore_consistency = ignore_consistency
8650 self.shutdown_timeout = shutdown_timeout
8651 self.ignore_ipolicy = ignore_ipolicy
8652 self.allow_runtime_changes = allow_runtime_changes
8654 def CheckPrereq(self):
8655 """Check prerequisites.
8657 This checks that the instance is in the cluster.
8660 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8661 instance = self.cfg.GetInstanceInfo(instance_name)
8662 assert instance is not None
8663 self.instance = instance
8664 cluster = self.cfg.GetClusterInfo()
8666 if (not self.cleanup and
8667 not instance.admin_state == constants.ADMINST_UP and
8668 not self.failover and self.fallback):
8669 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8670 " switching to failover")
8671 self.failover = True
8673 if instance.disk_template not in constants.DTS_MIRRORED:
8678 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8679 " %s" % (instance.disk_template, text),
8682 if instance.disk_template in constants.DTS_EXT_MIRROR:
8683 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8685 if self.lu.op.iallocator:
8686 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8687 self._RunAllocator()
8689 # We set set self.target_node as it is required by
8691 self.target_node = self.lu.op.target_node
8693 # Check that the target node is correct in terms of instance policy
8694 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8695 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8696 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8698 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8699 ignore=self.ignore_ipolicy)
8701 # self.target_node is already populated, either directly or by the
8703 target_node = self.target_node
8704 if self.target_node == instance.primary_node:
8705 raise errors.OpPrereqError("Cannot migrate instance %s"
8706 " to its primary (%s)" %
8707 (instance.name, instance.primary_node),
8710 if len(self.lu.tasklets) == 1:
8711 # It is safe to release locks only when we're the only tasklet
8713 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8714 keep=[instance.primary_node, self.target_node])
8715 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8718 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8720 secondary_nodes = instance.secondary_nodes
8721 if not secondary_nodes:
8722 raise errors.ConfigurationError("No secondary node but using"
8723 " %s disk template" %
8724 instance.disk_template)
8725 target_node = secondary_nodes[0]
8726 if self.lu.op.iallocator or (self.lu.op.target_node and
8727 self.lu.op.target_node != target_node):
8729 text = "failed over"
8732 raise errors.OpPrereqError("Instances with disk template %s cannot"
8733 " be %s to arbitrary nodes"
8734 " (neither an iallocator nor a target"
8735 " node can be passed)" %
8736 (instance.disk_template, text),
8738 nodeinfo = self.cfg.GetNodeInfo(target_node)
8739 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8740 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8742 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8743 ignore=self.ignore_ipolicy)
8745 i_be = cluster.FillBE(instance)
8747 # check memory requirements on the secondary node
8748 if (not self.cleanup and
8749 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8750 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8751 "migrating instance %s" %
8753 i_be[constants.BE_MINMEM],
8754 instance.hypervisor)
8756 self.lu.LogInfo("Not checking memory on the secondary node as"
8757 " instance will not be started")
8759 # check if failover must be forced instead of migration
8760 if (not self.cleanup and not self.failover and
8761 i_be[constants.BE_ALWAYS_FAILOVER]):
8762 self.lu.LogInfo("Instance configured to always failover; fallback"
8764 self.failover = True
8766 # check bridge existance
8767 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8769 if not self.cleanup:
8770 _CheckNodeNotDrained(self.lu, target_node)
8771 if not self.failover:
8772 result = self.rpc.call_instance_migratable(instance.primary_node,
8774 if result.fail_msg and self.fallback:
8775 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8777 self.failover = True
8779 result.Raise("Can't migrate, please use failover",
8780 prereq=True, ecode=errors.ECODE_STATE)
8782 assert not (self.failover and self.cleanup)
8784 if not self.failover:
8785 if self.lu.op.live is not None and self.lu.op.mode is not None:
8786 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8787 " parameters are accepted",
8789 if self.lu.op.live is not None:
8791 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8793 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8794 # reset the 'live' parameter to None so that repeated
8795 # invocations of CheckPrereq do not raise an exception
8796 self.lu.op.live = None
8797 elif self.lu.op.mode is None:
8798 # read the default value from the hypervisor
8799 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8800 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8802 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8804 # Failover is never live
8807 if not (self.failover or self.cleanup):
8808 remote_info = self.rpc.call_instance_info(instance.primary_node,
8810 instance.hypervisor)
8811 remote_info.Raise("Error checking instance on node %s" %
8812 instance.primary_node)
8813 instance_running = bool(remote_info.payload)
8814 if instance_running:
8815 self.current_mem = int(remote_info.payload["memory"])
8817 def _RunAllocator(self):
8818 """Run the allocator based on input opcode.
8821 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8823 # FIXME: add a self.ignore_ipolicy option
8824 req = iallocator.IAReqRelocate(name=self.instance_name,
8825 relocate_from=[self.instance.primary_node])
8826 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8828 ial.Run(self.lu.op.iallocator)
8831 raise errors.OpPrereqError("Can't compute nodes using"
8832 " iallocator '%s': %s" %
8833 (self.lu.op.iallocator, ial.info),
8835 self.target_node = ial.result[0]
8836 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8837 self.instance_name, self.lu.op.iallocator,
8838 utils.CommaJoin(ial.result))
8840 def _WaitUntilSync(self):
8841 """Poll with custom rpc for disk sync.
8843 This uses our own step-based rpc call.
8846 self.feedback_fn("* wait until resync is done")
8850 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8852 (self.instance.disks,
8855 for node, nres in result.items():
8856 nres.Raise("Cannot resync disks on node %s" % node)
8857 node_done, node_percent = nres.payload
8858 all_done = all_done and node_done
8859 if node_percent is not None:
8860 min_percent = min(min_percent, node_percent)
8862 if min_percent < 100:
8863 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8866 def _EnsureSecondary(self, node):
8867 """Demote a node to secondary.
8870 self.feedback_fn("* switching node %s to secondary mode" % node)
8872 for dev in self.instance.disks:
8873 self.cfg.SetDiskID(dev, node)
8875 result = self.rpc.call_blockdev_close(node, self.instance.name,
8876 self.instance.disks)
8877 result.Raise("Cannot change disk to secondary on node %s" % node)
8879 def _GoStandalone(self):
8880 """Disconnect from the network.
8883 self.feedback_fn("* changing into standalone mode")
8884 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8885 self.instance.disks)
8886 for node, nres in result.items():
8887 nres.Raise("Cannot disconnect disks node %s" % node)
8889 def _GoReconnect(self, multimaster):
8890 """Reconnect to the network.
8896 msg = "single-master"
8897 self.feedback_fn("* changing disks into %s mode" % msg)
8898 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8899 (self.instance.disks, self.instance),
8900 self.instance.name, multimaster)
8901 for node, nres in result.items():
8902 nres.Raise("Cannot change disks config on node %s" % node)
8904 def _ExecCleanup(self):
8905 """Try to cleanup after a failed migration.
8907 The cleanup is done by:
8908 - check that the instance is running only on one node
8909 (and update the config if needed)
8910 - change disks on its secondary node to secondary
8911 - wait until disks are fully synchronized
8912 - disconnect from the network
8913 - change disks into single-master mode
8914 - wait again until disks are fully synchronized
8917 instance = self.instance
8918 target_node = self.target_node
8919 source_node = self.source_node
8921 # check running on only one node
8922 self.feedback_fn("* checking where the instance actually runs"
8923 " (if this hangs, the hypervisor might be in"
8925 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8926 for node, result in ins_l.items():
8927 result.Raise("Can't contact node %s" % node)
8929 runningon_source = instance.name in ins_l[source_node].payload
8930 runningon_target = instance.name in ins_l[target_node].payload
8932 if runningon_source and runningon_target:
8933 raise errors.OpExecError("Instance seems to be running on two nodes,"
8934 " or the hypervisor is confused; you will have"
8935 " to ensure manually that it runs only on one"
8936 " and restart this operation")
8938 if not (runningon_source or runningon_target):
8939 raise errors.OpExecError("Instance does not seem to be running at all;"
8940 " in this case it's safer to repair by"
8941 " running 'gnt-instance stop' to ensure disk"
8942 " shutdown, and then restarting it")
8944 if runningon_target:
8945 # the migration has actually succeeded, we need to update the config
8946 self.feedback_fn("* instance running on secondary node (%s),"
8947 " updating config" % target_node)
8948 instance.primary_node = target_node
8949 self.cfg.Update(instance, self.feedback_fn)
8950 demoted_node = source_node
8952 self.feedback_fn("* instance confirmed to be running on its"
8953 " primary node (%s)" % source_node)
8954 demoted_node = target_node
8956 if instance.disk_template in constants.DTS_INT_MIRROR:
8957 self._EnsureSecondary(demoted_node)
8959 self._WaitUntilSync()
8960 except errors.OpExecError:
8961 # we ignore here errors, since if the device is standalone, it
8962 # won't be able to sync
8964 self._GoStandalone()
8965 self._GoReconnect(False)
8966 self._WaitUntilSync()
8968 self.feedback_fn("* done")
8970 def _RevertDiskStatus(self):
8971 """Try to revert the disk status after a failed migration.
8974 target_node = self.target_node
8975 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8979 self._EnsureSecondary(target_node)
8980 self._GoStandalone()
8981 self._GoReconnect(False)
8982 self._WaitUntilSync()
8983 except errors.OpExecError, err:
8984 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8985 " please try to recover the instance manually;"
8986 " error '%s'" % str(err))
8988 def _AbortMigration(self):
8989 """Call the hypervisor code to abort a started migration.
8992 instance = self.instance
8993 target_node = self.target_node
8994 source_node = self.source_node
8995 migration_info = self.migration_info
8997 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
9001 abort_msg = abort_result.fail_msg
9003 logging.error("Aborting migration failed on target node %s: %s",
9004 target_node, abort_msg)
9005 # Don't raise an exception here, as we stil have to try to revert the
9006 # disk status, even if this step failed.
9008 abort_result = self.rpc.call_instance_finalize_migration_src(
9009 source_node, instance, False, self.live)
9010 abort_msg = abort_result.fail_msg
9012 logging.error("Aborting migration failed on source node %s: %s",
9013 source_node, abort_msg)
9015 def _ExecMigration(self):
9016 """Migrate an instance.
9018 The migrate is done by:
9019 - change the disks into dual-master mode
9020 - wait until disks are fully synchronized again
9021 - migrate the instance
9022 - change disks on the new secondary node (the old primary) to secondary
9023 - wait until disks are fully synchronized
9024 - change disks into single-master mode
9027 instance = self.instance
9028 target_node = self.target_node
9029 source_node = self.source_node
9031 # Check for hypervisor version mismatch and warn the user.
9032 nodeinfo = self.rpc.call_node_info([source_node, target_node],
9033 None, [self.instance.hypervisor], False)
9034 for ninfo in nodeinfo.values():
9035 ninfo.Raise("Unable to retrieve node information from node '%s'" %
9037 (_, _, (src_info, )) = nodeinfo[source_node].payload
9038 (_, _, (dst_info, )) = nodeinfo[target_node].payload
9040 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9041 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9042 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9043 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9044 if src_version != dst_version:
9045 self.feedback_fn("* warning: hypervisor version mismatch between"
9046 " source (%s) and target (%s) node" %
9047 (src_version, dst_version))
9049 self.feedback_fn("* checking disk consistency between source and target")
9050 for (idx, dev) in enumerate(instance.disks):
9051 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9052 raise errors.OpExecError("Disk %s is degraded or not fully"
9053 " synchronized on target node,"
9054 " aborting migration" % idx)
9056 if self.current_mem > self.tgt_free_mem:
9057 if not self.allow_runtime_changes:
9058 raise errors.OpExecError("Memory ballooning not allowed and not enough"
9059 " free memory to fit instance %s on target"
9060 " node %s (have %dMB, need %dMB)" %
9061 (instance.name, target_node,
9062 self.tgt_free_mem, self.current_mem))
9063 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9064 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9067 rpcres.Raise("Cannot modify instance runtime memory")
9069 # First get the migration information from the remote node
9070 result = self.rpc.call_migration_info(source_node, instance)
9071 msg = result.fail_msg
9073 log_err = ("Failed fetching source migration information from %s: %s" %
9075 logging.error(log_err)
9076 raise errors.OpExecError(log_err)
9078 self.migration_info = migration_info = result.payload
9080 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9081 # Then switch the disks to master/master mode
9082 self._EnsureSecondary(target_node)
9083 self._GoStandalone()
9084 self._GoReconnect(True)
9085 self._WaitUntilSync()
9087 self.feedback_fn("* preparing %s to accept the instance" % target_node)
9088 result = self.rpc.call_accept_instance(target_node,
9091 self.nodes_ip[target_node])
9093 msg = result.fail_msg
9095 logging.error("Instance pre-migration failed, trying to revert"
9096 " disk status: %s", msg)
9097 self.feedback_fn("Pre-migration failed, aborting")
9098 self._AbortMigration()
9099 self._RevertDiskStatus()
9100 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9101 (instance.name, msg))
9103 self.feedback_fn("* migrating instance to %s" % target_node)
9104 result = self.rpc.call_instance_migrate(source_node, instance,
9105 self.nodes_ip[target_node],
9107 msg = result.fail_msg
9109 logging.error("Instance migration failed, trying to revert"
9110 " disk status: %s", msg)
9111 self.feedback_fn("Migration failed, aborting")
9112 self._AbortMigration()
9113 self._RevertDiskStatus()
9114 raise errors.OpExecError("Could not migrate instance %s: %s" %
9115 (instance.name, msg))
9117 self.feedback_fn("* starting memory transfer")
9118 last_feedback = time.time()
9120 result = self.rpc.call_instance_get_migration_status(source_node,
9122 msg = result.fail_msg
9123 ms = result.payload # MigrationStatus instance
9124 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9125 logging.error("Instance migration failed, trying to revert"
9126 " disk status: %s", msg)
9127 self.feedback_fn("Migration failed, aborting")
9128 self._AbortMigration()
9129 self._RevertDiskStatus()
9131 msg = "hypervisor returned failure"
9132 raise errors.OpExecError("Could not migrate instance %s: %s" %
9133 (instance.name, msg))
9135 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9136 self.feedback_fn("* memory transfer complete")
9139 if (utils.TimeoutExpired(last_feedback,
9140 self._MIGRATION_FEEDBACK_INTERVAL) and
9141 ms.transferred_ram is not None):
9142 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9143 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9144 last_feedback = time.time()
9146 time.sleep(self._MIGRATION_POLL_INTERVAL)
9148 result = self.rpc.call_instance_finalize_migration_src(source_node,
9152 msg = result.fail_msg
9154 logging.error("Instance migration succeeded, but finalization failed"
9155 " on the source node: %s", msg)
9156 raise errors.OpExecError("Could not finalize instance migration: %s" %
9159 instance.primary_node = target_node
9161 # distribute new instance config to the other nodes
9162 self.cfg.Update(instance, self.feedback_fn)
9164 result = self.rpc.call_instance_finalize_migration_dst(target_node,
9168 msg = result.fail_msg
9170 logging.error("Instance migration succeeded, but finalization failed"
9171 " on the target node: %s", msg)
9172 raise errors.OpExecError("Could not finalize instance migration: %s" %
9175 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9176 self._EnsureSecondary(source_node)
9177 self._WaitUntilSync()
9178 self._GoStandalone()
9179 self._GoReconnect(False)
9180 self._WaitUntilSync()
9182 # If the instance's disk template is `rbd' or `ext' and there was a
9183 # successful migration, unmap the device from the source node.
9184 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9185 disks = _ExpandCheckDisks(instance, instance.disks)
9186 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9188 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9189 msg = result.fail_msg
9191 logging.error("Migration was successful, but couldn't unmap the"
9192 " block device %s on source node %s: %s",
9193 disk.iv_name, source_node, msg)
9194 logging.error("You need to unmap the device %s manually on %s",
9195 disk.iv_name, source_node)
9197 self.feedback_fn("* done")
9199 def _ExecFailover(self):
9200 """Failover an instance.
9202 The failover is done by shutting it down on its present node and
9203 starting it on the secondary.
9206 instance = self.instance
9207 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9209 source_node = instance.primary_node
9210 target_node = self.target_node
9212 if instance.admin_state == constants.ADMINST_UP:
9213 self.feedback_fn("* checking disk consistency between source and target")
9214 for (idx, dev) in enumerate(instance.disks):
9215 # for drbd, these are drbd over lvm
9216 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9218 if primary_node.offline:
9219 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9221 (primary_node.name, idx, target_node))
9222 elif not self.ignore_consistency:
9223 raise errors.OpExecError("Disk %s is degraded on target node,"
9224 " aborting failover" % idx)
9226 self.feedback_fn("* not checking disk consistency as instance is not"
9229 self.feedback_fn("* shutting down instance on source node")
9230 logging.info("Shutting down instance %s on node %s",
9231 instance.name, source_node)
9233 result = self.rpc.call_instance_shutdown(source_node, instance,
9234 self.shutdown_timeout)
9235 msg = result.fail_msg
9237 if self.ignore_consistency or primary_node.offline:
9238 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9239 " proceeding anyway; please make sure node"
9240 " %s is down; error details: %s",
9241 instance.name, source_node, source_node, msg)
9243 raise errors.OpExecError("Could not shutdown instance %s on"
9245 (instance.name, source_node, msg))
9247 self.feedback_fn("* deactivating the instance's disks on source node")
9248 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9249 raise errors.OpExecError("Can't shut down the instance's disks")
9251 instance.primary_node = target_node
9252 # distribute new instance config to the other nodes
9253 self.cfg.Update(instance, self.feedback_fn)
9255 # Only start the instance if it's marked as up
9256 if instance.admin_state == constants.ADMINST_UP:
9257 self.feedback_fn("* activating the instance's disks on target node %s" %
9259 logging.info("Starting instance %s on node %s",
9260 instance.name, target_node)
9262 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9263 ignore_secondaries=True)
9265 _ShutdownInstanceDisks(self.lu, instance)
9266 raise errors.OpExecError("Can't activate the instance's disks")
9268 self.feedback_fn("* starting the instance on the target node %s" %
9270 result = self.rpc.call_instance_start(target_node, (instance, None, None),
9272 msg = result.fail_msg
9274 _ShutdownInstanceDisks(self.lu, instance)
9275 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9276 (instance.name, target_node, msg))
9278 def Exec(self, feedback_fn):
9279 """Perform the migration.
9282 self.feedback_fn = feedback_fn
9283 self.source_node = self.instance.primary_node
9285 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9286 if self.instance.disk_template in constants.DTS_INT_MIRROR:
9287 self.target_node = self.instance.secondary_nodes[0]
9288 # Otherwise self.target_node has been populated either
9289 # directly, or through an iallocator.
9291 self.all_nodes = [self.source_node, self.target_node]
9292 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9293 in self.cfg.GetMultiNodeInfo(self.all_nodes))
9296 feedback_fn("Failover instance %s" % self.instance.name)
9297 self._ExecFailover()
9299 feedback_fn("Migrating instance %s" % self.instance.name)
9302 return self._ExecCleanup()
9304 return self._ExecMigration()
9307 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9309 """Wrapper around L{_CreateBlockDevInner}.
9311 This method annotates the root device first.
9314 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9315 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9316 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9317 force_open, excl_stor)
9320 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9321 info, force_open, excl_stor):
9322 """Create a tree of block devices on a given node.
9324 If this device type has to be created on secondaries, create it and
9327 If not, just recurse to children keeping the same 'force' value.
9329 @attention: The device has to be annotated already.
9331 @param lu: the lu on whose behalf we execute
9332 @param node: the node on which to create the device
9333 @type instance: L{objects.Instance}
9334 @param instance: the instance which owns the device
9335 @type device: L{objects.Disk}
9336 @param device: the device to create
9337 @type force_create: boolean
9338 @param force_create: whether to force creation of this device; this
9339 will be change to True whenever we find a device which has
9340 CreateOnSecondary() attribute
9341 @param info: the extra 'metadata' we should attach to the device
9342 (this will be represented as a LVM tag)
9343 @type force_open: boolean
9344 @param force_open: this parameter will be passes to the
9345 L{backend.BlockdevCreate} function where it specifies
9346 whether we run on primary or not, and it affects both
9347 the child assembly and the device own Open() execution
9348 @type excl_stor: boolean
9349 @param excl_stor: Whether exclusive_storage is active for the node
9352 if device.CreateOnSecondary():
9356 for child in device.children:
9357 _CreateBlockDevInner(lu, node, instance, child, force_create,
9358 info, force_open, excl_stor)
9360 if not force_create:
9363 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9367 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9369 """Create a single block device on a given node.
9371 This will not recurse over children of the device, so they must be
9374 @param lu: the lu on whose behalf we execute
9375 @param node: the node on which to create the device
9376 @type instance: L{objects.Instance}
9377 @param instance: the instance which owns the device
9378 @type device: L{objects.Disk}
9379 @param device: the device to create
9380 @param info: the extra 'metadata' we should attach to the device
9381 (this will be represented as a LVM tag)
9382 @type force_open: boolean
9383 @param force_open: this parameter will be passes to the
9384 L{backend.BlockdevCreate} function where it specifies
9385 whether we run on primary or not, and it affects both
9386 the child assembly and the device own Open() execution
9387 @type excl_stor: boolean
9388 @param excl_stor: Whether exclusive_storage is active for the node
9391 lu.cfg.SetDiskID(device, node)
9392 result = lu.rpc.call_blockdev_create(node, device, device.size,
9393 instance.name, force_open, info,
9395 result.Raise("Can't create block device %s on"
9396 " node %s for instance %s" % (device, node, instance.name))
9397 if device.physical_id is None:
9398 device.physical_id = result.payload
9401 def _GenerateUniqueNames(lu, exts):
9402 """Generate a suitable LV name.
9404 This will generate a logical volume name for the given instance.
9409 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9410 results.append("%s%s" % (new_id, val))
9414 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9415 iv_name, p_minor, s_minor):
9416 """Generate a drbd8 device complete with its children.
9419 assert len(vgnames) == len(names) == 2
9420 port = lu.cfg.AllocatePort()
9421 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9423 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9424 logical_id=(vgnames[0], names[0]),
9426 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9427 size=constants.DRBD_META_SIZE,
9428 logical_id=(vgnames[1], names[1]),
9430 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9431 logical_id=(primary, secondary, port,
9434 children=[dev_data, dev_meta],
9435 iv_name=iv_name, params={})
9439 _DISK_TEMPLATE_NAME_PREFIX = {
9440 constants.DT_PLAIN: "",
9441 constants.DT_RBD: ".rbd",
9442 constants.DT_EXT: ".ext",
9446 _DISK_TEMPLATE_DEVICE_TYPE = {
9447 constants.DT_PLAIN: constants.LD_LV,
9448 constants.DT_FILE: constants.LD_FILE,
9449 constants.DT_SHARED_FILE: constants.LD_FILE,
9450 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9451 constants.DT_RBD: constants.LD_RBD,
9452 constants.DT_EXT: constants.LD_EXT,
9456 def _GenerateDiskTemplate(
9457 lu, template_name, instance_name, primary_node, secondary_nodes,
9458 disk_info, file_storage_dir, file_driver, base_index,
9459 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9460 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9461 """Generate the entire disk layout for a given template type.
9464 vgname = lu.cfg.GetVGName()
9465 disk_count = len(disk_info)
9468 if template_name == constants.DT_DISKLESS:
9470 elif template_name == constants.DT_DRBD8:
9471 if len(secondary_nodes) != 1:
9472 raise errors.ProgrammerError("Wrong template configuration")
9473 remote_node = secondary_nodes[0]
9474 minors = lu.cfg.AllocateDRBDMinor(
9475 [primary_node, remote_node] * len(disk_info), instance_name)
9477 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9479 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9482 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9483 for i in range(disk_count)]):
9484 names.append(lv_prefix + "_data")
9485 names.append(lv_prefix + "_meta")
9486 for idx, disk in enumerate(disk_info):
9487 disk_index = idx + base_index
9488 data_vg = disk.get(constants.IDISK_VG, vgname)
9489 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9490 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9491 disk[constants.IDISK_SIZE],
9493 names[idx * 2:idx * 2 + 2],
9494 "disk/%d" % disk_index,
9495 minors[idx * 2], minors[idx * 2 + 1])
9496 disk_dev.mode = disk[constants.IDISK_MODE]
9497 disks.append(disk_dev)
9500 raise errors.ProgrammerError("Wrong template configuration")
9502 if template_name == constants.DT_FILE:
9504 elif template_name == constants.DT_SHARED_FILE:
9505 _req_shr_file_storage()
9507 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9508 if name_prefix is None:
9511 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9512 (name_prefix, base_index + i)
9513 for i in range(disk_count)])
9515 if template_name == constants.DT_PLAIN:
9517 def logical_id_fn(idx, _, disk):
9518 vg = disk.get(constants.IDISK_VG, vgname)
9519 return (vg, names[idx])
9521 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9523 lambda _, disk_index, disk: (file_driver,
9524 "%s/disk%d" % (file_storage_dir,
9526 elif template_name == constants.DT_BLOCK:
9528 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9529 disk[constants.IDISK_ADOPT])
9530 elif template_name == constants.DT_RBD:
9531 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9532 elif template_name == constants.DT_EXT:
9533 def logical_id_fn(idx, _, disk):
9534 provider = disk.get(constants.IDISK_PROVIDER, None)
9535 if provider is None:
9536 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9537 " not found", constants.DT_EXT,
9538 constants.IDISK_PROVIDER)
9539 return (provider, names[idx])
9541 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9543 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9545 for idx, disk in enumerate(disk_info):
9547 # Only for the Ext template add disk_info to params
9548 if template_name == constants.DT_EXT:
9549 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9551 if key not in constants.IDISK_PARAMS:
9552 params[key] = disk[key]
9553 disk_index = idx + base_index
9554 size = disk[constants.IDISK_SIZE]
9555 feedback_fn("* disk %s, size %s" %
9556 (disk_index, utils.FormatUnit(size, "h")))
9557 disks.append(objects.Disk(dev_type=dev_type, size=size,
9558 logical_id=logical_id_fn(idx, disk_index, disk),
9559 iv_name="disk/%d" % disk_index,
9560 mode=disk[constants.IDISK_MODE],
9566 def _GetInstanceInfoText(instance):
9567 """Compute that text that should be added to the disk's metadata.
9570 return "originstname+%s" % instance.name
9573 def _CalcEta(time_taken, written, total_size):
9574 """Calculates the ETA based on size written and total size.
9576 @param time_taken: The time taken so far
9577 @param written: amount written so far
9578 @param total_size: The total size of data to be written
9579 @return: The remaining time in seconds
9582 avg_time = time_taken / float(written)
9583 return (total_size - written) * avg_time
9586 def _WipeDisks(lu, instance, disks=None):
9587 """Wipes instance disks.
9589 @type lu: L{LogicalUnit}
9590 @param lu: the logical unit on whose behalf we execute
9591 @type instance: L{objects.Instance}
9592 @param instance: the instance whose disks we should create
9593 @type disks: None or list of tuple of (number, L{objects.Disk}, number)
9594 @param disks: Disk details; tuple contains disk index, disk object and the
9598 node = instance.primary_node
9601 disks = [(idx, disk, 0)
9602 for (idx, disk) in enumerate(instance.disks)]
9604 for (_, device, _) in disks:
9605 lu.cfg.SetDiskID(device, node)
9607 logging.info("Pausing synchronization of disks of instance '%s'",
9609 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9610 (map(compat.snd, disks),
9613 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9615 for idx, success in enumerate(result.payload):
9617 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9618 " failed", idx, instance.name)
9621 for (idx, device, offset) in disks:
9622 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9623 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9625 int(min(constants.MAX_WIPE_CHUNK,
9626 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9630 start_time = time.time()
9635 info_text = (" (from %s to %s)" %
9636 (utils.FormatUnit(offset, "h"),
9637 utils.FormatUnit(size, "h")))
9639 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9641 logging.info("Wiping disk %d for instance %s on node %s using"
9642 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9644 while offset < size:
9645 wipe_size = min(wipe_chunk_size, size - offset)
9647 logging.debug("Wiping disk %d, offset %s, chunk %s",
9648 idx, offset, wipe_size)
9650 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9652 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9653 (idx, offset, wipe_size))
9657 if now - last_output >= 60:
9658 eta = _CalcEta(now - start_time, offset, size)
9659 lu.LogInfo(" - done: %.1f%% ETA: %s",
9660 offset / float(size) * 100, utils.FormatSeconds(eta))
9663 logging.info("Resuming synchronization of disks for instance '%s'",
9666 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9667 (map(compat.snd, disks),
9672 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9673 node, result.fail_msg)
9675 for idx, success in enumerate(result.payload):
9677 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9678 " failed", idx, instance.name)
9681 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9682 """Create all disks for an instance.
9684 This abstracts away some work from AddInstance.
9686 @type lu: L{LogicalUnit}
9687 @param lu: the logical unit on whose behalf we execute
9688 @type instance: L{objects.Instance}
9689 @param instance: the instance whose disks we should create
9691 @param to_skip: list of indices to skip
9692 @type target_node: string
9693 @param target_node: if passed, overrides the target node for creation
9695 @return: the success of the creation
9698 info = _GetInstanceInfoText(instance)
9699 if target_node is None:
9700 pnode = instance.primary_node
9701 all_nodes = instance.all_nodes
9706 if instance.disk_template in constants.DTS_FILEBASED:
9707 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9708 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9710 result.Raise("Failed to create directory '%s' on"
9711 " node %s" % (file_storage_dir, pnode))
9713 # Note: this needs to be kept in sync with adding of disks in
9714 # LUInstanceSetParams
9715 for idx, device in enumerate(instance.disks):
9716 if to_skip and idx in to_skip:
9718 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9720 for node in all_nodes:
9721 f_create = node == pnode
9722 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9725 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9726 """Remove all disks for an instance.
9728 This abstracts away some work from `AddInstance()` and
9729 `RemoveInstance()`. Note that in case some of the devices couldn't
9730 be removed, the removal will continue with the other ones (compare
9731 with `_CreateDisks()`).
9733 @type lu: L{LogicalUnit}
9734 @param lu: the logical unit on whose behalf we execute
9735 @type instance: L{objects.Instance}
9736 @param instance: the instance whose disks we should remove
9737 @type target_node: string
9738 @param target_node: used to override the node on which to remove the disks
9740 @return: the success of the removal
9743 logging.info("Removing block devices for instance %s", instance.name)
9746 ports_to_release = set()
9747 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9748 for (idx, device) in enumerate(anno_disks):
9750 edata = [(target_node, device)]
9752 edata = device.ComputeNodeTree(instance.primary_node)
9753 for node, disk in edata:
9754 lu.cfg.SetDiskID(disk, node)
9755 result = lu.rpc.call_blockdev_remove(node, disk)
9757 lu.LogWarning("Could not remove disk %s on node %s,"
9758 " continuing anyway: %s", idx, node, result.fail_msg)
9759 if not (result.offline and node != instance.primary_node):
9762 # if this is a DRBD disk, return its port to the pool
9763 if device.dev_type in constants.LDS_DRBD:
9764 ports_to_release.add(device.logical_id[2])
9766 if all_result or ignore_failures:
9767 for port in ports_to_release:
9768 lu.cfg.AddTcpUdpPort(port)
9770 if instance.disk_template in constants.DTS_FILEBASED:
9771 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9775 tgt = instance.primary_node
9776 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9778 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9779 file_storage_dir, instance.primary_node, result.fail_msg)
9785 def _ComputeDiskSizePerVG(disk_template, disks):
9786 """Compute disk size requirements in the volume group
9789 def _compute(disks, payload):
9790 """Universal algorithm.
9795 vgs[disk[constants.IDISK_VG]] = \
9796 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9800 # Required free disk space as a function of disk and swap space
9802 constants.DT_DISKLESS: {},
9803 constants.DT_PLAIN: _compute(disks, 0),
9804 # 128 MB are added for drbd metadata for each disk
9805 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9806 constants.DT_FILE: {},
9807 constants.DT_SHARED_FILE: {},
9810 if disk_template not in req_size_dict:
9811 raise errors.ProgrammerError("Disk template '%s' size requirement"
9812 " is unknown" % disk_template)
9814 return req_size_dict[disk_template]
9817 def _FilterVmNodes(lu, nodenames):
9818 """Filters out non-vm_capable nodes from a list.
9820 @type lu: L{LogicalUnit}
9821 @param lu: the logical unit for which we check
9822 @type nodenames: list
9823 @param nodenames: the list of nodes on which we should check
9825 @return: the list of vm-capable nodes
9828 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9829 return [name for name in nodenames if name not in vm_nodes]
9832 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9833 """Hypervisor parameter validation.
9835 This function abstract the hypervisor parameter validation to be
9836 used in both instance create and instance modify.
9838 @type lu: L{LogicalUnit}
9839 @param lu: the logical unit for which we check
9840 @type nodenames: list
9841 @param nodenames: the list of nodes on which we should check
9842 @type hvname: string
9843 @param hvname: the name of the hypervisor we should use
9844 @type hvparams: dict
9845 @param hvparams: the parameters which we need to check
9846 @raise errors.OpPrereqError: if the parameters are not valid
9849 nodenames = _FilterVmNodes(lu, nodenames)
9851 cluster = lu.cfg.GetClusterInfo()
9852 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9854 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9855 for node in nodenames:
9859 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9862 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9863 """OS parameters validation.
9865 @type lu: L{LogicalUnit}
9866 @param lu: the logical unit for which we check
9867 @type required: boolean
9868 @param required: whether the validation should fail if the OS is not
9870 @type nodenames: list
9871 @param nodenames: the list of nodes on which we should check
9872 @type osname: string
9873 @param osname: the name of the hypervisor we should use
9874 @type osparams: dict
9875 @param osparams: the parameters which we need to check
9876 @raise errors.OpPrereqError: if the parameters are not valid
9879 nodenames = _FilterVmNodes(lu, nodenames)
9880 result = lu.rpc.call_os_validate(nodenames, required, osname,
9881 [constants.OS_VALIDATE_PARAMETERS],
9883 for node, nres in result.items():
9884 # we don't check for offline cases since this should be run only
9885 # against the master node and/or an instance's nodes
9886 nres.Raise("OS Parameters validation failed on node %s" % node)
9887 if not nres.payload:
9888 lu.LogInfo("OS %s not found on node %s, validation skipped",
9892 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9893 """Wrapper around IAReqInstanceAlloc.
9895 @param op: The instance opcode
9896 @param disks: The computed disks
9897 @param nics: The computed nics
9898 @param beparams: The full filled beparams
9899 @param node_whitelist: List of nodes which should appear as online to the
9900 allocator (unless the node is already marked offline)
9902 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9905 spindle_use = beparams[constants.BE_SPINDLE_USE]
9906 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9907 disk_template=op.disk_template,
9910 vcpus=beparams[constants.BE_VCPUS],
9911 memory=beparams[constants.BE_MAXMEM],
9912 spindle_use=spindle_use,
9914 nics=[n.ToDict() for n in nics],
9915 hypervisor=op.hypervisor,
9916 node_whitelist=node_whitelist)
9919 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9920 """Computes the nics.
9922 @param op: The instance opcode
9923 @param cluster: Cluster configuration object
9924 @param default_ip: The default ip to assign
9925 @param cfg: An instance of the configuration object
9926 @param ec_id: Execution context ID
9928 @returns: The build up nics
9933 nic_mode_req = nic.get(constants.INIC_MODE, None)
9934 nic_mode = nic_mode_req
9935 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9936 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9938 net = nic.get(constants.INIC_NETWORK, None)
9939 link = nic.get(constants.NIC_LINK, None)
9940 ip = nic.get(constants.INIC_IP, None)
9942 if net is None or net.lower() == constants.VALUE_NONE:
9945 if nic_mode_req is not None or link is not None:
9946 raise errors.OpPrereqError("If network is given, no mode or link"
9947 " is allowed to be passed",
9950 # ip validity checks
9951 if ip is None or ip.lower() == constants.VALUE_NONE:
9953 elif ip.lower() == constants.VALUE_AUTO:
9954 if not op.name_check:
9955 raise errors.OpPrereqError("IP address set to auto but name checks"
9956 " have been skipped",
9960 # We defer pool operations until later, so that the iallocator has
9961 # filled in the instance's node(s) dimara
9962 if ip.lower() == constants.NIC_IP_POOL:
9964 raise errors.OpPrereqError("if ip=pool, parameter network"
9965 " must be passed too",
9968 elif not netutils.IPAddress.IsValid(ip):
9969 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9974 # TODO: check the ip address for uniqueness
9975 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9976 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9979 # MAC address verification
9980 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9981 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9982 mac = utils.NormalizeAndValidateMac(mac)
9985 # TODO: We need to factor this out
9986 cfg.ReserveMAC(mac, ec_id)
9987 except errors.ReservationError:
9988 raise errors.OpPrereqError("MAC address %s already in use"
9989 " in cluster" % mac,
9990 errors.ECODE_NOTUNIQUE)
9992 # Build nic parameters
9995 nicparams[constants.NIC_MODE] = nic_mode
9997 nicparams[constants.NIC_LINK] = link
9999 check_params = cluster.SimpleFillNIC(nicparams)
10000 objects.NIC.CheckParameterSyntax(check_params)
10001 net_uuid = cfg.LookupNetwork(net)
10002 nics.append(objects.NIC(mac=mac, ip=nic_ip,
10003 network=net_uuid, nicparams=nicparams))
10008 def _ComputeDisks(op, default_vg):
10009 """Computes the instance disks.
10011 @param op: The instance opcode
10012 @param default_vg: The default_vg to assume
10014 @return: The computed disks
10018 for disk in op.disks:
10019 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
10020 if mode not in constants.DISK_ACCESS_SET:
10021 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
10022 mode, errors.ECODE_INVAL)
10023 size = disk.get(constants.IDISK_SIZE, None)
10025 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
10028 except (TypeError, ValueError):
10029 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
10030 errors.ECODE_INVAL)
10032 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
10033 if ext_provider and op.disk_template != constants.DT_EXT:
10034 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10035 " disk template, not %s" %
10036 (constants.IDISK_PROVIDER, constants.DT_EXT,
10037 op.disk_template), errors.ECODE_INVAL)
10039 data_vg = disk.get(constants.IDISK_VG, default_vg)
10041 constants.IDISK_SIZE: size,
10042 constants.IDISK_MODE: mode,
10043 constants.IDISK_VG: data_vg,
10046 if constants.IDISK_METAVG in disk:
10047 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10048 if constants.IDISK_ADOPT in disk:
10049 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10051 # For extstorage, demand the `provider' option and add any
10052 # additional parameters (ext-params) to the dict
10053 if op.disk_template == constants.DT_EXT:
10055 new_disk[constants.IDISK_PROVIDER] = ext_provider
10057 if key not in constants.IDISK_PARAMS:
10058 new_disk[key] = disk[key]
10060 raise errors.OpPrereqError("Missing provider for template '%s'" %
10061 constants.DT_EXT, errors.ECODE_INVAL)
10063 disks.append(new_disk)
10068 def _ComputeFullBeParams(op, cluster):
10069 """Computes the full beparams.
10071 @param op: The instance opcode
10072 @param cluster: The cluster config object
10074 @return: The fully filled beparams
10077 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10078 for param, value in op.beparams.iteritems():
10079 if value == constants.VALUE_AUTO:
10080 op.beparams[param] = default_beparams[param]
10081 objects.UpgradeBeParams(op.beparams)
10082 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10083 return cluster.SimpleFillBE(op.beparams)
10086 def _CheckOpportunisticLocking(op):
10087 """Generate error if opportunistic locking is not possible.
10090 if op.opportunistic_locking and not op.iallocator:
10091 raise errors.OpPrereqError("Opportunistic locking is only available in"
10092 " combination with an instance allocator",
10093 errors.ECODE_INVAL)
10096 class LUInstanceCreate(LogicalUnit):
10097 """Create an instance.
10100 HPATH = "instance-add"
10101 HTYPE = constants.HTYPE_INSTANCE
10104 def CheckArguments(self):
10105 """Check arguments.
10108 # do not require name_check to ease forward/backward compatibility
10110 if self.op.no_install and self.op.start:
10111 self.LogInfo("No-installation mode selected, disabling startup")
10112 self.op.start = False
10113 # validate/normalize the instance name
10114 self.op.instance_name = \
10115 netutils.Hostname.GetNormalizedName(self.op.instance_name)
10117 if self.op.ip_check and not self.op.name_check:
10118 # TODO: make the ip check more flexible and not depend on the name check
10119 raise errors.OpPrereqError("Cannot do IP address check without a name"
10120 " check", errors.ECODE_INVAL)
10122 # check nics' parameter names
10123 for nic in self.op.nics:
10124 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10126 # check disks. parameter names and consistent adopt/no-adopt strategy
10127 has_adopt = has_no_adopt = False
10128 for disk in self.op.disks:
10129 if self.op.disk_template != constants.DT_EXT:
10130 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10131 if constants.IDISK_ADOPT in disk:
10134 has_no_adopt = True
10135 if has_adopt and has_no_adopt:
10136 raise errors.OpPrereqError("Either all disks are adopted or none is",
10137 errors.ECODE_INVAL)
10139 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10140 raise errors.OpPrereqError("Disk adoption is not supported for the"
10141 " '%s' disk template" %
10142 self.op.disk_template,
10143 errors.ECODE_INVAL)
10144 if self.op.iallocator is not None:
10145 raise errors.OpPrereqError("Disk adoption not allowed with an"
10146 " iallocator script", errors.ECODE_INVAL)
10147 if self.op.mode == constants.INSTANCE_IMPORT:
10148 raise errors.OpPrereqError("Disk adoption not allowed for"
10149 " instance import", errors.ECODE_INVAL)
10151 if self.op.disk_template in constants.DTS_MUST_ADOPT:
10152 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10153 " but no 'adopt' parameter given" %
10154 self.op.disk_template,
10155 errors.ECODE_INVAL)
10157 self.adopt_disks = has_adopt
10159 # instance name verification
10160 if self.op.name_check:
10161 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10162 self.op.instance_name = self.hostname1.name
10163 # used in CheckPrereq for ip ping check
10164 self.check_ip = self.hostname1.ip
10166 self.check_ip = None
10168 # file storage checks
10169 if (self.op.file_driver and
10170 not self.op.file_driver in constants.FILE_DRIVER):
10171 raise errors.OpPrereqError("Invalid file driver name '%s'" %
10172 self.op.file_driver, errors.ECODE_INVAL)
10174 if self.op.disk_template == constants.DT_FILE:
10175 opcodes.RequireFileStorage()
10176 elif self.op.disk_template == constants.DT_SHARED_FILE:
10177 opcodes.RequireSharedFileStorage()
10179 ### Node/iallocator related checks
10180 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10182 if self.op.pnode is not None:
10183 if self.op.disk_template in constants.DTS_INT_MIRROR:
10184 if self.op.snode is None:
10185 raise errors.OpPrereqError("The networked disk templates need"
10186 " a mirror node", errors.ECODE_INVAL)
10187 elif self.op.snode:
10188 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10190 self.op.snode = None
10192 _CheckOpportunisticLocking(self.op)
10194 self._cds = _GetClusterDomainSecret()
10196 if self.op.mode == constants.INSTANCE_IMPORT:
10197 # On import force_variant must be True, because if we forced it at
10198 # initial install, our only chance when importing it back is that it
10200 self.op.force_variant = True
10202 if self.op.no_install:
10203 self.LogInfo("No-installation mode has no effect during import")
10205 elif self.op.mode == constants.INSTANCE_CREATE:
10206 if self.op.os_type is None:
10207 raise errors.OpPrereqError("No guest OS specified",
10208 errors.ECODE_INVAL)
10209 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10210 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10211 " installation" % self.op.os_type,
10212 errors.ECODE_STATE)
10213 if self.op.disk_template is None:
10214 raise errors.OpPrereqError("No disk template specified",
10215 errors.ECODE_INVAL)
10217 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10218 # Check handshake to ensure both clusters have the same domain secret
10219 src_handshake = self.op.source_handshake
10220 if not src_handshake:
10221 raise errors.OpPrereqError("Missing source handshake",
10222 errors.ECODE_INVAL)
10224 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10227 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10228 errors.ECODE_INVAL)
10230 # Load and check source CA
10231 self.source_x509_ca_pem = self.op.source_x509_ca
10232 if not self.source_x509_ca_pem:
10233 raise errors.OpPrereqError("Missing source X509 CA",
10234 errors.ECODE_INVAL)
10237 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10239 except OpenSSL.crypto.Error, err:
10240 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10241 (err, ), errors.ECODE_INVAL)
10243 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10244 if errcode is not None:
10245 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10246 errors.ECODE_INVAL)
10248 self.source_x509_ca = cert
10250 src_instance_name = self.op.source_instance_name
10251 if not src_instance_name:
10252 raise errors.OpPrereqError("Missing source instance name",
10253 errors.ECODE_INVAL)
10255 self.source_instance_name = \
10256 netutils.GetHostname(name=src_instance_name).name
10259 raise errors.OpPrereqError("Invalid instance creation mode %r" %
10260 self.op.mode, errors.ECODE_INVAL)
10262 def ExpandNames(self):
10263 """ExpandNames for CreateInstance.
10265 Figure out the right locks for instance creation.
10268 self.needed_locks = {}
10270 instance_name = self.op.instance_name
10271 # this is just a preventive check, but someone might still add this
10272 # instance in the meantime, and creation will fail at lock-add time
10273 if instance_name in self.cfg.GetInstanceList():
10274 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10275 instance_name, errors.ECODE_EXISTS)
10277 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10279 if self.op.iallocator:
10280 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10281 # specifying a group on instance creation and then selecting nodes from
10283 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10284 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10286 if self.op.opportunistic_locking:
10287 self.opportunistic_locks[locking.LEVEL_NODE] = True
10288 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10290 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10291 nodelist = [self.op.pnode]
10292 if self.op.snode is not None:
10293 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10294 nodelist.append(self.op.snode)
10295 self.needed_locks[locking.LEVEL_NODE] = nodelist
10297 # in case of import lock the source node too
10298 if self.op.mode == constants.INSTANCE_IMPORT:
10299 src_node = self.op.src_node
10300 src_path = self.op.src_path
10302 if src_path is None:
10303 self.op.src_path = src_path = self.op.instance_name
10305 if src_node is None:
10306 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10307 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10308 self.op.src_node = None
10309 if os.path.isabs(src_path):
10310 raise errors.OpPrereqError("Importing an instance from a path"
10311 " requires a source node option",
10312 errors.ECODE_INVAL)
10314 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10315 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10316 self.needed_locks[locking.LEVEL_NODE].append(src_node)
10317 if not os.path.isabs(src_path):
10318 self.op.src_path = src_path = \
10319 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10321 self.needed_locks[locking.LEVEL_NODE_RES] = \
10322 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10324 def _RunAllocator(self):
10325 """Run the allocator based on input opcode.
10328 if self.op.opportunistic_locking:
10329 # Only consider nodes for which a lock is held
10330 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10332 node_whitelist = None
10334 #TODO Export network to iallocator so that it chooses a pnode
10335 # in a nodegroup that has the desired network connected to
10336 req = _CreateInstanceAllocRequest(self.op, self.disks,
10337 self.nics, self.be_full,
10339 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10341 ial.Run(self.op.iallocator)
10343 if not ial.success:
10344 # When opportunistic locks are used only a temporary failure is generated
10345 if self.op.opportunistic_locking:
10346 ecode = errors.ECODE_TEMP_NORES
10348 ecode = errors.ECODE_NORES
10350 raise errors.OpPrereqError("Can't compute nodes using"
10351 " iallocator '%s': %s" %
10352 (self.op.iallocator, ial.info),
10355 self.op.pnode = ial.result[0]
10356 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10357 self.op.instance_name, self.op.iallocator,
10358 utils.CommaJoin(ial.result))
10360 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10362 if req.RequiredNodes() == 2:
10363 self.op.snode = ial.result[1]
10365 def BuildHooksEnv(self):
10366 """Build hooks env.
10368 This runs on master, primary and secondary nodes of the instance.
10372 "ADD_MODE": self.op.mode,
10374 if self.op.mode == constants.INSTANCE_IMPORT:
10375 env["SRC_NODE"] = self.op.src_node
10376 env["SRC_PATH"] = self.op.src_path
10377 env["SRC_IMAGES"] = self.src_images
10379 env.update(_BuildInstanceHookEnv(
10380 name=self.op.instance_name,
10381 primary_node=self.op.pnode,
10382 secondary_nodes=self.secondaries,
10383 status=self.op.start,
10384 os_type=self.op.os_type,
10385 minmem=self.be_full[constants.BE_MINMEM],
10386 maxmem=self.be_full[constants.BE_MAXMEM],
10387 vcpus=self.be_full[constants.BE_VCPUS],
10388 nics=_NICListToTuple(self, self.nics),
10389 disk_template=self.op.disk_template,
10390 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10391 for d in self.disks],
10394 hypervisor_name=self.op.hypervisor,
10400 def BuildHooksNodes(self):
10401 """Build hooks nodes.
10404 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10407 def _ReadExportInfo(self):
10408 """Reads the export information from disk.
10410 It will override the opcode source node and path with the actual
10411 information, if these two were not specified before.
10413 @return: the export information
10416 assert self.op.mode == constants.INSTANCE_IMPORT
10418 src_node = self.op.src_node
10419 src_path = self.op.src_path
10421 if src_node is None:
10422 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10423 exp_list = self.rpc.call_export_list(locked_nodes)
10425 for node in exp_list:
10426 if exp_list[node].fail_msg:
10428 if src_path in exp_list[node].payload:
10430 self.op.src_node = src_node = node
10431 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10435 raise errors.OpPrereqError("No export found for relative path %s" %
10436 src_path, errors.ECODE_INVAL)
10438 _CheckNodeOnline(self, src_node)
10439 result = self.rpc.call_export_info(src_node, src_path)
10440 result.Raise("No export or invalid export found in dir %s" % src_path)
10442 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10443 if not export_info.has_section(constants.INISECT_EXP):
10444 raise errors.ProgrammerError("Corrupted export config",
10445 errors.ECODE_ENVIRON)
10447 ei_version = export_info.get(constants.INISECT_EXP, "version")
10448 if (int(ei_version) != constants.EXPORT_VERSION):
10449 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10450 (ei_version, constants.EXPORT_VERSION),
10451 errors.ECODE_ENVIRON)
10454 def _ReadExportParams(self, einfo):
10455 """Use export parameters as defaults.
10457 In case the opcode doesn't specify (as in override) some instance
10458 parameters, then try to use them from the export information, if
10459 that declares them.
10462 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10464 if self.op.disk_template is None:
10465 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10466 self.op.disk_template = einfo.get(constants.INISECT_INS,
10468 if self.op.disk_template not in constants.DISK_TEMPLATES:
10469 raise errors.OpPrereqError("Disk template specified in configuration"
10470 " file is not one of the allowed values:"
10472 " ".join(constants.DISK_TEMPLATES),
10473 errors.ECODE_INVAL)
10475 raise errors.OpPrereqError("No disk template specified and the export"
10476 " is missing the disk_template information",
10477 errors.ECODE_INVAL)
10479 if not self.op.disks:
10481 # TODO: import the disk iv_name too
10482 for idx in range(constants.MAX_DISKS):
10483 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10484 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10485 disks.append({constants.IDISK_SIZE: disk_sz})
10486 self.op.disks = disks
10487 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10488 raise errors.OpPrereqError("No disk info specified and the export"
10489 " is missing the disk information",
10490 errors.ECODE_INVAL)
10492 if not self.op.nics:
10494 for idx in range(constants.MAX_NICS):
10495 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10497 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10498 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10503 self.op.nics = nics
10505 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10506 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10508 if (self.op.hypervisor is None and
10509 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10510 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10512 if einfo.has_section(constants.INISECT_HYP):
10513 # use the export parameters but do not override the ones
10514 # specified by the user
10515 for name, value in einfo.items(constants.INISECT_HYP):
10516 if name not in self.op.hvparams:
10517 self.op.hvparams[name] = value
10519 if einfo.has_section(constants.INISECT_BEP):
10520 # use the parameters, without overriding
10521 for name, value in einfo.items(constants.INISECT_BEP):
10522 if name not in self.op.beparams:
10523 self.op.beparams[name] = value
10524 # Compatibility for the old "memory" be param
10525 if name == constants.BE_MEMORY:
10526 if constants.BE_MAXMEM not in self.op.beparams:
10527 self.op.beparams[constants.BE_MAXMEM] = value
10528 if constants.BE_MINMEM not in self.op.beparams:
10529 self.op.beparams[constants.BE_MINMEM] = value
10531 # try to read the parameters old style, from the main section
10532 for name in constants.BES_PARAMETERS:
10533 if (name not in self.op.beparams and
10534 einfo.has_option(constants.INISECT_INS, name)):
10535 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10537 if einfo.has_section(constants.INISECT_OSP):
10538 # use the parameters, without overriding
10539 for name, value in einfo.items(constants.INISECT_OSP):
10540 if name not in self.op.osparams:
10541 self.op.osparams[name] = value
10543 def _RevertToDefaults(self, cluster):
10544 """Revert the instance parameters to the default values.
10548 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10549 for name in self.op.hvparams.keys():
10550 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10551 del self.op.hvparams[name]
10553 be_defs = cluster.SimpleFillBE({})
10554 for name in self.op.beparams.keys():
10555 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10556 del self.op.beparams[name]
10558 nic_defs = cluster.SimpleFillNIC({})
10559 for nic in self.op.nics:
10560 for name in constants.NICS_PARAMETERS:
10561 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10564 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10565 for name in self.op.osparams.keys():
10566 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10567 del self.op.osparams[name]
10569 def _CalculateFileStorageDir(self):
10570 """Calculate final instance file storage dir.
10573 # file storage dir calculation/check
10574 self.instance_file_storage_dir = None
10575 if self.op.disk_template in constants.DTS_FILEBASED:
10576 # build the full file storage dir path
10579 if self.op.disk_template == constants.DT_SHARED_FILE:
10580 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10582 get_fsd_fn = self.cfg.GetFileStorageDir
10584 cfg_storagedir = get_fsd_fn()
10585 if not cfg_storagedir:
10586 raise errors.OpPrereqError("Cluster file storage dir not defined",
10587 errors.ECODE_STATE)
10588 joinargs.append(cfg_storagedir)
10590 if self.op.file_storage_dir is not None:
10591 joinargs.append(self.op.file_storage_dir)
10593 joinargs.append(self.op.instance_name)
10595 # pylint: disable=W0142
10596 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10598 def CheckPrereq(self): # pylint: disable=R0914
10599 """Check prerequisites.
10602 self._CalculateFileStorageDir()
10604 if self.op.mode == constants.INSTANCE_IMPORT:
10605 export_info = self._ReadExportInfo()
10606 self._ReadExportParams(export_info)
10607 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10609 self._old_instance_name = None
10611 if (not self.cfg.GetVGName() and
10612 self.op.disk_template not in constants.DTS_NOT_LVM):
10613 raise errors.OpPrereqError("Cluster does not support lvm-based"
10614 " instances", errors.ECODE_STATE)
10616 if (self.op.hypervisor is None or
10617 self.op.hypervisor == constants.VALUE_AUTO):
10618 self.op.hypervisor = self.cfg.GetHypervisorType()
10620 cluster = self.cfg.GetClusterInfo()
10621 enabled_hvs = cluster.enabled_hypervisors
10622 if self.op.hypervisor not in enabled_hvs:
10623 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10625 (self.op.hypervisor, ",".join(enabled_hvs)),
10626 errors.ECODE_STATE)
10628 # Check tag validity
10629 for tag in self.op.tags:
10630 objects.TaggableObject.ValidateTag(tag)
10632 # check hypervisor parameter syntax (locally)
10633 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10634 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10636 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10637 hv_type.CheckParameterSyntax(filled_hvp)
10638 self.hv_full = filled_hvp
10639 # check that we don't specify global parameters on an instance
10640 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10641 "instance", "cluster")
10643 # fill and remember the beparams dict
10644 self.be_full = _ComputeFullBeParams(self.op, cluster)
10646 # build os parameters
10647 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10649 # now that hvp/bep are in final format, let's reset to defaults,
10651 if self.op.identify_defaults:
10652 self._RevertToDefaults(cluster)
10655 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10656 self.proc.GetECId())
10658 # disk checks/pre-build
10659 default_vg = self.cfg.GetVGName()
10660 self.disks = _ComputeDisks(self.op, default_vg)
10662 if self.op.mode == constants.INSTANCE_IMPORT:
10664 for idx in range(len(self.disks)):
10665 option = "disk%d_dump" % idx
10666 if export_info.has_option(constants.INISECT_INS, option):
10667 # FIXME: are the old os-es, disk sizes, etc. useful?
10668 export_name = export_info.get(constants.INISECT_INS, option)
10669 image = utils.PathJoin(self.op.src_path, export_name)
10670 disk_images.append(image)
10672 disk_images.append(False)
10674 self.src_images = disk_images
10676 if self.op.instance_name == self._old_instance_name:
10677 for idx, nic in enumerate(self.nics):
10678 if nic.mac == constants.VALUE_AUTO:
10679 nic_mac_ini = "nic%d_mac" % idx
10680 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10682 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10684 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10685 if self.op.ip_check:
10686 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10687 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10688 (self.check_ip, self.op.instance_name),
10689 errors.ECODE_NOTUNIQUE)
10691 #### mac address generation
10692 # By generating here the mac address both the allocator and the hooks get
10693 # the real final mac address rather than the 'auto' or 'generate' value.
10694 # There is a race condition between the generation and the instance object
10695 # creation, which means that we know the mac is valid now, but we're not
10696 # sure it will be when we actually add the instance. If things go bad
10697 # adding the instance will abort because of a duplicate mac, and the
10698 # creation job will fail.
10699 for nic in self.nics:
10700 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10701 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10705 if self.op.iallocator is not None:
10706 self._RunAllocator()
10708 # Release all unneeded node locks
10709 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10710 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10711 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10712 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10714 assert (self.owned_locks(locking.LEVEL_NODE) ==
10715 self.owned_locks(locking.LEVEL_NODE_RES)), \
10716 "Node locks differ from node resource locks"
10718 #### node related checks
10720 # check primary node
10721 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10722 assert self.pnode is not None, \
10723 "Cannot retrieve locked node %s" % self.op.pnode
10725 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10726 pnode.name, errors.ECODE_STATE)
10728 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10729 pnode.name, errors.ECODE_STATE)
10730 if not pnode.vm_capable:
10731 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10732 " '%s'" % pnode.name, errors.ECODE_STATE)
10734 self.secondaries = []
10736 # Fill in any IPs from IP pools. This must happen here, because we need to
10737 # know the nic's primary node, as specified by the iallocator
10738 for idx, nic in enumerate(self.nics):
10739 net_uuid = nic.network
10740 if net_uuid is not None:
10741 nobj = self.cfg.GetNetwork(net_uuid)
10742 netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
10743 if netparams is None:
10744 raise errors.OpPrereqError("No netparams found for network"
10745 " %s. Propably not connected to"
10746 " node's %s nodegroup" %
10747 (nobj.name, self.pnode.name),
10748 errors.ECODE_INVAL)
10749 self.LogInfo("NIC/%d inherits netparams %s" %
10750 (idx, netparams.values()))
10751 nic.nicparams = dict(netparams)
10752 if nic.ip is not None:
10753 if nic.ip.lower() == constants.NIC_IP_POOL:
10755 nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
10756 except errors.ReservationError:
10757 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10758 " from the address pool" % idx,
10759 errors.ECODE_STATE)
10760 self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
10763 self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
10764 except errors.ReservationError:
10765 raise errors.OpPrereqError("IP address %s already in use"
10766 " or does not belong to network %s" %
10767 (nic.ip, nobj.name),
10768 errors.ECODE_NOTUNIQUE)
10770 # net is None, ip None or given
10771 elif self.op.conflicts_check:
10772 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10774 # mirror node verification
10775 if self.op.disk_template in constants.DTS_INT_MIRROR:
10776 if self.op.snode == pnode.name:
10777 raise errors.OpPrereqError("The secondary node cannot be the"
10778 " primary node", errors.ECODE_INVAL)
10779 _CheckNodeOnline(self, self.op.snode)
10780 _CheckNodeNotDrained(self, self.op.snode)
10781 _CheckNodeVmCapable(self, self.op.snode)
10782 self.secondaries.append(self.op.snode)
10784 snode = self.cfg.GetNodeInfo(self.op.snode)
10785 if pnode.group != snode.group:
10786 self.LogWarning("The primary and secondary nodes are in two"
10787 " different node groups; the disk parameters"
10788 " from the first disk's node group will be"
10791 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10793 if self.op.disk_template in constants.DTS_INT_MIRROR:
10794 nodes.append(snode)
10795 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10796 if compat.any(map(has_es, nodes)):
10797 raise errors.OpPrereqError("Disk template %s not supported with"
10798 " exclusive storage" % self.op.disk_template,
10799 errors.ECODE_STATE)
10801 nodenames = [pnode.name] + self.secondaries
10803 if not self.adopt_disks:
10804 if self.op.disk_template == constants.DT_RBD:
10805 # _CheckRADOSFreeSpace() is just a placeholder.
10806 # Any function that checks prerequisites can be placed here.
10807 # Check if there is enough space on the RADOS cluster.
10808 _CheckRADOSFreeSpace()
10809 elif self.op.disk_template == constants.DT_EXT:
10810 # FIXME: Function that checks prereqs if needed
10813 # Check lv size requirements, if not adopting
10814 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10815 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10817 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10818 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10819 disk[constants.IDISK_ADOPT])
10820 for disk in self.disks])
10821 if len(all_lvs) != len(self.disks):
10822 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10823 errors.ECODE_INVAL)
10824 for lv_name in all_lvs:
10826 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10827 # to ReserveLV uses the same syntax
10828 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10829 except errors.ReservationError:
10830 raise errors.OpPrereqError("LV named %s used by another instance" %
10831 lv_name, errors.ECODE_NOTUNIQUE)
10833 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10834 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10836 node_lvs = self.rpc.call_lv_list([pnode.name],
10837 vg_names.payload.keys())[pnode.name]
10838 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10839 node_lvs = node_lvs.payload
10841 delta = all_lvs.difference(node_lvs.keys())
10843 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10844 utils.CommaJoin(delta),
10845 errors.ECODE_INVAL)
10846 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10848 raise errors.OpPrereqError("Online logical volumes found, cannot"
10849 " adopt: %s" % utils.CommaJoin(online_lvs),
10850 errors.ECODE_STATE)
10851 # update the size of disk based on what is found
10852 for dsk in self.disks:
10853 dsk[constants.IDISK_SIZE] = \
10854 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10855 dsk[constants.IDISK_ADOPT])][0]))
10857 elif self.op.disk_template == constants.DT_BLOCK:
10858 # Normalize and de-duplicate device paths
10859 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10860 for disk in self.disks])
10861 if len(all_disks) != len(self.disks):
10862 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10863 errors.ECODE_INVAL)
10864 baddisks = [d for d in all_disks
10865 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10867 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10868 " cannot be adopted" %
10869 (utils.CommaJoin(baddisks),
10870 constants.ADOPTABLE_BLOCKDEV_ROOT),
10871 errors.ECODE_INVAL)
10873 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10874 list(all_disks))[pnode.name]
10875 node_disks.Raise("Cannot get block device information from node %s" %
10877 node_disks = node_disks.payload
10878 delta = all_disks.difference(node_disks.keys())
10880 raise errors.OpPrereqError("Missing block device(s): %s" %
10881 utils.CommaJoin(delta),
10882 errors.ECODE_INVAL)
10883 for dsk in self.disks:
10884 dsk[constants.IDISK_SIZE] = \
10885 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10887 # Verify instance specs
10888 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10890 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10891 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10892 constants.ISPEC_DISK_COUNT: len(self.disks),
10893 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10894 for disk in self.disks],
10895 constants.ISPEC_NIC_COUNT: len(self.nics),
10896 constants.ISPEC_SPINDLE_USE: spindle_use,
10899 group_info = self.cfg.GetNodeGroup(pnode.group)
10900 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10901 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec,
10902 self.op.disk_template)
10903 if not self.op.ignore_ipolicy and res:
10904 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10905 (pnode.group, group_info.name, utils.CommaJoin(res)))
10906 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10908 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10910 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10911 # check OS parameters (remotely)
10912 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10914 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10916 #TODO: _CheckExtParams (remotely)
10917 # Check parameters for extstorage
10919 # memory check on primary node
10920 #TODO(dynmem): use MINMEM for checking
10922 _CheckNodeFreeMemory(self, self.pnode.name,
10923 "creating instance %s" % self.op.instance_name,
10924 self.be_full[constants.BE_MAXMEM],
10925 self.op.hypervisor)
10927 self.dry_run_result = list(nodenames)
10929 def Exec(self, feedback_fn):
10930 """Create and add the instance to the cluster.
10933 instance = self.op.instance_name
10934 pnode_name = self.pnode.name
10936 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10937 self.owned_locks(locking.LEVEL_NODE)), \
10938 "Node locks differ from node resource locks"
10939 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10941 ht_kind = self.op.hypervisor
10942 if ht_kind in constants.HTS_REQ_PORT:
10943 network_port = self.cfg.AllocatePort()
10945 network_port = None
10947 # This is ugly but we got a chicken-egg problem here
10948 # We can only take the group disk parameters, as the instance
10949 # has no disks yet (we are generating them right here).
10950 node = self.cfg.GetNodeInfo(pnode_name)
10951 nodegroup = self.cfg.GetNodeGroup(node.group)
10952 disks = _GenerateDiskTemplate(self,
10953 self.op.disk_template,
10954 instance, pnode_name,
10957 self.instance_file_storage_dir,
10958 self.op.file_driver,
10961 self.cfg.GetGroupDiskParams(nodegroup))
10963 iobj = objects.Instance(name=instance, os=self.op.os_type,
10964 primary_node=pnode_name,
10965 nics=self.nics, disks=disks,
10966 disk_template=self.op.disk_template,
10967 admin_state=constants.ADMINST_DOWN,
10968 network_port=network_port,
10969 beparams=self.op.beparams,
10970 hvparams=self.op.hvparams,
10971 hypervisor=self.op.hypervisor,
10972 osparams=self.op.osparams,
10976 for tag in self.op.tags:
10979 if self.adopt_disks:
10980 if self.op.disk_template == constants.DT_PLAIN:
10981 # rename LVs to the newly-generated names; we need to construct
10982 # 'fake' LV disks with the old data, plus the new unique_id
10983 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10985 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10986 rename_to.append(t_dsk.logical_id)
10987 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10988 self.cfg.SetDiskID(t_dsk, pnode_name)
10989 result = self.rpc.call_blockdev_rename(pnode_name,
10990 zip(tmp_disks, rename_to))
10991 result.Raise("Failed to rename adoped LVs")
10993 feedback_fn("* creating instance disks...")
10995 _CreateDisks(self, iobj)
10996 except errors.OpExecError:
10997 self.LogWarning("Device creation failed, reverting...")
10999 _RemoveDisks(self, iobj)
11001 self.cfg.ReleaseDRBDMinors(instance)
11004 feedback_fn("adding instance %s to cluster config" % instance)
11006 self.cfg.AddInstance(iobj, self.proc.GetECId())
11008 # Declare that we don't want to remove the instance lock anymore, as we've
11009 # added the instance to the config
11010 del self.remove_locks[locking.LEVEL_INSTANCE]
11012 if self.op.mode == constants.INSTANCE_IMPORT:
11013 # Release unused nodes
11014 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
11016 # Release all nodes
11017 _ReleaseLocks(self, locking.LEVEL_NODE)
11020 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
11021 feedback_fn("* wiping instance disks...")
11023 _WipeDisks(self, iobj)
11024 except errors.OpExecError, err:
11025 logging.exception("Wiping disks failed")
11026 self.LogWarning("Wiping instance disks failed (%s)", err)
11030 # Something is already wrong with the disks, don't do anything else
11032 elif self.op.wait_for_sync:
11033 disk_abort = not _WaitForSync(self, iobj)
11034 elif iobj.disk_template in constants.DTS_INT_MIRROR:
11035 # make sure the disks are not degraded (still sync-ing is ok)
11036 feedback_fn("* checking mirrors status")
11037 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11042 _RemoveDisks(self, iobj)
11043 self.cfg.RemoveInstance(iobj.name)
11044 # Make sure the instance lock gets removed
11045 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11046 raise errors.OpExecError("There are some degraded disks for"
11049 # Release all node resource locks
11050 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11052 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11053 # we need to set the disks ID to the primary node, since the
11054 # preceding code might or might have not done it, depending on
11055 # disk template and other options
11056 for disk in iobj.disks:
11057 self.cfg.SetDiskID(disk, pnode_name)
11058 if self.op.mode == constants.INSTANCE_CREATE:
11059 if not self.op.no_install:
11060 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11061 not self.op.wait_for_sync)
11063 feedback_fn("* pausing disk sync to install instance OS")
11064 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11067 for idx, success in enumerate(result.payload):
11069 logging.warn("pause-sync of instance %s for disk %d failed",
11072 feedback_fn("* running the instance OS create scripts...")
11073 # FIXME: pass debug option from opcode to backend
11075 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11076 self.op.debug_level)
11078 feedback_fn("* resuming disk sync")
11079 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11082 for idx, success in enumerate(result.payload):
11084 logging.warn("resume-sync of instance %s for disk %d failed",
11087 os_add_result.Raise("Could not add os for instance %s"
11088 " on node %s" % (instance, pnode_name))
11091 if self.op.mode == constants.INSTANCE_IMPORT:
11092 feedback_fn("* running the instance OS import scripts...")
11096 for idx, image in enumerate(self.src_images):
11100 # FIXME: pass debug option from opcode to backend
11101 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11102 constants.IEIO_FILE, (image, ),
11103 constants.IEIO_SCRIPT,
11104 (iobj.disks[idx], idx),
11106 transfers.append(dt)
11109 masterd.instance.TransferInstanceData(self, feedback_fn,
11110 self.op.src_node, pnode_name,
11111 self.pnode.secondary_ip,
11113 if not compat.all(import_result):
11114 self.LogWarning("Some disks for instance %s on node %s were not"
11115 " imported successfully" % (instance, pnode_name))
11117 rename_from = self._old_instance_name
11119 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11120 feedback_fn("* preparing remote import...")
11121 # The source cluster will stop the instance before attempting to make
11122 # a connection. In some cases stopping an instance can take a long
11123 # time, hence the shutdown timeout is added to the connection
11125 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11126 self.op.source_shutdown_timeout)
11127 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11129 assert iobj.primary_node == self.pnode.name
11131 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11132 self.source_x509_ca,
11133 self._cds, timeouts)
11134 if not compat.all(disk_results):
11135 # TODO: Should the instance still be started, even if some disks
11136 # failed to import (valid for local imports, too)?
11137 self.LogWarning("Some disks for instance %s on node %s were not"
11138 " imported successfully" % (instance, pnode_name))
11140 rename_from = self.source_instance_name
11143 # also checked in the prereq part
11144 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11147 # Run rename script on newly imported instance
11148 assert iobj.name == instance
11149 feedback_fn("Running rename script for %s" % instance)
11150 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11152 self.op.debug_level)
11153 if result.fail_msg:
11154 self.LogWarning("Failed to run rename script for %s on node"
11155 " %s: %s" % (instance, pnode_name, result.fail_msg))
11157 assert not self.owned_locks(locking.LEVEL_NODE_RES)
11160 iobj.admin_state = constants.ADMINST_UP
11161 self.cfg.Update(iobj, feedback_fn)
11162 logging.info("Starting instance %s on node %s", instance, pnode_name)
11163 feedback_fn("* starting instance...")
11164 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11166 result.Raise("Could not start instance")
11168 return list(iobj.all_nodes)
11171 class LUInstanceMultiAlloc(NoHooksLU):
11172 """Allocates multiple instances at the same time.
11177 def CheckArguments(self):
11178 """Check arguments.
11182 for inst in self.op.instances:
11183 if inst.iallocator is not None:
11184 raise errors.OpPrereqError("iallocator are not allowed to be set on"
11185 " instance objects", errors.ECODE_INVAL)
11186 nodes.append(bool(inst.pnode))
11187 if inst.disk_template in constants.DTS_INT_MIRROR:
11188 nodes.append(bool(inst.snode))
11190 has_nodes = compat.any(nodes)
11191 if compat.all(nodes) ^ has_nodes:
11192 raise errors.OpPrereqError("There are instance objects providing"
11193 " pnode/snode while others do not",
11194 errors.ECODE_INVAL)
11196 if self.op.iallocator is None:
11197 default_iallocator = self.cfg.GetDefaultIAllocator()
11198 if default_iallocator and has_nodes:
11199 self.op.iallocator = default_iallocator
11201 raise errors.OpPrereqError("No iallocator or nodes on the instances"
11202 " given and no cluster-wide default"
11203 " iallocator found; please specify either"
11204 " an iallocator or nodes on the instances"
11205 " or set a cluster-wide default iallocator",
11206 errors.ECODE_INVAL)
11208 _CheckOpportunisticLocking(self.op)
11210 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11212 raise errors.OpPrereqError("There are duplicate instance names: %s" %
11213 utils.CommaJoin(dups), errors.ECODE_INVAL)
11215 def ExpandNames(self):
11216 """Calculate the locks.
11219 self.share_locks = _ShareAll()
11220 self.needed_locks = {
11221 # iallocator will select nodes and even if no iallocator is used,
11222 # collisions with LUInstanceCreate should be avoided
11223 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11226 if self.op.iallocator:
11227 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11228 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11230 if self.op.opportunistic_locking:
11231 self.opportunistic_locks[locking.LEVEL_NODE] = True
11232 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11235 for inst in self.op.instances:
11236 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11237 nodeslist.append(inst.pnode)
11238 if inst.snode is not None:
11239 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11240 nodeslist.append(inst.snode)
11242 self.needed_locks[locking.LEVEL_NODE] = nodeslist
11243 # Lock resources of instance's primary and secondary nodes (copy to
11244 # prevent accidential modification)
11245 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11247 def CheckPrereq(self):
11248 """Check prerequisite.
11251 cluster = self.cfg.GetClusterInfo()
11252 default_vg = self.cfg.GetVGName()
11253 ec_id = self.proc.GetECId()
11255 if self.op.opportunistic_locking:
11256 # Only consider nodes for which a lock is held
11257 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11259 node_whitelist = None
11261 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11262 _ComputeNics(op, cluster, None,
11264 _ComputeFullBeParams(op, cluster),
11266 for op in self.op.instances]
11268 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11269 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11271 ial.Run(self.op.iallocator)
11273 if not ial.success:
11274 raise errors.OpPrereqError("Can't compute nodes using"
11275 " iallocator '%s': %s" %
11276 (self.op.iallocator, ial.info),
11277 errors.ECODE_NORES)
11279 self.ia_result = ial.result
11281 if self.op.dry_run:
11282 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11283 constants.JOB_IDS_KEY: [],
11286 def _ConstructPartialResult(self):
11287 """Contructs the partial result.
11290 (allocatable, failed) = self.ia_result
11292 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11293 map(compat.fst, allocatable),
11294 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11297 def Exec(self, feedback_fn):
11298 """Executes the opcode.
11301 op2inst = dict((op.instance_name, op) for op in self.op.instances)
11302 (allocatable, failed) = self.ia_result
11305 for (name, nodes) in allocatable:
11306 op = op2inst.pop(name)
11309 (op.pnode, op.snode) = nodes
11311 (op.pnode,) = nodes
11315 missing = set(op2inst.keys()) - set(failed)
11316 assert not missing, \
11317 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11319 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11322 def _CheckRADOSFreeSpace():
11323 """Compute disk size requirements inside the RADOS cluster.
11326 # For the RADOS cluster we assume there is always enough space.
11330 class LUInstanceConsole(NoHooksLU):
11331 """Connect to an instance's console.
11333 This is somewhat special in that it returns the command line that
11334 you need to run on the master node in order to connect to the
11340 def ExpandNames(self):
11341 self.share_locks = _ShareAll()
11342 self._ExpandAndLockInstance()
11344 def CheckPrereq(self):
11345 """Check prerequisites.
11347 This checks that the instance is in the cluster.
11350 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11351 assert self.instance is not None, \
11352 "Cannot retrieve locked instance %s" % self.op.instance_name
11353 _CheckNodeOnline(self, self.instance.primary_node)
11355 def Exec(self, feedback_fn):
11356 """Connect to the console of an instance
11359 instance = self.instance
11360 node = instance.primary_node
11362 node_insts = self.rpc.call_instance_list([node],
11363 [instance.hypervisor])[node]
11364 node_insts.Raise("Can't get node information from %s" % node)
11366 if instance.name not in node_insts.payload:
11367 if instance.admin_state == constants.ADMINST_UP:
11368 state = constants.INSTST_ERRORDOWN
11369 elif instance.admin_state == constants.ADMINST_DOWN:
11370 state = constants.INSTST_ADMINDOWN
11372 state = constants.INSTST_ADMINOFFLINE
11373 raise errors.OpExecError("Instance %s is not running (state %s)" %
11374 (instance.name, state))
11376 logging.debug("Connecting to console of %s on %s", instance.name, node)
11378 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11381 def _GetInstanceConsole(cluster, instance):
11382 """Returns console information for an instance.
11384 @type cluster: L{objects.Cluster}
11385 @type instance: L{objects.Instance}
11389 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11390 # beparams and hvparams are passed separately, to avoid editing the
11391 # instance and then saving the defaults in the instance itself.
11392 hvparams = cluster.FillHV(instance)
11393 beparams = cluster.FillBE(instance)
11394 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11396 assert console.instance == instance.name
11397 assert console.Validate()
11399 return console.ToDict()
11402 class LUInstanceReplaceDisks(LogicalUnit):
11403 """Replace the disks of an instance.
11406 HPATH = "mirrors-replace"
11407 HTYPE = constants.HTYPE_INSTANCE
11410 def CheckArguments(self):
11411 """Check arguments.
11414 remote_node = self.op.remote_node
11415 ialloc = self.op.iallocator
11416 if self.op.mode == constants.REPLACE_DISK_CHG:
11417 if remote_node is None and ialloc is None:
11418 raise errors.OpPrereqError("When changing the secondary either an"
11419 " iallocator script must be used or the"
11420 " new node given", errors.ECODE_INVAL)
11422 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11424 elif remote_node is not None or ialloc is not None:
11425 # Not replacing the secondary
11426 raise errors.OpPrereqError("The iallocator and new node options can"
11427 " only be used when changing the"
11428 " secondary node", errors.ECODE_INVAL)
11430 def ExpandNames(self):
11431 self._ExpandAndLockInstance()
11433 assert locking.LEVEL_NODE not in self.needed_locks
11434 assert locking.LEVEL_NODE_RES not in self.needed_locks
11435 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11437 assert self.op.iallocator is None or self.op.remote_node is None, \
11438 "Conflicting options"
11440 if self.op.remote_node is not None:
11441 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11443 # Warning: do not remove the locking of the new secondary here
11444 # unless DRBD8.AddChildren is changed to work in parallel;
11445 # currently it doesn't since parallel invocations of
11446 # FindUnusedMinor will conflict
11447 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11448 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11450 self.needed_locks[locking.LEVEL_NODE] = []
11451 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11453 if self.op.iallocator is not None:
11454 # iallocator will select a new node in the same group
11455 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11456 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11458 self.needed_locks[locking.LEVEL_NODE_RES] = []
11460 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11461 self.op.iallocator, self.op.remote_node,
11462 self.op.disks, self.op.early_release,
11463 self.op.ignore_ipolicy)
11465 self.tasklets = [self.replacer]
11467 def DeclareLocks(self, level):
11468 if level == locking.LEVEL_NODEGROUP:
11469 assert self.op.remote_node is None
11470 assert self.op.iallocator is not None
11471 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11473 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11474 # Lock all groups used by instance optimistically; this requires going
11475 # via the node before it's locked, requiring verification later on
11476 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11477 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11479 elif level == locking.LEVEL_NODE:
11480 if self.op.iallocator is not None:
11481 assert self.op.remote_node is None
11482 assert not self.needed_locks[locking.LEVEL_NODE]
11483 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11485 # Lock member nodes of all locked groups
11486 self.needed_locks[locking.LEVEL_NODE] = \
11488 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11489 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11491 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11493 self._LockInstancesNodes()
11495 elif level == locking.LEVEL_NODE_RES:
11497 self.needed_locks[locking.LEVEL_NODE_RES] = \
11498 self.needed_locks[locking.LEVEL_NODE]
11500 def BuildHooksEnv(self):
11501 """Build hooks env.
11503 This runs on the master, the primary and all the secondaries.
11506 instance = self.replacer.instance
11508 "MODE": self.op.mode,
11509 "NEW_SECONDARY": self.op.remote_node,
11510 "OLD_SECONDARY": instance.secondary_nodes[0],
11512 env.update(_BuildInstanceHookEnvByObject(self, instance))
11515 def BuildHooksNodes(self):
11516 """Build hooks nodes.
11519 instance = self.replacer.instance
11521 self.cfg.GetMasterNode(),
11522 instance.primary_node,
11524 if self.op.remote_node is not None:
11525 nl.append(self.op.remote_node)
11528 def CheckPrereq(self):
11529 """Check prerequisites.
11532 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11533 self.op.iallocator is None)
11535 # Verify if node group locks are still correct
11536 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11538 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11540 return LogicalUnit.CheckPrereq(self)
11543 class TLReplaceDisks(Tasklet):
11544 """Replaces disks for an instance.
11546 Note: Locking is not within the scope of this class.
11549 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11550 disks, early_release, ignore_ipolicy):
11551 """Initializes this class.
11554 Tasklet.__init__(self, lu)
11557 self.instance_name = instance_name
11559 self.iallocator_name = iallocator_name
11560 self.remote_node = remote_node
11562 self.early_release = early_release
11563 self.ignore_ipolicy = ignore_ipolicy
11566 self.instance = None
11567 self.new_node = None
11568 self.target_node = None
11569 self.other_node = None
11570 self.remote_node_info = None
11571 self.node_secondary_ip = None
11574 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11575 """Compute a new secondary node using an IAllocator.
11578 req = iallocator.IAReqRelocate(name=instance_name,
11579 relocate_from=list(relocate_from))
11580 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11582 ial.Run(iallocator_name)
11584 if not ial.success:
11585 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11586 " %s" % (iallocator_name, ial.info),
11587 errors.ECODE_NORES)
11589 remote_node_name = ial.result[0]
11591 lu.LogInfo("Selected new secondary for instance '%s': %s",
11592 instance_name, remote_node_name)
11594 return remote_node_name
11596 def _FindFaultyDisks(self, node_name):
11597 """Wrapper for L{_FindFaultyInstanceDisks}.
11600 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11603 def _CheckDisksActivated(self, instance):
11604 """Checks if the instance disks are activated.
11606 @param instance: The instance to check disks
11607 @return: True if they are activated, False otherwise
11610 nodes = instance.all_nodes
11612 for idx, dev in enumerate(instance.disks):
11614 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11615 self.cfg.SetDiskID(dev, node)
11617 result = _BlockdevFind(self, node, dev, instance)
11621 elif result.fail_msg or not result.payload:
11626 def CheckPrereq(self):
11627 """Check prerequisites.
11629 This checks that the instance is in the cluster.
11632 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11633 assert instance is not None, \
11634 "Cannot retrieve locked instance %s" % self.instance_name
11636 if instance.disk_template != constants.DT_DRBD8:
11637 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11638 " instances", errors.ECODE_INVAL)
11640 if len(instance.secondary_nodes) != 1:
11641 raise errors.OpPrereqError("The instance has a strange layout,"
11642 " expected one secondary but found %d" %
11643 len(instance.secondary_nodes),
11644 errors.ECODE_FAULT)
11646 instance = self.instance
11647 secondary_node = instance.secondary_nodes[0]
11649 if self.iallocator_name is None:
11650 remote_node = self.remote_node
11652 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11653 instance.name, instance.secondary_nodes)
11655 if remote_node is None:
11656 self.remote_node_info = None
11658 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11659 "Remote node '%s' is not locked" % remote_node
11661 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11662 assert self.remote_node_info is not None, \
11663 "Cannot retrieve locked node %s" % remote_node
11665 if remote_node == self.instance.primary_node:
11666 raise errors.OpPrereqError("The specified node is the primary node of"
11667 " the instance", errors.ECODE_INVAL)
11669 if remote_node == secondary_node:
11670 raise errors.OpPrereqError("The specified node is already the"
11671 " secondary node of the instance",
11672 errors.ECODE_INVAL)
11674 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11675 constants.REPLACE_DISK_CHG):
11676 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11677 errors.ECODE_INVAL)
11679 if self.mode == constants.REPLACE_DISK_AUTO:
11680 if not self._CheckDisksActivated(instance):
11681 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11682 " first" % self.instance_name,
11683 errors.ECODE_STATE)
11684 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11685 faulty_secondary = self._FindFaultyDisks(secondary_node)
11687 if faulty_primary and faulty_secondary:
11688 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11689 " one node and can not be repaired"
11690 " automatically" % self.instance_name,
11691 errors.ECODE_STATE)
11694 self.disks = faulty_primary
11695 self.target_node = instance.primary_node
11696 self.other_node = secondary_node
11697 check_nodes = [self.target_node, self.other_node]
11698 elif faulty_secondary:
11699 self.disks = faulty_secondary
11700 self.target_node = secondary_node
11701 self.other_node = instance.primary_node
11702 check_nodes = [self.target_node, self.other_node]
11708 # Non-automatic modes
11709 if self.mode == constants.REPLACE_DISK_PRI:
11710 self.target_node = instance.primary_node
11711 self.other_node = secondary_node
11712 check_nodes = [self.target_node, self.other_node]
11714 elif self.mode == constants.REPLACE_DISK_SEC:
11715 self.target_node = secondary_node
11716 self.other_node = instance.primary_node
11717 check_nodes = [self.target_node, self.other_node]
11719 elif self.mode == constants.REPLACE_DISK_CHG:
11720 self.new_node = remote_node
11721 self.other_node = instance.primary_node
11722 self.target_node = secondary_node
11723 check_nodes = [self.new_node, self.other_node]
11725 _CheckNodeNotDrained(self.lu, remote_node)
11726 _CheckNodeVmCapable(self.lu, remote_node)
11728 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11729 assert old_node_info is not None
11730 if old_node_info.offline and not self.early_release:
11731 # doesn't make sense to delay the release
11732 self.early_release = True
11733 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11734 " early-release mode", secondary_node)
11737 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11740 # If not specified all disks should be replaced
11742 self.disks = range(len(self.instance.disks))
11744 # TODO: This is ugly, but right now we can't distinguish between internal
11745 # submitted opcode and external one. We should fix that.
11746 if self.remote_node_info:
11747 # We change the node, lets verify it still meets instance policy
11748 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11749 cluster = self.cfg.GetClusterInfo()
11750 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11752 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11753 self.cfg, ignore=self.ignore_ipolicy)
11755 for node in check_nodes:
11756 _CheckNodeOnline(self.lu, node)
11758 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11761 if node_name is not None)
11763 # Release unneeded node and node resource locks
11764 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11765 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11766 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11768 # Release any owned node group
11769 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11771 # Check whether disks are valid
11772 for disk_idx in self.disks:
11773 instance.FindDisk(disk_idx)
11775 # Get secondary node IP addresses
11776 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11777 in self.cfg.GetMultiNodeInfo(touched_nodes))
11779 def Exec(self, feedback_fn):
11780 """Execute disk replacement.
11782 This dispatches the disk replacement to the appropriate handler.
11786 # Verify owned locks before starting operation
11787 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11788 assert set(owned_nodes) == set(self.node_secondary_ip), \
11789 ("Incorrect node locks, owning %s, expected %s" %
11790 (owned_nodes, self.node_secondary_ip.keys()))
11791 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11792 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11793 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11795 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11796 assert list(owned_instances) == [self.instance_name], \
11797 "Instance '%s' not locked" % self.instance_name
11799 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11800 "Should not own any node group lock at this point"
11803 feedback_fn("No disks need replacement for instance '%s'" %
11804 self.instance.name)
11807 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11808 (utils.CommaJoin(self.disks), self.instance.name))
11809 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11810 feedback_fn("Current seconary node: %s" %
11811 utils.CommaJoin(self.instance.secondary_nodes))
11813 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11815 # Activate the instance disks if we're replacing them on a down instance
11817 _StartInstanceDisks(self.lu, self.instance, True)
11820 # Should we replace the secondary node?
11821 if self.new_node is not None:
11822 fn = self._ExecDrbd8Secondary
11824 fn = self._ExecDrbd8DiskOnly
11826 result = fn(feedback_fn)
11828 # Deactivate the instance disks if we're replacing them on a
11831 _SafeShutdownInstanceDisks(self.lu, self.instance)
11833 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11836 # Verify owned locks
11837 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11838 nodes = frozenset(self.node_secondary_ip)
11839 assert ((self.early_release and not owned_nodes) or
11840 (not self.early_release and not (set(owned_nodes) - nodes))), \
11841 ("Not owning the correct locks, early_release=%s, owned=%r,"
11842 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11846 def _CheckVolumeGroup(self, nodes):
11847 self.lu.LogInfo("Checking volume groups")
11849 vgname = self.cfg.GetVGName()
11851 # Make sure volume group exists on all involved nodes
11852 results = self.rpc.call_vg_list(nodes)
11854 raise errors.OpExecError("Can't list volume groups on the nodes")
11857 res = results[node]
11858 res.Raise("Error checking node %s" % node)
11859 if vgname not in res.payload:
11860 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11863 def _CheckDisksExistence(self, nodes):
11864 # Check disk existence
11865 for idx, dev in enumerate(self.instance.disks):
11866 if idx not in self.disks:
11870 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11871 self.cfg.SetDiskID(dev, node)
11873 result = _BlockdevFind(self, node, dev, self.instance)
11875 msg = result.fail_msg
11876 if msg or not result.payload:
11878 msg = "disk not found"
11879 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11882 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11883 for idx, dev in enumerate(self.instance.disks):
11884 if idx not in self.disks:
11887 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11890 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11891 on_primary, ldisk=ldisk):
11892 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11893 " replace disks for instance %s" %
11894 (node_name, self.instance.name))
11896 def _CreateNewStorage(self, node_name):
11897 """Create new storage on the primary or secondary node.
11899 This is only used for same-node replaces, not for changing the
11900 secondary node, hence we don't want to modify the existing disk.
11905 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11906 for idx, dev in enumerate(disks):
11907 if idx not in self.disks:
11910 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11912 self.cfg.SetDiskID(dev, node_name)
11914 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11915 names = _GenerateUniqueNames(self.lu, lv_names)
11917 (data_disk, meta_disk) = dev.children
11918 vg_data = data_disk.logical_id[0]
11919 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11920 logical_id=(vg_data, names[0]),
11921 params=data_disk.params)
11922 vg_meta = meta_disk.logical_id[0]
11923 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11924 size=constants.DRBD_META_SIZE,
11925 logical_id=(vg_meta, names[1]),
11926 params=meta_disk.params)
11928 new_lvs = [lv_data, lv_meta]
11929 old_lvs = [child.Copy() for child in dev.children]
11930 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11931 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11933 # we pass force_create=True to force the LVM creation
11934 for new_lv in new_lvs:
11935 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11936 _GetInstanceInfoText(self.instance), False,
11941 def _CheckDevices(self, node_name, iv_names):
11942 for name, (dev, _, _) in iv_names.iteritems():
11943 self.cfg.SetDiskID(dev, node_name)
11945 result = _BlockdevFind(self, node_name, dev, self.instance)
11947 msg = result.fail_msg
11948 if msg or not result.payload:
11950 msg = "disk not found"
11951 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11954 if result.payload.is_degraded:
11955 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11957 def _RemoveOldStorage(self, node_name, iv_names):
11958 for name, (_, old_lvs, _) in iv_names.iteritems():
11959 self.lu.LogInfo("Remove logical volumes for %s", name)
11962 self.cfg.SetDiskID(lv, node_name)
11964 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11966 self.lu.LogWarning("Can't remove old LV: %s", msg,
11967 hint="remove unused LVs manually")
11969 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11970 """Replace a disk on the primary or secondary for DRBD 8.
11972 The algorithm for replace is quite complicated:
11974 1. for each disk to be replaced:
11976 1. create new LVs on the target node with unique names
11977 1. detach old LVs from the drbd device
11978 1. rename old LVs to name_replaced.<time_t>
11979 1. rename new LVs to old LVs
11980 1. attach the new LVs (with the old names now) to the drbd device
11982 1. wait for sync across all devices
11984 1. for each modified disk:
11986 1. remove old LVs (which have the name name_replaces.<time_t>)
11988 Failures are not very well handled.
11993 # Step: check device activation
11994 self.lu.LogStep(1, steps_total, "Check device existence")
11995 self._CheckDisksExistence([self.other_node, self.target_node])
11996 self._CheckVolumeGroup([self.target_node, self.other_node])
11998 # Step: check other node consistency
11999 self.lu.LogStep(2, steps_total, "Check peer consistency")
12000 self._CheckDisksConsistency(self.other_node,
12001 self.other_node == self.instance.primary_node,
12004 # Step: create new storage
12005 self.lu.LogStep(3, steps_total, "Allocate new storage")
12006 iv_names = self._CreateNewStorage(self.target_node)
12008 # Step: for each lv, detach+rename*2+attach
12009 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12010 for dev, old_lvs, new_lvs in iv_names.itervalues():
12011 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
12013 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
12015 result.Raise("Can't detach drbd from local storage on node"
12016 " %s for device %s" % (self.target_node, dev.iv_name))
12018 #cfg.Update(instance)
12020 # ok, we created the new LVs, so now we know we have the needed
12021 # storage; as such, we proceed on the target node to rename
12022 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12023 # using the assumption that logical_id == physical_id (which in
12024 # turn is the unique_id on that node)
12026 # FIXME(iustin): use a better name for the replaced LVs
12027 temp_suffix = int(time.time())
12028 ren_fn = lambda d, suff: (d.physical_id[0],
12029 d.physical_id[1] + "_replaced-%s" % suff)
12031 # Build the rename list based on what LVs exist on the node
12032 rename_old_to_new = []
12033 for to_ren in old_lvs:
12034 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12035 if not result.fail_msg and result.payload:
12037 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12039 self.lu.LogInfo("Renaming the old LVs on the target node")
12040 result = self.rpc.call_blockdev_rename(self.target_node,
12042 result.Raise("Can't rename old LVs on node %s" % self.target_node)
12044 # Now we rename the new LVs to the old LVs
12045 self.lu.LogInfo("Renaming the new LVs on the target node")
12046 rename_new_to_old = [(new, old.physical_id)
12047 for old, new in zip(old_lvs, new_lvs)]
12048 result = self.rpc.call_blockdev_rename(self.target_node,
12050 result.Raise("Can't rename new LVs on node %s" % self.target_node)
12052 # Intermediate steps of in memory modifications
12053 for old, new in zip(old_lvs, new_lvs):
12054 new.logical_id = old.logical_id
12055 self.cfg.SetDiskID(new, self.target_node)
12057 # We need to modify old_lvs so that removal later removes the
12058 # right LVs, not the newly added ones; note that old_lvs is a
12060 for disk in old_lvs:
12061 disk.logical_id = ren_fn(disk, temp_suffix)
12062 self.cfg.SetDiskID(disk, self.target_node)
12064 # Now that the new lvs have the old name, we can add them to the device
12065 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12066 result = self.rpc.call_blockdev_addchildren(self.target_node,
12067 (dev, self.instance), new_lvs)
12068 msg = result.fail_msg
12070 for new_lv in new_lvs:
12071 msg2 = self.rpc.call_blockdev_remove(self.target_node,
12074 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12075 hint=("cleanup manually the unused logical"
12077 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12079 cstep = itertools.count(5)
12081 if self.early_release:
12082 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12083 self._RemoveOldStorage(self.target_node, iv_names)
12084 # TODO: Check if releasing locks early still makes sense
12085 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12087 # Release all resource locks except those used by the instance
12088 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12089 keep=self.node_secondary_ip.keys())
12091 # Release all node locks while waiting for sync
12092 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12094 # TODO: Can the instance lock be downgraded here? Take the optional disk
12095 # shutdown in the caller into consideration.
12098 # This can fail as the old devices are degraded and _WaitForSync
12099 # does a combined result over all disks, so we don't check its return value
12100 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12101 _WaitForSync(self.lu, self.instance)
12103 # Check all devices manually
12104 self._CheckDevices(self.instance.primary_node, iv_names)
12106 # Step: remove old storage
12107 if not self.early_release:
12108 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12109 self._RemoveOldStorage(self.target_node, iv_names)
12111 def _ExecDrbd8Secondary(self, feedback_fn):
12112 """Replace the secondary node for DRBD 8.
12114 The algorithm for replace is quite complicated:
12115 - for all disks of the instance:
12116 - create new LVs on the new node with same names
12117 - shutdown the drbd device on the old secondary
12118 - disconnect the drbd network on the primary
12119 - create the drbd device on the new secondary
12120 - network attach the drbd on the primary, using an artifice:
12121 the drbd code for Attach() will connect to the network if it
12122 finds a device which is connected to the good local disks but
12123 not network enabled
12124 - wait for sync across all devices
12125 - remove all disks from the old secondary
12127 Failures are not very well handled.
12132 pnode = self.instance.primary_node
12134 # Step: check device activation
12135 self.lu.LogStep(1, steps_total, "Check device existence")
12136 self._CheckDisksExistence([self.instance.primary_node])
12137 self._CheckVolumeGroup([self.instance.primary_node])
12139 # Step: check other node consistency
12140 self.lu.LogStep(2, steps_total, "Check peer consistency")
12141 self._CheckDisksConsistency(self.instance.primary_node, True, True)
12143 # Step: create new storage
12144 self.lu.LogStep(3, steps_total, "Allocate new storage")
12145 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12146 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12147 for idx, dev in enumerate(disks):
12148 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12149 (self.new_node, idx))
12150 # we pass force_create=True to force LVM creation
12151 for new_lv in dev.children:
12152 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12153 True, _GetInstanceInfoText(self.instance), False,
12156 # Step 4: dbrd minors and drbd setups changes
12157 # after this, we must manually remove the drbd minors on both the
12158 # error and the success paths
12159 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12160 minors = self.cfg.AllocateDRBDMinor([self.new_node
12161 for dev in self.instance.disks],
12162 self.instance.name)
12163 logging.debug("Allocated minors %r", minors)
12166 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12167 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12168 (self.new_node, idx))
12169 # create new devices on new_node; note that we create two IDs:
12170 # one without port, so the drbd will be activated without
12171 # networking information on the new node at this stage, and one
12172 # with network, for the latter activation in step 4
12173 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12174 if self.instance.primary_node == o_node1:
12177 assert self.instance.primary_node == o_node2, "Three-node instance?"
12180 new_alone_id = (self.instance.primary_node, self.new_node, None,
12181 p_minor, new_minor, o_secret)
12182 new_net_id = (self.instance.primary_node, self.new_node, o_port,
12183 p_minor, new_minor, o_secret)
12185 iv_names[idx] = (dev, dev.children, new_net_id)
12186 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12188 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12189 logical_id=new_alone_id,
12190 children=dev.children,
12193 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12196 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12198 _GetInstanceInfoText(self.instance), False,
12200 except errors.GenericError:
12201 self.cfg.ReleaseDRBDMinors(self.instance.name)
12204 # We have new devices, shutdown the drbd on the old secondary
12205 for idx, dev in enumerate(self.instance.disks):
12206 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12207 self.cfg.SetDiskID(dev, self.target_node)
12208 msg = self.rpc.call_blockdev_shutdown(self.target_node,
12209 (dev, self.instance)).fail_msg
12211 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12212 "node: %s" % (idx, msg),
12213 hint=("Please cleanup this device manually as"
12214 " soon as possible"))
12216 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12217 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12218 self.instance.disks)[pnode]
12220 msg = result.fail_msg
12222 # detaches didn't succeed (unlikely)
12223 self.cfg.ReleaseDRBDMinors(self.instance.name)
12224 raise errors.OpExecError("Can't detach the disks from the network on"
12225 " old node: %s" % (msg,))
12227 # if we managed to detach at least one, we update all the disks of
12228 # the instance to point to the new secondary
12229 self.lu.LogInfo("Updating instance configuration")
12230 for dev, _, new_logical_id in iv_names.itervalues():
12231 dev.logical_id = new_logical_id
12232 self.cfg.SetDiskID(dev, self.instance.primary_node)
12234 self.cfg.Update(self.instance, feedback_fn)
12236 # Release all node locks (the configuration has been updated)
12237 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12239 # and now perform the drbd attach
12240 self.lu.LogInfo("Attaching primary drbds to new secondary"
12241 " (standalone => connected)")
12242 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12244 self.node_secondary_ip,
12245 (self.instance.disks, self.instance),
12246 self.instance.name,
12248 for to_node, to_result in result.items():
12249 msg = to_result.fail_msg
12251 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12253 hint=("please do a gnt-instance info to see the"
12254 " status of disks"))
12256 cstep = itertools.count(5)
12258 if self.early_release:
12259 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12260 self._RemoveOldStorage(self.target_node, iv_names)
12261 # TODO: Check if releasing locks early still makes sense
12262 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12264 # Release all resource locks except those used by the instance
12265 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12266 keep=self.node_secondary_ip.keys())
12268 # TODO: Can the instance lock be downgraded here? Take the optional disk
12269 # shutdown in the caller into consideration.
12272 # This can fail as the old devices are degraded and _WaitForSync
12273 # does a combined result over all disks, so we don't check its return value
12274 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12275 _WaitForSync(self.lu, self.instance)
12277 # Check all devices manually
12278 self._CheckDevices(self.instance.primary_node, iv_names)
12280 # Step: remove old storage
12281 if not self.early_release:
12282 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12283 self._RemoveOldStorage(self.target_node, iv_names)
12286 class LURepairNodeStorage(NoHooksLU):
12287 """Repairs the volume group on a node.
12292 def CheckArguments(self):
12293 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12295 storage_type = self.op.storage_type
12297 if (constants.SO_FIX_CONSISTENCY not in
12298 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12299 raise errors.OpPrereqError("Storage units of type '%s' can not be"
12300 " repaired" % storage_type,
12301 errors.ECODE_INVAL)
12303 def ExpandNames(self):
12304 self.needed_locks = {
12305 locking.LEVEL_NODE: [self.op.node_name],
12308 def _CheckFaultyDisks(self, instance, node_name):
12309 """Ensure faulty disks abort the opcode or at least warn."""
12311 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12313 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12314 " node '%s'" % (instance.name, node_name),
12315 errors.ECODE_STATE)
12316 except errors.OpPrereqError, err:
12317 if self.op.ignore_consistency:
12318 self.LogWarning(str(err.args[0]))
12322 def CheckPrereq(self):
12323 """Check prerequisites.
12326 # Check whether any instance on this node has faulty disks
12327 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12328 if inst.admin_state != constants.ADMINST_UP:
12330 check_nodes = set(inst.all_nodes)
12331 check_nodes.discard(self.op.node_name)
12332 for inst_node_name in check_nodes:
12333 self._CheckFaultyDisks(inst, inst_node_name)
12335 def Exec(self, feedback_fn):
12336 feedback_fn("Repairing storage unit '%s' on %s ..." %
12337 (self.op.name, self.op.node_name))
12339 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12340 result = self.rpc.call_storage_execute(self.op.node_name,
12341 self.op.storage_type, st_args,
12343 constants.SO_FIX_CONSISTENCY)
12344 result.Raise("Failed to repair storage unit '%s' on %s" %
12345 (self.op.name, self.op.node_name))
12348 class LUNodeEvacuate(NoHooksLU):
12349 """Evacuates instances off a list of nodes.
12354 _MODE2IALLOCATOR = {
12355 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12356 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12357 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12359 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12360 assert (frozenset(_MODE2IALLOCATOR.values()) ==
12361 constants.IALLOCATOR_NEVAC_MODES)
12363 def CheckArguments(self):
12364 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12366 def ExpandNames(self):
12367 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12369 if self.op.remote_node is not None:
12370 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12371 assert self.op.remote_node
12373 if self.op.remote_node == self.op.node_name:
12374 raise errors.OpPrereqError("Can not use evacuated node as a new"
12375 " secondary node", errors.ECODE_INVAL)
12377 if self.op.mode != constants.NODE_EVAC_SEC:
12378 raise errors.OpPrereqError("Without the use of an iallocator only"
12379 " secondary instances can be evacuated",
12380 errors.ECODE_INVAL)
12383 self.share_locks = _ShareAll()
12384 self.needed_locks = {
12385 locking.LEVEL_INSTANCE: [],
12386 locking.LEVEL_NODEGROUP: [],
12387 locking.LEVEL_NODE: [],
12390 # Determine nodes (via group) optimistically, needs verification once locks
12391 # have been acquired
12392 self.lock_nodes = self._DetermineNodes()
12394 def _DetermineNodes(self):
12395 """Gets the list of nodes to operate on.
12398 if self.op.remote_node is None:
12399 # Iallocator will choose any node(s) in the same group
12400 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12402 group_nodes = frozenset([self.op.remote_node])
12404 # Determine nodes to be locked
12405 return set([self.op.node_name]) | group_nodes
12407 def _DetermineInstances(self):
12408 """Builds list of instances to operate on.
12411 assert self.op.mode in constants.NODE_EVAC_MODES
12413 if self.op.mode == constants.NODE_EVAC_PRI:
12414 # Primary instances only
12415 inst_fn = _GetNodePrimaryInstances
12416 assert self.op.remote_node is None, \
12417 "Evacuating primary instances requires iallocator"
12418 elif self.op.mode == constants.NODE_EVAC_SEC:
12419 # Secondary instances only
12420 inst_fn = _GetNodeSecondaryInstances
12423 assert self.op.mode == constants.NODE_EVAC_ALL
12424 inst_fn = _GetNodeInstances
12425 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12427 raise errors.OpPrereqError("Due to an issue with the iallocator"
12428 " interface it is not possible to evacuate"
12429 " all instances at once; specify explicitly"
12430 " whether to evacuate primary or secondary"
12432 errors.ECODE_INVAL)
12434 return inst_fn(self.cfg, self.op.node_name)
12436 def DeclareLocks(self, level):
12437 if level == locking.LEVEL_INSTANCE:
12438 # Lock instances optimistically, needs verification once node and group
12439 # locks have been acquired
12440 self.needed_locks[locking.LEVEL_INSTANCE] = \
12441 set(i.name for i in self._DetermineInstances())
12443 elif level == locking.LEVEL_NODEGROUP:
12444 # Lock node groups for all potential target nodes optimistically, needs
12445 # verification once nodes have been acquired
12446 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12447 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12449 elif level == locking.LEVEL_NODE:
12450 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12452 def CheckPrereq(self):
12454 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12455 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12456 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12458 need_nodes = self._DetermineNodes()
12460 if not owned_nodes.issuperset(need_nodes):
12461 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12462 " locks were acquired, current nodes are"
12463 " are '%s', used to be '%s'; retry the"
12465 (self.op.node_name,
12466 utils.CommaJoin(need_nodes),
12467 utils.CommaJoin(owned_nodes)),
12468 errors.ECODE_STATE)
12470 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12471 if owned_groups != wanted_groups:
12472 raise errors.OpExecError("Node groups changed since locks were acquired,"
12473 " current groups are '%s', used to be '%s';"
12474 " retry the operation" %
12475 (utils.CommaJoin(wanted_groups),
12476 utils.CommaJoin(owned_groups)))
12478 # Determine affected instances
12479 self.instances = self._DetermineInstances()
12480 self.instance_names = [i.name for i in self.instances]
12482 if set(self.instance_names) != owned_instances:
12483 raise errors.OpExecError("Instances on node '%s' changed since locks"
12484 " were acquired, current instances are '%s',"
12485 " used to be '%s'; retry the operation" %
12486 (self.op.node_name,
12487 utils.CommaJoin(self.instance_names),
12488 utils.CommaJoin(owned_instances)))
12490 if self.instance_names:
12491 self.LogInfo("Evacuating instances from node '%s': %s",
12493 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12495 self.LogInfo("No instances to evacuate from node '%s'",
12498 if self.op.remote_node is not None:
12499 for i in self.instances:
12500 if i.primary_node == self.op.remote_node:
12501 raise errors.OpPrereqError("Node %s is the primary node of"
12502 " instance %s, cannot use it as"
12504 (self.op.remote_node, i.name),
12505 errors.ECODE_INVAL)
12507 def Exec(self, feedback_fn):
12508 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12510 if not self.instance_names:
12511 # No instances to evacuate
12514 elif self.op.iallocator is not None:
12515 # TODO: Implement relocation to other group
12516 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12517 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12518 instances=list(self.instance_names))
12519 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12521 ial.Run(self.op.iallocator)
12523 if not ial.success:
12524 raise errors.OpPrereqError("Can't compute node evacuation using"
12525 " iallocator '%s': %s" %
12526 (self.op.iallocator, ial.info),
12527 errors.ECODE_NORES)
12529 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12531 elif self.op.remote_node is not None:
12532 assert self.op.mode == constants.NODE_EVAC_SEC
12534 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12535 remote_node=self.op.remote_node,
12537 mode=constants.REPLACE_DISK_CHG,
12538 early_release=self.op.early_release)]
12539 for instance_name in self.instance_names]
12542 raise errors.ProgrammerError("No iallocator or remote node")
12544 return ResultWithJobs(jobs)
12547 def _SetOpEarlyRelease(early_release, op):
12548 """Sets C{early_release} flag on opcodes if available.
12552 op.early_release = early_release
12553 except AttributeError:
12554 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12559 def _NodeEvacDest(use_nodes, group, nodes):
12560 """Returns group or nodes depending on caller's choice.
12564 return utils.CommaJoin(nodes)
12569 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12570 """Unpacks the result of change-group and node-evacuate iallocator requests.
12572 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12573 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12575 @type lu: L{LogicalUnit}
12576 @param lu: Logical unit instance
12577 @type alloc_result: tuple/list
12578 @param alloc_result: Result from iallocator
12579 @type early_release: bool
12580 @param early_release: Whether to release locks early if possible
12581 @type use_nodes: bool
12582 @param use_nodes: Whether to display node names instead of groups
12585 (moved, failed, jobs) = alloc_result
12588 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12589 for (name, reason) in failed)
12590 lu.LogWarning("Unable to evacuate instances %s", failreason)
12591 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12594 lu.LogInfo("Instances to be moved: %s",
12595 utils.CommaJoin("%s (to %s)" %
12596 (name, _NodeEvacDest(use_nodes, group, nodes))
12597 for (name, group, nodes) in moved))
12599 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12600 map(opcodes.OpCode.LoadOpCode, ops))
12604 def _DiskSizeInBytesToMebibytes(lu, size):
12605 """Converts a disk size in bytes to mebibytes.
12607 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12610 (mib, remainder) = divmod(size, 1024 * 1024)
12613 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12614 " to not overwrite existing data (%s bytes will not be"
12615 " wiped)", (1024 * 1024) - remainder)
12621 class LUInstanceGrowDisk(LogicalUnit):
12622 """Grow a disk of an instance.
12625 HPATH = "disk-grow"
12626 HTYPE = constants.HTYPE_INSTANCE
12629 def ExpandNames(self):
12630 self._ExpandAndLockInstance()
12631 self.needed_locks[locking.LEVEL_NODE] = []
12632 self.needed_locks[locking.LEVEL_NODE_RES] = []
12633 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12634 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12636 def DeclareLocks(self, level):
12637 if level == locking.LEVEL_NODE:
12638 self._LockInstancesNodes()
12639 elif level == locking.LEVEL_NODE_RES:
12641 self.needed_locks[locking.LEVEL_NODE_RES] = \
12642 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12644 def BuildHooksEnv(self):
12645 """Build hooks env.
12647 This runs on the master, the primary and all the secondaries.
12651 "DISK": self.op.disk,
12652 "AMOUNT": self.op.amount,
12653 "ABSOLUTE": self.op.absolute,
12655 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12658 def BuildHooksNodes(self):
12659 """Build hooks nodes.
12662 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12665 def CheckPrereq(self):
12666 """Check prerequisites.
12668 This checks that the instance is in the cluster.
12671 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12672 assert instance is not None, \
12673 "Cannot retrieve locked instance %s" % self.op.instance_name
12674 nodenames = list(instance.all_nodes)
12675 for node in nodenames:
12676 _CheckNodeOnline(self, node)
12678 self.instance = instance
12680 if instance.disk_template not in constants.DTS_GROWABLE:
12681 raise errors.OpPrereqError("Instance's disk layout does not support"
12682 " growing", errors.ECODE_INVAL)
12684 self.disk = instance.FindDisk(self.op.disk)
12686 if self.op.absolute:
12687 self.target = self.op.amount
12688 self.delta = self.target - self.disk.size
12690 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12691 "current disk size (%s)" %
12692 (utils.FormatUnit(self.target, "h"),
12693 utils.FormatUnit(self.disk.size, "h")),
12694 errors.ECODE_STATE)
12696 self.delta = self.op.amount
12697 self.target = self.disk.size + self.delta
12699 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12700 utils.FormatUnit(self.delta, "h"),
12701 errors.ECODE_INVAL)
12703 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12705 def _CheckDiskSpace(self, nodenames, req_vgspace):
12706 template = self.instance.disk_template
12707 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12708 # TODO: check the free disk space for file, when that feature will be
12710 nodes = map(self.cfg.GetNodeInfo, nodenames)
12711 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12714 # With exclusive storage we need to something smarter than just looking
12715 # at free space; for now, let's simply abort the operation.
12716 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12717 " is enabled", errors.ECODE_STATE)
12718 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12720 def Exec(self, feedback_fn):
12721 """Execute disk grow.
12724 instance = self.instance
12727 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12728 assert (self.owned_locks(locking.LEVEL_NODE) ==
12729 self.owned_locks(locking.LEVEL_NODE_RES))
12731 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12733 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12735 raise errors.OpExecError("Cannot activate block device to grow")
12737 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12738 (self.op.disk, instance.name,
12739 utils.FormatUnit(self.delta, "h"),
12740 utils.FormatUnit(self.target, "h")))
12742 # First run all grow ops in dry-run mode
12743 for node in instance.all_nodes:
12744 self.cfg.SetDiskID(disk, node)
12745 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12747 result.Raise("Dry-run grow request failed to node %s" % node)
12750 # Get disk size from primary node for wiping
12751 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12752 result.Raise("Failed to retrieve disk size from node '%s'" %
12753 instance.primary_node)
12755 (disk_size_in_bytes, ) = result.payload
12757 if disk_size_in_bytes is None:
12758 raise errors.OpExecError("Failed to retrieve disk size from primary"
12759 " node '%s'" % instance.primary_node)
12761 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12763 assert old_disk_size >= disk.size, \
12764 ("Retrieved disk size too small (got %s, should be at least %s)" %
12765 (old_disk_size, disk.size))
12767 old_disk_size = None
12769 # We know that (as far as we can test) operations across different
12770 # nodes will succeed, time to run it for real on the backing storage
12771 for node in instance.all_nodes:
12772 self.cfg.SetDiskID(disk, node)
12773 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12775 result.Raise("Grow request failed to node %s" % node)
12777 # And now execute it for logical storage, on the primary node
12778 node = instance.primary_node
12779 self.cfg.SetDiskID(disk, node)
12780 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12782 result.Raise("Grow request failed to node %s" % node)
12784 disk.RecordGrow(self.delta)
12785 self.cfg.Update(instance, feedback_fn)
12787 # Changes have been recorded, release node lock
12788 _ReleaseLocks(self, locking.LEVEL_NODE)
12790 # Downgrade lock while waiting for sync
12791 self.glm.downgrade(locking.LEVEL_INSTANCE)
12793 assert wipe_disks ^ (old_disk_size is None)
12796 assert instance.disks[self.op.disk] == disk
12798 # Wipe newly added disk space
12799 _WipeDisks(self, instance,
12800 disks=[(self.op.disk, disk, old_disk_size)])
12802 if self.op.wait_for_sync:
12803 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12805 self.LogWarning("Disk syncing has not returned a good status; check"
12807 if instance.admin_state != constants.ADMINST_UP:
12808 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12809 elif instance.admin_state != constants.ADMINST_UP:
12810 self.LogWarning("Not shutting down the disk even if the instance is"
12811 " not supposed to be running because no wait for"
12812 " sync mode was requested")
12814 assert self.owned_locks(locking.LEVEL_NODE_RES)
12815 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12818 class LUInstanceQueryData(NoHooksLU):
12819 """Query runtime instance data.
12824 def ExpandNames(self):
12825 self.needed_locks = {}
12827 # Use locking if requested or when non-static information is wanted
12828 if not (self.op.static or self.op.use_locking):
12829 self.LogWarning("Non-static data requested, locks need to be acquired")
12830 self.op.use_locking = True
12832 if self.op.instances or not self.op.use_locking:
12833 # Expand instance names right here
12834 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12836 # Will use acquired locks
12837 self.wanted_names = None
12839 if self.op.use_locking:
12840 self.share_locks = _ShareAll()
12842 if self.wanted_names is None:
12843 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12845 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12847 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12848 self.needed_locks[locking.LEVEL_NODE] = []
12849 self.needed_locks[locking.LEVEL_NETWORK] = []
12850 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12852 def DeclareLocks(self, level):
12853 if self.op.use_locking:
12854 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12855 if level == locking.LEVEL_NODEGROUP:
12857 # Lock all groups used by instances optimistically; this requires going
12858 # via the node before it's locked, requiring verification later on
12859 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12860 frozenset(group_uuid
12861 for instance_name in owned_instances
12863 self.cfg.GetInstanceNodeGroups(instance_name))
12865 elif level == locking.LEVEL_NODE:
12866 self._LockInstancesNodes()
12868 elif level == locking.LEVEL_NETWORK:
12869 self.needed_locks[locking.LEVEL_NETWORK] = \
12871 for instance_name in owned_instances
12873 self.cfg.GetInstanceNetworks(instance_name))
12875 def CheckPrereq(self):
12876 """Check prerequisites.
12878 This only checks the optional instance list against the existing names.
12881 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12882 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12883 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12884 owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
12886 if self.wanted_names is None:
12887 assert self.op.use_locking, "Locking was not used"
12888 self.wanted_names = owned_instances
12890 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12892 if self.op.use_locking:
12893 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12896 assert not (owned_instances or owned_groups or
12897 owned_nodes or owned_networks)
12899 self.wanted_instances = instances.values()
12901 def _ComputeBlockdevStatus(self, node, instance, dev):
12902 """Returns the status of a block device
12905 if self.op.static or not node:
12908 self.cfg.SetDiskID(dev, node)
12910 result = self.rpc.call_blockdev_find(node, dev)
12914 result.Raise("Can't compute disk status for %s" % instance.name)
12916 status = result.payload
12920 return (status.dev_path, status.major, status.minor,
12921 status.sync_percent, status.estimated_time,
12922 status.is_degraded, status.ldisk_status)
12924 def _ComputeDiskStatus(self, instance, snode, dev):
12925 """Compute block device status.
12928 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12930 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12932 def _ComputeDiskStatusInner(self, instance, snode, dev):
12933 """Compute block device status.
12935 @attention: The device has to be annotated already.
12938 if dev.dev_type in constants.LDS_DRBD:
12939 # we change the snode then (otherwise we use the one passed in)
12940 if dev.logical_id[0] == instance.primary_node:
12941 snode = dev.logical_id[1]
12943 snode = dev.logical_id[0]
12945 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12947 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12950 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12957 "iv_name": dev.iv_name,
12958 "dev_type": dev.dev_type,
12959 "logical_id": dev.logical_id,
12960 "physical_id": dev.physical_id,
12961 "pstatus": dev_pstatus,
12962 "sstatus": dev_sstatus,
12963 "children": dev_children,
12968 def Exec(self, feedback_fn):
12969 """Gather and return data"""
12972 cluster = self.cfg.GetClusterInfo()
12974 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12975 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12977 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12978 for node in nodes.values()))
12980 group2name_fn = lambda uuid: groups[uuid].name
12981 for instance in self.wanted_instances:
12982 pnode = nodes[instance.primary_node]
12984 if self.op.static or pnode.offline:
12985 remote_state = None
12987 self.LogWarning("Primary node %s is marked offline, returning static"
12988 " information only for instance %s" %
12989 (pnode.name, instance.name))
12991 remote_info = self.rpc.call_instance_info(instance.primary_node,
12993 instance.hypervisor)
12994 remote_info.Raise("Error checking node %s" % instance.primary_node)
12995 remote_info = remote_info.payload
12996 if remote_info and "state" in remote_info:
12997 remote_state = "up"
12999 if instance.admin_state == constants.ADMINST_UP:
13000 remote_state = "down"
13002 remote_state = instance.admin_state
13004 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
13007 snodes_group_uuids = [nodes[snode_name].group
13008 for snode_name in instance.secondary_nodes]
13010 result[instance.name] = {
13011 "name": instance.name,
13012 "config_state": instance.admin_state,
13013 "run_state": remote_state,
13014 "pnode": instance.primary_node,
13015 "pnode_group_uuid": pnode.group,
13016 "pnode_group_name": group2name_fn(pnode.group),
13017 "snodes": instance.secondary_nodes,
13018 "snodes_group_uuids": snodes_group_uuids,
13019 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
13021 # this happens to be the same format used for hooks
13022 "nics": _NICListToTuple(self, instance.nics),
13023 "disk_template": instance.disk_template,
13025 "hypervisor": instance.hypervisor,
13026 "network_port": instance.network_port,
13027 "hv_instance": instance.hvparams,
13028 "hv_actual": cluster.FillHV(instance, skip_globals=True),
13029 "be_instance": instance.beparams,
13030 "be_actual": cluster.FillBE(instance),
13031 "os_instance": instance.osparams,
13032 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13033 "serial_no": instance.serial_no,
13034 "mtime": instance.mtime,
13035 "ctime": instance.ctime,
13036 "uuid": instance.uuid,
13042 def PrepareContainerMods(mods, private_fn):
13043 """Prepares a list of container modifications by adding a private data field.
13045 @type mods: list of tuples; (operation, index, parameters)
13046 @param mods: List of modifications
13047 @type private_fn: callable or None
13048 @param private_fn: Callable for constructing a private data field for a
13053 if private_fn is None:
13058 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13061 #: Type description for changes as returned by L{ApplyContainerMods}'s
13063 _TApplyContModsCbChanges = \
13064 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13065 ht.TNonEmptyString,
13070 def ApplyContainerMods(kind, container, chgdesc, mods,
13071 create_fn, modify_fn, remove_fn):
13072 """Applies descriptions in C{mods} to C{container}.
13075 @param kind: One-word item description
13076 @type container: list
13077 @param container: Container to modify
13078 @type chgdesc: None or list
13079 @param chgdesc: List of applied changes
13081 @param mods: Modifications as returned by L{PrepareContainerMods}
13082 @type create_fn: callable
13083 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13084 receives absolute item index, parameters and private data object as added
13085 by L{PrepareContainerMods}, returns tuple containing new item and changes
13087 @type modify_fn: callable
13088 @param modify_fn: Callback for modifying an existing item
13089 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13090 and private data object as added by L{PrepareContainerMods}, returns
13092 @type remove_fn: callable
13093 @param remove_fn: Callback on removing item; receives absolute item index,
13094 item and private data object as added by L{PrepareContainerMods}
13097 for (op, idx, params, private) in mods:
13100 absidx = len(container) - 1
13102 raise IndexError("Not accepting negative indices other than -1")
13103 elif idx > len(container):
13104 raise IndexError("Got %s index %s, but there are only %s" %
13105 (kind, idx, len(container)))
13111 if op == constants.DDM_ADD:
13112 # Calculate where item will be added
13114 addidx = len(container)
13118 if create_fn is None:
13121 (item, changes) = create_fn(addidx, params, private)
13124 container.append(item)
13127 assert idx <= len(container)
13128 # list.insert does so before the specified index
13129 container.insert(idx, item)
13131 # Retrieve existing item
13133 item = container[absidx]
13135 raise IndexError("Invalid %s index %s" % (kind, idx))
13137 if op == constants.DDM_REMOVE:
13140 if remove_fn is not None:
13141 remove_fn(absidx, item, private)
13143 changes = [("%s/%s" % (kind, absidx), "remove")]
13145 assert container[absidx] == item
13146 del container[absidx]
13147 elif op == constants.DDM_MODIFY:
13148 if modify_fn is not None:
13149 changes = modify_fn(absidx, item, params, private)
13151 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13153 assert _TApplyContModsCbChanges(changes)
13155 if not (chgdesc is None or changes is None):
13156 chgdesc.extend(changes)
13159 def _UpdateIvNames(base_index, disks):
13160 """Updates the C{iv_name} attribute of disks.
13162 @type disks: list of L{objects.Disk}
13165 for (idx, disk) in enumerate(disks):
13166 disk.iv_name = "disk/%s" % (base_index + idx, )
13169 class _InstNicModPrivate:
13170 """Data structure for network interface modifications.
13172 Used by L{LUInstanceSetParams}.
13175 def __init__(self):
13180 class LUInstanceSetParams(LogicalUnit):
13181 """Modifies an instances's parameters.
13184 HPATH = "instance-modify"
13185 HTYPE = constants.HTYPE_INSTANCE
13189 def _UpgradeDiskNicMods(kind, mods, verify_fn):
13190 assert ht.TList(mods)
13191 assert not mods or len(mods[0]) in (2, 3)
13193 if mods and len(mods[0]) == 2:
13197 for op, params in mods:
13198 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13199 result.append((op, -1, params))
13203 raise errors.OpPrereqError("Only one %s add or remove operation is"
13204 " supported at a time" % kind,
13205 errors.ECODE_INVAL)
13207 result.append((constants.DDM_MODIFY, op, params))
13209 assert verify_fn(result)
13216 def _CheckMods(kind, mods, key_types, item_fn):
13217 """Ensures requested disk/NIC modifications are valid.
13220 for (op, _, params) in mods:
13221 assert ht.TDict(params)
13223 # If 'key_types' is an empty dict, we assume we have an
13224 # 'ext' template and thus do not ForceDictType
13226 utils.ForceDictType(params, key_types)
13228 if op == constants.DDM_REMOVE:
13230 raise errors.OpPrereqError("No settings should be passed when"
13231 " removing a %s" % kind,
13232 errors.ECODE_INVAL)
13233 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13234 item_fn(op, params)
13236 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13239 def _VerifyDiskModification(op, params):
13240 """Verifies a disk modification.
13243 if op == constants.DDM_ADD:
13244 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13245 if mode not in constants.DISK_ACCESS_SET:
13246 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13247 errors.ECODE_INVAL)
13249 size = params.get(constants.IDISK_SIZE, None)
13251 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13252 constants.IDISK_SIZE, errors.ECODE_INVAL)
13256 except (TypeError, ValueError), err:
13257 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13258 errors.ECODE_INVAL)
13260 params[constants.IDISK_SIZE] = size
13262 elif op == constants.DDM_MODIFY:
13263 if constants.IDISK_SIZE in params:
13264 raise errors.OpPrereqError("Disk size change not possible, use"
13265 " grow-disk", errors.ECODE_INVAL)
13266 if constants.IDISK_MODE not in params:
13267 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13268 " modification supported, but missing",
13269 errors.ECODE_NOENT)
13270 if len(params) > 1:
13271 raise errors.OpPrereqError("Disk modification doesn't support"
13272 " additional arbitrary parameters",
13273 errors.ECODE_INVAL)
13276 def _VerifyNicModification(op, params):
13277 """Verifies a network interface modification.
13280 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13281 ip = params.get(constants.INIC_IP, None)
13282 req_net = params.get(constants.INIC_NETWORK, None)
13283 link = params.get(constants.NIC_LINK, None)
13284 mode = params.get(constants.NIC_MODE, None)
13285 if req_net is not None:
13286 if req_net.lower() == constants.VALUE_NONE:
13287 params[constants.INIC_NETWORK] = None
13289 elif link is not None or mode is not None:
13290 raise errors.OpPrereqError("If network is given"
13291 " mode or link should not",
13292 errors.ECODE_INVAL)
13294 if op == constants.DDM_ADD:
13295 macaddr = params.get(constants.INIC_MAC, None)
13296 if macaddr is None:
13297 params[constants.INIC_MAC] = constants.VALUE_AUTO
13300 if ip.lower() == constants.VALUE_NONE:
13301 params[constants.INIC_IP] = None
13303 if ip.lower() == constants.NIC_IP_POOL:
13304 if op == constants.DDM_ADD and req_net is None:
13305 raise errors.OpPrereqError("If ip=pool, parameter network"
13307 errors.ECODE_INVAL)
13309 if not netutils.IPAddress.IsValid(ip):
13310 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13311 errors.ECODE_INVAL)
13313 if constants.INIC_MAC in params:
13314 macaddr = params[constants.INIC_MAC]
13315 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13316 macaddr = utils.NormalizeAndValidateMac(macaddr)
13318 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13319 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13320 " modifying an existing NIC",
13321 errors.ECODE_INVAL)
13323 def CheckArguments(self):
13324 if not (self.op.nics or self.op.disks or self.op.disk_template or
13325 self.op.hvparams or self.op.beparams or self.op.os_name or
13326 self.op.offline is not None or self.op.runtime_mem):
13327 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13329 if self.op.hvparams:
13330 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
13331 "hypervisor", "instance", "cluster")
13333 self.op.disks = self._UpgradeDiskNicMods(
13334 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13335 self.op.nics = self._UpgradeDiskNicMods(
13336 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13338 if self.op.disks and self.op.disk_template is not None:
13339 raise errors.OpPrereqError("Disk template conversion and other disk"
13340 " changes not supported at the same time",
13341 errors.ECODE_INVAL)
13343 if (self.op.disk_template and
13344 self.op.disk_template in constants.DTS_INT_MIRROR and
13345 self.op.remote_node is None):
13346 raise errors.OpPrereqError("Changing the disk template to a mirrored"
13347 " one requires specifying a secondary node",
13348 errors.ECODE_INVAL)
13350 # Check NIC modifications
13351 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13352 self._VerifyNicModification)
13354 def ExpandNames(self):
13355 self._ExpandAndLockInstance()
13356 self.needed_locks[locking.LEVEL_NODEGROUP] = []
13357 # Can't even acquire node locks in shared mode as upcoming changes in
13358 # Ganeti 2.6 will start to modify the node object on disk conversion
13359 self.needed_locks[locking.LEVEL_NODE] = []
13360 self.needed_locks[locking.LEVEL_NODE_RES] = []
13361 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13362 # Look node group to look up the ipolicy
13363 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13365 def DeclareLocks(self, level):
13366 if level == locking.LEVEL_NODEGROUP:
13367 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13368 # Acquire locks for the instance's nodegroups optimistically. Needs
13369 # to be verified in CheckPrereq
13370 self.needed_locks[locking.LEVEL_NODEGROUP] = \
13371 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13372 elif level == locking.LEVEL_NODE:
13373 self._LockInstancesNodes()
13374 if self.op.disk_template and self.op.remote_node:
13375 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13376 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13377 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13379 self.needed_locks[locking.LEVEL_NODE_RES] = \
13380 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13382 def BuildHooksEnv(self):
13383 """Build hooks env.
13385 This runs on the master, primary and secondaries.
13389 if constants.BE_MINMEM in self.be_new:
13390 args["minmem"] = self.be_new[constants.BE_MINMEM]
13391 if constants.BE_MAXMEM in self.be_new:
13392 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13393 if constants.BE_VCPUS in self.be_new:
13394 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13395 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13396 # information at all.
13398 if self._new_nics is not None:
13401 for nic in self._new_nics:
13402 n = copy.deepcopy(nic)
13403 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13404 n.nicparams = nicparams
13405 nics.append(_NICToTuple(self, n))
13407 args["nics"] = nics
13409 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13410 if self.op.disk_template:
13411 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13412 if self.op.runtime_mem:
13413 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13417 def BuildHooksNodes(self):
13418 """Build hooks nodes.
13421 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13424 def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
13425 old_params, cluster, pnode):
13427 update_params_dict = dict([(key, params[key])
13428 for key in constants.NICS_PARAMETERS
13431 req_link = update_params_dict.get(constants.NIC_LINK, None)
13432 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13434 new_net_uuid = None
13435 new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
13436 if new_net_uuid_or_name:
13437 new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
13438 new_net_obj = self.cfg.GetNetwork(new_net_uuid)
13441 old_net_obj = self.cfg.GetNetwork(old_net_uuid)
13444 netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
13446 raise errors.OpPrereqError("No netparams found for the network"
13447 " %s, probably not connected" %
13448 new_net_obj.name, errors.ECODE_INVAL)
13449 new_params = dict(netparams)
13451 new_params = _GetUpdatedParams(old_params, update_params_dict)
13453 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13455 new_filled_params = cluster.SimpleFillNIC(new_params)
13456 objects.NIC.CheckParameterSyntax(new_filled_params)
13458 new_mode = new_filled_params[constants.NIC_MODE]
13459 if new_mode == constants.NIC_MODE_BRIDGED:
13460 bridge = new_filled_params[constants.NIC_LINK]
13461 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13463 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13465 self.warn.append(msg)
13467 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13469 elif new_mode == constants.NIC_MODE_ROUTED:
13470 ip = params.get(constants.INIC_IP, old_ip)
13472 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13473 " on a routed NIC", errors.ECODE_INVAL)
13475 elif new_mode == constants.NIC_MODE_OVS:
13476 # TODO: check OVS link
13477 self.LogInfo("OVS links are currently not checked for correctness")
13479 if constants.INIC_MAC in params:
13480 mac = params[constants.INIC_MAC]
13482 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13483 errors.ECODE_INVAL)
13484 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13485 # otherwise generate the MAC address
13486 params[constants.INIC_MAC] = \
13487 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13489 # or validate/reserve the current one
13491 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13492 except errors.ReservationError:
13493 raise errors.OpPrereqError("MAC address '%s' already in use"
13494 " in cluster" % mac,
13495 errors.ECODE_NOTUNIQUE)
13496 elif new_net_uuid != old_net_uuid:
13498 def get_net_prefix(net_uuid):
13501 nobj = self.cfg.GetNetwork(net_uuid)
13502 mac_prefix = nobj.mac_prefix
13506 new_prefix = get_net_prefix(new_net_uuid)
13507 old_prefix = get_net_prefix(old_net_uuid)
13508 if old_prefix != new_prefix:
13509 params[constants.INIC_MAC] = \
13510 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13512 # if there is a change in (ip, network) tuple
13513 new_ip = params.get(constants.INIC_IP, old_ip)
13514 if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
13516 # if IP is pool then require a network and generate one IP
13517 if new_ip.lower() == constants.NIC_IP_POOL:
13520 new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
13521 except errors.ReservationError:
13522 raise errors.OpPrereqError("Unable to get a free IP"
13523 " from the address pool",
13524 errors.ECODE_STATE)
13525 self.LogInfo("Chose IP %s from network %s",
13528 params[constants.INIC_IP] = new_ip
13530 raise errors.OpPrereqError("ip=pool, but no network found",
13531 errors.ECODE_INVAL)
13532 # Reserve new IP if in the new network if any
13535 self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
13536 self.LogInfo("Reserving IP %s in network %s",
13537 new_ip, new_net_obj.name)
13538 except errors.ReservationError:
13539 raise errors.OpPrereqError("IP %s not available in network %s" %
13540 (new_ip, new_net_obj.name),
13541 errors.ECODE_NOTUNIQUE)
13542 # new network is None so check if new IP is a conflicting IP
13543 elif self.op.conflicts_check:
13544 _CheckForConflictingIp(self, new_ip, pnode)
13546 # release old IP if old network is not None
13547 if old_ip and old_net_uuid:
13549 self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
13550 except errors.AddressPoolError:
13551 logging.warning("Release IP %s not contained in network %s",
13552 old_ip, old_net_obj.name)
13554 # there are no changes in (ip, network) tuple and old network is not None
13555 elif (old_net_uuid is not None and
13556 (req_link is not None or req_mode is not None)):
13557 raise errors.OpPrereqError("Not allowed to change link or mode of"
13558 " a NIC that is connected to a network",
13559 errors.ECODE_INVAL)
13561 private.params = new_params
13562 private.filled = new_filled_params
13564 def _PreCheckDiskTemplate(self, pnode_info):
13565 """CheckPrereq checks related to a new disk template."""
13566 # Arguments are passed to avoid configuration lookups
13567 instance = self.instance
13568 pnode = instance.primary_node
13569 cluster = self.cluster
13570 if instance.disk_template == self.op.disk_template:
13571 raise errors.OpPrereqError("Instance already has disk template %s" %
13572 instance.disk_template, errors.ECODE_INVAL)
13574 if (instance.disk_template,
13575 self.op.disk_template) not in self._DISK_CONVERSIONS:
13576 raise errors.OpPrereqError("Unsupported disk template conversion from"
13577 " %s to %s" % (instance.disk_template,
13578 self.op.disk_template),
13579 errors.ECODE_INVAL)
13580 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13581 msg="cannot change disk template")
13582 if self.op.disk_template in constants.DTS_INT_MIRROR:
13583 if self.op.remote_node == pnode:
13584 raise errors.OpPrereqError("Given new secondary node %s is the same"
13585 " as the primary node of the instance" %
13586 self.op.remote_node, errors.ECODE_STATE)
13587 _CheckNodeOnline(self, self.op.remote_node)
13588 _CheckNodeNotDrained(self, self.op.remote_node)
13589 # FIXME: here we assume that the old instance type is DT_PLAIN
13590 assert instance.disk_template == constants.DT_PLAIN
13591 disks = [{constants.IDISK_SIZE: d.size,
13592 constants.IDISK_VG: d.logical_id[0]}
13593 for d in instance.disks]
13594 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13595 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13597 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13598 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13599 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13601 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, self.cfg,
13602 ignore=self.op.ignore_ipolicy)
13603 if pnode_info.group != snode_info.group:
13604 self.LogWarning("The primary and secondary nodes are in two"
13605 " different node groups; the disk parameters"
13606 " from the first disk's node group will be"
13609 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13610 # Make sure none of the nodes require exclusive storage
13611 nodes = [pnode_info]
13612 if self.op.disk_template in constants.DTS_INT_MIRROR:
13614 nodes.append(snode_info)
13615 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13616 if compat.any(map(has_es, nodes)):
13617 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13618 " storage is enabled" % (instance.disk_template,
13619 self.op.disk_template))
13620 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13622 def CheckPrereq(self):
13623 """Check prerequisites.
13625 This only checks the instance list against the existing names.
13628 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13629 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13631 cluster = self.cluster = self.cfg.GetClusterInfo()
13632 assert self.instance is not None, \
13633 "Cannot retrieve locked instance %s" % self.op.instance_name
13635 pnode = instance.primary_node
13636 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13637 nodelist = list(instance.all_nodes)
13638 pnode_info = self.cfg.GetNodeInfo(pnode)
13639 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13641 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13642 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13643 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13645 # dictionary with instance information after the modification
13648 # Check disk modifications. This is done here and not in CheckArguments
13649 # (as with NICs), because we need to know the instance's disk template
13650 if instance.disk_template == constants.DT_EXT:
13651 self._CheckMods("disk", self.op.disks, {},
13652 self._VerifyDiskModification)
13654 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13655 self._VerifyDiskModification)
13657 # Prepare disk/NIC modifications
13658 self.diskmod = PrepareContainerMods(self.op.disks, None)
13659 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13661 # Check the validity of the `provider' parameter
13662 if instance.disk_template in constants.DT_EXT:
13663 for mod in self.diskmod:
13664 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13665 if mod[0] == constants.DDM_ADD:
13666 if ext_provider is None:
13667 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13668 " '%s' missing, during disk add" %
13670 constants.IDISK_PROVIDER),
13671 errors.ECODE_NOENT)
13672 elif mod[0] == constants.DDM_MODIFY:
13674 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13676 constants.IDISK_PROVIDER,
13677 errors.ECODE_INVAL)
13679 for mod in self.diskmod:
13680 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13681 if ext_provider is not None:
13682 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13683 " instances of type '%s'" %
13684 (constants.IDISK_PROVIDER,
13686 errors.ECODE_INVAL)
13689 if self.op.os_name and not self.op.force:
13690 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13691 self.op.force_variant)
13692 instance_os = self.op.os_name
13694 instance_os = instance.os
13696 assert not (self.op.disk_template and self.op.disks), \
13697 "Can't modify disk template and apply disk changes at the same time"
13699 if self.op.disk_template:
13700 self._PreCheckDiskTemplate(pnode_info)
13702 # hvparams processing
13703 if self.op.hvparams:
13704 hv_type = instance.hypervisor
13705 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13706 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13707 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13710 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13711 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13712 self.hv_proposed = self.hv_new = hv_new # the new actual values
13713 self.hv_inst = i_hvdict # the new dict (without defaults)
13715 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13717 self.hv_new = self.hv_inst = {}
13719 # beparams processing
13720 if self.op.beparams:
13721 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13723 objects.UpgradeBeParams(i_bedict)
13724 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13725 be_new = cluster.SimpleFillBE(i_bedict)
13726 self.be_proposed = self.be_new = be_new # the new actual values
13727 self.be_inst = i_bedict # the new dict (without defaults)
13729 self.be_new = self.be_inst = {}
13730 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13731 be_old = cluster.FillBE(instance)
13733 # CPU param validation -- checking every time a parameter is
13734 # changed to cover all cases where either CPU mask or vcpus have
13736 if (constants.BE_VCPUS in self.be_proposed and
13737 constants.HV_CPU_MASK in self.hv_proposed):
13739 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13740 # Verify mask is consistent with number of vCPUs. Can skip this
13741 # test if only 1 entry in the CPU mask, which means same mask
13742 # is applied to all vCPUs.
13743 if (len(cpu_list) > 1 and
13744 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13745 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13747 (self.be_proposed[constants.BE_VCPUS],
13748 self.hv_proposed[constants.HV_CPU_MASK]),
13749 errors.ECODE_INVAL)
13751 # Only perform this test if a new CPU mask is given
13752 if constants.HV_CPU_MASK in self.hv_new:
13753 # Calculate the largest CPU number requested
13754 max_requested_cpu = max(map(max, cpu_list))
13755 # Check that all of the instance's nodes have enough physical CPUs to
13756 # satisfy the requested CPU mask
13757 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13758 max_requested_cpu + 1, instance.hypervisor)
13760 # osparams processing
13761 if self.op.osparams:
13762 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13763 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13764 self.os_inst = i_osdict # the new dict (without defaults)
13770 #TODO(dynmem): do the appropriate check involving MINMEM
13771 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13772 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13773 mem_check_list = [pnode]
13774 if be_new[constants.BE_AUTO_BALANCE]:
13775 # either we changed auto_balance to yes or it was from before
13776 mem_check_list.extend(instance.secondary_nodes)
13777 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13778 instance.hypervisor)
13779 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13780 [instance.hypervisor], False)
13781 pninfo = nodeinfo[pnode]
13782 msg = pninfo.fail_msg
13784 # Assume the primary node is unreachable and go ahead
13785 self.warn.append("Can't get info from primary node %s: %s" %
13788 (_, _, (pnhvinfo, )) = pninfo.payload
13789 if not isinstance(pnhvinfo.get("memory_free", None), int):
13790 self.warn.append("Node data from primary node %s doesn't contain"
13791 " free memory information" % pnode)
13792 elif instance_info.fail_msg:
13793 self.warn.append("Can't get instance runtime information: %s" %
13794 instance_info.fail_msg)
13796 if instance_info.payload:
13797 current_mem = int(instance_info.payload["memory"])
13799 # Assume instance not running
13800 # (there is a slight race condition here, but it's not very
13801 # probable, and we have no other way to check)
13802 # TODO: Describe race condition
13804 #TODO(dynmem): do the appropriate check involving MINMEM
13805 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13806 pnhvinfo["memory_free"])
13808 raise errors.OpPrereqError("This change will prevent the instance"
13809 " from starting, due to %d MB of memory"
13810 " missing on its primary node" %
13811 miss_mem, errors.ECODE_NORES)
13813 if be_new[constants.BE_AUTO_BALANCE]:
13814 for node, nres in nodeinfo.items():
13815 if node not in instance.secondary_nodes:
13817 nres.Raise("Can't get info from secondary node %s" % node,
13818 prereq=True, ecode=errors.ECODE_STATE)
13819 (_, _, (nhvinfo, )) = nres.payload
13820 if not isinstance(nhvinfo.get("memory_free", None), int):
13821 raise errors.OpPrereqError("Secondary node %s didn't return free"
13822 " memory information" % node,
13823 errors.ECODE_STATE)
13824 #TODO(dynmem): do the appropriate check involving MINMEM
13825 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13826 raise errors.OpPrereqError("This change will prevent the instance"
13827 " from failover to its secondary node"
13828 " %s, due to not enough memory" % node,
13829 errors.ECODE_STATE)
13831 if self.op.runtime_mem:
13832 remote_info = self.rpc.call_instance_info(instance.primary_node,
13834 instance.hypervisor)
13835 remote_info.Raise("Error checking node %s" % instance.primary_node)
13836 if not remote_info.payload: # not running already
13837 raise errors.OpPrereqError("Instance %s is not running" %
13838 instance.name, errors.ECODE_STATE)
13840 current_memory = remote_info.payload["memory"]
13841 if (not self.op.force and
13842 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13843 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13844 raise errors.OpPrereqError("Instance %s must have memory between %d"
13845 " and %d MB of memory unless --force is"
13848 self.be_proposed[constants.BE_MINMEM],
13849 self.be_proposed[constants.BE_MAXMEM]),
13850 errors.ECODE_INVAL)
13852 delta = self.op.runtime_mem - current_memory
13854 _CheckNodeFreeMemory(self, instance.primary_node,
13855 "ballooning memory for instance %s" %
13856 instance.name, delta, instance.hypervisor)
13858 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13859 raise errors.OpPrereqError("Disk operations not supported for"
13860 " diskless instances", errors.ECODE_INVAL)
13862 def _PrepareNicCreate(_, params, private):
13863 self._PrepareNicModification(params, private, None, None,
13864 {}, cluster, pnode)
13865 return (None, None)
13867 def _PrepareNicMod(_, nic, params, private):
13868 self._PrepareNicModification(params, private, nic.ip, nic.network,
13869 nic.nicparams, cluster, pnode)
13872 def _PrepareNicRemove(_, params, __):
13874 net = params.network
13875 if net is not None and ip is not None:
13876 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13878 # Verify NIC changes (operating on copy)
13879 nics = instance.nics[:]
13880 ApplyContainerMods("NIC", nics, None, self.nicmod,
13881 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13882 if len(nics) > constants.MAX_NICS:
13883 raise errors.OpPrereqError("Instance has too many network interfaces"
13884 " (%d), cannot add more" % constants.MAX_NICS,
13885 errors.ECODE_STATE)
13887 # Verify disk changes (operating on a copy)
13888 disks = instance.disks[:]
13889 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13890 if len(disks) > constants.MAX_DISKS:
13891 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13892 " more" % constants.MAX_DISKS,
13893 errors.ECODE_STATE)
13894 disk_sizes = [disk.size for disk in instance.disks]
13895 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13896 self.diskmod if op == constants.DDM_ADD)
13897 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13898 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13900 if self.op.offline is not None and self.op.offline:
13901 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13902 msg="can't change to offline")
13904 # Pre-compute NIC changes (necessary to use result in hooks)
13905 self._nic_chgdesc = []
13907 # Operate on copies as this is still in prereq
13908 nics = [nic.Copy() for nic in instance.nics]
13909 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13910 self._CreateNewNic, self._ApplyNicMods, None)
13911 self._new_nics = nics
13912 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13914 self._new_nics = None
13915 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13917 if not self.op.ignore_ipolicy:
13918 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13921 # Fill ispec with backend parameters
13922 ispec[constants.ISPEC_SPINDLE_USE] = \
13923 self.be_new.get(constants.BE_SPINDLE_USE, None)
13924 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13927 # Copy ispec to verify parameters with min/max values separately
13928 if self.op.disk_template:
13929 new_disk_template = self.op.disk_template
13931 new_disk_template = instance.disk_template
13932 ispec_max = ispec.copy()
13933 ispec_max[constants.ISPEC_MEM_SIZE] = \
13934 self.be_new.get(constants.BE_MAXMEM, None)
13935 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max,
13937 ispec_min = ispec.copy()
13938 ispec_min[constants.ISPEC_MEM_SIZE] = \
13939 self.be_new.get(constants.BE_MINMEM, None)
13940 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min,
13943 if (res_max or res_min):
13944 # FIXME: Improve error message by including information about whether
13945 # the upper or lower limit of the parameter fails the ipolicy.
13946 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13947 (group_info, group_info.name,
13948 utils.CommaJoin(set(res_max + res_min))))
13949 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13951 def _ConvertPlainToDrbd(self, feedback_fn):
13952 """Converts an instance from plain to drbd.
13955 feedback_fn("Converting template to drbd")
13956 instance = self.instance
13957 pnode = instance.primary_node
13958 snode = self.op.remote_node
13960 assert instance.disk_template == constants.DT_PLAIN
13962 # create a fake disk info for _GenerateDiskTemplate
13963 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13964 constants.IDISK_VG: d.logical_id[0]}
13965 for d in instance.disks]
13966 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13967 instance.name, pnode, [snode],
13968 disk_info, None, None, 0, feedback_fn,
13970 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13972 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13973 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13974 info = _GetInstanceInfoText(instance)
13975 feedback_fn("Creating additional volumes...")
13976 # first, create the missing data and meta devices
13977 for disk in anno_disks:
13978 # unfortunately this is... not too nice
13979 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13980 info, True, p_excl_stor)
13981 for child in disk.children:
13982 _CreateSingleBlockDev(self, snode, instance, child, info, True,
13984 # at this stage, all new LVs have been created, we can rename the
13986 feedback_fn("Renaming original volumes...")
13987 rename_list = [(o, n.children[0].logical_id)
13988 for (o, n) in zip(instance.disks, new_disks)]
13989 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13990 result.Raise("Failed to rename original LVs")
13992 feedback_fn("Initializing DRBD devices...")
13993 # all child devices are in place, we can now create the DRBD devices
13994 for disk in anno_disks:
13995 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13996 f_create = node == pnode
13997 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
14000 # at this point, the instance has been modified
14001 instance.disk_template = constants.DT_DRBD8
14002 instance.disks = new_disks
14003 self.cfg.Update(instance, feedback_fn)
14005 # Release node locks while waiting for sync
14006 _ReleaseLocks(self, locking.LEVEL_NODE)
14008 # disks are created, waiting for sync
14009 disk_abort = not _WaitForSync(self, instance,
14010 oneshot=not self.op.wait_for_sync)
14012 raise errors.OpExecError("There are some degraded disks for"
14013 " this instance, please cleanup manually")
14015 # Node resource locks will be released by caller
14017 def _ConvertDrbdToPlain(self, feedback_fn):
14018 """Converts an instance from drbd to plain.
14021 instance = self.instance
14023 assert len(instance.secondary_nodes) == 1
14024 assert instance.disk_template == constants.DT_DRBD8
14026 pnode = instance.primary_node
14027 snode = instance.secondary_nodes[0]
14028 feedback_fn("Converting template to plain")
14030 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
14031 new_disks = [d.children[0] for d in instance.disks]
14033 # copy over size and mode
14034 for parent, child in zip(old_disks, new_disks):
14035 child.size = parent.size
14036 child.mode = parent.mode
14038 # this is a DRBD disk, return its port to the pool
14039 # NOTE: this must be done right before the call to cfg.Update!
14040 for disk in old_disks:
14041 tcp_port = disk.logical_id[2]
14042 self.cfg.AddTcpUdpPort(tcp_port)
14044 # update instance structure
14045 instance.disks = new_disks
14046 instance.disk_template = constants.DT_PLAIN
14047 self.cfg.Update(instance, feedback_fn)
14049 # Release locks in case removing disks takes a while
14050 _ReleaseLocks(self, locking.LEVEL_NODE)
14052 feedback_fn("Removing volumes on the secondary node...")
14053 for disk in old_disks:
14054 self.cfg.SetDiskID(disk, snode)
14055 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14057 self.LogWarning("Could not remove block device %s on node %s,"
14058 " continuing anyway: %s", disk.iv_name, snode, msg)
14060 feedback_fn("Removing unneeded volumes on the primary node...")
14061 for idx, disk in enumerate(old_disks):
14062 meta = disk.children[1]
14063 self.cfg.SetDiskID(meta, pnode)
14064 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14066 self.LogWarning("Could not remove metadata for disk %d on node %s,"
14067 " continuing anyway: %s", idx, pnode, msg)
14069 def _CreateNewDisk(self, idx, params, _):
14070 """Creates a new disk.
14073 instance = self.instance
14076 if instance.disk_template in constants.DTS_FILEBASED:
14077 (file_driver, file_path) = instance.disks[0].logical_id
14078 file_path = os.path.dirname(file_path)
14080 file_driver = file_path = None
14083 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14084 instance.primary_node, instance.secondary_nodes,
14085 [params], file_path, file_driver, idx,
14086 self.Log, self.diskparams)[0]
14088 info = _GetInstanceInfoText(instance)
14090 logging.info("Creating volume %s for instance %s",
14091 disk.iv_name, instance.name)
14092 # Note: this needs to be kept in sync with _CreateDisks
14094 for node in instance.all_nodes:
14095 f_create = (node == instance.primary_node)
14097 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14098 except errors.OpExecError, err:
14099 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14100 disk.iv_name, disk, node, err)
14102 if self.cluster.prealloc_wipe_disks:
14104 _WipeDisks(self, instance,
14105 disks=[(idx, disk, 0)])
14108 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14112 def _ModifyDisk(idx, disk, params, _):
14113 """Modifies a disk.
14116 disk.mode = params[constants.IDISK_MODE]
14119 ("disk.mode/%d" % idx, disk.mode),
14122 def _RemoveDisk(self, idx, root, _):
14126 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14127 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14128 self.cfg.SetDiskID(disk, node)
14129 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14131 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14132 " continuing anyway", idx, node, msg)
14134 # if this is a DRBD disk, return its port to the pool
14135 if root.dev_type in constants.LDS_DRBD:
14136 self.cfg.AddTcpUdpPort(root.logical_id[2])
14138 def _CreateNewNic(self, idx, params, private):
14139 """Creates data structure for a new network interface.
14142 mac = params[constants.INIC_MAC]
14143 ip = params.get(constants.INIC_IP, None)
14144 net = params.get(constants.INIC_NETWORK, None)
14145 net_uuid = self.cfg.LookupNetwork(net)
14146 #TODO: not private.filled?? can a nic have no nicparams??
14147 nicparams = private.filled
14148 nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, nicparams=nicparams)
14152 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14153 (mac, ip, private.filled[constants.NIC_MODE],
14154 private.filled[constants.NIC_LINK],
14158 def _ApplyNicMods(self, idx, nic, params, private):
14159 """Modifies a network interface.
14164 for key in [constants.INIC_MAC, constants.INIC_IP]:
14166 changes.append(("nic.%s/%d" % (key, idx), params[key]))
14167 setattr(nic, key, params[key])
14169 new_net = params.get(constants.INIC_NETWORK, nic.network)
14170 new_net_uuid = self.cfg.LookupNetwork(new_net)
14171 if new_net_uuid != nic.network:
14172 changes.append(("nic.network/%d" % idx, new_net))
14173 nic.network = new_net_uuid
14176 nic.nicparams = private.filled
14178 for (key, val) in nic.nicparams.items():
14179 changes.append(("nic.%s/%d" % (key, idx), val))
14183 def Exec(self, feedback_fn):
14184 """Modifies an instance.
14186 All parameters take effect only at the next restart of the instance.
14189 # Process here the warnings from CheckPrereq, as we don't have a
14190 # feedback_fn there.
14191 # TODO: Replace with self.LogWarning
14192 for warn in self.warn:
14193 feedback_fn("WARNING: %s" % warn)
14195 assert ((self.op.disk_template is None) ^
14196 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14197 "Not owning any node resource locks"
14200 instance = self.instance
14203 if self.op.runtime_mem:
14204 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14206 self.op.runtime_mem)
14207 rpcres.Raise("Cannot modify instance runtime memory")
14208 result.append(("runtime_memory", self.op.runtime_mem))
14210 # Apply disk changes
14211 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14212 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14213 _UpdateIvNames(0, instance.disks)
14215 if self.op.disk_template:
14217 check_nodes = set(instance.all_nodes)
14218 if self.op.remote_node:
14219 check_nodes.add(self.op.remote_node)
14220 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14221 owned = self.owned_locks(level)
14222 assert not (check_nodes - owned), \
14223 ("Not owning the correct locks, owning %r, expected at least %r" %
14224 (owned, check_nodes))
14226 r_shut = _ShutdownInstanceDisks(self, instance)
14228 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14229 " proceed with disk template conversion")
14230 mode = (instance.disk_template, self.op.disk_template)
14232 self._DISK_CONVERSIONS[mode](self, feedback_fn)
14234 self.cfg.ReleaseDRBDMinors(instance.name)
14236 result.append(("disk_template", self.op.disk_template))
14238 assert instance.disk_template == self.op.disk_template, \
14239 ("Expected disk template '%s', found '%s'" %
14240 (self.op.disk_template, instance.disk_template))
14242 # Release node and resource locks if there are any (they might already have
14243 # been released during disk conversion)
14244 _ReleaseLocks(self, locking.LEVEL_NODE)
14245 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14247 # Apply NIC changes
14248 if self._new_nics is not None:
14249 instance.nics = self._new_nics
14250 result.extend(self._nic_chgdesc)
14253 if self.op.hvparams:
14254 instance.hvparams = self.hv_inst
14255 for key, val in self.op.hvparams.iteritems():
14256 result.append(("hv/%s" % key, val))
14259 if self.op.beparams:
14260 instance.beparams = self.be_inst
14261 for key, val in self.op.beparams.iteritems():
14262 result.append(("be/%s" % key, val))
14265 if self.op.os_name:
14266 instance.os = self.op.os_name
14269 if self.op.osparams:
14270 instance.osparams = self.os_inst
14271 for key, val in self.op.osparams.iteritems():
14272 result.append(("os/%s" % key, val))
14274 if self.op.offline is None:
14277 elif self.op.offline:
14278 # Mark instance as offline
14279 self.cfg.MarkInstanceOffline(instance.name)
14280 result.append(("admin_state", constants.ADMINST_OFFLINE))
14282 # Mark instance as online, but stopped
14283 self.cfg.MarkInstanceDown(instance.name)
14284 result.append(("admin_state", constants.ADMINST_DOWN))
14286 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14288 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14289 self.owned_locks(locking.LEVEL_NODE)), \
14290 "All node locks should have been released by now"
14294 _DISK_CONVERSIONS = {
14295 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14296 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14300 class LUInstanceChangeGroup(LogicalUnit):
14301 HPATH = "instance-change-group"
14302 HTYPE = constants.HTYPE_INSTANCE
14305 def ExpandNames(self):
14306 self.share_locks = _ShareAll()
14308 self.needed_locks = {
14309 locking.LEVEL_NODEGROUP: [],
14310 locking.LEVEL_NODE: [],
14311 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14314 self._ExpandAndLockInstance()
14316 if self.op.target_groups:
14317 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14318 self.op.target_groups)
14320 self.req_target_uuids = None
14322 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14324 def DeclareLocks(self, level):
14325 if level == locking.LEVEL_NODEGROUP:
14326 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14328 if self.req_target_uuids:
14329 lock_groups = set(self.req_target_uuids)
14331 # Lock all groups used by instance optimistically; this requires going
14332 # via the node before it's locked, requiring verification later on
14333 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14334 lock_groups.update(instance_groups)
14336 # No target groups, need to lock all of them
14337 lock_groups = locking.ALL_SET
14339 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14341 elif level == locking.LEVEL_NODE:
14342 if self.req_target_uuids:
14343 # Lock all nodes used by instances
14344 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14345 self._LockInstancesNodes()
14347 # Lock all nodes in all potential target groups
14348 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14349 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14350 member_nodes = [node_name
14351 for group in lock_groups
14352 for node_name in self.cfg.GetNodeGroup(group).members]
14353 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14355 # Lock all nodes as all groups are potential targets
14356 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14358 def CheckPrereq(self):
14359 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14360 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14361 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14363 assert (self.req_target_uuids is None or
14364 owned_groups.issuperset(self.req_target_uuids))
14365 assert owned_instances == set([self.op.instance_name])
14367 # Get instance information
14368 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14370 # Check if node groups for locked instance are still correct
14371 assert owned_nodes.issuperset(self.instance.all_nodes), \
14372 ("Instance %s's nodes changed while we kept the lock" %
14373 self.op.instance_name)
14375 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14378 if self.req_target_uuids:
14379 # User requested specific target groups
14380 self.target_uuids = frozenset(self.req_target_uuids)
14382 # All groups except those used by the instance are potential targets
14383 self.target_uuids = owned_groups - inst_groups
14385 conflicting_groups = self.target_uuids & inst_groups
14386 if conflicting_groups:
14387 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14388 " used by the instance '%s'" %
14389 (utils.CommaJoin(conflicting_groups),
14390 self.op.instance_name),
14391 errors.ECODE_INVAL)
14393 if not self.target_uuids:
14394 raise errors.OpPrereqError("There are no possible target groups",
14395 errors.ECODE_INVAL)
14397 def BuildHooksEnv(self):
14398 """Build hooks env.
14401 assert self.target_uuids
14404 "TARGET_GROUPS": " ".join(self.target_uuids),
14407 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14411 def BuildHooksNodes(self):
14412 """Build hooks nodes.
14415 mn = self.cfg.GetMasterNode()
14416 return ([mn], [mn])
14418 def Exec(self, feedback_fn):
14419 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14421 assert instances == [self.op.instance_name], "Instance not locked"
14423 req = iallocator.IAReqGroupChange(instances=instances,
14424 target_groups=list(self.target_uuids))
14425 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14427 ial.Run(self.op.iallocator)
14429 if not ial.success:
14430 raise errors.OpPrereqError("Can't compute solution for changing group of"
14431 " instance '%s' using iallocator '%s': %s" %
14432 (self.op.instance_name, self.op.iallocator,
14433 ial.info), errors.ECODE_NORES)
14435 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14437 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14438 " instance '%s'", len(jobs), self.op.instance_name)
14440 return ResultWithJobs(jobs)
14443 class LUBackupQuery(NoHooksLU):
14444 """Query the exports list
14449 def CheckArguments(self):
14450 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14451 ["node", "export"], self.op.use_locking)
14453 def ExpandNames(self):
14454 self.expq.ExpandNames(self)
14456 def DeclareLocks(self, level):
14457 self.expq.DeclareLocks(self, level)
14459 def Exec(self, feedback_fn):
14462 for (node, expname) in self.expq.OldStyleQuery(self):
14463 if expname is None:
14464 result[node] = False
14466 result.setdefault(node, []).append(expname)
14471 class _ExportQuery(_QueryBase):
14472 FIELDS = query.EXPORT_FIELDS
14474 #: The node name is not a unique key for this query
14475 SORT_FIELD = "node"
14477 def ExpandNames(self, lu):
14478 lu.needed_locks = {}
14480 # The following variables interact with _QueryBase._GetNames
14482 self.wanted = _GetWantedNodes(lu, self.names)
14484 self.wanted = locking.ALL_SET
14486 self.do_locking = self.use_locking
14488 if self.do_locking:
14489 lu.share_locks = _ShareAll()
14490 lu.needed_locks = {
14491 locking.LEVEL_NODE: self.wanted,
14495 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14497 def DeclareLocks(self, lu, level):
14500 def _GetQueryData(self, lu):
14501 """Computes the list of nodes and their attributes.
14504 # Locking is not used
14506 assert not (compat.any(lu.glm.is_owned(level)
14507 for level in locking.LEVELS
14508 if level != locking.LEVEL_CLUSTER) or
14509 self.do_locking or self.use_locking)
14511 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14515 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14517 result.append((node, None))
14519 result.extend((node, expname) for expname in nres.payload)
14524 class LUBackupPrepare(NoHooksLU):
14525 """Prepares an instance for an export and returns useful information.
14530 def ExpandNames(self):
14531 self._ExpandAndLockInstance()
14533 def CheckPrereq(self):
14534 """Check prerequisites.
14537 instance_name = self.op.instance_name
14539 self.instance = self.cfg.GetInstanceInfo(instance_name)
14540 assert self.instance is not None, \
14541 "Cannot retrieve locked instance %s" % self.op.instance_name
14542 _CheckNodeOnline(self, self.instance.primary_node)
14544 self._cds = _GetClusterDomainSecret()
14546 def Exec(self, feedback_fn):
14547 """Prepares an instance for an export.
14550 instance = self.instance
14552 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14553 salt = utils.GenerateSecret(8)
14555 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14556 result = self.rpc.call_x509_cert_create(instance.primary_node,
14557 constants.RIE_CERT_VALIDITY)
14558 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14560 (name, cert_pem) = result.payload
14562 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14566 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14567 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14569 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14575 class LUBackupExport(LogicalUnit):
14576 """Export an instance to an image in the cluster.
14579 HPATH = "instance-export"
14580 HTYPE = constants.HTYPE_INSTANCE
14583 def CheckArguments(self):
14584 """Check the arguments.
14587 self.x509_key_name = self.op.x509_key_name
14588 self.dest_x509_ca_pem = self.op.destination_x509_ca
14590 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14591 if not self.x509_key_name:
14592 raise errors.OpPrereqError("Missing X509 key name for encryption",
14593 errors.ECODE_INVAL)
14595 if not self.dest_x509_ca_pem:
14596 raise errors.OpPrereqError("Missing destination X509 CA",
14597 errors.ECODE_INVAL)
14599 def ExpandNames(self):
14600 self._ExpandAndLockInstance()
14602 # Lock all nodes for local exports
14603 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14604 # FIXME: lock only instance primary and destination node
14606 # Sad but true, for now we have do lock all nodes, as we don't know where
14607 # the previous export might be, and in this LU we search for it and
14608 # remove it from its current node. In the future we could fix this by:
14609 # - making a tasklet to search (share-lock all), then create the
14610 # new one, then one to remove, after
14611 # - removing the removal operation altogether
14612 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14614 # Allocations should be stopped while this LU runs with node locks, but
14615 # it doesn't have to be exclusive
14616 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14617 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14619 def DeclareLocks(self, level):
14620 """Last minute lock declaration."""
14621 # All nodes are locked anyway, so nothing to do here.
14623 def BuildHooksEnv(self):
14624 """Build hooks env.
14626 This will run on the master, primary node and target node.
14630 "EXPORT_MODE": self.op.mode,
14631 "EXPORT_NODE": self.op.target_node,
14632 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14633 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14634 # TODO: Generic function for boolean env variables
14635 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14638 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14642 def BuildHooksNodes(self):
14643 """Build hooks nodes.
14646 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14648 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14649 nl.append(self.op.target_node)
14653 def CheckPrereq(self):
14654 """Check prerequisites.
14656 This checks that the instance and node names are valid.
14659 instance_name = self.op.instance_name
14661 self.instance = self.cfg.GetInstanceInfo(instance_name)
14662 assert self.instance is not None, \
14663 "Cannot retrieve locked instance %s" % self.op.instance_name
14664 _CheckNodeOnline(self, self.instance.primary_node)
14666 if (self.op.remove_instance and
14667 self.instance.admin_state == constants.ADMINST_UP and
14668 not self.op.shutdown):
14669 raise errors.OpPrereqError("Can not remove instance without shutting it"
14670 " down before", errors.ECODE_STATE)
14672 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14673 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14674 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14675 assert self.dst_node is not None
14677 _CheckNodeOnline(self, self.dst_node.name)
14678 _CheckNodeNotDrained(self, self.dst_node.name)
14681 self.dest_disk_info = None
14682 self.dest_x509_ca = None
14684 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14685 self.dst_node = None
14687 if len(self.op.target_node) != len(self.instance.disks):
14688 raise errors.OpPrereqError(("Received destination information for %s"
14689 " disks, but instance %s has %s disks") %
14690 (len(self.op.target_node), instance_name,
14691 len(self.instance.disks)),
14692 errors.ECODE_INVAL)
14694 cds = _GetClusterDomainSecret()
14696 # Check X509 key name
14698 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14699 except (TypeError, ValueError), err:
14700 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14701 errors.ECODE_INVAL)
14703 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14704 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14705 errors.ECODE_INVAL)
14707 # Load and verify CA
14709 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14710 except OpenSSL.crypto.Error, err:
14711 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14712 (err, ), errors.ECODE_INVAL)
14714 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14715 if errcode is not None:
14716 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14717 (msg, ), errors.ECODE_INVAL)
14719 self.dest_x509_ca = cert
14721 # Verify target information
14723 for idx, disk_data in enumerate(self.op.target_node):
14725 (host, port, magic) = \
14726 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14727 except errors.GenericError, err:
14728 raise errors.OpPrereqError("Target info for disk %s: %s" %
14729 (idx, err), errors.ECODE_INVAL)
14731 disk_info.append((host, port, magic))
14733 assert len(disk_info) == len(self.op.target_node)
14734 self.dest_disk_info = disk_info
14737 raise errors.ProgrammerError("Unhandled export mode %r" %
14740 # instance disk type verification
14741 # TODO: Implement export support for file-based disks
14742 for disk in self.instance.disks:
14743 if disk.dev_type == constants.LD_FILE:
14744 raise errors.OpPrereqError("Export not supported for instances with"
14745 " file-based disks", errors.ECODE_INVAL)
14747 def _CleanupExports(self, feedback_fn):
14748 """Removes exports of current instance from all other nodes.
14750 If an instance in a cluster with nodes A..D was exported to node C, its
14751 exports will be removed from the nodes A, B and D.
14754 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14756 nodelist = self.cfg.GetNodeList()
14757 nodelist.remove(self.dst_node.name)
14759 # on one-node clusters nodelist will be empty after the removal
14760 # if we proceed the backup would be removed because OpBackupQuery
14761 # substitutes an empty list with the full cluster node list.
14762 iname = self.instance.name
14764 feedback_fn("Removing old exports for instance %s" % iname)
14765 exportlist = self.rpc.call_export_list(nodelist)
14766 for node in exportlist:
14767 if exportlist[node].fail_msg:
14769 if iname in exportlist[node].payload:
14770 msg = self.rpc.call_export_remove(node, iname).fail_msg
14772 self.LogWarning("Could not remove older export for instance %s"
14773 " on node %s: %s", iname, node, msg)
14775 def Exec(self, feedback_fn):
14776 """Export an instance to an image in the cluster.
14779 assert self.op.mode in constants.EXPORT_MODES
14781 instance = self.instance
14782 src_node = instance.primary_node
14784 if self.op.shutdown:
14785 # shutdown the instance, but not the disks
14786 feedback_fn("Shutting down instance %s" % instance.name)
14787 result = self.rpc.call_instance_shutdown(src_node, instance,
14788 self.op.shutdown_timeout)
14789 # TODO: Maybe ignore failures if ignore_remove_failures is set
14790 result.Raise("Could not shutdown instance %s on"
14791 " node %s" % (instance.name, src_node))
14793 # set the disks ID correctly since call_instance_start needs the
14794 # correct drbd minor to create the symlinks
14795 for disk in instance.disks:
14796 self.cfg.SetDiskID(disk, src_node)
14798 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14801 # Activate the instance disks if we'exporting a stopped instance
14802 feedback_fn("Activating disks for %s" % instance.name)
14803 _StartInstanceDisks(self, instance, None)
14806 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14809 helper.CreateSnapshots()
14811 if (self.op.shutdown and
14812 instance.admin_state == constants.ADMINST_UP and
14813 not self.op.remove_instance):
14814 assert not activate_disks
14815 feedback_fn("Starting instance %s" % instance.name)
14816 result = self.rpc.call_instance_start(src_node,
14817 (instance, None, None), False)
14818 msg = result.fail_msg
14820 feedback_fn("Failed to start instance: %s" % msg)
14821 _ShutdownInstanceDisks(self, instance)
14822 raise errors.OpExecError("Could not start instance: %s" % msg)
14824 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14825 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14826 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14827 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14828 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14830 (key_name, _, _) = self.x509_key_name
14833 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14836 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14837 key_name, dest_ca_pem,
14842 # Check for backwards compatibility
14843 assert len(dresults) == len(instance.disks)
14844 assert compat.all(isinstance(i, bool) for i in dresults), \
14845 "Not all results are boolean: %r" % dresults
14849 feedback_fn("Deactivating disks for %s" % instance.name)
14850 _ShutdownInstanceDisks(self, instance)
14852 if not (compat.all(dresults) and fin_resu):
14855 failures.append("export finalization")
14856 if not compat.all(dresults):
14857 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14859 failures.append("disk export: disk(s) %s" % fdsk)
14861 raise errors.OpExecError("Export failed, errors in %s" %
14862 utils.CommaJoin(failures))
14864 # At this point, the export was successful, we can cleanup/finish
14866 # Remove instance if requested
14867 if self.op.remove_instance:
14868 feedback_fn("Removing instance %s" % instance.name)
14869 _RemoveInstance(self, feedback_fn, instance,
14870 self.op.ignore_remove_failures)
14872 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14873 self._CleanupExports(feedback_fn)
14875 return fin_resu, dresults
14878 class LUBackupRemove(NoHooksLU):
14879 """Remove exports related to the named instance.
14884 def ExpandNames(self):
14885 self.needed_locks = {
14886 # We need all nodes to be locked in order for RemoveExport to work, but
14887 # we don't need to lock the instance itself, as nothing will happen to it
14888 # (and we can remove exports also for a removed instance)
14889 locking.LEVEL_NODE: locking.ALL_SET,
14891 # Removing backups is quick, so blocking allocations is justified
14892 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14895 # Allocations should be stopped while this LU runs with node locks, but it
14896 # doesn't have to be exclusive
14897 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14899 def Exec(self, feedback_fn):
14900 """Remove any export.
14903 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14904 # If the instance was not found we'll try with the name that was passed in.
14905 # This will only work if it was an FQDN, though.
14907 if not instance_name:
14909 instance_name = self.op.instance_name
14911 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14912 exportlist = self.rpc.call_export_list(locked_nodes)
14914 for node in exportlist:
14915 msg = exportlist[node].fail_msg
14917 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14919 if instance_name in exportlist[node].payload:
14921 result = self.rpc.call_export_remove(node, instance_name)
14922 msg = result.fail_msg
14924 logging.error("Could not remove export for instance %s"
14925 " on node %s: %s", instance_name, node, msg)
14927 if fqdn_warn and not found:
14928 feedback_fn("Export not found. If trying to remove an export belonging"
14929 " to a deleted instance please use its Fully Qualified"
14933 class LUGroupAdd(LogicalUnit):
14934 """Logical unit for creating node groups.
14937 HPATH = "group-add"
14938 HTYPE = constants.HTYPE_GROUP
14941 def ExpandNames(self):
14942 # We need the new group's UUID here so that we can create and acquire the
14943 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14944 # that it should not check whether the UUID exists in the configuration.
14945 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14946 self.needed_locks = {}
14947 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14949 def CheckPrereq(self):
14950 """Check prerequisites.
14952 This checks that the given group name is not an existing node group
14957 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14958 except errors.OpPrereqError:
14961 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14962 " node group (UUID: %s)" %
14963 (self.op.group_name, existing_uuid),
14964 errors.ECODE_EXISTS)
14966 if self.op.ndparams:
14967 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14969 if self.op.hv_state:
14970 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14972 self.new_hv_state = None
14974 if self.op.disk_state:
14975 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14977 self.new_disk_state = None
14979 if self.op.diskparams:
14980 for templ in constants.DISK_TEMPLATES:
14981 if templ in self.op.diskparams:
14982 utils.ForceDictType(self.op.diskparams[templ],
14983 constants.DISK_DT_TYPES)
14984 self.new_diskparams = self.op.diskparams
14986 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14987 except errors.OpPrereqError, err:
14988 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14989 errors.ECODE_INVAL)
14991 self.new_diskparams = {}
14993 if self.op.ipolicy:
14994 cluster = self.cfg.GetClusterInfo()
14995 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14997 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14998 except errors.ConfigurationError, err:
14999 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
15000 errors.ECODE_INVAL)
15002 def BuildHooksEnv(self):
15003 """Build hooks env.
15007 "GROUP_NAME": self.op.group_name,
15010 def BuildHooksNodes(self):
15011 """Build hooks nodes.
15014 mn = self.cfg.GetMasterNode()
15015 return ([mn], [mn])
15017 def Exec(self, feedback_fn):
15018 """Add the node group to the cluster.
15021 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
15022 uuid=self.group_uuid,
15023 alloc_policy=self.op.alloc_policy,
15024 ndparams=self.op.ndparams,
15025 diskparams=self.new_diskparams,
15026 ipolicy=self.op.ipolicy,
15027 hv_state_static=self.new_hv_state,
15028 disk_state_static=self.new_disk_state)
15030 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
15031 del self.remove_locks[locking.LEVEL_NODEGROUP]
15034 class LUGroupAssignNodes(NoHooksLU):
15035 """Logical unit for assigning nodes to groups.
15040 def ExpandNames(self):
15041 # These raise errors.OpPrereqError on their own:
15042 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15043 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15045 # We want to lock all the affected nodes and groups. We have readily
15046 # available the list of nodes, and the *destination* group. To gather the
15047 # list of "source" groups, we need to fetch node information later on.
15048 self.needed_locks = {
15049 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
15050 locking.LEVEL_NODE: self.op.nodes,
15053 def DeclareLocks(self, level):
15054 if level == locking.LEVEL_NODEGROUP:
15055 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15057 # Try to get all affected nodes' groups without having the group or node
15058 # lock yet. Needs verification later in the code flow.
15059 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15061 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15063 def CheckPrereq(self):
15064 """Check prerequisites.
15067 assert self.needed_locks[locking.LEVEL_NODEGROUP]
15068 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15069 frozenset(self.op.nodes))
15071 expected_locks = (set([self.group_uuid]) |
15072 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15073 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15074 if actual_locks != expected_locks:
15075 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15076 " current groups are '%s', used to be '%s'" %
15077 (utils.CommaJoin(expected_locks),
15078 utils.CommaJoin(actual_locks)))
15080 self.node_data = self.cfg.GetAllNodesInfo()
15081 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15082 instance_data = self.cfg.GetAllInstancesInfo()
15084 if self.group is None:
15085 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15086 (self.op.group_name, self.group_uuid))
15088 (new_splits, previous_splits) = \
15089 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15090 for node in self.op.nodes],
15091 self.node_data, instance_data)
15094 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15096 if not self.op.force:
15097 raise errors.OpExecError("The following instances get split by this"
15098 " change and --force was not given: %s" %
15101 self.LogWarning("This operation will split the following instances: %s",
15104 if previous_splits:
15105 self.LogWarning("In addition, these already-split instances continue"
15106 " to be split across groups: %s",
15107 utils.CommaJoin(utils.NiceSort(previous_splits)))
15109 def Exec(self, feedback_fn):
15110 """Assign nodes to a new group.
15113 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15115 self.cfg.AssignGroupNodes(mods)
15118 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15119 """Check for split instances after a node assignment.
15121 This method considers a series of node assignments as an atomic operation,
15122 and returns information about split instances after applying the set of
15125 In particular, it returns information about newly split instances, and
15126 instances that were already split, and remain so after the change.
15128 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15131 @type changes: list of (node_name, new_group_uuid) pairs.
15132 @param changes: list of node assignments to consider.
15133 @param node_data: a dict with data for all nodes
15134 @param instance_data: a dict with all instances to consider
15135 @rtype: a two-tuple
15136 @return: a list of instances that were previously okay and result split as a
15137 consequence of this change, and a list of instances that were previously
15138 split and this change does not fix.
15141 changed_nodes = dict((node, group) for node, group in changes
15142 if node_data[node].group != group)
15144 all_split_instances = set()
15145 previously_split_instances = set()
15147 def InstanceNodes(instance):
15148 return [instance.primary_node] + list(instance.secondary_nodes)
15150 for inst in instance_data.values():
15151 if inst.disk_template not in constants.DTS_INT_MIRROR:
15154 instance_nodes = InstanceNodes(inst)
15156 if len(set(node_data[node].group for node in instance_nodes)) > 1:
15157 previously_split_instances.add(inst.name)
15159 if len(set(changed_nodes.get(node, node_data[node].group)
15160 for node in instance_nodes)) > 1:
15161 all_split_instances.add(inst.name)
15163 return (list(all_split_instances - previously_split_instances),
15164 list(previously_split_instances & all_split_instances))
15167 class _GroupQuery(_QueryBase):
15168 FIELDS = query.GROUP_FIELDS
15170 def ExpandNames(self, lu):
15171 lu.needed_locks = {}
15173 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15174 self._cluster = lu.cfg.GetClusterInfo()
15175 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15178 self.wanted = [name_to_uuid[name]
15179 for name in utils.NiceSort(name_to_uuid.keys())]
15181 # Accept names to be either names or UUIDs.
15184 all_uuid = frozenset(self._all_groups.keys())
15186 for name in self.names:
15187 if name in all_uuid:
15188 self.wanted.append(name)
15189 elif name in name_to_uuid:
15190 self.wanted.append(name_to_uuid[name])
15192 missing.append(name)
15195 raise errors.OpPrereqError("Some groups do not exist: %s" %
15196 utils.CommaJoin(missing),
15197 errors.ECODE_NOENT)
15199 def DeclareLocks(self, lu, level):
15202 def _GetQueryData(self, lu):
15203 """Computes the list of node groups and their attributes.
15206 do_nodes = query.GQ_NODE in self.requested_data
15207 do_instances = query.GQ_INST in self.requested_data
15209 group_to_nodes = None
15210 group_to_instances = None
15212 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15213 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15214 # latter GetAllInstancesInfo() is not enough, for we have to go through
15215 # instance->node. Hence, we will need to process nodes even if we only need
15216 # instance information.
15217 if do_nodes or do_instances:
15218 all_nodes = lu.cfg.GetAllNodesInfo()
15219 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15222 for node in all_nodes.values():
15223 if node.group in group_to_nodes:
15224 group_to_nodes[node.group].append(node.name)
15225 node_to_group[node.name] = node.group
15228 all_instances = lu.cfg.GetAllInstancesInfo()
15229 group_to_instances = dict((uuid, []) for uuid in self.wanted)
15231 for instance in all_instances.values():
15232 node = instance.primary_node
15233 if node in node_to_group:
15234 group_to_instances[node_to_group[node]].append(instance.name)
15237 # Do not pass on node information if it was not requested.
15238 group_to_nodes = None
15240 return query.GroupQueryData(self._cluster,
15241 [self._all_groups[uuid]
15242 for uuid in self.wanted],
15243 group_to_nodes, group_to_instances,
15244 query.GQ_DISKPARAMS in self.requested_data)
15247 class LUGroupQuery(NoHooksLU):
15248 """Logical unit for querying node groups.
15253 def CheckArguments(self):
15254 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15255 self.op.output_fields, False)
15257 def ExpandNames(self):
15258 self.gq.ExpandNames(self)
15260 def DeclareLocks(self, level):
15261 self.gq.DeclareLocks(self, level)
15263 def Exec(self, feedback_fn):
15264 return self.gq.OldStyleQuery(self)
15267 class LUGroupSetParams(LogicalUnit):
15268 """Modifies the parameters of a node group.
15271 HPATH = "group-modify"
15272 HTYPE = constants.HTYPE_GROUP
15275 def CheckArguments(self):
15278 self.op.diskparams,
15279 self.op.alloc_policy,
15281 self.op.disk_state,
15285 if all_changes.count(None) == len(all_changes):
15286 raise errors.OpPrereqError("Please pass at least one modification",
15287 errors.ECODE_INVAL)
15289 def ExpandNames(self):
15290 # This raises errors.OpPrereqError on its own:
15291 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15293 self.needed_locks = {
15294 locking.LEVEL_INSTANCE: [],
15295 locking.LEVEL_NODEGROUP: [self.group_uuid],
15298 self.share_locks[locking.LEVEL_INSTANCE] = 1
15300 def DeclareLocks(self, level):
15301 if level == locking.LEVEL_INSTANCE:
15302 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15304 # Lock instances optimistically, needs verification once group lock has
15306 self.needed_locks[locking.LEVEL_INSTANCE] = \
15307 self.cfg.GetNodeGroupInstances(self.group_uuid)
15310 def _UpdateAndVerifyDiskParams(old, new):
15311 """Updates and verifies disk parameters.
15314 new_params = _GetUpdatedParams(old, new)
15315 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15318 def CheckPrereq(self):
15319 """Check prerequisites.
15322 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15324 # Check if locked instances are still correct
15325 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15327 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15328 cluster = self.cfg.GetClusterInfo()
15330 if self.group is None:
15331 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15332 (self.op.group_name, self.group_uuid))
15334 if self.op.ndparams:
15335 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15336 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15337 self.new_ndparams = new_ndparams
15339 if self.op.diskparams:
15340 diskparams = self.group.diskparams
15341 uavdp = self._UpdateAndVerifyDiskParams
15342 # For each disktemplate subdict update and verify the values
15343 new_diskparams = dict((dt,
15344 uavdp(diskparams.get(dt, {}),
15345 self.op.diskparams[dt]))
15346 for dt in constants.DISK_TEMPLATES
15347 if dt in self.op.diskparams)
15348 # As we've all subdicts of diskparams ready, lets merge the actual
15349 # dict with all updated subdicts
15350 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15352 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15353 except errors.OpPrereqError, err:
15354 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15355 errors.ECODE_INVAL)
15357 if self.op.hv_state:
15358 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15359 self.group.hv_state_static)
15361 if self.op.disk_state:
15362 self.new_disk_state = \
15363 _MergeAndVerifyDiskState(self.op.disk_state,
15364 self.group.disk_state_static)
15366 if self.op.ipolicy:
15367 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15371 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15372 inst_filter = lambda inst: inst.name in owned_instances
15373 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15374 gmi = ganeti.masterd.instance
15376 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15378 new_ipolicy, instances, self.cfg)
15381 self.LogWarning("After the ipolicy change the following instances"
15382 " violate them: %s",
15383 utils.CommaJoin(violations))
15385 def BuildHooksEnv(self):
15386 """Build hooks env.
15390 "GROUP_NAME": self.op.group_name,
15391 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15394 def BuildHooksNodes(self):
15395 """Build hooks nodes.
15398 mn = self.cfg.GetMasterNode()
15399 return ([mn], [mn])
15401 def Exec(self, feedback_fn):
15402 """Modifies the node group.
15407 if self.op.ndparams:
15408 self.group.ndparams = self.new_ndparams
15409 result.append(("ndparams", str(self.group.ndparams)))
15411 if self.op.diskparams:
15412 self.group.diskparams = self.new_diskparams
15413 result.append(("diskparams", str(self.group.diskparams)))
15415 if self.op.alloc_policy:
15416 self.group.alloc_policy = self.op.alloc_policy
15418 if self.op.hv_state:
15419 self.group.hv_state_static = self.new_hv_state
15421 if self.op.disk_state:
15422 self.group.disk_state_static = self.new_disk_state
15424 if self.op.ipolicy:
15425 self.group.ipolicy = self.new_ipolicy
15427 self.cfg.Update(self.group, feedback_fn)
15431 class LUGroupRemove(LogicalUnit):
15432 HPATH = "group-remove"
15433 HTYPE = constants.HTYPE_GROUP
15436 def ExpandNames(self):
15437 # This will raises errors.OpPrereqError on its own:
15438 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15439 self.needed_locks = {
15440 locking.LEVEL_NODEGROUP: [self.group_uuid],
15443 def CheckPrereq(self):
15444 """Check prerequisites.
15446 This checks that the given group name exists as a node group, that is
15447 empty (i.e., contains no nodes), and that is not the last group of the
15451 # Verify that the group is empty.
15452 group_nodes = [node.name
15453 for node in self.cfg.GetAllNodesInfo().values()
15454 if node.group == self.group_uuid]
15457 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15459 (self.op.group_name,
15460 utils.CommaJoin(utils.NiceSort(group_nodes))),
15461 errors.ECODE_STATE)
15463 # Verify the cluster would not be left group-less.
15464 if len(self.cfg.GetNodeGroupList()) == 1:
15465 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15466 " removed" % self.op.group_name,
15467 errors.ECODE_STATE)
15469 def BuildHooksEnv(self):
15470 """Build hooks env.
15474 "GROUP_NAME": self.op.group_name,
15477 def BuildHooksNodes(self):
15478 """Build hooks nodes.
15481 mn = self.cfg.GetMasterNode()
15482 return ([mn], [mn])
15484 def Exec(self, feedback_fn):
15485 """Remove the node group.
15489 self.cfg.RemoveNodeGroup(self.group_uuid)
15490 except errors.ConfigurationError:
15491 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15492 (self.op.group_name, self.group_uuid))
15494 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15497 class LUGroupRename(LogicalUnit):
15498 HPATH = "group-rename"
15499 HTYPE = constants.HTYPE_GROUP
15502 def ExpandNames(self):
15503 # This raises errors.OpPrereqError on its own:
15504 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15506 self.needed_locks = {
15507 locking.LEVEL_NODEGROUP: [self.group_uuid],
15510 def CheckPrereq(self):
15511 """Check prerequisites.
15513 Ensures requested new name is not yet used.
15517 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15518 except errors.OpPrereqError:
15521 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15522 " node group (UUID: %s)" %
15523 (self.op.new_name, new_name_uuid),
15524 errors.ECODE_EXISTS)
15526 def BuildHooksEnv(self):
15527 """Build hooks env.
15531 "OLD_NAME": self.op.group_name,
15532 "NEW_NAME": self.op.new_name,
15535 def BuildHooksNodes(self):
15536 """Build hooks nodes.
15539 mn = self.cfg.GetMasterNode()
15541 all_nodes = self.cfg.GetAllNodesInfo()
15542 all_nodes.pop(mn, None)
15545 run_nodes.extend(node.name for node in all_nodes.values()
15546 if node.group == self.group_uuid)
15548 return (run_nodes, run_nodes)
15550 def Exec(self, feedback_fn):
15551 """Rename the node group.
15554 group = self.cfg.GetNodeGroup(self.group_uuid)
15557 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15558 (self.op.group_name, self.group_uuid))
15560 group.name = self.op.new_name
15561 self.cfg.Update(group, feedback_fn)
15563 return self.op.new_name
15566 class LUGroupEvacuate(LogicalUnit):
15567 HPATH = "group-evacuate"
15568 HTYPE = constants.HTYPE_GROUP
15571 def ExpandNames(self):
15572 # This raises errors.OpPrereqError on its own:
15573 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15575 if self.op.target_groups:
15576 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15577 self.op.target_groups)
15579 self.req_target_uuids = []
15581 if self.group_uuid in self.req_target_uuids:
15582 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15583 " as a target group (targets are %s)" %
15585 utils.CommaJoin(self.req_target_uuids)),
15586 errors.ECODE_INVAL)
15588 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15590 self.share_locks = _ShareAll()
15591 self.needed_locks = {
15592 locking.LEVEL_INSTANCE: [],
15593 locking.LEVEL_NODEGROUP: [],
15594 locking.LEVEL_NODE: [],
15597 def DeclareLocks(self, level):
15598 if level == locking.LEVEL_INSTANCE:
15599 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15601 # Lock instances optimistically, needs verification once node and group
15602 # locks have been acquired
15603 self.needed_locks[locking.LEVEL_INSTANCE] = \
15604 self.cfg.GetNodeGroupInstances(self.group_uuid)
15606 elif level == locking.LEVEL_NODEGROUP:
15607 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15609 if self.req_target_uuids:
15610 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15612 # Lock all groups used by instances optimistically; this requires going
15613 # via the node before it's locked, requiring verification later on
15614 lock_groups.update(group_uuid
15615 for instance_name in
15616 self.owned_locks(locking.LEVEL_INSTANCE)
15618 self.cfg.GetInstanceNodeGroups(instance_name))
15620 # No target groups, need to lock all of them
15621 lock_groups = locking.ALL_SET
15623 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15625 elif level == locking.LEVEL_NODE:
15626 # This will only lock the nodes in the group to be evacuated which
15627 # contain actual instances
15628 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15629 self._LockInstancesNodes()
15631 # Lock all nodes in group to be evacuated and target groups
15632 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15633 assert self.group_uuid in owned_groups
15634 member_nodes = [node_name
15635 for group in owned_groups
15636 for node_name in self.cfg.GetNodeGroup(group).members]
15637 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15639 def CheckPrereq(self):
15640 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15641 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15642 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15644 assert owned_groups.issuperset(self.req_target_uuids)
15645 assert self.group_uuid in owned_groups
15647 # Check if locked instances are still correct
15648 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15650 # Get instance information
15651 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15653 # Check if node groups for locked instances are still correct
15654 _CheckInstancesNodeGroups(self.cfg, self.instances,
15655 owned_groups, owned_nodes, self.group_uuid)
15657 if self.req_target_uuids:
15658 # User requested specific target groups
15659 self.target_uuids = self.req_target_uuids
15661 # All groups except the one to be evacuated are potential targets
15662 self.target_uuids = [group_uuid for group_uuid in owned_groups
15663 if group_uuid != self.group_uuid]
15665 if not self.target_uuids:
15666 raise errors.OpPrereqError("There are no possible target groups",
15667 errors.ECODE_INVAL)
15669 def BuildHooksEnv(self):
15670 """Build hooks env.
15674 "GROUP_NAME": self.op.group_name,
15675 "TARGET_GROUPS": " ".join(self.target_uuids),
15678 def BuildHooksNodes(self):
15679 """Build hooks nodes.
15682 mn = self.cfg.GetMasterNode()
15684 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15686 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15688 return (run_nodes, run_nodes)
15690 def Exec(self, feedback_fn):
15691 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15693 assert self.group_uuid not in self.target_uuids
15695 req = iallocator.IAReqGroupChange(instances=instances,
15696 target_groups=self.target_uuids)
15697 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15699 ial.Run(self.op.iallocator)
15701 if not ial.success:
15702 raise errors.OpPrereqError("Can't compute group evacuation using"
15703 " iallocator '%s': %s" %
15704 (self.op.iallocator, ial.info),
15705 errors.ECODE_NORES)
15707 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15709 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15710 len(jobs), self.op.group_name)
15712 return ResultWithJobs(jobs)
15715 class TagsLU(NoHooksLU): # pylint: disable=W0223
15716 """Generic tags LU.
15718 This is an abstract class which is the parent of all the other tags LUs.
15721 def ExpandNames(self):
15722 self.group_uuid = None
15723 self.needed_locks = {}
15725 if self.op.kind == constants.TAG_NODE:
15726 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15727 lock_level = locking.LEVEL_NODE
15728 lock_name = self.op.name
15729 elif self.op.kind == constants.TAG_INSTANCE:
15730 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15731 lock_level = locking.LEVEL_INSTANCE
15732 lock_name = self.op.name
15733 elif self.op.kind == constants.TAG_NODEGROUP:
15734 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15735 lock_level = locking.LEVEL_NODEGROUP
15736 lock_name = self.group_uuid
15737 elif self.op.kind == constants.TAG_NETWORK:
15738 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15739 lock_level = locking.LEVEL_NETWORK
15740 lock_name = self.network_uuid
15745 if lock_level and getattr(self.op, "use_locking", True):
15746 self.needed_locks[lock_level] = lock_name
15748 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15749 # not possible to acquire the BGL based on opcode parameters)
15751 def CheckPrereq(self):
15752 """Check prerequisites.
15755 if self.op.kind == constants.TAG_CLUSTER:
15756 self.target = self.cfg.GetClusterInfo()
15757 elif self.op.kind == constants.TAG_NODE:
15758 self.target = self.cfg.GetNodeInfo(self.op.name)
15759 elif self.op.kind == constants.TAG_INSTANCE:
15760 self.target = self.cfg.GetInstanceInfo(self.op.name)
15761 elif self.op.kind == constants.TAG_NODEGROUP:
15762 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15763 elif self.op.kind == constants.TAG_NETWORK:
15764 self.target = self.cfg.GetNetwork(self.network_uuid)
15766 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15767 str(self.op.kind), errors.ECODE_INVAL)
15770 class LUTagsGet(TagsLU):
15771 """Returns the tags of a given object.
15776 def ExpandNames(self):
15777 TagsLU.ExpandNames(self)
15779 # Share locks as this is only a read operation
15780 self.share_locks = _ShareAll()
15782 def Exec(self, feedback_fn):
15783 """Returns the tag list.
15786 return list(self.target.GetTags())
15789 class LUTagsSearch(NoHooksLU):
15790 """Searches the tags for a given pattern.
15795 def ExpandNames(self):
15796 self.needed_locks = {}
15798 def CheckPrereq(self):
15799 """Check prerequisites.
15801 This checks the pattern passed for validity by compiling it.
15805 self.re = re.compile(self.op.pattern)
15806 except re.error, err:
15807 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15808 (self.op.pattern, err), errors.ECODE_INVAL)
15810 def Exec(self, feedback_fn):
15811 """Returns the tag list.
15815 tgts = [("/cluster", cfg.GetClusterInfo())]
15816 ilist = cfg.GetAllInstancesInfo().values()
15817 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15818 nlist = cfg.GetAllNodesInfo().values()
15819 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15820 tgts.extend(("/nodegroup/%s" % n.name, n)
15821 for n in cfg.GetAllNodeGroupsInfo().values())
15823 for path, target in tgts:
15824 for tag in target.GetTags():
15825 if self.re.search(tag):
15826 results.append((path, tag))
15830 class LUTagsSet(TagsLU):
15831 """Sets a tag on a given object.
15836 def CheckPrereq(self):
15837 """Check prerequisites.
15839 This checks the type and length of the tag name and value.
15842 TagsLU.CheckPrereq(self)
15843 for tag in self.op.tags:
15844 objects.TaggableObject.ValidateTag(tag)
15846 def Exec(self, feedback_fn):
15851 for tag in self.op.tags:
15852 self.target.AddTag(tag)
15853 except errors.TagError, err:
15854 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15855 self.cfg.Update(self.target, feedback_fn)
15858 class LUTagsDel(TagsLU):
15859 """Delete a list of tags from a given object.
15864 def CheckPrereq(self):
15865 """Check prerequisites.
15867 This checks that we have the given tag.
15870 TagsLU.CheckPrereq(self)
15871 for tag in self.op.tags:
15872 objects.TaggableObject.ValidateTag(tag)
15873 del_tags = frozenset(self.op.tags)
15874 cur_tags = self.target.GetTags()
15876 diff_tags = del_tags - cur_tags
15878 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15879 raise errors.OpPrereqError("Tag(s) %s not found" %
15880 (utils.CommaJoin(diff_names), ),
15881 errors.ECODE_NOENT)
15883 def Exec(self, feedback_fn):
15884 """Remove the tag from the object.
15887 for tag in self.op.tags:
15888 self.target.RemoveTag(tag)
15889 self.cfg.Update(self.target, feedback_fn)
15892 class LUTestDelay(NoHooksLU):
15893 """Sleep for a specified amount of time.
15895 This LU sleeps on the master and/or nodes for a specified amount of
15901 def ExpandNames(self):
15902 """Expand names and set required locks.
15904 This expands the node list, if any.
15907 self.needed_locks = {}
15908 if self.op.on_nodes:
15909 # _GetWantedNodes can be used here, but is not always appropriate to use
15910 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15911 # more information.
15912 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15913 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15915 def _TestDelay(self):
15916 """Do the actual sleep.
15919 if self.op.on_master:
15920 if not utils.TestDelay(self.op.duration):
15921 raise errors.OpExecError("Error during master delay test")
15922 if self.op.on_nodes:
15923 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15924 for node, node_result in result.items():
15925 node_result.Raise("Failure during rpc call to node %s" % node)
15927 def Exec(self, feedback_fn):
15928 """Execute the test delay opcode, with the wanted repetitions.
15931 if self.op.repeat == 0:
15934 top_value = self.op.repeat - 1
15935 for i in range(self.op.repeat):
15936 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15940 class LURestrictedCommand(NoHooksLU):
15941 """Logical unit for executing restricted commands.
15946 def ExpandNames(self):
15948 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15950 self.needed_locks = {
15951 locking.LEVEL_NODE: self.op.nodes,
15953 self.share_locks = {
15954 locking.LEVEL_NODE: not self.op.use_locking,
15957 def CheckPrereq(self):
15958 """Check prerequisites.
15962 def Exec(self, feedback_fn):
15963 """Execute restricted command and return output.
15966 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15968 # Check if correct locks are held
15969 assert set(self.op.nodes).issubset(owned_nodes)
15971 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15975 for node_name in self.op.nodes:
15976 nres = rpcres[node_name]
15978 msg = ("Command '%s' on node '%s' failed: %s" %
15979 (self.op.command, node_name, nres.fail_msg))
15980 result.append((False, msg))
15982 result.append((True, nres.payload))
15987 class LUTestJqueue(NoHooksLU):
15988 """Utility LU to test some aspects of the job queue.
15993 # Must be lower than default timeout for WaitForJobChange to see whether it
15994 # notices changed jobs
15995 _CLIENT_CONNECT_TIMEOUT = 20.0
15996 _CLIENT_CONFIRM_TIMEOUT = 60.0
15999 def _NotifyUsingSocket(cls, cb, errcls):
16000 """Opens a Unix socket and waits for another program to connect.
16003 @param cb: Callback to send socket name to client
16004 @type errcls: class
16005 @param errcls: Exception class to use for errors
16008 # Using a temporary directory as there's no easy way to create temporary
16009 # sockets without writing a custom loop around tempfile.mktemp and
16011 tmpdir = tempfile.mkdtemp()
16013 tmpsock = utils.PathJoin(tmpdir, "sock")
16015 logging.debug("Creating temporary socket at %s", tmpsock)
16016 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
16021 # Send details to client
16024 # Wait for client to connect before continuing
16025 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
16027 (conn, _) = sock.accept()
16028 except socket.error, err:
16029 raise errcls("Client didn't connect in time (%s)" % err)
16033 # Remove as soon as client is connected
16034 shutil.rmtree(tmpdir)
16036 # Wait for client to close
16039 # pylint: disable=E1101
16040 # Instance of '_socketobject' has no ... member
16041 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
16043 except socket.error, err:
16044 raise errcls("Client failed to confirm notification (%s)" % err)
16048 def _SendNotification(self, test, arg, sockname):
16049 """Sends a notification to the client.
16052 @param test: Test name
16053 @param arg: Test argument (depends on test)
16054 @type sockname: string
16055 @param sockname: Socket path
16058 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16060 def _Notify(self, prereq, test, arg):
16061 """Notifies the client of a test.
16064 @param prereq: Whether this is a prereq-phase test
16066 @param test: Test name
16067 @param arg: Test argument (depends on test)
16071 errcls = errors.OpPrereqError
16073 errcls = errors.OpExecError
16075 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16079 def CheckArguments(self):
16080 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16081 self.expandnames_calls = 0
16083 def ExpandNames(self):
16084 checkargs_calls = getattr(self, "checkargs_calls", 0)
16085 if checkargs_calls < 1:
16086 raise errors.ProgrammerError("CheckArguments was not called")
16088 self.expandnames_calls += 1
16090 if self.op.notify_waitlock:
16091 self._Notify(True, constants.JQT_EXPANDNAMES, None)
16093 self.LogInfo("Expanding names")
16095 # Get lock on master node (just to get a lock, not for a particular reason)
16096 self.needed_locks = {
16097 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16100 def Exec(self, feedback_fn):
16101 if self.expandnames_calls < 1:
16102 raise errors.ProgrammerError("ExpandNames was not called")
16104 if self.op.notify_exec:
16105 self._Notify(False, constants.JQT_EXEC, None)
16107 self.LogInfo("Executing")
16109 if self.op.log_messages:
16110 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16111 for idx, msg in enumerate(self.op.log_messages):
16112 self.LogInfo("Sending log message %s", idx + 1)
16113 feedback_fn(constants.JQT_MSGPREFIX + msg)
16114 # Report how many test messages have been sent
16115 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16118 raise errors.OpExecError("Opcode failure was requested")
16123 class LUTestAllocator(NoHooksLU):
16124 """Run allocator tests.
16126 This LU runs the allocator tests
16129 def CheckPrereq(self):
16130 """Check prerequisites.
16132 This checks the opcode parameters depending on the director and mode test.
16135 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16136 constants.IALLOCATOR_MODE_MULTI_ALLOC):
16137 for attr in ["memory", "disks", "disk_template",
16138 "os", "tags", "nics", "vcpus"]:
16139 if not hasattr(self.op, attr):
16140 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16141 attr, errors.ECODE_INVAL)
16142 iname = self.cfg.ExpandInstanceName(self.op.name)
16143 if iname is not None:
16144 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16145 iname, errors.ECODE_EXISTS)
16146 if not isinstance(self.op.nics, list):
16147 raise errors.OpPrereqError("Invalid parameter 'nics'",
16148 errors.ECODE_INVAL)
16149 if not isinstance(self.op.disks, list):
16150 raise errors.OpPrereqError("Invalid parameter 'disks'",
16151 errors.ECODE_INVAL)
16152 for row in self.op.disks:
16153 if (not isinstance(row, dict) or
16154 constants.IDISK_SIZE not in row or
16155 not isinstance(row[constants.IDISK_SIZE], int) or
16156 constants.IDISK_MODE not in row or
16157 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16158 raise errors.OpPrereqError("Invalid contents of the 'disks'"
16159 " parameter", errors.ECODE_INVAL)
16160 if self.op.hypervisor is None:
16161 self.op.hypervisor = self.cfg.GetHypervisorType()
16162 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16163 fname = _ExpandInstanceName(self.cfg, self.op.name)
16164 self.op.name = fname
16165 self.relocate_from = \
16166 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16167 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16168 constants.IALLOCATOR_MODE_NODE_EVAC):
16169 if not self.op.instances:
16170 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16171 self.op.instances = _GetWantedInstances(self, self.op.instances)
16173 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16174 self.op.mode, errors.ECODE_INVAL)
16176 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16177 if self.op.iallocator is None:
16178 raise errors.OpPrereqError("Missing allocator name",
16179 errors.ECODE_INVAL)
16180 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16181 raise errors.OpPrereqError("Wrong allocator test '%s'" %
16182 self.op.direction, errors.ECODE_INVAL)
16184 def Exec(self, feedback_fn):
16185 """Run the allocator test.
16188 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16189 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16190 memory=self.op.memory,
16191 disks=self.op.disks,
16192 disk_template=self.op.disk_template,
16196 vcpus=self.op.vcpus,
16197 spindle_use=self.op.spindle_use,
16198 hypervisor=self.op.hypervisor,
16199 node_whitelist=None)
16200 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16201 req = iallocator.IAReqRelocate(name=self.op.name,
16202 relocate_from=list(self.relocate_from))
16203 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16204 req = iallocator.IAReqGroupChange(instances=self.op.instances,
16205 target_groups=self.op.target_groups)
16206 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16207 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16208 evac_mode=self.op.evac_mode)
16209 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16210 disk_template = self.op.disk_template
16211 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16212 memory=self.op.memory,
16213 disks=self.op.disks,
16214 disk_template=disk_template,
16218 vcpus=self.op.vcpus,
16219 spindle_use=self.op.spindle_use,
16220 hypervisor=self.op.hypervisor)
16221 for idx in range(self.op.count)]
16222 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16224 raise errors.ProgrammerError("Uncatched mode %s in"
16225 " LUTestAllocator.Exec", self.op.mode)
16227 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16228 if self.op.direction == constants.IALLOCATOR_DIR_IN:
16229 result = ial.in_text
16231 ial.Run(self.op.iallocator, validate=False)
16232 result = ial.out_text
16236 class LUNetworkAdd(LogicalUnit):
16237 """Logical unit for creating networks.
16240 HPATH = "network-add"
16241 HTYPE = constants.HTYPE_NETWORK
16244 def BuildHooksNodes(self):
16245 """Build hooks nodes.
16248 mn = self.cfg.GetMasterNode()
16249 return ([mn], [mn])
16251 def CheckArguments(self):
16252 if self.op.mac_prefix:
16253 self.op.mac_prefix = \
16254 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16256 def ExpandNames(self):
16257 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16259 if self.op.conflicts_check:
16260 self.share_locks[locking.LEVEL_NODE] = 1
16261 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16262 self.needed_locks = {
16263 locking.LEVEL_NODE: locking.ALL_SET,
16264 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16267 self.needed_locks = {}
16269 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16271 def CheckPrereq(self):
16272 if self.op.network is None:
16273 raise errors.OpPrereqError("Network must be given",
16274 errors.ECODE_INVAL)
16277 existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
16278 except errors.OpPrereqError:
16281 raise errors.OpPrereqError("Desired network name '%s' already exists as a"
16282 " network (UUID: %s)" %
16283 (self.op.network_name, existing_uuid),
16284 errors.ECODE_EXISTS)
16286 # Check tag validity
16287 for tag in self.op.tags:
16288 objects.TaggableObject.ValidateTag(tag)
16290 def BuildHooksEnv(self):
16291 """Build hooks env.
16295 "name": self.op.network_name,
16296 "subnet": self.op.network,
16297 "gateway": self.op.gateway,
16298 "network6": self.op.network6,
16299 "gateway6": self.op.gateway6,
16300 "mac_prefix": self.op.mac_prefix,
16301 "tags": self.op.tags,
16303 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16305 def Exec(self, feedback_fn):
16306 """Add the ip pool to the cluster.
16309 nobj = objects.Network(name=self.op.network_name,
16310 network=self.op.network,
16311 gateway=self.op.gateway,
16312 network6=self.op.network6,
16313 gateway6=self.op.gateway6,
16314 mac_prefix=self.op.mac_prefix,
16315 uuid=self.network_uuid)
16316 # Initialize the associated address pool
16318 pool = network.AddressPool.InitializeNetwork(nobj)
16319 except errors.AddressPoolError, err:
16320 raise errors.OpExecError("Cannot create IP address pool for network"
16321 " '%s': %s" % (self.op.network_name, err))
16323 # Check if we need to reserve the nodes and the cluster master IP
16324 # These may not be allocated to any instances in routed mode, as
16325 # they wouldn't function anyway.
16326 if self.op.conflicts_check:
16327 for node in self.cfg.GetAllNodesInfo().values():
16328 for ip in [node.primary_ip, node.secondary_ip]:
16330 if pool.Contains(ip):
16332 self.LogInfo("Reserved IP address of node '%s' (%s)",
16334 except errors.AddressPoolError, err:
16335 self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s",
16336 ip, node.name, err)
16338 master_ip = self.cfg.GetClusterInfo().master_ip
16340 if pool.Contains(master_ip):
16341 pool.Reserve(master_ip)
16342 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16343 except errors.AddressPoolError, err:
16344 self.LogWarning("Cannot reserve cluster master IP address (%s): %s",
16347 if self.op.add_reserved_ips:
16348 for ip in self.op.add_reserved_ips:
16350 pool.Reserve(ip, external=True)
16351 except errors.AddressPoolError, err:
16352 raise errors.OpExecError("Cannot reserve IP address '%s': %s" %
16356 for tag in self.op.tags:
16359 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16360 del self.remove_locks[locking.LEVEL_NETWORK]
16363 class LUNetworkRemove(LogicalUnit):
16364 HPATH = "network-remove"
16365 HTYPE = constants.HTYPE_NETWORK
16368 def ExpandNames(self):
16369 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16371 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16372 self.needed_locks = {
16373 locking.LEVEL_NETWORK: [self.network_uuid],
16374 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16377 def CheckPrereq(self):
16378 """Check prerequisites.
16380 This checks that the given network name exists as a network, that is
16381 empty (i.e., contains no nodes), and that is not the last group of the
16385 # Verify that the network is not conncted.
16386 node_groups = [group.name
16387 for group in self.cfg.GetAllNodeGroupsInfo().values()
16388 if self.network_uuid in group.networks]
16391 self.LogWarning("Network '%s' is connected to the following"
16392 " node groups: %s" %
16393 (self.op.network_name,
16394 utils.CommaJoin(utils.NiceSort(node_groups))))
16395 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16397 def BuildHooksEnv(self):
16398 """Build hooks env.
16402 "NETWORK_NAME": self.op.network_name,
16405 def BuildHooksNodes(self):
16406 """Build hooks nodes.
16409 mn = self.cfg.GetMasterNode()
16410 return ([mn], [mn])
16412 def Exec(self, feedback_fn):
16413 """Remove the network.
16417 self.cfg.RemoveNetwork(self.network_uuid)
16418 except errors.ConfigurationError:
16419 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16420 (self.op.network_name, self.network_uuid))
16423 class LUNetworkSetParams(LogicalUnit):
16424 """Modifies the parameters of a network.
16427 HPATH = "network-modify"
16428 HTYPE = constants.HTYPE_NETWORK
16431 def CheckArguments(self):
16432 if (self.op.gateway and
16433 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16434 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16435 " at once", errors.ECODE_INVAL)
16437 def ExpandNames(self):
16438 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16440 self.needed_locks = {
16441 locking.LEVEL_NETWORK: [self.network_uuid],
16444 def CheckPrereq(self):
16445 """Check prerequisites.
16448 self.network = self.cfg.GetNetwork(self.network_uuid)
16449 self.gateway = self.network.gateway
16450 self.mac_prefix = self.network.mac_prefix
16451 self.network6 = self.network.network6
16452 self.gateway6 = self.network.gateway6
16453 self.tags = self.network.tags
16455 self.pool = network.AddressPool(self.network)
16457 if self.op.gateway:
16458 if self.op.gateway == constants.VALUE_NONE:
16459 self.gateway = None
16461 self.gateway = self.op.gateway
16462 if self.pool.IsReserved(self.gateway):
16463 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16464 " reserved" % self.gateway,
16465 errors.ECODE_STATE)
16467 if self.op.mac_prefix:
16468 if self.op.mac_prefix == constants.VALUE_NONE:
16469 self.mac_prefix = None
16471 self.mac_prefix = \
16472 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16474 if self.op.gateway6:
16475 if self.op.gateway6 == constants.VALUE_NONE:
16476 self.gateway6 = None
16478 self.gateway6 = self.op.gateway6
16480 if self.op.network6:
16481 if self.op.network6 == constants.VALUE_NONE:
16482 self.network6 = None
16484 self.network6 = self.op.network6
16486 def BuildHooksEnv(self):
16487 """Build hooks env.
16491 "name": self.op.network_name,
16492 "subnet": self.network.network,
16493 "gateway": self.gateway,
16494 "network6": self.network6,
16495 "gateway6": self.gateway6,
16496 "mac_prefix": self.mac_prefix,
16499 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16501 def BuildHooksNodes(self):
16502 """Build hooks nodes.
16505 mn = self.cfg.GetMasterNode()
16506 return ([mn], [mn])
16508 def Exec(self, feedback_fn):
16509 """Modifies the network.
16512 #TODO: reserve/release via temporary reservation manager
16513 # extend cfg.ReserveIp/ReleaseIp with the external flag
16514 if self.op.gateway:
16515 if self.gateway == self.network.gateway:
16516 self.LogWarning("Gateway is already %s", self.gateway)
16519 self.pool.Reserve(self.gateway, external=True)
16520 if self.network.gateway:
16521 self.pool.Release(self.network.gateway, external=True)
16522 self.network.gateway = self.gateway
16524 if self.op.add_reserved_ips:
16525 for ip in self.op.add_reserved_ips:
16527 if self.pool.IsReserved(ip):
16528 self.LogWarning("IP address %s is already reserved", ip)
16530 self.pool.Reserve(ip, external=True)
16531 except errors.AddressPoolError, err:
16532 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16534 if self.op.remove_reserved_ips:
16535 for ip in self.op.remove_reserved_ips:
16536 if ip == self.network.gateway:
16537 self.LogWarning("Cannot unreserve Gateway's IP")
16540 if not self.pool.IsReserved(ip):
16541 self.LogWarning("IP address %s is already unreserved", ip)
16543 self.pool.Release(ip, external=True)
16544 except errors.AddressPoolError, err:
16545 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16547 if self.op.mac_prefix:
16548 self.network.mac_prefix = self.mac_prefix
16550 if self.op.network6:
16551 self.network.network6 = self.network6
16553 if self.op.gateway6:
16554 self.network.gateway6 = self.gateway6
16556 self.pool.Validate()
16558 self.cfg.Update(self.network, feedback_fn)
16561 class _NetworkQuery(_QueryBase):
16562 FIELDS = query.NETWORK_FIELDS
16564 def ExpandNames(self, lu):
16565 lu.needed_locks = {}
16566 lu.share_locks = _ShareAll()
16568 self.do_locking = self.use_locking
16570 all_networks = lu.cfg.GetAllNetworksInfo()
16571 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16577 for name in self.names:
16578 if name in name_to_uuid:
16579 self.wanted.append(name_to_uuid[name])
16581 missing.append(name)
16584 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16585 errors.ECODE_NOENT)
16587 self.wanted = locking.ALL_SET
16589 if self.do_locking:
16590 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16591 if query.NETQ_INST in self.requested_data:
16592 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16593 if query.NETQ_GROUP in self.requested_data:
16594 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16596 def DeclareLocks(self, lu, level):
16599 def _GetQueryData(self, lu):
16600 """Computes the list of networks and their attributes.
16603 all_networks = lu.cfg.GetAllNetworksInfo()
16605 network_uuids = self._GetNames(lu, all_networks.keys(),
16606 locking.LEVEL_NETWORK)
16608 do_instances = query.NETQ_INST in self.requested_data
16609 do_groups = query.NETQ_GROUP in self.requested_data
16611 network_to_instances = None
16612 network_to_groups = None
16614 # For NETQ_GROUP, we need to map network->[groups]
16616 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16617 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16618 for _, group in all_groups.iteritems():
16619 for net_uuid in network_uuids:
16620 netparams = group.networks.get(net_uuid, None)
16622 info = (group.name, netparams[constants.NIC_MODE],
16623 netparams[constants.NIC_LINK])
16625 network_to_groups[net_uuid].append(info)
16628 all_instances = lu.cfg.GetAllInstancesInfo()
16629 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16630 for instance in all_instances.values():
16631 for nic in instance.nics:
16632 if nic.network in network_uuids:
16633 network_to_instances[nic.network].append(instance.name)
16636 if query.NETQ_STATS in self.requested_data:
16639 self._GetStats(network.AddressPool(all_networks[uuid])))
16640 for uuid in network_uuids)
16644 return query.NetworkQueryData([all_networks[uuid]
16645 for uuid in network_uuids],
16647 network_to_instances,
16651 def _GetStats(pool):
16652 """Returns statistics for a network address pool.
16656 "free_count": pool.GetFreeCount(),
16657 "reserved_count": pool.GetReservedCount(),
16658 "map": pool.GetMap(),
16659 "external_reservations":
16660 utils.CommaJoin(pool.GetExternalReservations()),
16664 class LUNetworkQuery(NoHooksLU):
16665 """Logical unit for querying networks.
16670 def CheckArguments(self):
16671 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16672 self.op.output_fields, self.op.use_locking)
16674 def ExpandNames(self):
16675 self.nq.ExpandNames(self)
16677 def Exec(self, feedback_fn):
16678 return self.nq.OldStyleQuery(self)
16681 class LUNetworkConnect(LogicalUnit):
16682 """Connect a network to a nodegroup
16685 HPATH = "network-connect"
16686 HTYPE = constants.HTYPE_NETWORK
16689 def ExpandNames(self):
16690 self.network_name = self.op.network_name
16691 self.group_name = self.op.group_name
16692 self.network_mode = self.op.network_mode
16693 self.network_link = self.op.network_link
16695 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16696 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16698 self.needed_locks = {
16699 locking.LEVEL_INSTANCE: [],
16700 locking.LEVEL_NODEGROUP: [self.group_uuid],
16702 self.share_locks[locking.LEVEL_INSTANCE] = 1
16704 if self.op.conflicts_check:
16705 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16706 self.share_locks[locking.LEVEL_NETWORK] = 1
16708 def DeclareLocks(self, level):
16709 if level == locking.LEVEL_INSTANCE:
16710 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16712 # Lock instances optimistically, needs verification once group lock has
16714 if self.op.conflicts_check:
16715 self.needed_locks[locking.LEVEL_INSTANCE] = \
16716 self.cfg.GetNodeGroupInstances(self.group_uuid)
16718 def BuildHooksEnv(self):
16720 "GROUP_NAME": self.group_name,
16721 "GROUP_NETWORK_MODE": self.network_mode,
16722 "GROUP_NETWORK_LINK": self.network_link,
16726 def BuildHooksNodes(self):
16727 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16728 return (nodes, nodes)
16730 def CheckPrereq(self):
16731 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16733 assert self.group_uuid in owned_groups
16735 # Check if locked instances are still correct
16736 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16737 if self.op.conflicts_check:
16738 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16741 constants.NIC_MODE: self.network_mode,
16742 constants.NIC_LINK: self.network_link,
16744 objects.NIC.CheckParameterSyntax(self.netparams)
16746 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16747 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16748 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16749 self.connected = False
16750 if self.network_uuid in self.group.networks:
16751 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16752 (self.network_name, self.group.name))
16753 self.connected = True
16755 # check only if not already connected
16756 elif self.op.conflicts_check:
16757 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16759 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16760 "connect to", owned_instances)
16762 def Exec(self, feedback_fn):
16763 # Connect the network and update the group only if not already connected
16764 if not self.connected:
16765 self.group.networks[self.network_uuid] = self.netparams
16766 self.cfg.Update(self.group, feedback_fn)
16769 def _NetworkConflictCheck(lu, check_fn, action, instances):
16770 """Checks for network interface conflicts with a network.
16772 @type lu: L{LogicalUnit}
16773 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16775 @param check_fn: Function checking for conflict
16776 @type action: string
16777 @param action: Part of error message (see code)
16778 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16783 for (_, instance) in lu.cfg.GetMultiInstanceInfo(instances):
16784 instconflicts = [(idx, nic.ip)
16785 for (idx, nic) in enumerate(instance.nics)
16789 conflicts.append((instance.name, instconflicts))
16792 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16793 " node group '%s', are in use: %s" %
16794 (lu.network_name, action, lu.group.name,
16795 utils.CommaJoin(("%s: %s" %
16796 (name, _FmtNetworkConflict(details)))
16797 for (name, details) in conflicts)))
16799 raise errors.OpPrereqError("Conflicting IP addresses found; "
16800 " remove/modify the corresponding network"
16801 " interfaces", errors.ECODE_STATE)
16804 def _FmtNetworkConflict(details):
16805 """Utility for L{_NetworkConflictCheck}.
16808 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16809 for (idx, ipaddr) in details)
16812 class LUNetworkDisconnect(LogicalUnit):
16813 """Disconnect a network to a nodegroup
16816 HPATH = "network-disconnect"
16817 HTYPE = constants.HTYPE_NETWORK
16820 def ExpandNames(self):
16821 self.network_name = self.op.network_name
16822 self.group_name = self.op.group_name
16824 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16825 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16827 self.needed_locks = {
16828 locking.LEVEL_INSTANCE: [],
16829 locking.LEVEL_NODEGROUP: [self.group_uuid],
16831 self.share_locks[locking.LEVEL_INSTANCE] = 1
16833 def DeclareLocks(self, level):
16834 if level == locking.LEVEL_INSTANCE:
16835 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16837 # Lock instances optimistically, needs verification once group lock has
16839 self.needed_locks[locking.LEVEL_INSTANCE] = \
16840 self.cfg.GetNodeGroupInstances(self.group_uuid)
16842 def BuildHooksEnv(self):
16844 "GROUP_NAME": self.group_name,
16848 def BuildHooksNodes(self):
16849 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16850 return (nodes, nodes)
16852 def CheckPrereq(self):
16853 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16855 assert self.group_uuid in owned_groups
16857 # Check if locked instances are still correct
16858 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16859 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16861 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16862 self.connected = True
16863 if self.network_uuid not in self.group.networks:
16864 self.LogWarning("Network '%s' is not mapped to group '%s'",
16865 self.network_name, self.group.name)
16866 self.connected = False
16868 # We need this check only if network is not already connected
16870 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid,
16871 "disconnect from", owned_instances)
16873 def Exec(self, feedback_fn):
16874 # Disconnect the network and update the group only if network is connected
16876 del self.group.networks[self.network_uuid]
16877 self.cfg.Update(self.group, feedback_fn)
16880 #: Query type implementations
16882 constants.QR_CLUSTER: _ClusterQuery,
16883 constants.QR_INSTANCE: _InstanceQuery,
16884 constants.QR_NODE: _NodeQuery,
16885 constants.QR_GROUP: _GroupQuery,
16886 constants.QR_NETWORK: _NetworkQuery,
16887 constants.QR_OS: _OsQuery,
16888 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16889 constants.QR_EXPORT: _ExportQuery,
16892 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16895 def _GetQueryImplementation(name):
16896 """Returns the implemtnation for a query type.
16898 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16902 return _QUERY_IMPL[name]
16904 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16905 errors.ECODE_INVAL)
16908 def _CheckForConflictingIp(lu, ip, node):
16909 """In case of conflicting IP address raise error.
16912 @param ip: IP address
16914 @param node: node name
16917 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16918 if conf_net is not None:
16919 raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16921 errors.ECODE_STATE)
16923 return (None, None)