4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti import network
65 from ganeti.masterd import iallocator
67 import ganeti.masterd.instance # pylint: disable=W0611
71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
75 #: Instance status in which an instance can be marked as offline/online
76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
77 constants.ADMINST_OFFLINE,
82 """Data container for LU results with jobs.
84 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
85 by L{mcpu._ProcessResult}. The latter will then submit the jobs
86 contained in the C{jobs} attribute and include the job IDs in the opcode
90 def __init__(self, jobs, **kwargs):
91 """Initializes this class.
93 Additional return values can be specified as keyword arguments.
95 @type jobs: list of lists of L{opcode.OpCode}
96 @param jobs: A list of lists of opcode objects
103 class LogicalUnit(object):
104 """Logical Unit base class.
106 Subclasses must follow these rules:
107 - implement ExpandNames
108 - implement CheckPrereq (except when tasklets are used)
109 - implement Exec (except when tasklets are used)
110 - implement BuildHooksEnv
111 - implement BuildHooksNodes
112 - redefine HPATH and HTYPE
113 - optionally redefine their run requirements:
114 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
116 Note that all commands require root permissions.
118 @ivar dry_run_result: the value (if any) that will be returned to the caller
119 in dry-run mode (signalled by opcode dry_run parameter)
126 def __init__(self, processor, op, context, rpc_runner):
127 """Constructor for LogicalUnit.
129 This needs to be overridden in derived classes in order to check op
133 self.proc = processor
135 self.cfg = context.cfg
136 self.glm = context.glm
138 self.owned_locks = context.glm.list_owned
139 self.context = context
140 self.rpc = rpc_runner
142 # Dictionaries used to declare locking needs to mcpu
143 self.needed_locks = None
144 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
145 self.opportunistic_locks = dict.fromkeys(locking.LEVELS, False)
148 self.remove_locks = {}
150 # Used to force good behavior when calling helper functions
151 self.recalculate_locks = {}
154 self.Log = processor.Log # pylint: disable=C0103
155 self.LogWarning = processor.LogWarning # pylint: disable=C0103
156 self.LogInfo = processor.LogInfo # pylint: disable=C0103
157 self.LogStep = processor.LogStep # pylint: disable=C0103
158 # support for dry-run
159 self.dry_run_result = None
160 # support for generic debug attribute
161 if (not hasattr(self.op, "debug_level") or
162 not isinstance(self.op.debug_level, int)):
163 self.op.debug_level = 0
168 # Validate opcode parameters and set defaults
169 self.op.Validate(True)
171 self.CheckArguments()
173 def CheckArguments(self):
174 """Check syntactic validity for the opcode arguments.
176 This method is for doing a simple syntactic check and ensure
177 validity of opcode parameters, without any cluster-related
178 checks. While the same can be accomplished in ExpandNames and/or
179 CheckPrereq, doing these separate is better because:
181 - ExpandNames is left as as purely a lock-related function
182 - CheckPrereq is run after we have acquired locks (and possible
185 The function is allowed to change the self.op attribute so that
186 later methods can no longer worry about missing parameters.
191 def ExpandNames(self):
192 """Expand names for this LU.
194 This method is called before starting to execute the opcode, and it should
195 update all the parameters of the opcode to their canonical form (e.g. a
196 short node name must be fully expanded after this method has successfully
197 completed). This way locking, hooks, logging, etc. can work correctly.
199 LUs which implement this method must also populate the self.needed_locks
200 member, as a dict with lock levels as keys, and a list of needed lock names
203 - use an empty dict if you don't need any lock
204 - if you don't need any lock at a particular level omit that
205 level (note that in this case C{DeclareLocks} won't be called
206 at all for that level)
207 - if you need locks at a level, but you can't calculate it in
208 this function, initialise that level with an empty list and do
209 further processing in L{LogicalUnit.DeclareLocks} (see that
210 function's docstring)
211 - don't put anything for the BGL level
212 - if you want all locks at a level use L{locking.ALL_SET} as a value
214 If you need to share locks (rather than acquire them exclusively) at one
215 level you can modify self.share_locks, setting a true value (usually 1) for
216 that level. By default locks are not shared.
218 This function can also define a list of tasklets, which then will be
219 executed in order instead of the usual LU-level CheckPrereq and Exec
220 functions, if those are not defined by the LU.
224 # Acquire all nodes and one instance
225 self.needed_locks = {
226 locking.LEVEL_NODE: locking.ALL_SET,
227 locking.LEVEL_INSTANCE: ['instance1.example.com'],
229 # Acquire just two nodes
230 self.needed_locks = {
231 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
234 self.needed_locks = {} # No, you can't leave it to the default value None
237 # The implementation of this method is mandatory only if the new LU is
238 # concurrent, so that old LUs don't need to be changed all at the same
241 self.needed_locks = {} # Exclusive LUs don't need locks.
243 raise NotImplementedError
245 def DeclareLocks(self, level):
246 """Declare LU locking needs for a level
248 While most LUs can just declare their locking needs at ExpandNames time,
249 sometimes there's the need to calculate some locks after having acquired
250 the ones before. This function is called just before acquiring locks at a
251 particular level, but after acquiring the ones at lower levels, and permits
252 such calculations. It can be used to modify self.needed_locks, and by
253 default it does nothing.
255 This function is only called if you have something already set in
256 self.needed_locks for the level.
258 @param level: Locking level which is going to be locked
259 @type level: member of L{ganeti.locking.LEVELS}
263 def CheckPrereq(self):
264 """Check prerequisites for this LU.
266 This method should check that the prerequisites for the execution
267 of this LU are fulfilled. It can do internode communication, but
268 it should be idempotent - no cluster or system changes are
271 The method should raise errors.OpPrereqError in case something is
272 not fulfilled. Its return value is ignored.
274 This method should also update all the parameters of the opcode to
275 their canonical form if it hasn't been done by ExpandNames before.
278 if self.tasklets is not None:
279 for (idx, tl) in enumerate(self.tasklets):
280 logging.debug("Checking prerequisites for tasklet %s/%s",
281 idx + 1, len(self.tasklets))
286 def Exec(self, feedback_fn):
289 This method should implement the actual work. It should raise
290 errors.OpExecError for failures that are somewhat dealt with in
294 if self.tasklets is not None:
295 for (idx, tl) in enumerate(self.tasklets):
296 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
299 raise NotImplementedError
301 def BuildHooksEnv(self):
302 """Build hooks environment for this LU.
305 @return: Dictionary containing the environment that will be used for
306 running the hooks for this LU. The keys of the dict must not be prefixed
307 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
308 will extend the environment with additional variables. If no environment
309 should be defined, an empty dictionary should be returned (not C{None}).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def BuildHooksNodes(self):
317 """Build list of nodes to run LU's hooks.
319 @rtype: tuple; (list, list)
320 @return: Tuple containing a list of node names on which the hook
321 should run before the execution and a list of node names on which the
322 hook should run after the execution. No nodes should be returned as an
323 empty list (and not None).
324 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
328 raise NotImplementedError
330 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
331 """Notify the LU about the results of its hooks.
333 This method is called every time a hooks phase is executed, and notifies
334 the Logical Unit about the hooks' result. The LU can then use it to alter
335 its result based on the hooks. By default the method does nothing and the
336 previous result is passed back unchanged but any LU can define it if it
337 wants to use the local cluster hook-scripts somehow.
339 @param phase: one of L{constants.HOOKS_PHASE_POST} or
340 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
341 @param hook_results: the results of the multi-node hooks rpc call
342 @param feedback_fn: function used send feedback back to the caller
343 @param lu_result: the previous Exec result this LU had, or None
345 @return: the new Exec result, based on the previous result
349 # API must be kept, thus we ignore the unused argument and could
350 # be a function warnings
351 # pylint: disable=W0613,R0201
354 def _ExpandAndLockInstance(self):
355 """Helper function to expand and lock an instance.
357 Many LUs that work on an instance take its name in self.op.instance_name
358 and need to expand it and then declare the expanded name for locking. This
359 function does it, and then updates self.op.instance_name to the expanded
360 name. It also initializes needed_locks as a dict, if this hasn't been done
364 if self.needed_locks is None:
365 self.needed_locks = {}
367 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
368 "_ExpandAndLockInstance called with instance-level locks set"
369 self.op.instance_name = _ExpandInstanceName(self.cfg,
370 self.op.instance_name)
371 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
373 def _LockInstancesNodes(self, primary_only=False,
374 level=locking.LEVEL_NODE):
375 """Helper function to declare instances' nodes for locking.
377 This function should be called after locking one or more instances to lock
378 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
379 with all primary or secondary nodes for instances already locked and
380 present in self.needed_locks[locking.LEVEL_INSTANCE].
382 It should be called from DeclareLocks, and for safety only works if
383 self.recalculate_locks[locking.LEVEL_NODE] is set.
385 In the future it may grow parameters to just lock some instance's nodes, or
386 to just lock primaries or secondary nodes, if needed.
388 If should be called in DeclareLocks in a way similar to::
390 if level == locking.LEVEL_NODE:
391 self._LockInstancesNodes()
393 @type primary_only: boolean
394 @param primary_only: only lock primary nodes of locked instances
395 @param level: Which lock level to use for locking nodes
398 assert level in self.recalculate_locks, \
399 "_LockInstancesNodes helper function called with no nodes to recalculate"
401 # TODO: check if we're really been called with the instance locks held
403 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
404 # future we might want to have different behaviors depending on the value
405 # of self.recalculate_locks[locking.LEVEL_NODE]
407 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
408 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
409 wanted_nodes.append(instance.primary_node)
411 wanted_nodes.extend(instance.secondary_nodes)
413 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
414 self.needed_locks[level] = wanted_nodes
415 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
416 self.needed_locks[level].extend(wanted_nodes)
418 raise errors.ProgrammerError("Unknown recalculation mode")
420 del self.recalculate_locks[level]
423 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
424 """Simple LU which runs no hooks.
426 This LU is intended as a parent for other LogicalUnits which will
427 run no hooks, in order to reduce duplicate code.
433 def BuildHooksEnv(self):
434 """Empty BuildHooksEnv for NoHooksLu.
436 This just raises an error.
439 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
441 def BuildHooksNodes(self):
442 """Empty BuildHooksNodes for NoHooksLU.
445 raise AssertionError("BuildHooksNodes called for NoHooksLU")
449 """Tasklet base class.
451 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
452 they can mix legacy code with tasklets. Locking needs to be done in the LU,
453 tasklets know nothing about locks.
455 Subclasses must follow these rules:
456 - Implement CheckPrereq
460 def __init__(self, lu):
467 def CheckPrereq(self):
468 """Check prerequisites for this tasklets.
470 This method should check whether the prerequisites for the execution of
471 this tasklet are fulfilled. It can do internode communication, but it
472 should be idempotent - no cluster or system changes are allowed.
474 The method should raise errors.OpPrereqError in case something is not
475 fulfilled. Its return value is ignored.
477 This method should also update all parameters to their canonical form if it
478 hasn't been done before.
483 def Exec(self, feedback_fn):
484 """Execute the tasklet.
486 This method should implement the actual work. It should raise
487 errors.OpExecError for failures that are somewhat dealt with in code, or
491 raise NotImplementedError
495 """Base for query utility classes.
498 #: Attribute holding field definitions
504 def __init__(self, qfilter, fields, use_locking):
505 """Initializes this class.
508 self.use_locking = use_locking
510 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
511 namefield=self.SORT_FIELD)
512 self.requested_data = self.query.RequestedData()
513 self.names = self.query.RequestedNames()
515 # Sort only if no names were requested
516 self.sort_by_name = not self.names
518 self.do_locking = None
521 def _GetNames(self, lu, all_names, lock_level):
522 """Helper function to determine names asked for in the query.
526 names = lu.owned_locks(lock_level)
530 if self.wanted == locking.ALL_SET:
531 assert not self.names
532 # caller didn't specify names, so ordering is not important
533 return utils.NiceSort(names)
535 # caller specified names and we must keep the same order
537 assert not self.do_locking or lu.glm.is_owned(lock_level)
539 missing = set(self.wanted).difference(names)
541 raise errors.OpExecError("Some items were removed before retrieving"
542 " their data: %s" % missing)
544 # Return expanded names
547 def ExpandNames(self, lu):
548 """Expand names for this query.
550 See L{LogicalUnit.ExpandNames}.
553 raise NotImplementedError()
555 def DeclareLocks(self, lu, level):
556 """Declare locks for this query.
558 See L{LogicalUnit.DeclareLocks}.
561 raise NotImplementedError()
563 def _GetQueryData(self, lu):
564 """Collects all data for this query.
566 @return: Query data object
569 raise NotImplementedError()
571 def NewStyleQuery(self, lu):
572 """Collect data and execute query.
575 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
576 sort_by_name=self.sort_by_name)
578 def OldStyleQuery(self, lu):
579 """Collect data and execute query.
582 return self.query.OldStyleQuery(self._GetQueryData(lu),
583 sort_by_name=self.sort_by_name)
587 """Returns a dict declaring all lock levels shared.
590 return dict.fromkeys(locking.LEVELS, 1)
593 def _AnnotateDiskParams(instance, devs, cfg):
594 """Little helper wrapper to the rpc annotation method.
596 @param instance: The instance object
597 @type devs: List of L{objects.Disk}
598 @param devs: The root devices (not any of its children!)
599 @param cfg: The config object
600 @returns The annotated disk copies
601 @see L{rpc.AnnotateDiskParams}
604 return rpc.AnnotateDiskParams(instance.disk_template, devs,
605 cfg.GetInstanceDiskParams(instance))
608 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
610 """Checks if node groups for locked instances are still correct.
612 @type cfg: L{config.ConfigWriter}
613 @param cfg: Cluster configuration
614 @type instances: dict; string as key, L{objects.Instance} as value
615 @param instances: Dictionary, instance name as key, instance object as value
616 @type owned_groups: iterable of string
617 @param owned_groups: List of owned groups
618 @type owned_nodes: iterable of string
619 @param owned_nodes: List of owned nodes
620 @type cur_group_uuid: string or None
621 @param cur_group_uuid: Optional group UUID to check against instance's groups
624 for (name, inst) in instances.items():
625 assert owned_nodes.issuperset(inst.all_nodes), \
626 "Instance %s's nodes changed while we kept the lock" % name
628 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
630 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
631 "Instance %s has no node in group %s" % (name, cur_group_uuid)
634 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
636 """Checks if the owned node groups are still correct for an instance.
638 @type cfg: L{config.ConfigWriter}
639 @param cfg: The cluster configuration
640 @type instance_name: string
641 @param instance_name: Instance name
642 @type owned_groups: set or frozenset
643 @param owned_groups: List of currently owned node groups
644 @type primary_only: boolean
645 @param primary_only: Whether to check node groups for only the primary node
648 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
650 if not owned_groups.issuperset(inst_groups):
651 raise errors.OpPrereqError("Instance %s's node groups changed since"
652 " locks were acquired, current groups are"
653 " are '%s', owning groups '%s'; retry the"
656 utils.CommaJoin(inst_groups),
657 utils.CommaJoin(owned_groups)),
663 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
664 """Checks if the instances in a node group are still correct.
666 @type cfg: L{config.ConfigWriter}
667 @param cfg: The cluster configuration
668 @type group_uuid: string
669 @param group_uuid: Node group UUID
670 @type owned_instances: set or frozenset
671 @param owned_instances: List of currently owned instances
674 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
675 if owned_instances != wanted_instances:
676 raise errors.OpPrereqError("Instances in node group '%s' changed since"
677 " locks were acquired, wanted '%s', have '%s';"
678 " retry the operation" %
680 utils.CommaJoin(wanted_instances),
681 utils.CommaJoin(owned_instances)),
684 return wanted_instances
687 def _SupportsOob(cfg, node):
688 """Tells if node supports OOB.
690 @type cfg: L{config.ConfigWriter}
691 @param cfg: The cluster configuration
692 @type node: L{objects.Node}
693 @param node: The node
694 @return: The OOB script if supported or an empty string otherwise
697 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
700 def _IsExclusiveStorageEnabledNode(cfg, node):
701 """Whether exclusive_storage is in effect for the given node.
703 @type cfg: L{config.ConfigWriter}
704 @param cfg: The cluster configuration
705 @type node: L{objects.Node}
706 @param node: The node
708 @return: The effective value of exclusive_storage
711 return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
714 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
715 """Whether exclusive_storage is in effect for the given node.
717 @type cfg: L{config.ConfigWriter}
718 @param cfg: The cluster configuration
719 @type nodename: string
720 @param nodename: The node
722 @return: The effective value of exclusive_storage
723 @raise errors.OpPrereqError: if no node exists with the given name
726 ni = cfg.GetNodeInfo(nodename)
728 raise errors.OpPrereqError("Invalid node name %s" % nodename,
730 return _IsExclusiveStorageEnabledNode(cfg, ni)
733 def _CopyLockList(names):
734 """Makes a copy of a list of lock names.
736 Handles L{locking.ALL_SET} correctly.
739 if names == locking.ALL_SET:
740 return locking.ALL_SET
745 def _GetWantedNodes(lu, nodes):
746 """Returns list of checked and expanded node names.
748 @type lu: L{LogicalUnit}
749 @param lu: the logical unit on whose behalf we execute
751 @param nodes: list of node names or None for all nodes
753 @return: the list of nodes, sorted
754 @raise errors.ProgrammerError: if the nodes parameter is wrong type
758 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
760 return utils.NiceSort(lu.cfg.GetNodeList())
763 def _GetWantedInstances(lu, instances):
764 """Returns list of checked and expanded instance names.
766 @type lu: L{LogicalUnit}
767 @param lu: the logical unit on whose behalf we execute
768 @type instances: list
769 @param instances: list of instance names or None for all instances
771 @return: the list of instances, sorted
772 @raise errors.OpPrereqError: if the instances parameter is wrong type
773 @raise errors.OpPrereqError: if any of the passed instances is not found
777 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
779 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
783 def _GetUpdatedParams(old_params, update_dict,
784 use_default=True, use_none=False):
785 """Return the new version of a parameter dictionary.
787 @type old_params: dict
788 @param old_params: old parameters
789 @type update_dict: dict
790 @param update_dict: dict containing new parameter values, or
791 constants.VALUE_DEFAULT to reset the parameter to its default
793 @param use_default: boolean
794 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
795 values as 'to be deleted' values
796 @param use_none: boolean
797 @type use_none: whether to recognise C{None} values as 'to be
800 @return: the new parameter dictionary
803 params_copy = copy.deepcopy(old_params)
804 for key, val in update_dict.iteritems():
805 if ((use_default and val == constants.VALUE_DEFAULT) or
806 (use_none and val is None)):
812 params_copy[key] = val
816 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
817 """Return the new version of a instance policy.
819 @param group_policy: whether this policy applies to a group and thus
820 we should support removal of policy entries
823 use_none = use_default = group_policy
824 ipolicy = copy.deepcopy(old_ipolicy)
825 for key, value in new_ipolicy.items():
826 if key not in constants.IPOLICY_ALL_KEYS:
827 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
829 if key in constants.IPOLICY_ISPECS:
830 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
832 use_default=use_default)
833 utils.ForceDictType(ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
835 if (not value or value == [constants.VALUE_DEFAULT] or
836 value == constants.VALUE_DEFAULT):
840 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
841 " on the cluster'" % key,
844 if key in constants.IPOLICY_PARAMETERS:
845 # FIXME: we assume all such values are float
847 ipolicy[key] = float(value)
848 except (TypeError, ValueError), err:
849 raise errors.OpPrereqError("Invalid value for attribute"
850 " '%s': '%s', error: %s" %
851 (key, value, err), errors.ECODE_INVAL)
853 # FIXME: we assume all others are lists; this should be redone
855 ipolicy[key] = list(value)
857 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
858 except errors.ConfigurationError, err:
859 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
864 def _UpdateAndVerifySubDict(base, updates, type_check):
865 """Updates and verifies a dict with sub dicts of the same type.
867 @param base: The dict with the old data
868 @param updates: The dict with the new data
869 @param type_check: Dict suitable to ForceDictType to verify correct types
870 @returns: A new dict with updated and verified values
874 new = _GetUpdatedParams(old, value)
875 utils.ForceDictType(new, type_check)
878 ret = copy.deepcopy(base)
879 ret.update(dict((key, fn(base.get(key, {}), value))
880 for key, value in updates.items()))
884 def _MergeAndVerifyHvState(op_input, obj_input):
885 """Combines the hv state from an opcode with the one of the object
887 @param op_input: The input dict from the opcode
888 @param obj_input: The input dict from the objects
889 @return: The verified and updated dict
893 invalid_hvs = set(op_input) - constants.HYPER_TYPES
895 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
896 " %s" % utils.CommaJoin(invalid_hvs),
898 if obj_input is None:
900 type_check = constants.HVSTS_PARAMETER_TYPES
901 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
906 def _MergeAndVerifyDiskState(op_input, obj_input):
907 """Combines the disk state from an opcode with the one of the object
909 @param op_input: The input dict from the opcode
910 @param obj_input: The input dict from the objects
911 @return: The verified and updated dict
914 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
916 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
917 utils.CommaJoin(invalid_dst),
919 type_check = constants.DSS_PARAMETER_TYPES
920 if obj_input is None:
922 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
924 for key, value in op_input.items())
929 def _ReleaseLocks(lu, level, names=None, keep=None):
930 """Releases locks owned by an LU.
932 @type lu: L{LogicalUnit}
933 @param level: Lock level
934 @type names: list or None
935 @param names: Names of locks to release
936 @type keep: list or None
937 @param keep: Names of locks to retain
940 assert not (keep is not None and names is not None), \
941 "Only one of the 'names' and the 'keep' parameters can be given"
943 if names is not None:
944 should_release = names.__contains__
946 should_release = lambda name: name not in keep
948 should_release = None
950 owned = lu.owned_locks(level)
952 # Not owning any lock at this level, do nothing
959 # Determine which locks to release
961 if should_release(name):
966 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
968 # Release just some locks
969 lu.glm.release(level, names=release)
971 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
974 lu.glm.release(level)
976 assert not lu.glm.is_owned(level), "No locks should be owned"
979 def _MapInstanceDisksToNodes(instances):
980 """Creates a map from (node, volume) to instance name.
982 @type instances: list of L{objects.Instance}
983 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
986 return dict(((node, vol), inst.name)
987 for inst in instances
988 for (node, vols) in inst.MapLVsByNode().items()
992 def _RunPostHook(lu, node_name):
993 """Runs the post-hook for an opcode on a single node.
996 hm = lu.proc.BuildHooksManager(lu)
998 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
999 except Exception, err: # pylint: disable=W0703
1000 lu.LogWarning("Errors occurred running hooks on %s: %s",
1004 def _CheckOutputFields(static, dynamic, selected):
1005 """Checks whether all selected fields are valid.
1007 @type static: L{utils.FieldSet}
1008 @param static: static fields set
1009 @type dynamic: L{utils.FieldSet}
1010 @param dynamic: dynamic fields set
1013 f = utils.FieldSet()
1017 delta = f.NonMatching(selected)
1019 raise errors.OpPrereqError("Unknown output fields selected: %s"
1020 % ",".join(delta), errors.ECODE_INVAL)
1023 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
1024 """Make sure that none of the given paramters is global.
1026 If a global parameter is found, an L{errors.OpPrereqError} exception is
1027 raised. This is used to avoid setting global parameters for individual nodes.
1029 @type params: dictionary
1030 @param params: Parameters to check
1031 @type glob_pars: dictionary
1032 @param glob_pars: Forbidden parameters
1034 @param kind: Kind of parameters (e.g. "node")
1035 @type bad_levels: string
1036 @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
1038 @type good_levels: strings
1039 @param good_levels: Level(s) at which the parameters are allowed (e.g.
1043 used_globals = glob_pars.intersection(params)
1045 msg = ("The following %s parameters are global and cannot"
1046 " be customized at %s level, please modify them at"
1048 (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
1049 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1052 def _CheckNodeOnline(lu, node, msg=None):
1053 """Ensure that a given node is online.
1055 @param lu: the LU on behalf of which we make the check
1056 @param node: the node to check
1057 @param msg: if passed, should be a message to replace the default one
1058 @raise errors.OpPrereqError: if the node is offline
1062 msg = "Can't use offline node"
1063 if lu.cfg.GetNodeInfo(node).offline:
1064 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1067 def _CheckNodeNotDrained(lu, node):
1068 """Ensure that a given node is not drained.
1070 @param lu: the LU on behalf of which we make the check
1071 @param node: the node to check
1072 @raise errors.OpPrereqError: if the node is drained
1075 if lu.cfg.GetNodeInfo(node).drained:
1076 raise errors.OpPrereqError("Can't use drained node %s" % node,
1080 def _CheckNodeVmCapable(lu, node):
1081 """Ensure that a given node is vm capable.
1083 @param lu: the LU on behalf of which we make the check
1084 @param node: the node to check
1085 @raise errors.OpPrereqError: if the node is not vm capable
1088 if not lu.cfg.GetNodeInfo(node).vm_capable:
1089 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1093 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1094 """Ensure that a node supports a given OS.
1096 @param lu: the LU on behalf of which we make the check
1097 @param node: the node to check
1098 @param os_name: the OS to query about
1099 @param force_variant: whether to ignore variant errors
1100 @raise errors.OpPrereqError: if the node is not supporting the OS
1103 result = lu.rpc.call_os_get(node, os_name)
1104 result.Raise("OS '%s' not in supported OS list for node %s" %
1106 prereq=True, ecode=errors.ECODE_INVAL)
1107 if not force_variant:
1108 _CheckOSVariant(result.payload, os_name)
1111 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1112 """Ensure that a node has the given secondary ip.
1114 @type lu: L{LogicalUnit}
1115 @param lu: the LU on behalf of which we make the check
1117 @param node: the node to check
1118 @type secondary_ip: string
1119 @param secondary_ip: the ip to check
1120 @type prereq: boolean
1121 @param prereq: whether to throw a prerequisite or an execute error
1122 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1123 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1126 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1127 result.Raise("Failure checking secondary ip on node %s" % node,
1128 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1129 if not result.payload:
1130 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1131 " please fix and re-run this command" % secondary_ip)
1133 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1135 raise errors.OpExecError(msg)
1138 def _CheckNodePVs(nresult, exclusive_storage):
1142 pvlist_dict = nresult.get(constants.NV_PVLIST, None)
1143 if pvlist_dict is None:
1144 return (["Can't get PV list from node"], None)
1145 pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
1147 # check that ':' is not present in PV names, since it's a
1148 # special character for lvcreate (denotes the range of PEs to
1152 errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
1153 (pv.name, pv.vg_name))
1155 if exclusive_storage:
1156 (errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
1157 errlist.extend(errmsgs)
1158 shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
1160 for (pvname, lvlist) in shared_pvs:
1161 # TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
1162 errlist.append("PV %s is shared among unrelated LVs (%s)" %
1163 (pvname, utils.CommaJoin(lvlist)))
1164 return (errlist, es_pvinfo)
1167 def _GetClusterDomainSecret():
1168 """Reads the cluster domain secret.
1171 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1175 def _CheckInstanceState(lu, instance, req_states, msg=None):
1176 """Ensure that an instance is in one of the required states.
1178 @param lu: the LU on behalf of which we make the check
1179 @param instance: the instance to check
1180 @param msg: if passed, should be a message to replace the default one
1181 @raise errors.OpPrereqError: if the instance is not in the required state
1185 msg = ("can't use instance from outside %s states" %
1186 utils.CommaJoin(req_states))
1187 if instance.admin_state not in req_states:
1188 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1189 (instance.name, instance.admin_state, msg),
1192 if constants.ADMINST_UP not in req_states:
1193 pnode = instance.primary_node
1194 if not lu.cfg.GetNodeInfo(pnode).offline:
1195 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1196 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1197 prereq=True, ecode=errors.ECODE_ENVIRON)
1198 if instance.name in ins_l.payload:
1199 raise errors.OpPrereqError("Instance %s is running, %s" %
1200 (instance.name, msg), errors.ECODE_STATE)
1202 lu.LogWarning("Primary node offline, ignoring check that instance"
1206 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1207 """Computes if value is in the desired range.
1209 @param name: name of the parameter for which we perform the check
1210 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1212 @param ipolicy: dictionary containing min, max and std values
1213 @param value: actual value that we want to use
1214 @return: None or element not meeting the criteria
1218 if value in [None, constants.VALUE_AUTO]:
1220 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1221 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1222 if value > max_v or min_v > value:
1224 fqn = "%s/%s" % (name, qualifier)
1227 return ("%s value %s is not in range [%s, %s]" %
1228 (fqn, value, min_v, max_v))
1232 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1233 nic_count, disk_sizes, spindle_use,
1235 _compute_fn=_ComputeMinMaxSpec):
1236 """Verifies ipolicy against provided specs.
1239 @param ipolicy: The ipolicy
1241 @param mem_size: The memory size
1242 @type cpu_count: int
1243 @param cpu_count: Used cpu cores
1244 @type disk_count: int
1245 @param disk_count: Number of disks used
1246 @type nic_count: int
1247 @param nic_count: Number of nics used
1248 @type disk_sizes: list of ints
1249 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1250 @type spindle_use: int
1251 @param spindle_use: The number of spindles this instance uses
1252 @type disk_template: string
1253 @param disk_template: The disk template of the instance
1254 @param _compute_fn: The compute function (unittest only)
1255 @return: A list of violations, or an empty list of no violations are found
1258 assert disk_count == len(disk_sizes)
1261 (constants.ISPEC_MEM_SIZE, "", mem_size),
1262 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1263 (constants.ISPEC_NIC_COUNT, "", nic_count),
1264 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1265 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1266 for idx, d in enumerate(disk_sizes)]
1267 if disk_template != constants.DT_DISKLESS:
1268 # This check doesn't make sense for diskless instances
1269 test_settings.append((constants.ISPEC_DISK_COUNT, "", disk_count))
1271 allowed_dts = ipolicy[constants.IPOLICY_DTS]
1272 if disk_template not in allowed_dts:
1273 ret.append("Disk template %s is not allowed (allowed templates: %s)" %
1274 (disk_template, utils.CommaJoin(allowed_dts)))
1276 return ret + filter(None,
1277 (_compute_fn(name, qualifier, ipolicy, value)
1278 for (name, qualifier, value) in test_settings))
1281 def _ComputeIPolicyInstanceViolation(ipolicy, instance, cfg,
1282 _compute_fn=_ComputeIPolicySpecViolation):
1283 """Compute if instance meets the specs of ipolicy.
1286 @param ipolicy: The ipolicy to verify against
1287 @type instance: L{objects.Instance}
1288 @param instance: The instance to verify
1289 @type cfg: L{config.ConfigWriter}
1290 @param cfg: Cluster configuration
1291 @param _compute_fn: The function to verify ipolicy (unittest only)
1292 @see: L{_ComputeIPolicySpecViolation}
1295 be_full = cfg.GetClusterInfo().FillBE(instance)
1296 mem_size = be_full[constants.BE_MAXMEM]
1297 cpu_count = be_full[constants.BE_VCPUS]
1298 spindle_use = be_full[constants.BE_SPINDLE_USE]
1299 disk_count = len(instance.disks)
1300 disk_sizes = [disk.size for disk in instance.disks]
1301 nic_count = len(instance.nics)
1302 disk_template = instance.disk_template
1304 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1305 disk_sizes, spindle_use, disk_template)
1308 def _ComputeIPolicyInstanceSpecViolation(
1309 ipolicy, instance_spec, disk_template,
1310 _compute_fn=_ComputeIPolicySpecViolation):
1311 """Compute if instance specs meets the specs of ipolicy.
1314 @param ipolicy: The ipolicy to verify against
1315 @param instance_spec: dict
1316 @param instance_spec: The instance spec to verify
1317 @type disk_template: string
1318 @param disk_template: the disk template of the instance
1319 @param _compute_fn: The function to verify ipolicy (unittest only)
1320 @see: L{_ComputeIPolicySpecViolation}
1323 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1324 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1325 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1326 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1327 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1328 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1330 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1331 disk_sizes, spindle_use, disk_template)
1334 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1336 _compute_fn=_ComputeIPolicyInstanceViolation):
1337 """Compute if instance meets the specs of the new target group.
1339 @param ipolicy: The ipolicy to verify
1340 @param instance: The instance object to verify
1341 @param current_group: The current group of the instance
1342 @param target_group: The new group of the instance
1343 @type cfg: L{config.ConfigWriter}
1344 @param cfg: Cluster configuration
1345 @param _compute_fn: The function to verify ipolicy (unittest only)
1346 @see: L{_ComputeIPolicySpecViolation}
1349 if current_group == target_group:
1352 return _compute_fn(ipolicy, instance, cfg)
1355 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False,
1356 _compute_fn=_ComputeIPolicyNodeViolation):
1357 """Checks that the target node is correct in terms of instance policy.
1359 @param ipolicy: The ipolicy to verify
1360 @param instance: The instance object to verify
1361 @param node: The new node to relocate
1362 @type cfg: L{config.ConfigWriter}
1363 @param cfg: Cluster configuration
1364 @param ignore: Ignore violations of the ipolicy
1365 @param _compute_fn: The function to verify ipolicy (unittest only)
1366 @see: L{_ComputeIPolicySpecViolation}
1369 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1370 res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg)
1373 msg = ("Instance does not meet target node group's (%s) instance"
1374 " policy: %s") % (node.group, utils.CommaJoin(res))
1378 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1381 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances, cfg):
1382 """Computes a set of any instances that would violate the new ipolicy.
1384 @param old_ipolicy: The current (still in-place) ipolicy
1385 @param new_ipolicy: The new (to become) ipolicy
1386 @param instances: List of instances to verify
1387 @type cfg: L{config.ConfigWriter}
1388 @param cfg: Cluster configuration
1389 @return: A list of instances which violates the new ipolicy but
1393 return (_ComputeViolatingInstances(new_ipolicy, instances, cfg) -
1394 _ComputeViolatingInstances(old_ipolicy, instances, cfg))
1397 def _ExpandItemName(fn, name, kind):
1398 """Expand an item name.
1400 @param fn: the function to use for expansion
1401 @param name: requested item name
1402 @param kind: text description ('Node' or 'Instance')
1403 @return: the resolved (full) name
1404 @raise errors.OpPrereqError: if the item is not found
1407 full_name = fn(name)
1408 if full_name is None:
1409 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1414 def _ExpandNodeName(cfg, name):
1415 """Wrapper over L{_ExpandItemName} for nodes."""
1416 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1419 def _ExpandInstanceName(cfg, name):
1420 """Wrapper over L{_ExpandItemName} for instance."""
1421 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1424 def _BuildNetworkHookEnv(name, subnet, gateway, network6, gateway6,
1426 """Builds network related env variables for hooks
1428 This builds the hook environment from individual variables.
1431 @param name: the name of the network
1432 @type subnet: string
1433 @param subnet: the ipv4 subnet
1434 @type gateway: string
1435 @param gateway: the ipv4 gateway
1436 @type network6: string
1437 @param network6: the ipv6 subnet
1438 @type gateway6: string
1439 @param gateway6: the ipv6 gateway
1440 @type mac_prefix: string
1441 @param mac_prefix: the mac_prefix
1443 @param tags: the tags of the network
1448 env["NETWORK_NAME"] = name
1450 env["NETWORK_SUBNET"] = subnet
1452 env["NETWORK_GATEWAY"] = gateway
1454 env["NETWORK_SUBNET6"] = network6
1456 env["NETWORK_GATEWAY6"] = gateway6
1458 env["NETWORK_MAC_PREFIX"] = mac_prefix
1460 env["NETWORK_TAGS"] = " ".join(tags)
1465 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1466 minmem, maxmem, vcpus, nics, disk_template, disks,
1467 bep, hvp, hypervisor_name, tags):
1468 """Builds instance related env variables for hooks
1470 This builds the hook environment from individual variables.
1473 @param name: the name of the instance
1474 @type primary_node: string
1475 @param primary_node: the name of the instance's primary node
1476 @type secondary_nodes: list
1477 @param secondary_nodes: list of secondary nodes as strings
1478 @type os_type: string
1479 @param os_type: the name of the instance's OS
1480 @type status: string
1481 @param status: the desired status of the instance
1482 @type minmem: string
1483 @param minmem: the minimum memory size of the instance
1484 @type maxmem: string
1485 @param maxmem: the maximum memory size of the instance
1487 @param vcpus: the count of VCPUs the instance has
1489 @param nics: list of tuples (ip, mac, mode, link, net, netinfo) representing
1490 the NICs the instance has
1491 @type disk_template: string
1492 @param disk_template: the disk template of the instance
1494 @param disks: the list of (size, mode) pairs
1496 @param bep: the backend parameters for the instance
1498 @param hvp: the hypervisor parameters for the instance
1499 @type hypervisor_name: string
1500 @param hypervisor_name: the hypervisor for the instance
1502 @param tags: list of instance tags as strings
1504 @return: the hook environment for this instance
1509 "INSTANCE_NAME": name,
1510 "INSTANCE_PRIMARY": primary_node,
1511 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1512 "INSTANCE_OS_TYPE": os_type,
1513 "INSTANCE_STATUS": status,
1514 "INSTANCE_MINMEM": minmem,
1515 "INSTANCE_MAXMEM": maxmem,
1516 # TODO(2.9) remove deprecated "memory" value
1517 "INSTANCE_MEMORY": maxmem,
1518 "INSTANCE_VCPUS": vcpus,
1519 "INSTANCE_DISK_TEMPLATE": disk_template,
1520 "INSTANCE_HYPERVISOR": hypervisor_name,
1523 nic_count = len(nics)
1524 for idx, (ip, mac, mode, link, net, netinfo) in enumerate(nics):
1527 env["INSTANCE_NIC%d_IP" % idx] = ip
1528 env["INSTANCE_NIC%d_MAC" % idx] = mac
1529 env["INSTANCE_NIC%d_MODE" % idx] = mode
1530 env["INSTANCE_NIC%d_LINK" % idx] = link
1532 nobj = objects.Network.FromDict(netinfo)
1533 env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
1535 # FIXME: broken network reference: the instance NIC specifies a
1536 # network, but the relevant network entry was not in the config. This
1537 # should be made impossible.
1538 env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
1539 if mode == constants.NIC_MODE_BRIDGED:
1540 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1544 env["INSTANCE_NIC_COUNT"] = nic_count
1547 disk_count = len(disks)
1548 for idx, (size, mode) in enumerate(disks):
1549 env["INSTANCE_DISK%d_SIZE" % idx] = size
1550 env["INSTANCE_DISK%d_MODE" % idx] = mode
1554 env["INSTANCE_DISK_COUNT"] = disk_count
1559 env["INSTANCE_TAGS"] = " ".join(tags)
1561 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1562 for key, value in source.items():
1563 env["INSTANCE_%s_%s" % (kind, key)] = value
1568 def _NICToTuple(lu, nic):
1569 """Build a tupple of nic information.
1571 @type lu: L{LogicalUnit}
1572 @param lu: the logical unit on whose behalf we execute
1573 @type nic: L{objects.NIC}
1574 @param nic: nic to convert to hooks tuple
1577 cluster = lu.cfg.GetClusterInfo()
1578 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1579 mode = filled_params[constants.NIC_MODE]
1580 link = filled_params[constants.NIC_LINK]
1583 nobj = lu.cfg.GetNetwork(nic.network)
1584 netinfo = objects.Network.ToDict(nobj)
1585 return (nic.ip, nic.mac, mode, link, nic.network, netinfo)
1588 def _NICListToTuple(lu, nics):
1589 """Build a list of nic information tuples.
1591 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1592 value in LUInstanceQueryData.
1594 @type lu: L{LogicalUnit}
1595 @param lu: the logical unit on whose behalf we execute
1596 @type nics: list of L{objects.NIC}
1597 @param nics: list of nics to convert to hooks tuples
1602 hooks_nics.append(_NICToTuple(lu, nic))
1606 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1607 """Builds instance related env variables for hooks from an object.
1609 @type lu: L{LogicalUnit}
1610 @param lu: the logical unit on whose behalf we execute
1611 @type instance: L{objects.Instance}
1612 @param instance: the instance for which we should build the
1614 @type override: dict
1615 @param override: dictionary with key/values that will override
1618 @return: the hook environment dictionary
1621 cluster = lu.cfg.GetClusterInfo()
1622 bep = cluster.FillBE(instance)
1623 hvp = cluster.FillHV(instance)
1625 "name": instance.name,
1626 "primary_node": instance.primary_node,
1627 "secondary_nodes": instance.secondary_nodes,
1628 "os_type": instance.os,
1629 "status": instance.admin_state,
1630 "maxmem": bep[constants.BE_MAXMEM],
1631 "minmem": bep[constants.BE_MINMEM],
1632 "vcpus": bep[constants.BE_VCPUS],
1633 "nics": _NICListToTuple(lu, instance.nics),
1634 "disk_template": instance.disk_template,
1635 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1638 "hypervisor_name": instance.hypervisor,
1639 "tags": instance.tags,
1642 args.update(override)
1643 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1646 def _AdjustCandidatePool(lu, exceptions):
1647 """Adjust the candidate pool after node operations.
1650 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1652 lu.LogInfo("Promoted nodes to master candidate role: %s",
1653 utils.CommaJoin(node.name for node in mod_list))
1654 for name in mod_list:
1655 lu.context.ReaddNode(name)
1656 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1658 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1662 def _DecideSelfPromotion(lu, exceptions=None):
1663 """Decide whether I should promote myself as a master candidate.
1666 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1667 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1668 # the new node will increase mc_max with one, so:
1669 mc_should = min(mc_should + 1, cp_size)
1670 return mc_now < mc_should
1673 def _ComputeViolatingInstances(ipolicy, instances, cfg):
1674 """Computes a set of instances who violates given ipolicy.
1676 @param ipolicy: The ipolicy to verify
1677 @type instances: L{objects.Instance}
1678 @param instances: List of instances to verify
1679 @type cfg: L{config.ConfigWriter}
1680 @param cfg: Cluster configuration
1681 @return: A frozenset of instance names violating the ipolicy
1684 return frozenset([inst.name for inst in instances
1685 if _ComputeIPolicyInstanceViolation(ipolicy, inst, cfg)])
1688 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1689 """Check that the brigdes needed by a list of nics exist.
1692 cluster = lu.cfg.GetClusterInfo()
1693 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1694 brlist = [params[constants.NIC_LINK] for params in paramslist
1695 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1697 result = lu.rpc.call_bridges_exist(target_node, brlist)
1698 result.Raise("Error checking bridges on destination node '%s'" %
1699 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1702 def _CheckInstanceBridgesExist(lu, instance, node=None):
1703 """Check that the brigdes needed by an instance exist.
1707 node = instance.primary_node
1708 _CheckNicsBridgesExist(lu, instance.nics, node)
1711 def _CheckOSVariant(os_obj, name):
1712 """Check whether an OS name conforms to the os variants specification.
1714 @type os_obj: L{objects.OS}
1715 @param os_obj: OS object to check
1717 @param name: OS name passed by the user, to check for validity
1720 variant = objects.OS.GetVariant(name)
1721 if not os_obj.supported_variants:
1723 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1724 " passed)" % (os_obj.name, variant),
1728 raise errors.OpPrereqError("OS name must include a variant",
1731 if variant not in os_obj.supported_variants:
1732 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1735 def _GetNodeInstancesInner(cfg, fn):
1736 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1739 def _GetNodeInstances(cfg, node_name):
1740 """Returns a list of all primary and secondary instances on a node.
1744 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1747 def _GetNodePrimaryInstances(cfg, node_name):
1748 """Returns primary instances on a node.
1751 return _GetNodeInstancesInner(cfg,
1752 lambda inst: node_name == inst.primary_node)
1755 def _GetNodeSecondaryInstances(cfg, node_name):
1756 """Returns secondary instances on a node.
1759 return _GetNodeInstancesInner(cfg,
1760 lambda inst: node_name in inst.secondary_nodes)
1763 def _GetStorageTypeArgs(cfg, storage_type):
1764 """Returns the arguments for a storage type.
1767 # Special case for file storage
1768 if storage_type == constants.ST_FILE:
1769 # storage.FileStorage wants a list of storage directories
1770 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1775 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1778 for dev in instance.disks:
1779 cfg.SetDiskID(dev, node_name)
1781 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1783 result.Raise("Failed to get disk status from node %s" % node_name,
1784 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1786 for idx, bdev_status in enumerate(result.payload):
1787 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1793 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1794 """Check the sanity of iallocator and node arguments and use the
1795 cluster-wide iallocator if appropriate.
1797 Check that at most one of (iallocator, node) is specified. If none is
1798 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1799 then the LU's opcode's iallocator slot is filled with the cluster-wide
1802 @type iallocator_slot: string
1803 @param iallocator_slot: the name of the opcode iallocator slot
1804 @type node_slot: string
1805 @param node_slot: the name of the opcode target node slot
1808 node = getattr(lu.op, node_slot, None)
1809 ialloc = getattr(lu.op, iallocator_slot, None)
1813 if node is not None and ialloc is not None:
1814 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1816 elif ((node is None and ialloc is None) or
1817 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1818 default_iallocator = lu.cfg.GetDefaultIAllocator()
1819 if default_iallocator:
1820 setattr(lu.op, iallocator_slot, default_iallocator)
1822 raise errors.OpPrereqError("No iallocator or node given and no"
1823 " cluster-wide default iallocator found;"
1824 " please specify either an iallocator or a"
1825 " node, or set a cluster-wide default"
1826 " iallocator", errors.ECODE_INVAL)
1829 def _GetDefaultIAllocator(cfg, ialloc):
1830 """Decides on which iallocator to use.
1832 @type cfg: L{config.ConfigWriter}
1833 @param cfg: Cluster configuration object
1834 @type ialloc: string or None
1835 @param ialloc: Iallocator specified in opcode
1837 @return: Iallocator name
1841 # Use default iallocator
1842 ialloc = cfg.GetDefaultIAllocator()
1845 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1846 " opcode nor as a cluster-wide default",
1852 def _CheckHostnameSane(lu, name):
1853 """Ensures that a given hostname resolves to a 'sane' name.
1855 The given name is required to be a prefix of the resolved hostname,
1856 to prevent accidental mismatches.
1858 @param lu: the logical unit on behalf of which we're checking
1859 @param name: the name we should resolve and check
1860 @return: the resolved hostname object
1863 hostname = netutils.GetHostname(name=name)
1864 if hostname.name != name:
1865 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1866 if not utils.MatchNameComponent(name, [hostname.name]):
1867 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1868 " same as given hostname '%s'") %
1869 (hostname.name, name), errors.ECODE_INVAL)
1873 class LUClusterPostInit(LogicalUnit):
1874 """Logical unit for running hooks after cluster initialization.
1877 HPATH = "cluster-init"
1878 HTYPE = constants.HTYPE_CLUSTER
1880 def BuildHooksEnv(self):
1885 "OP_TARGET": self.cfg.GetClusterName(),
1888 def BuildHooksNodes(self):
1889 """Build hooks nodes.
1892 return ([], [self.cfg.GetMasterNode()])
1894 def Exec(self, feedback_fn):
1901 class LUClusterDestroy(LogicalUnit):
1902 """Logical unit for destroying the cluster.
1905 HPATH = "cluster-destroy"
1906 HTYPE = constants.HTYPE_CLUSTER
1908 def BuildHooksEnv(self):
1913 "OP_TARGET": self.cfg.GetClusterName(),
1916 def BuildHooksNodes(self):
1917 """Build hooks nodes.
1922 def CheckPrereq(self):
1923 """Check prerequisites.
1925 This checks whether the cluster is empty.
1927 Any errors are signaled by raising errors.OpPrereqError.
1930 master = self.cfg.GetMasterNode()
1932 nodelist = self.cfg.GetNodeList()
1933 if len(nodelist) != 1 or nodelist[0] != master:
1934 raise errors.OpPrereqError("There are still %d node(s) in"
1935 " this cluster." % (len(nodelist) - 1),
1937 instancelist = self.cfg.GetInstanceList()
1939 raise errors.OpPrereqError("There are still %d instance(s) in"
1940 " this cluster." % len(instancelist),
1943 def Exec(self, feedback_fn):
1944 """Destroys the cluster.
1947 master_params = self.cfg.GetMasterNetworkParameters()
1949 # Run post hooks on master node before it's removed
1950 _RunPostHook(self, master_params.name)
1952 ems = self.cfg.GetUseExternalMipScript()
1953 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1956 self.LogWarning("Error disabling the master IP address: %s",
1959 return master_params.name
1962 def _VerifyCertificate(filename):
1963 """Verifies a certificate for L{LUClusterVerifyConfig}.
1965 @type filename: string
1966 @param filename: Path to PEM file
1970 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1971 utils.ReadFile(filename))
1972 except Exception, err: # pylint: disable=W0703
1973 return (LUClusterVerifyConfig.ETYPE_ERROR,
1974 "Failed to load X509 certificate %s: %s" % (filename, err))
1977 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1978 constants.SSL_CERT_EXPIRATION_ERROR)
1981 fnamemsg = "While verifying %s: %s" % (filename, msg)
1986 return (None, fnamemsg)
1987 elif errcode == utils.CERT_WARNING:
1988 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1989 elif errcode == utils.CERT_ERROR:
1990 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1992 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1995 def _GetAllHypervisorParameters(cluster, instances):
1996 """Compute the set of all hypervisor parameters.
1998 @type cluster: L{objects.Cluster}
1999 @param cluster: the cluster object
2000 @param instances: list of L{objects.Instance}
2001 @param instances: additional instances from which to obtain parameters
2002 @rtype: list of (origin, hypervisor, parameters)
2003 @return: a list with all parameters found, indicating the hypervisor they
2004 apply to, and the origin (can be "cluster", "os X", or "instance Y")
2009 for hv_name in cluster.enabled_hypervisors:
2010 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2012 for os_name, os_hvp in cluster.os_hvp.items():
2013 for hv_name, hv_params in os_hvp.items():
2015 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2016 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2018 # TODO: collapse identical parameter values in a single one
2019 for instance in instances:
2020 if instance.hvparams:
2021 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2022 cluster.FillHV(instance)))
2027 class _VerifyErrors(object):
2028 """Mix-in for cluster/group verify LUs.
2030 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
2031 self.op and self._feedback_fn to be available.)
2035 ETYPE_FIELD = "code"
2036 ETYPE_ERROR = "ERROR"
2037 ETYPE_WARNING = "WARNING"
2039 def _Error(self, ecode, item, msg, *args, **kwargs):
2040 """Format an error message.
2042 Based on the opcode's error_codes parameter, either format a
2043 parseable error code, or a simpler error string.
2045 This must be called only from Exec and functions called from Exec.
2048 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
2049 itype, etxt, _ = ecode
2050 # If the error code is in the list of ignored errors, demote the error to a
2052 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2053 ltype = self.ETYPE_WARNING
2054 # first complete the msg
2057 # then format the whole message
2058 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
2059 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
2065 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
2066 # and finally report it via the feedback_fn
2067 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
2068 # do not mark the operation as failed for WARN cases only
2069 if ltype == self.ETYPE_ERROR:
2072 def _ErrorIf(self, cond, *args, **kwargs):
2073 """Log an error message if the passed condition is True.
2077 or self.op.debug_simulate_errors): # pylint: disable=E1101
2078 self._Error(*args, **kwargs)
2081 class LUClusterVerify(NoHooksLU):
2082 """Submits all jobs necessary to verify the cluster.
2087 def ExpandNames(self):
2088 self.needed_locks = {}
2090 def Exec(self, feedback_fn):
2093 if self.op.group_name:
2094 groups = [self.op.group_name]
2095 depends_fn = lambda: None
2097 groups = self.cfg.GetNodeGroupList()
2099 # Verify global configuration
2101 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
2104 # Always depend on global verification
2105 depends_fn = lambda: [(-len(jobs), [])]
2108 [opcodes.OpClusterVerifyGroup(group_name=group,
2109 ignore_errors=self.op.ignore_errors,
2110 depends=depends_fn())]
2111 for group in groups)
2113 # Fix up all parameters
2114 for op in itertools.chain(*jobs): # pylint: disable=W0142
2115 op.debug_simulate_errors = self.op.debug_simulate_errors
2116 op.verbose = self.op.verbose
2117 op.error_codes = self.op.error_codes
2119 op.skip_checks = self.op.skip_checks
2120 except AttributeError:
2121 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2123 return ResultWithJobs(jobs)
2126 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2127 """Verifies the cluster config.
2132 def _VerifyHVP(self, hvp_data):
2133 """Verifies locally the syntax of the hypervisor parameters.
2136 for item, hv_name, hv_params in hvp_data:
2137 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2140 hv_class = hypervisor.GetHypervisorClass(hv_name)
2141 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2142 hv_class.CheckParameterSyntax(hv_params)
2143 except errors.GenericError, err:
2144 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2146 def ExpandNames(self):
2147 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2148 self.share_locks = _ShareAll()
2150 def CheckPrereq(self):
2151 """Check prerequisites.
2154 # Retrieve all information
2155 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2156 self.all_node_info = self.cfg.GetAllNodesInfo()
2157 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2159 def Exec(self, feedback_fn):
2160 """Verify integrity of cluster, performing various test on nodes.
2164 self._feedback_fn = feedback_fn
2166 feedback_fn("* Verifying cluster config")
2168 for msg in self.cfg.VerifyConfig():
2169 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2171 feedback_fn("* Verifying cluster certificate files")
2173 for cert_filename in pathutils.ALL_CERT_FILES:
2174 (errcode, msg) = _VerifyCertificate(cert_filename)
2175 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2177 feedback_fn("* Verifying hypervisor parameters")
2179 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2180 self.all_inst_info.values()))
2182 feedback_fn("* Verifying all nodes belong to an existing group")
2184 # We do this verification here because, should this bogus circumstance
2185 # occur, it would never be caught by VerifyGroup, which only acts on
2186 # nodes/instances reachable from existing node groups.
2188 dangling_nodes = set(node.name for node in self.all_node_info.values()
2189 if node.group not in self.all_group_info)
2191 dangling_instances = {}
2192 no_node_instances = []
2194 for inst in self.all_inst_info.values():
2195 if inst.primary_node in dangling_nodes:
2196 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2197 elif inst.primary_node not in self.all_node_info:
2198 no_node_instances.append(inst.name)
2203 utils.CommaJoin(dangling_instances.get(node.name,
2205 for node in dangling_nodes]
2207 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2209 "the following nodes (and their instances) belong to a non"
2210 " existing group: %s", utils.CommaJoin(pretty_dangling))
2212 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2214 "the following instances have a non-existing primary-node:"
2215 " %s", utils.CommaJoin(no_node_instances))
2220 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2221 """Verifies the status of a node group.
2224 HPATH = "cluster-verify"
2225 HTYPE = constants.HTYPE_CLUSTER
2228 _HOOKS_INDENT_RE = re.compile("^", re.M)
2230 class NodeImage(object):
2231 """A class representing the logical and physical status of a node.
2234 @ivar name: the node name to which this object refers
2235 @ivar volumes: a structure as returned from
2236 L{ganeti.backend.GetVolumeList} (runtime)
2237 @ivar instances: a list of running instances (runtime)
2238 @ivar pinst: list of configured primary instances (config)
2239 @ivar sinst: list of configured secondary instances (config)
2240 @ivar sbp: dictionary of {primary-node: list of instances} for all
2241 instances for which this node is secondary (config)
2242 @ivar mfree: free memory, as reported by hypervisor (runtime)
2243 @ivar dfree: free disk, as reported by the node (runtime)
2244 @ivar offline: the offline status (config)
2245 @type rpc_fail: boolean
2246 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2247 not whether the individual keys were correct) (runtime)
2248 @type lvm_fail: boolean
2249 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2250 @type hyp_fail: boolean
2251 @ivar hyp_fail: whether the RPC call didn't return the instance list
2252 @type ghost: boolean
2253 @ivar ghost: whether this is a known node or not (config)
2254 @type os_fail: boolean
2255 @ivar os_fail: whether the RPC call didn't return valid OS data
2257 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2258 @type vm_capable: boolean
2259 @ivar vm_capable: whether the node can host instances
2261 @ivar pv_min: size in MiB of the smallest PVs
2263 @ivar pv_max: size in MiB of the biggest PVs
2266 def __init__(self, offline=False, name=None, vm_capable=True):
2275 self.offline = offline
2276 self.vm_capable = vm_capable
2277 self.rpc_fail = False
2278 self.lvm_fail = False
2279 self.hyp_fail = False
2281 self.os_fail = False
2286 def ExpandNames(self):
2287 # This raises errors.OpPrereqError on its own:
2288 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2290 # Get instances in node group; this is unsafe and needs verification later
2292 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2294 self.needed_locks = {
2295 locking.LEVEL_INSTANCE: inst_names,
2296 locking.LEVEL_NODEGROUP: [self.group_uuid],
2297 locking.LEVEL_NODE: [],
2299 # This opcode is run by watcher every five minutes and acquires all nodes
2300 # for a group. It doesn't run for a long time, so it's better to acquire
2301 # the node allocation lock as well.
2302 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2305 self.share_locks = _ShareAll()
2307 def DeclareLocks(self, level):
2308 if level == locking.LEVEL_NODE:
2309 # Get members of node group; this is unsafe and needs verification later
2310 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2312 all_inst_info = self.cfg.GetAllInstancesInfo()
2314 # In Exec(), we warn about mirrored instances that have primary and
2315 # secondary living in separate node groups. To fully verify that
2316 # volumes for these instances are healthy, we will need to do an
2317 # extra call to their secondaries. We ensure here those nodes will
2319 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2320 # Important: access only the instances whose lock is owned
2321 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2322 nodes.update(all_inst_info[inst].secondary_nodes)
2324 self.needed_locks[locking.LEVEL_NODE] = nodes
2326 def CheckPrereq(self):
2327 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2328 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2330 group_nodes = set(self.group_info.members)
2332 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2335 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2337 unlocked_instances = \
2338 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2341 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2342 utils.CommaJoin(unlocked_nodes),
2345 if unlocked_instances:
2346 raise errors.OpPrereqError("Missing lock for instances: %s" %
2347 utils.CommaJoin(unlocked_instances),
2350 self.all_node_info = self.cfg.GetAllNodesInfo()
2351 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2353 self.my_node_names = utils.NiceSort(group_nodes)
2354 self.my_inst_names = utils.NiceSort(group_instances)
2356 self.my_node_info = dict((name, self.all_node_info[name])
2357 for name in self.my_node_names)
2359 self.my_inst_info = dict((name, self.all_inst_info[name])
2360 for name in self.my_inst_names)
2362 # We detect here the nodes that will need the extra RPC calls for verifying
2363 # split LV volumes; they should be locked.
2364 extra_lv_nodes = set()
2366 for inst in self.my_inst_info.values():
2367 if inst.disk_template in constants.DTS_INT_MIRROR:
2368 for nname in inst.all_nodes:
2369 if self.all_node_info[nname].group != self.group_uuid:
2370 extra_lv_nodes.add(nname)
2372 unlocked_lv_nodes = \
2373 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2375 if unlocked_lv_nodes:
2376 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2377 utils.CommaJoin(unlocked_lv_nodes),
2379 self.extra_lv_nodes = list(extra_lv_nodes)
2381 def _VerifyNode(self, ninfo, nresult):
2382 """Perform some basic validation on data returned from a node.
2384 - check the result data structure is well formed and has all the
2386 - check ganeti version
2388 @type ninfo: L{objects.Node}
2389 @param ninfo: the node to check
2390 @param nresult: the results from the node
2392 @return: whether overall this call was successful (and we can expect
2393 reasonable values in the respose)
2397 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2399 # main result, nresult should be a non-empty dict
2400 test = not nresult or not isinstance(nresult, dict)
2401 _ErrorIf(test, constants.CV_ENODERPC, node,
2402 "unable to verify node: no data returned")
2406 # compares ganeti version
2407 local_version = constants.PROTOCOL_VERSION
2408 remote_version = nresult.get("version", None)
2409 test = not (remote_version and
2410 isinstance(remote_version, (list, tuple)) and
2411 len(remote_version) == 2)
2412 _ErrorIf(test, constants.CV_ENODERPC, node,
2413 "connection to node returned invalid data")
2417 test = local_version != remote_version[0]
2418 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2419 "incompatible protocol versions: master %s,"
2420 " node %s", local_version, remote_version[0])
2424 # node seems compatible, we can actually try to look into its results
2426 # full package version
2427 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2428 constants.CV_ENODEVERSION, node,
2429 "software version mismatch: master %s, node %s",
2430 constants.RELEASE_VERSION, remote_version[1],
2431 code=self.ETYPE_WARNING)
2433 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2434 if ninfo.vm_capable and isinstance(hyp_result, dict):
2435 for hv_name, hv_result in hyp_result.iteritems():
2436 test = hv_result is not None
2437 _ErrorIf(test, constants.CV_ENODEHV, node,
2438 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2440 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2441 if ninfo.vm_capable and isinstance(hvp_result, list):
2442 for item, hv_name, hv_result in hvp_result:
2443 _ErrorIf(True, constants.CV_ENODEHV, node,
2444 "hypervisor %s parameter verify failure (source %s): %s",
2445 hv_name, item, hv_result)
2447 test = nresult.get(constants.NV_NODESETUP,
2448 ["Missing NODESETUP results"])
2449 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2454 def _VerifyNodeTime(self, ninfo, nresult,
2455 nvinfo_starttime, nvinfo_endtime):
2456 """Check the node time.
2458 @type ninfo: L{objects.Node}
2459 @param ninfo: the node to check
2460 @param nresult: the remote results for the node
2461 @param nvinfo_starttime: the start time of the RPC call
2462 @param nvinfo_endtime: the end time of the RPC call
2466 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2468 ntime = nresult.get(constants.NV_TIME, None)
2470 ntime_merged = utils.MergeTime(ntime)
2471 except (ValueError, TypeError):
2472 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2475 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2476 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2477 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2478 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2482 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2483 "Node time diverges by at least %s from master node time",
2486 def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
2487 """Check the node LVM results and update info for cross-node checks.
2489 @type ninfo: L{objects.Node}
2490 @param ninfo: the node to check
2491 @param nresult: the remote results for the node
2492 @param vg_name: the configured VG name
2493 @type nimg: L{NodeImage}
2494 @param nimg: node image
2501 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2503 # checks vg existence and size > 20G
2504 vglist = nresult.get(constants.NV_VGLIST, None)
2506 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2508 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2509 constants.MIN_VG_SIZE)
2510 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2513 (errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
2515 self._Error(constants.CV_ENODELVM, node, em)
2516 if pvminmax is not None:
2517 (nimg.pv_min, nimg.pv_max) = pvminmax
2519 def _VerifyGroupLVM(self, node_image, vg_name):
2520 """Check cross-node consistency in LVM.
2522 @type node_image: dict
2523 @param node_image: info about nodes, mapping from node to names to
2524 L{NodeImage} objects
2525 @param vg_name: the configured VG name
2531 # Only exlcusive storage needs this kind of checks
2532 if not self._exclusive_storage:
2535 # exclusive_storage wants all PVs to have the same size (approximately),
2536 # if the smallest and the biggest ones are okay, everything is fine.
2537 # pv_min is None iff pv_max is None
2538 vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
2541 (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
2542 (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
2543 bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
2544 self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
2545 "PV sizes differ too much in the group; smallest (%s MB) is"
2546 " on %s, biggest (%s MB) is on %s",
2547 pvmin, minnode, pvmax, maxnode)
2549 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2550 """Check the node bridges.
2552 @type ninfo: L{objects.Node}
2553 @param ninfo: the node to check
2554 @param nresult: the remote results for the node
2555 @param bridges: the expected list of bridges
2562 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2564 missing = nresult.get(constants.NV_BRIDGES, None)
2565 test = not isinstance(missing, list)
2566 _ErrorIf(test, constants.CV_ENODENET, node,
2567 "did not return valid bridge information")
2569 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2570 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2572 def _VerifyNodeUserScripts(self, ninfo, nresult):
2573 """Check the results of user scripts presence and executability on the node
2575 @type ninfo: L{objects.Node}
2576 @param ninfo: the node to check
2577 @param nresult: the remote results for the node
2582 test = not constants.NV_USERSCRIPTS in nresult
2583 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2584 "did not return user scripts information")
2586 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2588 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2589 "user scripts not present or not executable: %s" %
2590 utils.CommaJoin(sorted(broken_scripts)))
2592 def _VerifyNodeNetwork(self, ninfo, nresult):
2593 """Check the node network connectivity results.
2595 @type ninfo: L{objects.Node}
2596 @param ninfo: the node to check
2597 @param nresult: the remote results for the node
2601 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2603 test = constants.NV_NODELIST not in nresult
2604 _ErrorIf(test, constants.CV_ENODESSH, node,
2605 "node hasn't returned node ssh connectivity data")
2607 if nresult[constants.NV_NODELIST]:
2608 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2609 _ErrorIf(True, constants.CV_ENODESSH, node,
2610 "ssh communication with node '%s': %s", a_node, a_msg)
2612 test = constants.NV_NODENETTEST not in nresult
2613 _ErrorIf(test, constants.CV_ENODENET, node,
2614 "node hasn't returned node tcp connectivity data")
2616 if nresult[constants.NV_NODENETTEST]:
2617 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2619 _ErrorIf(True, constants.CV_ENODENET, node,
2620 "tcp communication with node '%s': %s",
2621 anode, nresult[constants.NV_NODENETTEST][anode])
2623 test = constants.NV_MASTERIP not in nresult
2624 _ErrorIf(test, constants.CV_ENODENET, node,
2625 "node hasn't returned node master IP reachability data")
2627 if not nresult[constants.NV_MASTERIP]:
2628 if node == self.master_node:
2629 msg = "the master node cannot reach the master IP (not configured?)"
2631 msg = "cannot reach the master IP"
2632 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2634 def _VerifyInstance(self, instance, inst_config, node_image,
2636 """Verify an instance.
2638 This function checks to see if the required block devices are
2639 available on the instance's node, and that the nodes are in the correct
2643 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2644 pnode = inst_config.primary_node
2645 pnode_img = node_image[pnode]
2646 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2648 node_vol_should = {}
2649 inst_config.MapLVsByNode(node_vol_should)
2651 cluster = self.cfg.GetClusterInfo()
2652 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2654 err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg)
2655 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
2656 code=self.ETYPE_WARNING)
2658 for node in node_vol_should:
2659 n_img = node_image[node]
2660 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2661 # ignore missing volumes on offline or broken nodes
2663 for volume in node_vol_should[node]:
2664 test = volume not in n_img.volumes
2665 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2666 "volume %s missing on node %s", volume, node)
2668 if inst_config.admin_state == constants.ADMINST_UP:
2669 test = instance not in pnode_img.instances and not pnode_img.offline
2670 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2671 "instance not running on its primary node %s",
2673 _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
2674 "instance is marked as running and lives on offline node %s",
2677 diskdata = [(nname, success, status, idx)
2678 for (nname, disks) in diskstatus.items()
2679 for idx, (success, status) in enumerate(disks)]
2681 for nname, success, bdev_status, idx in diskdata:
2682 # the 'ghost node' construction in Exec() ensures that we have a
2684 snode = node_image[nname]
2685 bad_snode = snode.ghost or snode.offline
2686 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2687 not success and not bad_snode,
2688 constants.CV_EINSTANCEFAULTYDISK, instance,
2689 "couldn't retrieve status for disk/%s on %s: %s",
2690 idx, nname, bdev_status)
2691 _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
2692 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2693 constants.CV_EINSTANCEFAULTYDISK, instance,
2694 "disk/%s on %s is faulty", idx, nname)
2696 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2697 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2698 " primary node failed", instance)
2700 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2701 constants.CV_EINSTANCELAYOUT,
2702 instance, "instance has multiple secondary nodes: %s",
2703 utils.CommaJoin(inst_config.secondary_nodes),
2704 code=self.ETYPE_WARNING)
2706 if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
2707 # Disk template not compatible with exclusive_storage: no instance
2708 # node should have the flag set
2709 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
2710 inst_config.all_nodes)
2711 es_nodes = [n for (n, es) in es_flags.items()
2713 _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
2714 "instance has template %s, which is not supported on nodes"
2715 " that have exclusive storage set: %s",
2716 inst_config.disk_template, utils.CommaJoin(es_nodes))
2718 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2719 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2720 instance_groups = {}
2722 for node in instance_nodes:
2723 instance_groups.setdefault(self.all_node_info[node].group,
2727 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2728 # Sort so that we always list the primary node first.
2729 for group, nodes in sorted(instance_groups.items(),
2730 key=lambda (_, nodes): pnode in nodes,
2733 self._ErrorIf(len(instance_groups) > 1,
2734 constants.CV_EINSTANCESPLITGROUPS,
2735 instance, "instance has primary and secondary nodes in"
2736 " different groups: %s", utils.CommaJoin(pretty_list),
2737 code=self.ETYPE_WARNING)
2739 inst_nodes_offline = []
2740 for snode in inst_config.secondary_nodes:
2741 s_img = node_image[snode]
2742 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2743 snode, "instance %s, connection to secondary node failed",
2747 inst_nodes_offline.append(snode)
2749 # warn that the instance lives on offline nodes
2750 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2751 "instance has offline secondary node(s) %s",
2752 utils.CommaJoin(inst_nodes_offline))
2753 # ... or ghost/non-vm_capable nodes
2754 for node in inst_config.all_nodes:
2755 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2756 instance, "instance lives on ghost node %s", node)
2757 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2758 instance, "instance lives on non-vm_capable node %s", node)
2760 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2761 """Verify if there are any unknown volumes in the cluster.
2763 The .os, .swap and backup volumes are ignored. All other volumes are
2764 reported as unknown.
2766 @type reserved: L{ganeti.utils.FieldSet}
2767 @param reserved: a FieldSet of reserved volume names
2770 for node, n_img in node_image.items():
2771 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2772 self.all_node_info[node].group != self.group_uuid):
2773 # skip non-healthy nodes
2775 for volume in n_img.volumes:
2776 test = ((node not in node_vol_should or
2777 volume not in node_vol_should[node]) and
2778 not reserved.Matches(volume))
2779 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2780 "volume %s is unknown", volume)
2782 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2783 """Verify N+1 Memory Resilience.
2785 Check that if one single node dies we can still start all the
2786 instances it was primary for.
2789 cluster_info = self.cfg.GetClusterInfo()
2790 for node, n_img in node_image.items():
2791 # This code checks that every node which is now listed as
2792 # secondary has enough memory to host all instances it is
2793 # supposed to should a single other node in the cluster fail.
2794 # FIXME: not ready for failover to an arbitrary node
2795 # FIXME: does not support file-backed instances
2796 # WARNING: we currently take into account down instances as well
2797 # as up ones, considering that even if they're down someone
2798 # might want to start them even in the event of a node failure.
2799 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2800 # we're skipping nodes marked offline and nodes in other groups from
2801 # the N+1 warning, since most likely we don't have good memory
2802 # infromation from them; we already list instances living on such
2803 # nodes, and that's enough warning
2805 #TODO(dynmem): also consider ballooning out other instances
2806 for prinode, instances in n_img.sbp.items():
2808 for instance in instances:
2809 bep = cluster_info.FillBE(instance_cfg[instance])
2810 if bep[constants.BE_AUTO_BALANCE]:
2811 needed_mem += bep[constants.BE_MINMEM]
2812 test = n_img.mfree < needed_mem
2813 self._ErrorIf(test, constants.CV_ENODEN1, node,
2814 "not enough memory to accomodate instance failovers"
2815 " should node %s fail (%dMiB needed, %dMiB available)",
2816 prinode, needed_mem, n_img.mfree)
2819 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2820 (files_all, files_opt, files_mc, files_vm)):
2821 """Verifies file checksums collected from all nodes.
2823 @param errorif: Callback for reporting errors
2824 @param nodeinfo: List of L{objects.Node} objects
2825 @param master_node: Name of master node
2826 @param all_nvinfo: RPC results
2829 # Define functions determining which nodes to consider for a file
2832 (files_mc, lambda node: (node.master_candidate or
2833 node.name == master_node)),
2834 (files_vm, lambda node: node.vm_capable),
2837 # Build mapping from filename to list of nodes which should have the file
2839 for (files, fn) in files2nodefn:
2841 filenodes = nodeinfo
2843 filenodes = filter(fn, nodeinfo)
2844 nodefiles.update((filename,
2845 frozenset(map(operator.attrgetter("name"), filenodes)))
2846 for filename in files)
2848 assert set(nodefiles) == (files_all | files_mc | files_vm)
2850 fileinfo = dict((filename, {}) for filename in nodefiles)
2851 ignore_nodes = set()
2853 for node in nodeinfo:
2855 ignore_nodes.add(node.name)
2858 nresult = all_nvinfo[node.name]
2860 if nresult.fail_msg or not nresult.payload:
2863 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2864 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2865 for (key, value) in fingerprints.items())
2868 test = not (node_files and isinstance(node_files, dict))
2869 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2870 "Node did not return file checksum data")
2872 ignore_nodes.add(node.name)
2875 # Build per-checksum mapping from filename to nodes having it
2876 for (filename, checksum) in node_files.items():
2877 assert filename in nodefiles
2878 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2880 for (filename, checksums) in fileinfo.items():
2881 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2883 # Nodes having the file
2884 with_file = frozenset(node_name
2885 for nodes in fileinfo[filename].values()
2886 for node_name in nodes) - ignore_nodes
2888 expected_nodes = nodefiles[filename] - ignore_nodes
2890 # Nodes missing file
2891 missing_file = expected_nodes - with_file
2893 if filename in files_opt:
2895 errorif(missing_file and missing_file != expected_nodes,
2896 constants.CV_ECLUSTERFILECHECK, None,
2897 "File %s is optional, but it must exist on all or no"
2898 " nodes (not found on %s)",
2899 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2901 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2902 "File %s is missing from node(s) %s", filename,
2903 utils.CommaJoin(utils.NiceSort(missing_file)))
2905 # Warn if a node has a file it shouldn't
2906 unexpected = with_file - expected_nodes
2908 constants.CV_ECLUSTERFILECHECK, None,
2909 "File %s should not exist on node(s) %s",
2910 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2912 # See if there are multiple versions of the file
2913 test = len(checksums) > 1
2915 variants = ["variant %s on %s" %
2916 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2917 for (idx, (checksum, nodes)) in
2918 enumerate(sorted(checksums.items()))]
2922 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2923 "File %s found with %s different checksums (%s)",
2924 filename, len(checksums), "; ".join(variants))
2926 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2928 """Verifies and the node DRBD status.
2930 @type ninfo: L{objects.Node}
2931 @param ninfo: the node to check
2932 @param nresult: the remote results for the node
2933 @param instanceinfo: the dict of instances
2934 @param drbd_helper: the configured DRBD usermode helper
2935 @param drbd_map: the DRBD map as returned by
2936 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2940 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2943 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2944 test = (helper_result is None)
2945 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2946 "no drbd usermode helper returned")
2948 status, payload = helper_result
2950 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2951 "drbd usermode helper check unsuccessful: %s", payload)
2952 test = status and (payload != drbd_helper)
2953 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2954 "wrong drbd usermode helper: %s", payload)
2956 # compute the DRBD minors
2958 for minor, instance in drbd_map[node].items():
2959 test = instance not in instanceinfo
2960 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2961 "ghost instance '%s' in temporary DRBD map", instance)
2962 # ghost instance should not be running, but otherwise we
2963 # don't give double warnings (both ghost instance and
2964 # unallocated minor in use)
2966 node_drbd[minor] = (instance, False)
2968 instance = instanceinfo[instance]
2969 node_drbd[minor] = (instance.name,
2970 instance.admin_state == constants.ADMINST_UP)
2972 # and now check them
2973 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2974 test = not isinstance(used_minors, (tuple, list))
2975 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2976 "cannot parse drbd status file: %s", str(used_minors))
2978 # we cannot check drbd status
2981 for minor, (iname, must_exist) in node_drbd.items():
2982 test = minor not in used_minors and must_exist
2983 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2984 "drbd minor %d of instance %s is not active", minor, iname)
2985 for minor in used_minors:
2986 test = minor not in node_drbd
2987 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2988 "unallocated drbd minor %d is in use", minor)
2990 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2991 """Builds the node OS structures.
2993 @type ninfo: L{objects.Node}
2994 @param ninfo: the node to check
2995 @param nresult: the remote results for the node
2996 @param nimg: the node image object
3000 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3002 remote_os = nresult.get(constants.NV_OSLIST, None)
3003 test = (not isinstance(remote_os, list) or
3004 not compat.all(isinstance(v, list) and len(v) == 7
3005 for v in remote_os))
3007 _ErrorIf(test, constants.CV_ENODEOS, node,
3008 "node hasn't returned valid OS data")
3017 for (name, os_path, status, diagnose,
3018 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
3020 if name not in os_dict:
3023 # parameters is a list of lists instead of list of tuples due to
3024 # JSON lacking a real tuple type, fix it:
3025 parameters = [tuple(v) for v in parameters]
3026 os_dict[name].append((os_path, status, diagnose,
3027 set(variants), set(parameters), set(api_ver)))
3029 nimg.oslist = os_dict
3031 def _VerifyNodeOS(self, ninfo, nimg, base):
3032 """Verifies the node OS list.
3034 @type ninfo: L{objects.Node}
3035 @param ninfo: the node to check
3036 @param nimg: the node image object
3037 @param base: the 'template' node we match against (e.g. from the master)
3041 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3043 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
3045 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
3046 for os_name, os_data in nimg.oslist.items():
3047 assert os_data, "Empty OS status for OS %s?!" % os_name
3048 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
3049 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
3050 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
3051 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
3052 "OS '%s' has multiple entries (first one shadows the rest): %s",
3053 os_name, utils.CommaJoin([v[0] for v in os_data]))
3054 # comparisons with the 'base' image
3055 test = os_name not in base.oslist
3056 _ErrorIf(test, constants.CV_ENODEOS, node,
3057 "Extra OS %s not present on reference node (%s)",
3061 assert base.oslist[os_name], "Base node has empty OS status?"
3062 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
3064 # base OS is invalid, skipping
3066 for kind, a, b in [("API version", f_api, b_api),
3067 ("variants list", f_var, b_var),
3068 ("parameters", beautify_params(f_param),
3069 beautify_params(b_param))]:
3070 _ErrorIf(a != b, constants.CV_ENODEOS, node,
3071 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
3072 kind, os_name, base.name,
3073 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
3075 # check any missing OSes
3076 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
3077 _ErrorIf(missing, constants.CV_ENODEOS, node,
3078 "OSes present on reference node %s but missing on this node: %s",
3079 base.name, utils.CommaJoin(missing))
3081 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
3082 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
3084 @type ninfo: L{objects.Node}
3085 @param ninfo: the node to check
3086 @param nresult: the remote results for the node
3087 @type is_master: bool
3088 @param is_master: Whether node is the master node
3094 (constants.ENABLE_FILE_STORAGE or
3095 constants.ENABLE_SHARED_FILE_STORAGE)):
3097 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
3099 # This should never happen
3100 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
3101 "Node did not return forbidden file storage paths")
3103 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
3104 "Found forbidden file storage paths: %s",
3105 utils.CommaJoin(fspaths))
3107 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
3108 constants.CV_ENODEFILESTORAGEPATHS, node,
3109 "Node should not have returned forbidden file storage"
3112 def _VerifyOob(self, ninfo, nresult):
3113 """Verifies out of band functionality of a node.
3115 @type ninfo: L{objects.Node}
3116 @param ninfo: the node to check
3117 @param nresult: the remote results for the node
3121 # We just have to verify the paths on master and/or master candidates
3122 # as the oob helper is invoked on the master
3123 if ((ninfo.master_candidate or ninfo.master_capable) and
3124 constants.NV_OOB_PATHS in nresult):
3125 for path_result in nresult[constants.NV_OOB_PATHS]:
3126 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
3128 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
3129 """Verifies and updates the node volume data.
3131 This function will update a L{NodeImage}'s internal structures
3132 with data from the remote call.
3134 @type ninfo: L{objects.Node}
3135 @param ninfo: the node to check
3136 @param nresult: the remote results for the node
3137 @param nimg: the node image object
3138 @param vg_name: the configured VG name
3142 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3144 nimg.lvm_fail = True
3145 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
3148 elif isinstance(lvdata, basestring):
3149 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
3150 utils.SafeEncode(lvdata))
3151 elif not isinstance(lvdata, dict):
3152 _ErrorIf(True, constants.CV_ENODELVM, node,
3153 "rpc call to node failed (lvlist)")
3155 nimg.volumes = lvdata
3156 nimg.lvm_fail = False
3158 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
3159 """Verifies and updates the node instance list.
3161 If the listing was successful, then updates this node's instance
3162 list. Otherwise, it marks the RPC call as failed for the instance
3165 @type ninfo: L{objects.Node}
3166 @param ninfo: the node to check
3167 @param nresult: the remote results for the node
3168 @param nimg: the node image object
3171 idata = nresult.get(constants.NV_INSTANCELIST, None)
3172 test = not isinstance(idata, list)
3173 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
3174 "rpc call to node failed (instancelist): %s",
3175 utils.SafeEncode(str(idata)))
3177 nimg.hyp_fail = True
3179 nimg.instances = idata
3181 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
3182 """Verifies and computes a node information map
3184 @type ninfo: L{objects.Node}
3185 @param ninfo: the node to check
3186 @param nresult: the remote results for the node
3187 @param nimg: the node image object
3188 @param vg_name: the configured VG name
3192 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3194 # try to read free memory (from the hypervisor)
3195 hv_info = nresult.get(constants.NV_HVINFO, None)
3196 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
3197 _ErrorIf(test, constants.CV_ENODEHV, node,
3198 "rpc call to node failed (hvinfo)")
3201 nimg.mfree = int(hv_info["memory_free"])
3202 except (ValueError, TypeError):
3203 _ErrorIf(True, constants.CV_ENODERPC, node,
3204 "node returned invalid nodeinfo, check hypervisor")
3206 # FIXME: devise a free space model for file based instances as well
3207 if vg_name is not None:
3208 test = (constants.NV_VGLIST not in nresult or
3209 vg_name not in nresult[constants.NV_VGLIST])
3210 _ErrorIf(test, constants.CV_ENODELVM, node,
3211 "node didn't return data for the volume group '%s'"
3212 " - it is either missing or broken", vg_name)
3215 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3216 except (ValueError, TypeError):
3217 _ErrorIf(True, constants.CV_ENODERPC, node,
3218 "node returned invalid LVM info, check LVM status")
3220 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3221 """Gets per-disk status information for all instances.
3223 @type nodelist: list of strings
3224 @param nodelist: Node names
3225 @type node_image: dict of (name, L{objects.Node})
3226 @param node_image: Node objects
3227 @type instanceinfo: dict of (name, L{objects.Instance})
3228 @param instanceinfo: Instance objects
3229 @rtype: {instance: {node: [(succes, payload)]}}
3230 @return: a dictionary of per-instance dictionaries with nodes as
3231 keys and disk information as values; the disk information is a
3232 list of tuples (success, payload)
3235 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3238 node_disks_devonly = {}
3239 diskless_instances = set()
3240 diskless = constants.DT_DISKLESS
3242 for nname in nodelist:
3243 node_instances = list(itertools.chain(node_image[nname].pinst,
3244 node_image[nname].sinst))
3245 diskless_instances.update(inst for inst in node_instances
3246 if instanceinfo[inst].disk_template == diskless)
3247 disks = [(inst, disk)
3248 for inst in node_instances
3249 for disk in instanceinfo[inst].disks]
3252 # No need to collect data
3255 node_disks[nname] = disks
3257 # _AnnotateDiskParams makes already copies of the disks
3259 for (inst, dev) in disks:
3260 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3261 self.cfg.SetDiskID(anno_disk, nname)
3262 devonly.append(anno_disk)
3264 node_disks_devonly[nname] = devonly
3266 assert len(node_disks) == len(node_disks_devonly)
3268 # Collect data from all nodes with disks
3269 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3272 assert len(result) == len(node_disks)
3276 for (nname, nres) in result.items():
3277 disks = node_disks[nname]
3280 # No data from this node
3281 data = len(disks) * [(False, "node offline")]
3284 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3285 "while getting disk information: %s", msg)
3287 # No data from this node
3288 data = len(disks) * [(False, msg)]
3291 for idx, i in enumerate(nres.payload):
3292 if isinstance(i, (tuple, list)) and len(i) == 2:
3295 logging.warning("Invalid result from node %s, entry %d: %s",
3297 data.append((False, "Invalid result from the remote node"))
3299 for ((inst, _), status) in zip(disks, data):
3300 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3302 # Add empty entries for diskless instances.
3303 for inst in diskless_instances:
3304 assert inst not in instdisk
3307 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3308 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3309 compat.all(isinstance(s, (tuple, list)) and
3310 len(s) == 2 for s in statuses)
3311 for inst, nnames in instdisk.items()
3312 for nname, statuses in nnames.items())
3314 instdisk_keys = set(instdisk)
3315 instanceinfo_keys = set(instanceinfo)
3316 assert instdisk_keys == instanceinfo_keys, \
3317 ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
3318 (instdisk_keys, instanceinfo_keys))
3323 def _SshNodeSelector(group_uuid, all_nodes):
3324 """Create endless iterators for all potential SSH check hosts.
3327 nodes = [node for node in all_nodes
3328 if (node.group != group_uuid and
3330 keyfunc = operator.attrgetter("group")
3332 return map(itertools.cycle,
3333 [sorted(map(operator.attrgetter("name"), names))
3334 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3338 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3339 """Choose which nodes should talk to which other nodes.
3341 We will make nodes contact all nodes in their group, and one node from
3344 @warning: This algorithm has a known issue if one node group is much
3345 smaller than others (e.g. just one node). In such a case all other
3346 nodes will talk to the single node.
3349 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3350 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3352 return (online_nodes,
3353 dict((name, sorted([i.next() for i in sel]))
3354 for name in online_nodes))
3356 def BuildHooksEnv(self):
3359 Cluster-Verify hooks just ran in the post phase and their failure makes
3360 the output be logged in the verify output and the verification to fail.
3364 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
3367 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3368 for node in self.my_node_info.values())
3372 def BuildHooksNodes(self):
3373 """Build hooks nodes.
3376 return ([], self.my_node_names)
3378 def Exec(self, feedback_fn):
3379 """Verify integrity of the node group, performing various test on nodes.
3382 # This method has too many local variables. pylint: disable=R0914
3383 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3385 if not self.my_node_names:
3387 feedback_fn("* Empty node group, skipping verification")
3391 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3392 verbose = self.op.verbose
3393 self._feedback_fn = feedback_fn
3395 vg_name = self.cfg.GetVGName()
3396 drbd_helper = self.cfg.GetDRBDHelper()
3397 cluster = self.cfg.GetClusterInfo()
3398 hypervisors = cluster.enabled_hypervisors
3399 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3401 i_non_redundant = [] # Non redundant instances
3402 i_non_a_balanced = [] # Non auto-balanced instances
3403 i_offline = 0 # Count of offline instances
3404 n_offline = 0 # Count of offline nodes
3405 n_drained = 0 # Count of nodes being drained
3406 node_vol_should = {}
3408 # FIXME: verify OS list
3411 filemap = _ComputeAncillaryFiles(cluster, False)
3413 # do local checksums
3414 master_node = self.master_node = self.cfg.GetMasterNode()
3415 master_ip = self.cfg.GetMasterIP()
3417 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3420 if self.cfg.GetUseExternalMipScript():
3421 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3423 node_verify_param = {
3424 constants.NV_FILELIST:
3425 map(vcluster.MakeVirtualPath,
3426 utils.UniqueSequence(filename
3427 for files in filemap
3428 for filename in files)),
3429 constants.NV_NODELIST:
3430 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3431 self.all_node_info.values()),
3432 constants.NV_HYPERVISOR: hypervisors,
3433 constants.NV_HVPARAMS:
3434 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3435 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3436 for node in node_data_list
3437 if not node.offline],
3438 constants.NV_INSTANCELIST: hypervisors,
3439 constants.NV_VERSION: None,
3440 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3441 constants.NV_NODESETUP: None,
3442 constants.NV_TIME: None,
3443 constants.NV_MASTERIP: (master_node, master_ip),
3444 constants.NV_OSLIST: None,
3445 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3446 constants.NV_USERSCRIPTS: user_scripts,
3449 if vg_name is not None:
3450 node_verify_param[constants.NV_VGLIST] = None
3451 node_verify_param[constants.NV_LVLIST] = vg_name
3452 node_verify_param[constants.NV_PVLIST] = [vg_name]
3455 node_verify_param[constants.NV_DRBDLIST] = None
3456 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3458 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3459 # Load file storage paths only from master node
3460 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3463 # FIXME: this needs to be changed per node-group, not cluster-wide
3465 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3466 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3467 bridges.add(default_nicpp[constants.NIC_LINK])
3468 for instance in self.my_inst_info.values():
3469 for nic in instance.nics:
3470 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3471 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3472 bridges.add(full_nic[constants.NIC_LINK])
3475 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3477 # Build our expected cluster state
3478 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3480 vm_capable=node.vm_capable))
3481 for node in node_data_list)
3485 for node in self.all_node_info.values():
3486 path = _SupportsOob(self.cfg, node)
3487 if path and path not in oob_paths:
3488 oob_paths.append(path)
3491 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3493 for instance in self.my_inst_names:
3494 inst_config = self.my_inst_info[instance]
3495 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3498 for nname in inst_config.all_nodes:
3499 if nname not in node_image:
3500 gnode = self.NodeImage(name=nname)
3501 gnode.ghost = (nname not in self.all_node_info)
3502 node_image[nname] = gnode
3504 inst_config.MapLVsByNode(node_vol_should)
3506 pnode = inst_config.primary_node
3507 node_image[pnode].pinst.append(instance)
3509 for snode in inst_config.secondary_nodes:
3510 nimg = node_image[snode]
3511 nimg.sinst.append(instance)
3512 if pnode not in nimg.sbp:
3513 nimg.sbp[pnode] = []
3514 nimg.sbp[pnode].append(instance)
3516 es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
3517 # The value of exclusive_storage should be the same across the group, so if
3518 # it's True for at least a node, we act as if it were set for all the nodes
3519 self._exclusive_storage = compat.any(es_flags.values())
3520 if self._exclusive_storage:
3521 node_verify_param[constants.NV_EXCLUSIVEPVS] = True
3523 # At this point, we have the in-memory data structures complete,
3524 # except for the runtime information, which we'll gather next
3526 # Due to the way our RPC system works, exact response times cannot be
3527 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3528 # time before and after executing the request, we can at least have a time
3530 nvinfo_starttime = time.time()
3531 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3533 self.cfg.GetClusterName())
3534 nvinfo_endtime = time.time()
3536 if self.extra_lv_nodes and vg_name is not None:
3538 self.rpc.call_node_verify(self.extra_lv_nodes,
3539 {constants.NV_LVLIST: vg_name},
3540 self.cfg.GetClusterName())
3542 extra_lv_nvinfo = {}
3544 all_drbd_map = self.cfg.ComputeDRBDMap()
3546 feedback_fn("* Gathering disk information (%s nodes)" %
3547 len(self.my_node_names))
3548 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3551 feedback_fn("* Verifying configuration file consistency")
3553 # If not all nodes are being checked, we need to make sure the master node
3554 # and a non-checked vm_capable node are in the list.
3555 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3557 vf_nvinfo = all_nvinfo.copy()
3558 vf_node_info = list(self.my_node_info.values())
3559 additional_nodes = []
3560 if master_node not in self.my_node_info:
3561 additional_nodes.append(master_node)
3562 vf_node_info.append(self.all_node_info[master_node])
3563 # Add the first vm_capable node we find which is not included,
3564 # excluding the master node (which we already have)
3565 for node in absent_nodes:
3566 nodeinfo = self.all_node_info[node]
3567 if (nodeinfo.vm_capable and not nodeinfo.offline and
3568 node != master_node):
3569 additional_nodes.append(node)
3570 vf_node_info.append(self.all_node_info[node])
3572 key = constants.NV_FILELIST
3573 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3574 {key: node_verify_param[key]},
3575 self.cfg.GetClusterName()))
3577 vf_nvinfo = all_nvinfo
3578 vf_node_info = self.my_node_info.values()
3580 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3582 feedback_fn("* Verifying node status")
3586 for node_i in node_data_list:
3588 nimg = node_image[node]
3592 feedback_fn("* Skipping offline node %s" % (node,))
3596 if node == master_node:
3598 elif node_i.master_candidate:
3599 ntype = "master candidate"
3600 elif node_i.drained:
3606 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3608 msg = all_nvinfo[node].fail_msg
3609 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3612 nimg.rpc_fail = True
3615 nresult = all_nvinfo[node].payload
3617 nimg.call_ok = self._VerifyNode(node_i, nresult)
3618 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3619 self._VerifyNodeNetwork(node_i, nresult)
3620 self._VerifyNodeUserScripts(node_i, nresult)
3621 self._VerifyOob(node_i, nresult)
3622 self._VerifyFileStoragePaths(node_i, nresult,
3623 node == master_node)
3626 self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
3627 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3630 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3631 self._UpdateNodeInstances(node_i, nresult, nimg)
3632 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3633 self._UpdateNodeOS(node_i, nresult, nimg)
3635 if not nimg.os_fail:
3636 if refos_img is None:
3638 self._VerifyNodeOS(node_i, nimg, refos_img)
3639 self._VerifyNodeBridges(node_i, nresult, bridges)
3641 # Check whether all running instancies are primary for the node. (This
3642 # can no longer be done from _VerifyInstance below, since some of the
3643 # wrong instances could be from other node groups.)
3644 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3646 for inst in non_primary_inst:
3647 test = inst in self.all_inst_info
3648 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3649 "instance should not run on node %s", node_i.name)
3650 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3651 "node is running unknown instance %s", inst)
3653 self._VerifyGroupLVM(node_image, vg_name)
3655 for node, result in extra_lv_nvinfo.items():
3656 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3657 node_image[node], vg_name)
3659 feedback_fn("* Verifying instance status")
3660 for instance in self.my_inst_names:
3662 feedback_fn("* Verifying instance %s" % instance)
3663 inst_config = self.my_inst_info[instance]
3664 self._VerifyInstance(instance, inst_config, node_image,
3667 # If the instance is non-redundant we cannot survive losing its primary
3668 # node, so we are not N+1 compliant.
3669 if inst_config.disk_template not in constants.DTS_MIRRORED:
3670 i_non_redundant.append(instance)
3672 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3673 i_non_a_balanced.append(instance)
3675 feedback_fn("* Verifying orphan volumes")
3676 reserved = utils.FieldSet(*cluster.reserved_lvs)
3678 # We will get spurious "unknown volume" warnings if any node of this group
3679 # is secondary for an instance whose primary is in another group. To avoid
3680 # them, we find these instances and add their volumes to node_vol_should.
3681 for inst in self.all_inst_info.values():
3682 for secondary in inst.secondary_nodes:
3683 if (secondary in self.my_node_info
3684 and inst.name not in self.my_inst_info):
3685 inst.MapLVsByNode(node_vol_should)
3688 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3690 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3691 feedback_fn("* Verifying N+1 Memory redundancy")
3692 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3694 feedback_fn("* Other Notes")
3696 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3697 % len(i_non_redundant))
3699 if i_non_a_balanced:
3700 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3701 % len(i_non_a_balanced))
3704 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3707 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3710 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3714 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3715 """Analyze the post-hooks' result
3717 This method analyses the hook result, handles it, and sends some
3718 nicely-formatted feedback back to the user.
3720 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3721 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3722 @param hooks_results: the results of the multi-node hooks rpc call
3723 @param feedback_fn: function used send feedback back to the caller
3724 @param lu_result: previous Exec result
3725 @return: the new Exec result, based on the previous result
3729 # We only really run POST phase hooks, only for non-empty groups,
3730 # and are only interested in their results
3731 if not self.my_node_names:
3734 elif phase == constants.HOOKS_PHASE_POST:
3735 # Used to change hooks' output to proper indentation
3736 feedback_fn("* Hooks Results")
3737 assert hooks_results, "invalid result from hooks"
3739 for node_name in hooks_results:
3740 res = hooks_results[node_name]
3742 test = msg and not res.offline
3743 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3744 "Communication failure in hooks execution: %s", msg)
3745 if res.offline or msg:
3746 # No need to investigate payload if node is offline or gave
3749 for script, hkr, output in res.payload:
3750 test = hkr == constants.HKR_FAIL
3751 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3752 "Script %s failed, output:", script)
3754 output = self._HOOKS_INDENT_RE.sub(" ", output)
3755 feedback_fn("%s" % output)
3761 class LUClusterVerifyDisks(NoHooksLU):
3762 """Verifies the cluster disks status.
3767 def ExpandNames(self):
3768 self.share_locks = _ShareAll()
3769 self.needed_locks = {
3770 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3773 def Exec(self, feedback_fn):
3774 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3776 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3777 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3778 for group in group_names])
3781 class LUGroupVerifyDisks(NoHooksLU):
3782 """Verifies the status of all disks in a node group.
3787 def ExpandNames(self):
3788 # Raises errors.OpPrereqError on its own if group can't be found
3789 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3791 self.share_locks = _ShareAll()
3792 self.needed_locks = {
3793 locking.LEVEL_INSTANCE: [],
3794 locking.LEVEL_NODEGROUP: [],
3795 locking.LEVEL_NODE: [],
3797 # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
3798 # starts one instance of this opcode for every group, which means all
3799 # nodes will be locked for a short amount of time, so it's better to
3800 # acquire the node allocation lock as well.
3801 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3804 def DeclareLocks(self, level):
3805 if level == locking.LEVEL_INSTANCE:
3806 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3808 # Lock instances optimistically, needs verification once node and group
3809 # locks have been acquired
3810 self.needed_locks[locking.LEVEL_INSTANCE] = \
3811 self.cfg.GetNodeGroupInstances(self.group_uuid)
3813 elif level == locking.LEVEL_NODEGROUP:
3814 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3816 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3817 set([self.group_uuid] +
3818 # Lock all groups used by instances optimistically; this requires
3819 # going via the node before it's locked, requiring verification
3822 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3823 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3825 elif level == locking.LEVEL_NODE:
3826 # This will only lock the nodes in the group to be verified which contain
3828 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3829 self._LockInstancesNodes()
3831 # Lock all nodes in group to be verified
3832 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3833 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3834 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3836 def CheckPrereq(self):
3837 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3838 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3839 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3841 assert self.group_uuid in owned_groups
3843 # Check if locked instances are still correct
3844 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3846 # Get instance information
3847 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3849 # Check if node groups for locked instances are still correct
3850 _CheckInstancesNodeGroups(self.cfg, self.instances,
3851 owned_groups, owned_nodes, self.group_uuid)
3853 def Exec(self, feedback_fn):
3854 """Verify integrity of cluster disks.
3856 @rtype: tuple of three items
3857 @return: a tuple of (dict of node-to-node_error, list of instances
3858 which need activate-disks, dict of instance: (node, volume) for
3863 res_instances = set()
3866 nv_dict = _MapInstanceDisksToNodes(
3867 [inst for inst in self.instances.values()
3868 if inst.admin_state == constants.ADMINST_UP])
3871 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3872 set(self.cfg.GetVmCapableNodeList()))
3874 node_lvs = self.rpc.call_lv_list(nodes, [])
3876 for (node, node_res) in node_lvs.items():
3877 if node_res.offline:
3880 msg = node_res.fail_msg
3882 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3883 res_nodes[node] = msg
3886 for lv_name, (_, _, lv_online) in node_res.payload.items():
3887 inst = nv_dict.pop((node, lv_name), None)
3888 if not (lv_online or inst is None):
3889 res_instances.add(inst)
3891 # any leftover items in nv_dict are missing LVs, let's arrange the data
3893 for key, inst in nv_dict.iteritems():
3894 res_missing.setdefault(inst, []).append(list(key))
3896 return (res_nodes, list(res_instances), res_missing)
3899 class LUClusterRepairDiskSizes(NoHooksLU):
3900 """Verifies the cluster disks sizes.
3905 def ExpandNames(self):
3906 if self.op.instances:
3907 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3908 # Not getting the node allocation lock as only a specific set of
3909 # instances (and their nodes) is going to be acquired
3910 self.needed_locks = {
3911 locking.LEVEL_NODE_RES: [],
3912 locking.LEVEL_INSTANCE: self.wanted_names,
3914 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3916 self.wanted_names = None
3917 self.needed_locks = {
3918 locking.LEVEL_NODE_RES: locking.ALL_SET,
3919 locking.LEVEL_INSTANCE: locking.ALL_SET,
3921 # This opcode is acquires the node locks for all instances
3922 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
3925 self.share_locks = {
3926 locking.LEVEL_NODE_RES: 1,
3927 locking.LEVEL_INSTANCE: 0,
3928 locking.LEVEL_NODE_ALLOC: 1,
3931 def DeclareLocks(self, level):
3932 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3933 self._LockInstancesNodes(primary_only=True, level=level)
3935 def CheckPrereq(self):
3936 """Check prerequisites.
3938 This only checks the optional instance list against the existing names.
3941 if self.wanted_names is None:
3942 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3944 self.wanted_instances = \
3945 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3947 def _EnsureChildSizes(self, disk):
3948 """Ensure children of the disk have the needed disk size.
3950 This is valid mainly for DRBD8 and fixes an issue where the
3951 children have smaller disk size.
3953 @param disk: an L{ganeti.objects.Disk} object
3956 if disk.dev_type == constants.LD_DRBD8:
3957 assert disk.children, "Empty children for DRBD8?"
3958 fchild = disk.children[0]
3959 mismatch = fchild.size < disk.size
3961 self.LogInfo("Child disk has size %d, parent %d, fixing",
3962 fchild.size, disk.size)
3963 fchild.size = disk.size
3965 # and we recurse on this child only, not on the metadev
3966 return self._EnsureChildSizes(fchild) or mismatch
3970 def Exec(self, feedback_fn):
3971 """Verify the size of cluster disks.
3974 # TODO: check child disks too
3975 # TODO: check differences in size between primary/secondary nodes
3977 for instance in self.wanted_instances:
3978 pnode = instance.primary_node
3979 if pnode not in per_node_disks:
3980 per_node_disks[pnode] = []
3981 for idx, disk in enumerate(instance.disks):
3982 per_node_disks[pnode].append((instance, idx, disk))
3984 assert not (frozenset(per_node_disks.keys()) -
3985 self.owned_locks(locking.LEVEL_NODE_RES)), \
3986 "Not owning correct locks"
3987 assert not self.owned_locks(locking.LEVEL_NODE)
3990 for node, dskl in per_node_disks.items():
3991 newl = [v[2].Copy() for v in dskl]
3993 self.cfg.SetDiskID(dsk, node)
3994 result = self.rpc.call_blockdev_getsize(node, newl)
3996 self.LogWarning("Failure in blockdev_getsize call to node"
3997 " %s, ignoring", node)
3999 if len(result.payload) != len(dskl):
4000 logging.warning("Invalid result from node %s: len(dksl)=%d,"
4001 " result.payload=%s", node, len(dskl), result.payload)
4002 self.LogWarning("Invalid result from node %s, ignoring node results",
4005 for ((instance, idx, disk), size) in zip(dskl, result.payload):
4007 self.LogWarning("Disk %d of instance %s did not return size"
4008 " information, ignoring", idx, instance.name)
4010 if not isinstance(size, (int, long)):
4011 self.LogWarning("Disk %d of instance %s did not return valid"
4012 " size information, ignoring", idx, instance.name)
4015 if size != disk.size:
4016 self.LogInfo("Disk %d of instance %s has mismatched size,"
4017 " correcting: recorded %d, actual %d", idx,
4018 instance.name, disk.size, size)
4020 self.cfg.Update(instance, feedback_fn)
4021 changed.append((instance.name, idx, size))
4022 if self._EnsureChildSizes(disk):
4023 self.cfg.Update(instance, feedback_fn)
4024 changed.append((instance.name, idx, disk.size))
4028 class LUClusterRename(LogicalUnit):
4029 """Rename the cluster.
4032 HPATH = "cluster-rename"
4033 HTYPE = constants.HTYPE_CLUSTER
4035 def BuildHooksEnv(self):
4040 "OP_TARGET": self.cfg.GetClusterName(),
4041 "NEW_NAME": self.op.name,
4044 def BuildHooksNodes(self):
4045 """Build hooks nodes.
4048 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
4050 def CheckPrereq(self):
4051 """Verify that the passed name is a valid one.
4054 hostname = netutils.GetHostname(name=self.op.name,
4055 family=self.cfg.GetPrimaryIPFamily())
4057 new_name = hostname.name
4058 self.ip = new_ip = hostname.ip
4059 old_name = self.cfg.GetClusterName()
4060 old_ip = self.cfg.GetMasterIP()
4061 if new_name == old_name and new_ip == old_ip:
4062 raise errors.OpPrereqError("Neither the name nor the IP address of the"
4063 " cluster has changed",
4065 if new_ip != old_ip:
4066 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
4067 raise errors.OpPrereqError("The given cluster IP address (%s) is"
4068 " reachable on the network" %
4069 new_ip, errors.ECODE_NOTUNIQUE)
4071 self.op.name = new_name
4073 def Exec(self, feedback_fn):
4074 """Rename the cluster.
4077 clustername = self.op.name
4080 # shutdown the master IP
4081 master_params = self.cfg.GetMasterNetworkParameters()
4082 ems = self.cfg.GetUseExternalMipScript()
4083 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4085 result.Raise("Could not disable the master role")
4088 cluster = self.cfg.GetClusterInfo()
4089 cluster.cluster_name = clustername
4090 cluster.master_ip = new_ip
4091 self.cfg.Update(cluster, feedback_fn)
4093 # update the known hosts file
4094 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
4095 node_list = self.cfg.GetOnlineNodeList()
4097 node_list.remove(master_params.name)
4100 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
4102 master_params.ip = new_ip
4103 result = self.rpc.call_node_activate_master_ip(master_params.name,
4105 msg = result.fail_msg
4107 self.LogWarning("Could not re-enable the master role on"
4108 " the master, please restart manually: %s", msg)
4113 def _ValidateNetmask(cfg, netmask):
4114 """Checks if a netmask is valid.
4116 @type cfg: L{config.ConfigWriter}
4117 @param cfg: The cluster configuration
4119 @param netmask: the netmask to be verified
4120 @raise errors.OpPrereqError: if the validation fails
4123 ip_family = cfg.GetPrimaryIPFamily()
4125 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
4126 except errors.ProgrammerError:
4127 raise errors.OpPrereqError("Invalid primary ip family: %s." %
4128 ip_family, errors.ECODE_INVAL)
4129 if not ipcls.ValidateNetmask(netmask):
4130 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
4131 (netmask), errors.ECODE_INVAL)
4134 class LUClusterSetParams(LogicalUnit):
4135 """Change the parameters of the cluster.
4138 HPATH = "cluster-modify"
4139 HTYPE = constants.HTYPE_CLUSTER
4142 def CheckArguments(self):
4146 if self.op.uid_pool:
4147 uidpool.CheckUidPool(self.op.uid_pool)
4149 if self.op.add_uids:
4150 uidpool.CheckUidPool(self.op.add_uids)
4152 if self.op.remove_uids:
4153 uidpool.CheckUidPool(self.op.remove_uids)
4155 if self.op.master_netmask is not None:
4156 _ValidateNetmask(self.cfg, self.op.master_netmask)
4158 if self.op.diskparams:
4159 for dt_params in self.op.diskparams.values():
4160 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
4162 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
4163 except errors.OpPrereqError, err:
4164 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
4167 def ExpandNames(self):
4168 # FIXME: in the future maybe other cluster params won't require checking on
4169 # all nodes to be modified.
4170 # FIXME: This opcode changes cluster-wide settings. Is acquiring all
4171 # resource locks the right thing, shouldn't it be the BGL instead?
4172 self.needed_locks = {
4173 locking.LEVEL_NODE: locking.ALL_SET,
4174 locking.LEVEL_INSTANCE: locking.ALL_SET,
4175 locking.LEVEL_NODEGROUP: locking.ALL_SET,
4176 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4178 self.share_locks = _ShareAll()
4180 def BuildHooksEnv(self):
4185 "OP_TARGET": self.cfg.GetClusterName(),
4186 "NEW_VG_NAME": self.op.vg_name,
4189 def BuildHooksNodes(self):
4190 """Build hooks nodes.
4193 mn = self.cfg.GetMasterNode()
4196 def CheckPrereq(self):
4197 """Check prerequisites.
4199 This checks whether the given params don't conflict and
4200 if the given volume group is valid.
4203 if self.op.vg_name is not None and not self.op.vg_name:
4204 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4205 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4206 " instances exist", errors.ECODE_INVAL)
4208 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4209 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4210 raise errors.OpPrereqError("Cannot disable drbd helper while"
4211 " drbd-based instances exist",
4214 node_list = self.owned_locks(locking.LEVEL_NODE)
4216 # if vg_name not None, checks given volume group on all nodes
4218 vglist = self.rpc.call_vg_list(node_list)
4219 for node in node_list:
4220 msg = vglist[node].fail_msg
4222 # ignoring down node
4223 self.LogWarning("Error while gathering data on node %s"
4224 " (ignoring node): %s", node, msg)
4226 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4228 constants.MIN_VG_SIZE)
4230 raise errors.OpPrereqError("Error on node '%s': %s" %
4231 (node, vgstatus), errors.ECODE_ENVIRON)
4233 if self.op.drbd_helper:
4234 # checks given drbd helper on all nodes
4235 helpers = self.rpc.call_drbd_helper(node_list)
4236 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4238 self.LogInfo("Not checking drbd helper on offline node %s", node)
4240 msg = helpers[node].fail_msg
4242 raise errors.OpPrereqError("Error checking drbd helper on node"
4243 " '%s': %s" % (node, msg),
4244 errors.ECODE_ENVIRON)
4245 node_helper = helpers[node].payload
4246 if node_helper != self.op.drbd_helper:
4247 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4248 (node, node_helper), errors.ECODE_ENVIRON)
4250 self.cluster = cluster = self.cfg.GetClusterInfo()
4251 # validate params changes
4252 if self.op.beparams:
4253 objects.UpgradeBeParams(self.op.beparams)
4254 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4255 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4257 if self.op.ndparams:
4258 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4259 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4261 # TODO: we need a more general way to handle resetting
4262 # cluster-level parameters to default values
4263 if self.new_ndparams["oob_program"] == "":
4264 self.new_ndparams["oob_program"] = \
4265 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4267 if self.op.hv_state:
4268 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4269 self.cluster.hv_state_static)
4270 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4271 for hv, values in new_hv_state.items())
4273 if self.op.disk_state:
4274 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4275 self.cluster.disk_state_static)
4276 self.new_disk_state = \
4277 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4278 for name, values in svalues.items()))
4279 for storage, svalues in new_disk_state.items())
4282 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4285 all_instances = self.cfg.GetAllInstancesInfo().values()
4287 for group in self.cfg.GetAllNodeGroupsInfo().values():
4288 instances = frozenset([inst for inst in all_instances
4289 if compat.any(node in group.members
4290 for node in inst.all_nodes)])
4291 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4292 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4293 new = _ComputeNewInstanceViolations(ipol,
4294 new_ipolicy, instances, self.cfg)
4296 violations.update(new)
4299 self.LogWarning("After the ipolicy change the following instances"
4300 " violate them: %s",
4301 utils.CommaJoin(utils.NiceSort(violations)))
4303 if self.op.nicparams:
4304 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4305 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4306 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4309 # check all instances for consistency
4310 for instance in self.cfg.GetAllInstancesInfo().values():
4311 for nic_idx, nic in enumerate(instance.nics):
4312 params_copy = copy.deepcopy(nic.nicparams)
4313 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4315 # check parameter syntax
4317 objects.NIC.CheckParameterSyntax(params_filled)
4318 except errors.ConfigurationError, err:
4319 nic_errors.append("Instance %s, nic/%d: %s" %
4320 (instance.name, nic_idx, err))
4322 # if we're moving instances to routed, check that they have an ip
4323 target_mode = params_filled[constants.NIC_MODE]
4324 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4325 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4326 " address" % (instance.name, nic_idx))
4328 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4329 "\n".join(nic_errors), errors.ECODE_INVAL)
4331 # hypervisor list/parameters
4332 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4333 if self.op.hvparams:
4334 for hv_name, hv_dict in self.op.hvparams.items():
4335 if hv_name not in self.new_hvparams:
4336 self.new_hvparams[hv_name] = hv_dict
4338 self.new_hvparams[hv_name].update(hv_dict)
4340 # disk template parameters
4341 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4342 if self.op.diskparams:
4343 for dt_name, dt_params in self.op.diskparams.items():
4344 if dt_name not in self.op.diskparams:
4345 self.new_diskparams[dt_name] = dt_params
4347 self.new_diskparams[dt_name].update(dt_params)
4349 # os hypervisor parameters
4350 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4352 for os_name, hvs in self.op.os_hvp.items():
4353 if os_name not in self.new_os_hvp:
4354 self.new_os_hvp[os_name] = hvs
4356 for hv_name, hv_dict in hvs.items():
4358 # Delete if it exists
4359 self.new_os_hvp[os_name].pop(hv_name, None)
4360 elif hv_name not in self.new_os_hvp[os_name]:
4361 self.new_os_hvp[os_name][hv_name] = hv_dict
4363 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4366 self.new_osp = objects.FillDict(cluster.osparams, {})
4367 if self.op.osparams:
4368 for os_name, osp in self.op.osparams.items():
4369 if os_name not in self.new_osp:
4370 self.new_osp[os_name] = {}
4372 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4375 if not self.new_osp[os_name]:
4376 # we removed all parameters
4377 del self.new_osp[os_name]
4379 # check the parameter validity (remote check)
4380 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4381 os_name, self.new_osp[os_name])
4383 # changes to the hypervisor list
4384 if self.op.enabled_hypervisors is not None:
4385 self.hv_list = self.op.enabled_hypervisors
4386 for hv in self.hv_list:
4387 # if the hypervisor doesn't already exist in the cluster
4388 # hvparams, we initialize it to empty, and then (in both
4389 # cases) we make sure to fill the defaults, as we might not
4390 # have a complete defaults list if the hypervisor wasn't
4392 if hv not in new_hvp:
4394 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4395 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4397 self.hv_list = cluster.enabled_hypervisors
4399 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4400 # either the enabled list has changed, or the parameters have, validate
4401 for hv_name, hv_params in self.new_hvparams.items():
4402 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4403 (self.op.enabled_hypervisors and
4404 hv_name in self.op.enabled_hypervisors)):
4405 # either this is a new hypervisor, or its parameters have changed
4406 hv_class = hypervisor.GetHypervisorClass(hv_name)
4407 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4408 hv_class.CheckParameterSyntax(hv_params)
4409 _CheckHVParams(self, node_list, hv_name, hv_params)
4412 # no need to check any newly-enabled hypervisors, since the
4413 # defaults have already been checked in the above code-block
4414 for os_name, os_hvp in self.new_os_hvp.items():
4415 for hv_name, hv_params in os_hvp.items():
4416 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4417 # we need to fill in the new os_hvp on top of the actual hv_p
4418 cluster_defaults = self.new_hvparams.get(hv_name, {})
4419 new_osp = objects.FillDict(cluster_defaults, hv_params)
4420 hv_class = hypervisor.GetHypervisorClass(hv_name)
4421 hv_class.CheckParameterSyntax(new_osp)
4422 _CheckHVParams(self, node_list, hv_name, new_osp)
4424 if self.op.default_iallocator:
4425 alloc_script = utils.FindFile(self.op.default_iallocator,
4426 constants.IALLOCATOR_SEARCH_PATH,
4428 if alloc_script is None:
4429 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4430 " specified" % self.op.default_iallocator,
4433 def Exec(self, feedback_fn):
4434 """Change the parameters of the cluster.
4437 if self.op.vg_name is not None:
4438 new_volume = self.op.vg_name
4441 if new_volume != self.cfg.GetVGName():
4442 self.cfg.SetVGName(new_volume)
4444 feedback_fn("Cluster LVM configuration already in desired"
4445 " state, not changing")
4446 if self.op.drbd_helper is not None:
4447 new_helper = self.op.drbd_helper
4450 if new_helper != self.cfg.GetDRBDHelper():
4451 self.cfg.SetDRBDHelper(new_helper)
4453 feedback_fn("Cluster DRBD helper already in desired state,"
4455 if self.op.hvparams:
4456 self.cluster.hvparams = self.new_hvparams
4458 self.cluster.os_hvp = self.new_os_hvp
4459 if self.op.enabled_hypervisors is not None:
4460 self.cluster.hvparams = self.new_hvparams
4461 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4462 if self.op.beparams:
4463 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4464 if self.op.nicparams:
4465 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4467 self.cluster.ipolicy = self.new_ipolicy
4468 if self.op.osparams:
4469 self.cluster.osparams = self.new_osp
4470 if self.op.ndparams:
4471 self.cluster.ndparams = self.new_ndparams
4472 if self.op.diskparams:
4473 self.cluster.diskparams = self.new_diskparams
4474 if self.op.hv_state:
4475 self.cluster.hv_state_static = self.new_hv_state
4476 if self.op.disk_state:
4477 self.cluster.disk_state_static = self.new_disk_state
4479 if self.op.candidate_pool_size is not None:
4480 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4481 # we need to update the pool size here, otherwise the save will fail
4482 _AdjustCandidatePool(self, [])
4484 if self.op.maintain_node_health is not None:
4485 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4486 feedback_fn("Note: CONFD was disabled at build time, node health"
4487 " maintenance is not useful (still enabling it)")
4488 self.cluster.maintain_node_health = self.op.maintain_node_health
4490 if self.op.prealloc_wipe_disks is not None:
4491 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4493 if self.op.add_uids is not None:
4494 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4496 if self.op.remove_uids is not None:
4497 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4499 if self.op.uid_pool is not None:
4500 self.cluster.uid_pool = self.op.uid_pool
4502 if self.op.default_iallocator is not None:
4503 self.cluster.default_iallocator = self.op.default_iallocator
4505 if self.op.reserved_lvs is not None:
4506 self.cluster.reserved_lvs = self.op.reserved_lvs
4508 if self.op.use_external_mip_script is not None:
4509 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4511 def helper_os(aname, mods, desc):
4513 lst = getattr(self.cluster, aname)
4514 for key, val in mods:
4515 if key == constants.DDM_ADD:
4517 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4520 elif key == constants.DDM_REMOVE:
4524 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4526 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4528 if self.op.hidden_os:
4529 helper_os("hidden_os", self.op.hidden_os, "hidden")
4531 if self.op.blacklisted_os:
4532 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4534 if self.op.master_netdev:
4535 master_params = self.cfg.GetMasterNetworkParameters()
4536 ems = self.cfg.GetUseExternalMipScript()
4537 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4538 self.cluster.master_netdev)
4539 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4541 result.Raise("Could not disable the master ip")
4542 feedback_fn("Changing master_netdev from %s to %s" %
4543 (master_params.netdev, self.op.master_netdev))
4544 self.cluster.master_netdev = self.op.master_netdev
4546 if self.op.master_netmask:
4547 master_params = self.cfg.GetMasterNetworkParameters()
4548 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4549 result = self.rpc.call_node_change_master_netmask(master_params.name,
4550 master_params.netmask,
4551 self.op.master_netmask,
4553 master_params.netdev)
4555 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4558 self.cluster.master_netmask = self.op.master_netmask
4560 self.cfg.Update(self.cluster, feedback_fn)
4562 if self.op.master_netdev:
4563 master_params = self.cfg.GetMasterNetworkParameters()
4564 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4565 self.op.master_netdev)
4566 ems = self.cfg.GetUseExternalMipScript()
4567 result = self.rpc.call_node_activate_master_ip(master_params.name,
4570 self.LogWarning("Could not re-enable the master ip on"
4571 " the master, please restart manually: %s",
4575 def _UploadHelper(lu, nodes, fname):
4576 """Helper for uploading a file and showing warnings.
4579 if os.path.exists(fname):
4580 result = lu.rpc.call_upload_file(nodes, fname)
4581 for to_node, to_result in result.items():
4582 msg = to_result.fail_msg
4584 msg = ("Copy of file %s to node %s failed: %s" %
4585 (fname, to_node, msg))
4589 def _ComputeAncillaryFiles(cluster, redist):
4590 """Compute files external to Ganeti which need to be consistent.
4592 @type redist: boolean
4593 @param redist: Whether to include files which need to be redistributed
4596 # Compute files for all nodes
4598 pathutils.SSH_KNOWN_HOSTS_FILE,
4599 pathutils.CONFD_HMAC_KEY,
4600 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4601 pathutils.SPICE_CERT_FILE,
4602 pathutils.SPICE_CACERT_FILE,
4603 pathutils.RAPI_USERS_FILE,
4607 # we need to ship at least the RAPI certificate
4608 files_all.add(pathutils.RAPI_CERT_FILE)
4610 files_all.update(pathutils.ALL_CERT_FILES)
4611 files_all.update(ssconf.SimpleStore().GetFileList())
4613 if cluster.modify_etc_hosts:
4614 files_all.add(pathutils.ETC_HOSTS)
4616 if cluster.use_external_mip_script:
4617 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4619 # Files which are optional, these must:
4620 # - be present in one other category as well
4621 # - either exist or not exist on all nodes of that category (mc, vm all)
4623 pathutils.RAPI_USERS_FILE,
4626 # Files which should only be on master candidates
4630 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4634 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4635 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4636 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4638 # Files which should only be on VM-capable nodes
4641 for hv_name in cluster.enabled_hypervisors
4643 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[0])
4647 for hv_name in cluster.enabled_hypervisors
4649 hypervisor.GetHypervisorClass(hv_name).GetAncillaryFiles()[1])
4651 # Filenames in each category must be unique
4652 all_files_set = files_all | files_mc | files_vm
4653 assert (len(all_files_set) ==
4654 sum(map(len, [files_all, files_mc, files_vm]))), \
4655 "Found file listed in more than one file list"
4657 # Optional files must be present in one other category
4658 assert all_files_set.issuperset(files_opt), \
4659 "Optional file not in a different required list"
4661 # This one file should never ever be re-distributed via RPC
4662 assert not (redist and
4663 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4665 return (files_all, files_opt, files_mc, files_vm)
4668 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4669 """Distribute additional files which are part of the cluster configuration.
4671 ConfigWriter takes care of distributing the config and ssconf files, but
4672 there are more files which should be distributed to all nodes. This function
4673 makes sure those are copied.
4675 @param lu: calling logical unit
4676 @param additional_nodes: list of nodes not in the config to distribute to
4677 @type additional_vm: boolean
4678 @param additional_vm: whether the additional nodes are vm-capable or not
4681 # Gather target nodes
4682 cluster = lu.cfg.GetClusterInfo()
4683 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4685 online_nodes = lu.cfg.GetOnlineNodeList()
4686 online_set = frozenset(online_nodes)
4687 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4689 if additional_nodes is not None:
4690 online_nodes.extend(additional_nodes)
4692 vm_nodes.extend(additional_nodes)
4694 # Never distribute to master node
4695 for nodelist in [online_nodes, vm_nodes]:
4696 if master_info.name in nodelist:
4697 nodelist.remove(master_info.name)
4700 (files_all, _, files_mc, files_vm) = \
4701 _ComputeAncillaryFiles(cluster, True)
4703 # Never re-distribute configuration file from here
4704 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4705 pathutils.CLUSTER_CONF_FILE in files_vm)
4706 assert not files_mc, "Master candidates not handled in this function"
4709 (online_nodes, files_all),
4710 (vm_nodes, files_vm),
4714 for (node_list, files) in filemap:
4716 _UploadHelper(lu, node_list, fname)
4719 class LUClusterRedistConf(NoHooksLU):
4720 """Force the redistribution of cluster configuration.
4722 This is a very simple LU.
4727 def ExpandNames(self):
4728 self.needed_locks = {
4729 locking.LEVEL_NODE: locking.ALL_SET,
4730 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
4732 self.share_locks = _ShareAll()
4734 def Exec(self, feedback_fn):
4735 """Redistribute the configuration.
4738 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4739 _RedistributeAncillaryFiles(self)
4742 class LUClusterActivateMasterIp(NoHooksLU):
4743 """Activate the master IP on the master node.
4746 def Exec(self, feedback_fn):
4747 """Activate the master IP.
4750 master_params = self.cfg.GetMasterNetworkParameters()
4751 ems = self.cfg.GetUseExternalMipScript()
4752 result = self.rpc.call_node_activate_master_ip(master_params.name,
4754 result.Raise("Could not activate the master IP")
4757 class LUClusterDeactivateMasterIp(NoHooksLU):
4758 """Deactivate the master IP on the master node.
4761 def Exec(self, feedback_fn):
4762 """Deactivate the master IP.
4765 master_params = self.cfg.GetMasterNetworkParameters()
4766 ems = self.cfg.GetUseExternalMipScript()
4767 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4769 result.Raise("Could not deactivate the master IP")
4772 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4773 """Sleep and poll for an instance's disk to sync.
4776 if not instance.disks or disks is not None and not disks:
4779 disks = _ExpandCheckDisks(instance, disks)
4782 lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
4784 node = instance.primary_node
4787 lu.cfg.SetDiskID(dev, node)
4789 # TODO: Convert to utils.Retry
4792 degr_retries = 10 # in seconds, as we sleep 1 second each time
4796 cumul_degraded = False
4797 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4798 msg = rstats.fail_msg
4800 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4803 raise errors.RemoteError("Can't contact node %s for mirror data,"
4804 " aborting." % node)
4807 rstats = rstats.payload
4809 for i, mstat in enumerate(rstats):
4811 lu.LogWarning("Can't compute data for node %s/%s",
4812 node, disks[i].iv_name)
4815 cumul_degraded = (cumul_degraded or
4816 (mstat.is_degraded and mstat.sync_percent is None))
4817 if mstat.sync_percent is not None:
4819 if mstat.estimated_time is not None:
4820 rem_time = ("%s remaining (estimated)" %
4821 utils.FormatSeconds(mstat.estimated_time))
4822 max_time = mstat.estimated_time
4824 rem_time = "no time estimate"
4825 lu.LogInfo("- device %s: %5.2f%% done, %s",
4826 disks[i].iv_name, mstat.sync_percent, rem_time)
4828 # if we're done but degraded, let's do a few small retries, to
4829 # make sure we see a stable and not transient situation; therefore
4830 # we force restart of the loop
4831 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4832 logging.info("Degraded disks found, %d retries left", degr_retries)
4840 time.sleep(min(60, max_time))
4843 lu.LogInfo("Instance %s's disks are in sync", instance.name)
4845 return not cumul_degraded
4848 def _BlockdevFind(lu, node, dev, instance):
4849 """Wrapper around call_blockdev_find to annotate diskparams.
4851 @param lu: A reference to the lu object
4852 @param node: The node to call out
4853 @param dev: The device to find
4854 @param instance: The instance object the device belongs to
4855 @returns The result of the rpc call
4858 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4859 return lu.rpc.call_blockdev_find(node, disk)
4862 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4863 """Wrapper around L{_CheckDiskConsistencyInner}.
4866 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4867 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4871 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4873 """Check that mirrors are not degraded.
4875 @attention: The device has to be annotated already.
4877 The ldisk parameter, if True, will change the test from the
4878 is_degraded attribute (which represents overall non-ok status for
4879 the device(s)) to the ldisk (representing the local storage status).
4882 lu.cfg.SetDiskID(dev, node)
4886 if on_primary or dev.AssembleOnSecondary():
4887 rstats = lu.rpc.call_blockdev_find(node, dev)
4888 msg = rstats.fail_msg
4890 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4892 elif not rstats.payload:
4893 lu.LogWarning("Can't find disk on node %s", node)
4897 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4899 result = result and not rstats.payload.is_degraded
4902 for child in dev.children:
4903 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4909 class LUOobCommand(NoHooksLU):
4910 """Logical unit for OOB handling.
4914 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4916 def ExpandNames(self):
4917 """Gather locks we need.
4920 if self.op.node_names:
4921 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4922 lock_names = self.op.node_names
4924 lock_names = locking.ALL_SET
4926 self.needed_locks = {
4927 locking.LEVEL_NODE: lock_names,
4930 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
4932 if not self.op.node_names:
4933 # Acquire node allocation lock only if all nodes are affected
4934 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4936 def CheckPrereq(self):
4937 """Check prerequisites.
4940 - the node exists in the configuration
4943 Any errors are signaled by raising errors.OpPrereqError.
4947 self.master_node = self.cfg.GetMasterNode()
4949 assert self.op.power_delay >= 0.0
4951 if self.op.node_names:
4952 if (self.op.command in self._SKIP_MASTER and
4953 self.master_node in self.op.node_names):
4954 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4955 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4957 if master_oob_handler:
4958 additional_text = ("run '%s %s %s' if you want to operate on the"
4959 " master regardless") % (master_oob_handler,
4963 additional_text = "it does not support out-of-band operations"
4965 raise errors.OpPrereqError(("Operating on the master node %s is not"
4966 " allowed for %s; %s") %
4967 (self.master_node, self.op.command,
4968 additional_text), errors.ECODE_INVAL)
4970 self.op.node_names = self.cfg.GetNodeList()
4971 if self.op.command in self._SKIP_MASTER:
4972 self.op.node_names.remove(self.master_node)
4974 if self.op.command in self._SKIP_MASTER:
4975 assert self.master_node not in self.op.node_names
4977 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4979 raise errors.OpPrereqError("Node %s not found" % node_name,
4982 self.nodes.append(node)
4984 if (not self.op.ignore_status and
4985 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4986 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4987 " not marked offline") % node_name,
4990 def Exec(self, feedback_fn):
4991 """Execute OOB and return result if we expect any.
4994 master_node = self.master_node
4997 for idx, node in enumerate(utils.NiceSort(self.nodes,
4998 key=lambda node: node.name)):
4999 node_entry = [(constants.RS_NORMAL, node.name)]
5000 ret.append(node_entry)
5002 oob_program = _SupportsOob(self.cfg, node)
5005 node_entry.append((constants.RS_UNAVAIL, None))
5008 logging.info("Executing out-of-band command '%s' using '%s' on %s",
5009 self.op.command, oob_program, node.name)
5010 result = self.rpc.call_run_oob(master_node, oob_program,
5011 self.op.command, node.name,
5015 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
5016 node.name, result.fail_msg)
5017 node_entry.append((constants.RS_NODATA, None))
5020 self._CheckPayload(result)
5021 except errors.OpExecError, err:
5022 self.LogWarning("Payload returned by node '%s' is not valid: %s",
5024 node_entry.append((constants.RS_NODATA, None))
5026 if self.op.command == constants.OOB_HEALTH:
5027 # For health we should log important events
5028 for item, status in result.payload:
5029 if status in [constants.OOB_STATUS_WARNING,
5030 constants.OOB_STATUS_CRITICAL]:
5031 self.LogWarning("Item '%s' on node '%s' has status '%s'",
5032 item, node.name, status)
5034 if self.op.command == constants.OOB_POWER_ON:
5036 elif self.op.command == constants.OOB_POWER_OFF:
5037 node.powered = False
5038 elif self.op.command == constants.OOB_POWER_STATUS:
5039 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
5040 if powered != node.powered:
5041 logging.warning(("Recorded power state (%s) of node '%s' does not"
5042 " match actual power state (%s)"), node.powered,
5045 # For configuration changing commands we should update the node
5046 if self.op.command in (constants.OOB_POWER_ON,
5047 constants.OOB_POWER_OFF):
5048 self.cfg.Update(node, feedback_fn)
5050 node_entry.append((constants.RS_NORMAL, result.payload))
5052 if (self.op.command == constants.OOB_POWER_ON and
5053 idx < len(self.nodes) - 1):
5054 time.sleep(self.op.power_delay)
5058 def _CheckPayload(self, result):
5059 """Checks if the payload is valid.
5061 @param result: RPC result
5062 @raises errors.OpExecError: If payload is not valid
5066 if self.op.command == constants.OOB_HEALTH:
5067 if not isinstance(result.payload, list):
5068 errs.append("command 'health' is expected to return a list but got %s" %
5069 type(result.payload))
5071 for item, status in result.payload:
5072 if status not in constants.OOB_STATUSES:
5073 errs.append("health item '%s' has invalid status '%s'" %
5076 if self.op.command == constants.OOB_POWER_STATUS:
5077 if not isinstance(result.payload, dict):
5078 errs.append("power-status is expected to return a dict but got %s" %
5079 type(result.payload))
5081 if self.op.command in [
5082 constants.OOB_POWER_ON,
5083 constants.OOB_POWER_OFF,
5084 constants.OOB_POWER_CYCLE,
5086 if result.payload is not None:
5087 errs.append("%s is expected to not return payload but got '%s'" %
5088 (self.op.command, result.payload))
5091 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
5092 utils.CommaJoin(errs))
5095 class _OsQuery(_QueryBase):
5096 FIELDS = query.OS_FIELDS
5098 def ExpandNames(self, lu):
5099 # Lock all nodes in shared mode
5100 # Temporary removal of locks, should be reverted later
5101 # TODO: reintroduce locks when they are lighter-weight
5102 lu.needed_locks = {}
5103 #self.share_locks[locking.LEVEL_NODE] = 1
5104 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5106 # The following variables interact with _QueryBase._GetNames
5108 self.wanted = self.names
5110 self.wanted = locking.ALL_SET
5112 self.do_locking = self.use_locking
5114 def DeclareLocks(self, lu, level):
5118 def _DiagnoseByOS(rlist):
5119 """Remaps a per-node return list into an a per-os per-node dictionary
5121 @param rlist: a map with node names as keys and OS objects as values
5124 @return: a dictionary with osnames as keys and as value another
5125 map, with nodes as keys and tuples of (path, status, diagnose,
5126 variants, parameters, api_versions) as values, eg::
5128 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
5129 (/srv/..., False, "invalid api")],
5130 "node2": [(/srv/..., True, "", [], [])]}
5135 # we build here the list of nodes that didn't fail the RPC (at RPC
5136 # level), so that nodes with a non-responding node daemon don't
5137 # make all OSes invalid
5138 good_nodes = [node_name for node_name in rlist
5139 if not rlist[node_name].fail_msg]
5140 for node_name, nr in rlist.items():
5141 if nr.fail_msg or not nr.payload:
5143 for (name, path, status, diagnose, variants,
5144 params, api_versions) in nr.payload:
5145 if name not in all_os:
5146 # build a list of nodes for this os containing empty lists
5147 # for each node in node_list
5149 for nname in good_nodes:
5150 all_os[name][nname] = []
5151 # convert params from [name, help] to (name, help)
5152 params = [tuple(v) for v in params]
5153 all_os[name][node_name].append((path, status, diagnose,
5154 variants, params, api_versions))
5157 def _GetQueryData(self, lu):
5158 """Computes the list of nodes and their attributes.
5161 # Locking is not used
5162 assert not (compat.any(lu.glm.is_owned(level)
5163 for level in locking.LEVELS
5164 if level != locking.LEVEL_CLUSTER) or
5165 self.do_locking or self.use_locking)
5167 valid_nodes = [node.name
5168 for node in lu.cfg.GetAllNodesInfo().values()
5169 if not node.offline and node.vm_capable]
5170 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
5171 cluster = lu.cfg.GetClusterInfo()
5175 for (os_name, os_data) in pol.items():
5176 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
5177 hidden=(os_name in cluster.hidden_os),
5178 blacklisted=(os_name in cluster.blacklisted_os))
5182 api_versions = set()
5184 for idx, osl in enumerate(os_data.values()):
5185 info.valid = bool(info.valid and osl and osl[0][1])
5189 (node_variants, node_params, node_api) = osl[0][3:6]
5192 variants.update(node_variants)
5193 parameters.update(node_params)
5194 api_versions.update(node_api)
5196 # Filter out inconsistent values
5197 variants.intersection_update(node_variants)
5198 parameters.intersection_update(node_params)
5199 api_versions.intersection_update(node_api)
5201 info.variants = list(variants)
5202 info.parameters = list(parameters)
5203 info.api_versions = list(api_versions)
5205 data[os_name] = info
5207 # Prepare data in requested order
5208 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5212 class LUOsDiagnose(NoHooksLU):
5213 """Logical unit for OS diagnose/query.
5219 def _BuildFilter(fields, names):
5220 """Builds a filter for querying OSes.
5223 name_filter = qlang.MakeSimpleFilter("name", names)
5225 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5226 # respective field is not requested
5227 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5228 for fname in ["hidden", "blacklisted"]
5229 if fname not in fields]
5230 if "valid" not in fields:
5231 status_filter.append([qlang.OP_TRUE, "valid"])
5234 status_filter.insert(0, qlang.OP_AND)
5236 status_filter = None
5238 if name_filter and status_filter:
5239 return [qlang.OP_AND, name_filter, status_filter]
5243 return status_filter
5245 def CheckArguments(self):
5246 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5247 self.op.output_fields, False)
5249 def ExpandNames(self):
5250 self.oq.ExpandNames(self)
5252 def Exec(self, feedback_fn):
5253 return self.oq.OldStyleQuery(self)
5256 class _ExtStorageQuery(_QueryBase):
5257 FIELDS = query.EXTSTORAGE_FIELDS
5259 def ExpandNames(self, lu):
5260 # Lock all nodes in shared mode
5261 # Temporary removal of locks, should be reverted later
5262 # TODO: reintroduce locks when they are lighter-weight
5263 lu.needed_locks = {}
5264 #self.share_locks[locking.LEVEL_NODE] = 1
5265 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5267 # The following variables interact with _QueryBase._GetNames
5269 self.wanted = self.names
5271 self.wanted = locking.ALL_SET
5273 self.do_locking = self.use_locking
5275 def DeclareLocks(self, lu, level):
5279 def _DiagnoseByProvider(rlist):
5280 """Remaps a per-node return list into an a per-provider per-node dictionary
5282 @param rlist: a map with node names as keys and ExtStorage objects as values
5285 @return: a dictionary with extstorage providers as keys and as
5286 value another map, with nodes as keys and tuples of
5287 (path, status, diagnose, parameters) as values, eg::
5289 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5290 "node2": [(/srv/..., False, "missing file")]
5291 "node3": [(/srv/..., True, "", [])]
5296 # we build here the list of nodes that didn't fail the RPC (at RPC
5297 # level), so that nodes with a non-responding node daemon don't
5298 # make all OSes invalid
5299 good_nodes = [node_name for node_name in rlist
5300 if not rlist[node_name].fail_msg]
5301 for node_name, nr in rlist.items():
5302 if nr.fail_msg or not nr.payload:
5304 for (name, path, status, diagnose, params) in nr.payload:
5305 if name not in all_es:
5306 # build a list of nodes for this os containing empty lists
5307 # for each node in node_list
5309 for nname in good_nodes:
5310 all_es[name][nname] = []
5311 # convert params from [name, help] to (name, help)
5312 params = [tuple(v) for v in params]
5313 all_es[name][node_name].append((path, status, diagnose, params))
5316 def _GetQueryData(self, lu):
5317 """Computes the list of nodes and their attributes.
5320 # Locking is not used
5321 assert not (compat.any(lu.glm.is_owned(level)
5322 for level in locking.LEVELS
5323 if level != locking.LEVEL_CLUSTER) or
5324 self.do_locking or self.use_locking)
5326 valid_nodes = [node.name
5327 for node in lu.cfg.GetAllNodesInfo().values()
5328 if not node.offline and node.vm_capable]
5329 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5333 nodegroup_list = lu.cfg.GetNodeGroupList()
5335 for (es_name, es_data) in pol.items():
5336 # For every provider compute the nodegroup validity.
5337 # To do this we need to check the validity of each node in es_data
5338 # and then construct the corresponding nodegroup dict:
5339 # { nodegroup1: status
5340 # nodegroup2: status
5343 for nodegroup in nodegroup_list:
5344 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5346 nodegroup_nodes = ndgrp.members
5347 nodegroup_name = ndgrp.name
5350 for node in nodegroup_nodes:
5351 if node in valid_nodes:
5352 if es_data[node] != []:
5353 node_status = es_data[node][0][1]
5354 node_statuses.append(node_status)
5356 node_statuses.append(False)
5358 if False in node_statuses:
5359 ndgrp_data[nodegroup_name] = False
5361 ndgrp_data[nodegroup_name] = True
5363 # Compute the provider's parameters
5365 for idx, esl in enumerate(es_data.values()):
5366 valid = bool(esl and esl[0][1])
5370 node_params = esl[0][3]
5373 parameters.update(node_params)
5375 # Filter out inconsistent values
5376 parameters.intersection_update(node_params)
5378 params = list(parameters)
5380 # Now fill all the info for this provider
5381 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5382 nodegroup_status=ndgrp_data,
5385 data[es_name] = info
5387 # Prepare data in requested order
5388 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5392 class LUExtStorageDiagnose(NoHooksLU):
5393 """Logical unit for ExtStorage diagnose/query.
5398 def CheckArguments(self):
5399 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5400 self.op.output_fields, False)
5402 def ExpandNames(self):
5403 self.eq.ExpandNames(self)
5405 def Exec(self, feedback_fn):
5406 return self.eq.OldStyleQuery(self)
5409 class LUNodeRemove(LogicalUnit):
5410 """Logical unit for removing a node.
5413 HPATH = "node-remove"
5414 HTYPE = constants.HTYPE_NODE
5416 def BuildHooksEnv(self):
5421 "OP_TARGET": self.op.node_name,
5422 "NODE_NAME": self.op.node_name,
5425 def BuildHooksNodes(self):
5426 """Build hooks nodes.
5428 This doesn't run on the target node in the pre phase as a failed
5429 node would then be impossible to remove.
5432 all_nodes = self.cfg.GetNodeList()
5434 all_nodes.remove(self.op.node_name)
5437 return (all_nodes, all_nodes)
5439 def CheckPrereq(self):
5440 """Check prerequisites.
5443 - the node exists in the configuration
5444 - it does not have primary or secondary instances
5445 - it's not the master
5447 Any errors are signaled by raising errors.OpPrereqError.
5450 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5451 node = self.cfg.GetNodeInfo(self.op.node_name)
5452 assert node is not None
5454 masternode = self.cfg.GetMasterNode()
5455 if node.name == masternode:
5456 raise errors.OpPrereqError("Node is the master node, failover to another"
5457 " node is required", errors.ECODE_INVAL)
5459 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5460 if node.name in instance.all_nodes:
5461 raise errors.OpPrereqError("Instance %s is still running on the node,"
5462 " please remove first" % instance_name,
5464 self.op.node_name = node.name
5467 def Exec(self, feedback_fn):
5468 """Removes the node from the cluster.
5472 logging.info("Stopping the node daemon and removing configs from node %s",
5475 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5477 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5480 # Promote nodes to master candidate as needed
5481 _AdjustCandidatePool(self, exceptions=[node.name])
5482 self.context.RemoveNode(node.name)
5484 # Run post hooks on the node before it's removed
5485 _RunPostHook(self, node.name)
5487 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5488 msg = result.fail_msg
5490 self.LogWarning("Errors encountered on the remote node while leaving"
5491 " the cluster: %s", msg)
5493 # Remove node from our /etc/hosts
5494 if self.cfg.GetClusterInfo().modify_etc_hosts:
5495 master_node = self.cfg.GetMasterNode()
5496 result = self.rpc.call_etc_hosts_modify(master_node,
5497 constants.ETC_HOSTS_REMOVE,
5499 result.Raise("Can't update hosts file with new host data")
5500 _RedistributeAncillaryFiles(self)
5503 class _NodeQuery(_QueryBase):
5504 FIELDS = query.NODE_FIELDS
5506 def ExpandNames(self, lu):
5507 lu.needed_locks = {}
5508 lu.share_locks = _ShareAll()
5511 self.wanted = _GetWantedNodes(lu, self.names)
5513 self.wanted = locking.ALL_SET
5515 self.do_locking = (self.use_locking and
5516 query.NQ_LIVE in self.requested_data)
5519 # If any non-static field is requested we need to lock the nodes
5520 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5521 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
5523 def DeclareLocks(self, lu, level):
5526 def _GetQueryData(self, lu):
5527 """Computes the list of nodes and their attributes.
5530 all_info = lu.cfg.GetAllNodesInfo()
5532 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5534 # Gather data as requested
5535 if query.NQ_LIVE in self.requested_data:
5536 # filter out non-vm_capable nodes
5537 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5539 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
5540 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5541 [lu.cfg.GetHypervisorType()], es_flags)
5542 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5543 for (name, nresult) in node_data.items()
5544 if not nresult.fail_msg and nresult.payload)
5548 if query.NQ_INST in self.requested_data:
5549 node_to_primary = dict([(name, set()) for name in nodenames])
5550 node_to_secondary = dict([(name, set()) for name in nodenames])
5552 inst_data = lu.cfg.GetAllInstancesInfo()
5554 for inst in inst_data.values():
5555 if inst.primary_node in node_to_primary:
5556 node_to_primary[inst.primary_node].add(inst.name)
5557 for secnode in inst.secondary_nodes:
5558 if secnode in node_to_secondary:
5559 node_to_secondary[secnode].add(inst.name)
5561 node_to_primary = None
5562 node_to_secondary = None
5564 if query.NQ_OOB in self.requested_data:
5565 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5566 for name, node in all_info.iteritems())
5570 if query.NQ_GROUP in self.requested_data:
5571 groups = lu.cfg.GetAllNodeGroupsInfo()
5575 return query.NodeQueryData([all_info[name] for name in nodenames],
5576 live_data, lu.cfg.GetMasterNode(),
5577 node_to_primary, node_to_secondary, groups,
5578 oob_support, lu.cfg.GetClusterInfo())
5581 class LUNodeQuery(NoHooksLU):
5582 """Logical unit for querying nodes.
5585 # pylint: disable=W0142
5588 def CheckArguments(self):
5589 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5590 self.op.output_fields, self.op.use_locking)
5592 def ExpandNames(self):
5593 self.nq.ExpandNames(self)
5595 def DeclareLocks(self, level):
5596 self.nq.DeclareLocks(self, level)
5598 def Exec(self, feedback_fn):
5599 return self.nq.OldStyleQuery(self)
5602 class LUNodeQueryvols(NoHooksLU):
5603 """Logical unit for getting volumes on node(s).
5607 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5608 _FIELDS_STATIC = utils.FieldSet("node")
5610 def CheckArguments(self):
5611 _CheckOutputFields(static=self._FIELDS_STATIC,
5612 dynamic=self._FIELDS_DYNAMIC,
5613 selected=self.op.output_fields)
5615 def ExpandNames(self):
5616 self.share_locks = _ShareAll()
5619 self.needed_locks = {
5620 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5623 self.needed_locks = {
5624 locking.LEVEL_NODE: locking.ALL_SET,
5625 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5628 def Exec(self, feedback_fn):
5629 """Computes the list of nodes and their attributes.
5632 nodenames = self.owned_locks(locking.LEVEL_NODE)
5633 volumes = self.rpc.call_node_volumes(nodenames)
5635 ilist = self.cfg.GetAllInstancesInfo()
5636 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5639 for node in nodenames:
5640 nresult = volumes[node]
5643 msg = nresult.fail_msg
5645 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5648 node_vols = sorted(nresult.payload,
5649 key=operator.itemgetter("dev"))
5651 for vol in node_vols:
5653 for field in self.op.output_fields:
5656 elif field == "phys":
5660 elif field == "name":
5662 elif field == "size":
5663 val = int(float(vol["size"]))
5664 elif field == "instance":
5665 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5667 raise errors.ParameterError(field)
5668 node_output.append(str(val))
5670 output.append(node_output)
5675 class LUNodeQueryStorage(NoHooksLU):
5676 """Logical unit for getting information on storage units on node(s).
5679 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5682 def CheckArguments(self):
5683 _CheckOutputFields(static=self._FIELDS_STATIC,
5684 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5685 selected=self.op.output_fields)
5687 def ExpandNames(self):
5688 self.share_locks = _ShareAll()
5691 self.needed_locks = {
5692 locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
5695 self.needed_locks = {
5696 locking.LEVEL_NODE: locking.ALL_SET,
5697 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
5700 def Exec(self, feedback_fn):
5701 """Computes the list of nodes and their attributes.
5704 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5706 # Always get name to sort by
5707 if constants.SF_NAME in self.op.output_fields:
5708 fields = self.op.output_fields[:]
5710 fields = [constants.SF_NAME] + self.op.output_fields
5712 # Never ask for node or type as it's only known to the LU
5713 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5714 while extra in fields:
5715 fields.remove(extra)
5717 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5718 name_idx = field_idx[constants.SF_NAME]
5720 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5721 data = self.rpc.call_storage_list(self.nodes,
5722 self.op.storage_type, st_args,
5723 self.op.name, fields)
5727 for node in utils.NiceSort(self.nodes):
5728 nresult = data[node]
5732 msg = nresult.fail_msg
5734 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5737 rows = dict([(row[name_idx], row) for row in nresult.payload])
5739 for name in utils.NiceSort(rows.keys()):
5744 for field in self.op.output_fields:
5745 if field == constants.SF_NODE:
5747 elif field == constants.SF_TYPE:
5748 val = self.op.storage_type
5749 elif field in field_idx:
5750 val = row[field_idx[field]]
5752 raise errors.ParameterError(field)
5761 class _InstanceQuery(_QueryBase):
5762 FIELDS = query.INSTANCE_FIELDS
5764 def ExpandNames(self, lu):
5765 lu.needed_locks = {}
5766 lu.share_locks = _ShareAll()
5769 self.wanted = _GetWantedInstances(lu, self.names)
5771 self.wanted = locking.ALL_SET
5773 self.do_locking = (self.use_locking and
5774 query.IQ_LIVE in self.requested_data)
5776 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5777 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5778 lu.needed_locks[locking.LEVEL_NODE] = []
5779 lu.needed_locks[locking.LEVEL_NETWORK] = []
5780 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5782 self.do_grouplocks = (self.do_locking and
5783 query.IQ_NODES in self.requested_data)
5785 def DeclareLocks(self, lu, level):
5787 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5788 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5790 # Lock all groups used by instances optimistically; this requires going
5791 # via the node before it's locked, requiring verification later on
5792 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5794 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5795 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5796 elif level == locking.LEVEL_NODE:
5797 lu._LockInstancesNodes() # pylint: disable=W0212
5799 elif level == locking.LEVEL_NETWORK:
5800 lu.needed_locks[locking.LEVEL_NETWORK] = \
5802 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5803 for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
5806 def _CheckGroupLocks(lu):
5807 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5808 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5810 # Check if node groups for locked instances are still correct
5811 for instance_name in owned_instances:
5812 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5814 def _GetQueryData(self, lu):
5815 """Computes the list of instances and their attributes.
5818 if self.do_grouplocks:
5819 self._CheckGroupLocks(lu)
5821 cluster = lu.cfg.GetClusterInfo()
5822 all_info = lu.cfg.GetAllInstancesInfo()
5824 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5826 instance_list = [all_info[name] for name in instance_names]
5827 nodes = frozenset(itertools.chain(*(inst.all_nodes
5828 for inst in instance_list)))
5829 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5832 wrongnode_inst = set()
5834 # Gather data as requested
5835 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5837 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5839 result = node_data[name]
5841 # offline nodes will be in both lists
5842 assert result.fail_msg
5843 offline_nodes.append(name)
5845 bad_nodes.append(name)
5846 elif result.payload:
5847 for inst in result.payload:
5848 if inst in all_info:
5849 if all_info[inst].primary_node == name:
5850 live_data.update(result.payload)
5852 wrongnode_inst.add(inst)
5854 # orphan instance; we don't list it here as we don't
5855 # handle this case yet in the output of instance listing
5856 logging.warning("Orphan instance '%s' found on node %s",
5858 # else no instance is alive
5862 if query.IQ_DISKUSAGE in self.requested_data:
5863 gmi = ganeti.masterd.instance
5864 disk_usage = dict((inst.name,
5865 gmi.ComputeDiskSize(inst.disk_template,
5866 [{constants.IDISK_SIZE: disk.size}
5867 for disk in inst.disks]))
5868 for inst in instance_list)
5872 if query.IQ_CONSOLE in self.requested_data:
5874 for inst in instance_list:
5875 if inst.name in live_data:
5876 # Instance is running
5877 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5879 consinfo[inst.name] = None
5880 assert set(consinfo.keys()) == set(instance_names)
5884 if query.IQ_NODES in self.requested_data:
5885 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5887 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5888 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5889 for uuid in set(map(operator.attrgetter("group"),
5895 if query.IQ_NETWORKS in self.requested_data:
5896 net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
5897 for i in instance_list))
5898 networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
5902 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5903 disk_usage, offline_nodes, bad_nodes,
5904 live_data, wrongnode_inst, consinfo,
5905 nodes, groups, networks)
5908 class LUQuery(NoHooksLU):
5909 """Query for resources/items of a certain kind.
5912 # pylint: disable=W0142
5915 def CheckArguments(self):
5916 qcls = _GetQueryImplementation(self.op.what)
5918 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5920 def ExpandNames(self):
5921 self.impl.ExpandNames(self)
5923 def DeclareLocks(self, level):
5924 self.impl.DeclareLocks(self, level)
5926 def Exec(self, feedback_fn):
5927 return self.impl.NewStyleQuery(self)
5930 class LUQueryFields(NoHooksLU):
5931 """Query for resources/items of a certain kind.
5934 # pylint: disable=W0142
5937 def CheckArguments(self):
5938 self.qcls = _GetQueryImplementation(self.op.what)
5940 def ExpandNames(self):
5941 self.needed_locks = {}
5943 def Exec(self, feedback_fn):
5944 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5947 class LUNodeModifyStorage(NoHooksLU):
5948 """Logical unit for modifying a storage volume on a node.
5953 def CheckArguments(self):
5954 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5956 storage_type = self.op.storage_type
5959 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5961 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5962 " modified" % storage_type,
5965 diff = set(self.op.changes.keys()) - modifiable
5967 raise errors.OpPrereqError("The following fields can not be modified for"
5968 " storage units of type '%s': %r" %
5969 (storage_type, list(diff)),
5972 def ExpandNames(self):
5973 self.needed_locks = {
5974 locking.LEVEL_NODE: self.op.node_name,
5977 def Exec(self, feedback_fn):
5978 """Computes the list of nodes and their attributes.
5981 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5982 result = self.rpc.call_storage_modify(self.op.node_name,
5983 self.op.storage_type, st_args,
5984 self.op.name, self.op.changes)
5985 result.Raise("Failed to modify storage unit '%s' on %s" %
5986 (self.op.name, self.op.node_name))
5989 class LUNodeAdd(LogicalUnit):
5990 """Logical unit for adding node to the cluster.
5994 HTYPE = constants.HTYPE_NODE
5995 _NFLAGS = ["master_capable", "vm_capable"]
5997 def CheckArguments(self):
5998 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5999 # validate/normalize the node name
6000 self.hostname = netutils.GetHostname(name=self.op.node_name,
6001 family=self.primary_ip_family)
6002 self.op.node_name = self.hostname.name
6004 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
6005 raise errors.OpPrereqError("Cannot readd the master node",
6008 if self.op.readd and self.op.group:
6009 raise errors.OpPrereqError("Cannot pass a node group when a node is"
6010 " being readded", errors.ECODE_INVAL)
6012 def BuildHooksEnv(self):
6015 This will run on all nodes before, and on all nodes + the new node after.
6019 "OP_TARGET": self.op.node_name,
6020 "NODE_NAME": self.op.node_name,
6021 "NODE_PIP": self.op.primary_ip,
6022 "NODE_SIP": self.op.secondary_ip,
6023 "MASTER_CAPABLE": str(self.op.master_capable),
6024 "VM_CAPABLE": str(self.op.vm_capable),
6027 def BuildHooksNodes(self):
6028 """Build hooks nodes.
6031 # Exclude added node
6032 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
6033 post_nodes = pre_nodes + [self.op.node_name, ]
6035 return (pre_nodes, post_nodes)
6037 def CheckPrereq(self):
6038 """Check prerequisites.
6041 - the new node is not already in the config
6043 - its parameters (single/dual homed) matches the cluster
6045 Any errors are signaled by raising errors.OpPrereqError.
6049 hostname = self.hostname
6050 node = hostname.name
6051 primary_ip = self.op.primary_ip = hostname.ip
6052 if self.op.secondary_ip is None:
6053 if self.primary_ip_family == netutils.IP6Address.family:
6054 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
6055 " IPv4 address must be given as secondary",
6057 self.op.secondary_ip = primary_ip
6059 secondary_ip = self.op.secondary_ip
6060 if not netutils.IP4Address.IsValid(secondary_ip):
6061 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6062 " address" % secondary_ip, errors.ECODE_INVAL)
6064 node_list = cfg.GetNodeList()
6065 if not self.op.readd and node in node_list:
6066 raise errors.OpPrereqError("Node %s is already in the configuration" %
6067 node, errors.ECODE_EXISTS)
6068 elif self.op.readd and node not in node_list:
6069 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
6072 self.changed_primary_ip = False
6074 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
6075 if self.op.readd and node == existing_node_name:
6076 if existing_node.secondary_ip != secondary_ip:
6077 raise errors.OpPrereqError("Readded node doesn't have the same IP"
6078 " address configuration as before",
6080 if existing_node.primary_ip != primary_ip:
6081 self.changed_primary_ip = True
6085 if (existing_node.primary_ip == primary_ip or
6086 existing_node.secondary_ip == primary_ip or
6087 existing_node.primary_ip == secondary_ip or
6088 existing_node.secondary_ip == secondary_ip):
6089 raise errors.OpPrereqError("New node ip address(es) conflict with"
6090 " existing node %s" % existing_node.name,
6091 errors.ECODE_NOTUNIQUE)
6093 # After this 'if' block, None is no longer a valid value for the
6094 # _capable op attributes
6096 old_node = self.cfg.GetNodeInfo(node)
6097 assert old_node is not None, "Can't retrieve locked node %s" % node
6098 for attr in self._NFLAGS:
6099 if getattr(self.op, attr) is None:
6100 setattr(self.op, attr, getattr(old_node, attr))
6102 for attr in self._NFLAGS:
6103 if getattr(self.op, attr) is None:
6104 setattr(self.op, attr, True)
6106 if self.op.readd and not self.op.vm_capable:
6107 pri, sec = cfg.GetNodeInstances(node)
6109 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
6110 " flag set to false, but it already holds"
6111 " instances" % node,
6114 # check that the type of the node (single versus dual homed) is the
6115 # same as for the master
6116 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
6117 master_singlehomed = myself.secondary_ip == myself.primary_ip
6118 newbie_singlehomed = secondary_ip == primary_ip
6119 if master_singlehomed != newbie_singlehomed:
6120 if master_singlehomed:
6121 raise errors.OpPrereqError("The master has no secondary ip but the"
6122 " new node has one",
6125 raise errors.OpPrereqError("The master has a secondary ip but the"
6126 " new node doesn't have one",
6129 # checks reachability
6130 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
6131 raise errors.OpPrereqError("Node not reachable by ping",
6132 errors.ECODE_ENVIRON)
6134 if not newbie_singlehomed:
6135 # check reachability from my secondary ip to newbie's secondary ip
6136 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
6137 source=myself.secondary_ip):
6138 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6139 " based ping to node daemon port",
6140 errors.ECODE_ENVIRON)
6147 if self.op.master_capable:
6148 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
6150 self.master_candidate = False
6153 self.new_node = old_node
6155 node_group = cfg.LookupNodeGroup(self.op.group)
6156 self.new_node = objects.Node(name=node,
6157 primary_ip=primary_ip,
6158 secondary_ip=secondary_ip,
6159 master_candidate=self.master_candidate,
6160 offline=False, drained=False,
6161 group=node_group, ndparams={})
6163 if self.op.ndparams:
6164 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
6165 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6166 "node", "cluster or group")
6168 if self.op.hv_state:
6169 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
6171 if self.op.disk_state:
6172 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
6174 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
6175 # it a property on the base class.
6176 rpcrunner = rpc.DnsOnlyRunner()
6177 result = rpcrunner.call_version([node])[node]
6178 result.Raise("Can't get version information from node %s" % node)
6179 if constants.PROTOCOL_VERSION == result.payload:
6180 logging.info("Communication to node %s fine, sw version %s match",
6181 node, result.payload)
6183 raise errors.OpPrereqError("Version mismatch master version %s,"
6184 " node version %s" %
6185 (constants.PROTOCOL_VERSION, result.payload),
6186 errors.ECODE_ENVIRON)
6188 vg_name = cfg.GetVGName()
6189 if vg_name is not None:
6190 vparams = {constants.NV_PVLIST: [vg_name]}
6191 excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
6192 cname = self.cfg.GetClusterName()
6193 result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
6194 (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
6196 raise errors.OpPrereqError("Checks on node PVs failed: %s" %
6197 "; ".join(errmsgs), errors.ECODE_ENVIRON)
6199 def Exec(self, feedback_fn):
6200 """Adds the new node to the cluster.
6203 new_node = self.new_node
6204 node = new_node.name
6206 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
6209 # We adding a new node so we assume it's powered
6210 new_node.powered = True
6212 # for re-adds, reset the offline/drained/master-candidate flags;
6213 # we need to reset here, otherwise offline would prevent RPC calls
6214 # later in the procedure; this also means that if the re-add
6215 # fails, we are left with a non-offlined, broken node
6217 new_node.drained = new_node.offline = False # pylint: disable=W0201
6218 self.LogInfo("Readding a node, the offline/drained flags were reset")
6219 # if we demote the node, we do cleanup later in the procedure
6220 new_node.master_candidate = self.master_candidate
6221 if self.changed_primary_ip:
6222 new_node.primary_ip = self.op.primary_ip
6224 # copy the master/vm_capable flags
6225 for attr in self._NFLAGS:
6226 setattr(new_node, attr, getattr(self.op, attr))
6228 # notify the user about any possible mc promotion
6229 if new_node.master_candidate:
6230 self.LogInfo("Node will be a master candidate")
6232 if self.op.ndparams:
6233 new_node.ndparams = self.op.ndparams
6235 new_node.ndparams = {}
6237 if self.op.hv_state:
6238 new_node.hv_state_static = self.new_hv_state
6240 if self.op.disk_state:
6241 new_node.disk_state_static = self.new_disk_state
6243 # Add node to our /etc/hosts, and add key to known_hosts
6244 if self.cfg.GetClusterInfo().modify_etc_hosts:
6245 master_node = self.cfg.GetMasterNode()
6246 result = self.rpc.call_etc_hosts_modify(master_node,
6247 constants.ETC_HOSTS_ADD,
6250 result.Raise("Can't update hosts file with new host data")
6252 if new_node.secondary_ip != new_node.primary_ip:
6253 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6256 node_verify_list = [self.cfg.GetMasterNode()]
6257 node_verify_param = {
6258 constants.NV_NODELIST: ([node], {}),
6259 # TODO: do a node-net-test as well?
6262 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6263 self.cfg.GetClusterName())
6264 for verifier in node_verify_list:
6265 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6266 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6268 for failed in nl_payload:
6269 feedback_fn("ssh/hostname verification failed"
6270 " (checking from %s): %s" %
6271 (verifier, nl_payload[failed]))
6272 raise errors.OpExecError("ssh/hostname verification failed")
6275 _RedistributeAncillaryFiles(self)
6276 self.context.ReaddNode(new_node)
6277 # make sure we redistribute the config
6278 self.cfg.Update(new_node, feedback_fn)
6279 # and make sure the new node will not have old files around
6280 if not new_node.master_candidate:
6281 result = self.rpc.call_node_demote_from_mc(new_node.name)
6282 msg = result.fail_msg
6284 self.LogWarning("Node failed to demote itself from master"
6285 " candidate status: %s" % msg)
6287 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6288 additional_vm=self.op.vm_capable)
6289 self.context.AddNode(new_node, self.proc.GetECId())
6292 class LUNodeSetParams(LogicalUnit):
6293 """Modifies the parameters of a node.
6295 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6296 to the node role (as _ROLE_*)
6297 @cvar _R2F: a dictionary from node role to tuples of flags
6298 @cvar _FLAGS: a list of attribute names corresponding to the flags
6301 HPATH = "node-modify"
6302 HTYPE = constants.HTYPE_NODE
6304 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6306 (True, False, False): _ROLE_CANDIDATE,
6307 (False, True, False): _ROLE_DRAINED,
6308 (False, False, True): _ROLE_OFFLINE,
6309 (False, False, False): _ROLE_REGULAR,
6311 _R2F = dict((v, k) for k, v in _F2R.items())
6312 _FLAGS = ["master_candidate", "drained", "offline"]
6314 def CheckArguments(self):
6315 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6316 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6317 self.op.master_capable, self.op.vm_capable,
6318 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6320 if all_mods.count(None) == len(all_mods):
6321 raise errors.OpPrereqError("Please pass at least one modification",
6323 if all_mods.count(True) > 1:
6324 raise errors.OpPrereqError("Can't set the node into more than one"
6325 " state at the same time",
6328 # Boolean value that tells us whether we might be demoting from MC
6329 self.might_demote = (self.op.master_candidate is False or
6330 self.op.offline is True or
6331 self.op.drained is True or
6332 self.op.master_capable is False)
6334 if self.op.secondary_ip:
6335 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6336 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6337 " address" % self.op.secondary_ip,
6340 self.lock_all = self.op.auto_promote and self.might_demote
6341 self.lock_instances = self.op.secondary_ip is not None
6343 def _InstanceFilter(self, instance):
6344 """Filter for getting affected instances.
6347 return (instance.disk_template in constants.DTS_INT_MIRROR and
6348 self.op.node_name in instance.all_nodes)
6350 def ExpandNames(self):
6352 self.needed_locks = {
6353 locking.LEVEL_NODE: locking.ALL_SET,
6355 # Block allocations when all nodes are locked
6356 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
6359 self.needed_locks = {
6360 locking.LEVEL_NODE: self.op.node_name,
6363 # Since modifying a node can have severe effects on currently running
6364 # operations the resource lock is at least acquired in shared mode
6365 self.needed_locks[locking.LEVEL_NODE_RES] = \
6366 self.needed_locks[locking.LEVEL_NODE]
6368 # Get all locks except nodes in shared mode; they are not used for anything
6369 # but read-only access
6370 self.share_locks = _ShareAll()
6371 self.share_locks[locking.LEVEL_NODE] = 0
6372 self.share_locks[locking.LEVEL_NODE_RES] = 0
6373 self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
6375 if self.lock_instances:
6376 self.needed_locks[locking.LEVEL_INSTANCE] = \
6377 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6379 def BuildHooksEnv(self):
6382 This runs on the master node.
6386 "OP_TARGET": self.op.node_name,
6387 "MASTER_CANDIDATE": str(self.op.master_candidate),
6388 "OFFLINE": str(self.op.offline),
6389 "DRAINED": str(self.op.drained),
6390 "MASTER_CAPABLE": str(self.op.master_capable),
6391 "VM_CAPABLE": str(self.op.vm_capable),
6394 def BuildHooksNodes(self):
6395 """Build hooks nodes.
6398 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6401 def CheckPrereq(self):
6402 """Check prerequisites.
6404 This only checks the instance list against the existing names.
6407 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6409 if self.lock_instances:
6410 affected_instances = \
6411 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6413 # Verify instance locks
6414 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6415 wanted_instances = frozenset(affected_instances.keys())
6416 if wanted_instances - owned_instances:
6417 raise errors.OpPrereqError("Instances affected by changing node %s's"
6418 " secondary IP address have changed since"
6419 " locks were acquired, wanted '%s', have"
6420 " '%s'; retry the operation" %
6422 utils.CommaJoin(wanted_instances),
6423 utils.CommaJoin(owned_instances)),
6426 affected_instances = None
6428 if (self.op.master_candidate is not None or
6429 self.op.drained is not None or
6430 self.op.offline is not None):
6431 # we can't change the master's node flags
6432 if self.op.node_name == self.cfg.GetMasterNode():
6433 raise errors.OpPrereqError("The master role can be changed"
6434 " only via master-failover",
6437 if self.op.master_candidate and not node.master_capable:
6438 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6439 " it a master candidate" % node.name,
6442 if self.op.vm_capable is False:
6443 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6445 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6446 " the vm_capable flag" % node.name,
6449 if node.master_candidate and self.might_demote and not self.lock_all:
6450 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6451 # check if after removing the current node, we're missing master
6453 (mc_remaining, mc_should, _) = \
6454 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6455 if mc_remaining < mc_should:
6456 raise errors.OpPrereqError("Not enough master candidates, please"
6457 " pass auto promote option to allow"
6458 " promotion (--auto-promote or RAPI"
6459 " auto_promote=True)", errors.ECODE_STATE)
6461 self.old_flags = old_flags = (node.master_candidate,
6462 node.drained, node.offline)
6463 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6464 self.old_role = old_role = self._F2R[old_flags]
6466 # Check for ineffective changes
6467 for attr in self._FLAGS:
6468 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6469 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6470 setattr(self.op, attr, None)
6472 # Past this point, any flag change to False means a transition
6473 # away from the respective state, as only real changes are kept
6475 # TODO: We might query the real power state if it supports OOB
6476 if _SupportsOob(self.cfg, node):
6477 if self.op.offline is False and not (node.powered or
6478 self.op.powered is True):
6479 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6480 " offline status can be reset") %
6481 self.op.node_name, errors.ECODE_STATE)
6482 elif self.op.powered is not None:
6483 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6484 " as it does not support out-of-band"
6485 " handling") % self.op.node_name,
6488 # If we're being deofflined/drained, we'll MC ourself if needed
6489 if (self.op.drained is False or self.op.offline is False or
6490 (self.op.master_capable and not node.master_capable)):
6491 if _DecideSelfPromotion(self):
6492 self.op.master_candidate = True
6493 self.LogInfo("Auto-promoting node to master candidate")
6495 # If we're no longer master capable, we'll demote ourselves from MC
6496 if self.op.master_capable is False and node.master_candidate:
6497 self.LogInfo("Demoting from master candidate")
6498 self.op.master_candidate = False
6501 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6502 if self.op.master_candidate:
6503 new_role = self._ROLE_CANDIDATE
6504 elif self.op.drained:
6505 new_role = self._ROLE_DRAINED
6506 elif self.op.offline:
6507 new_role = self._ROLE_OFFLINE
6508 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6509 # False is still in new flags, which means we're un-setting (the
6511 new_role = self._ROLE_REGULAR
6512 else: # no new flags, nothing, keep old role
6515 self.new_role = new_role
6517 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6518 # Trying to transition out of offline status
6519 result = self.rpc.call_version([node.name])[node.name]
6521 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6522 " to report its version: %s" %
6523 (node.name, result.fail_msg),
6526 self.LogWarning("Transitioning node from offline to online state"
6527 " without using re-add. Please make sure the node"
6530 # When changing the secondary ip, verify if this is a single-homed to
6531 # multi-homed transition or vice versa, and apply the relevant
6533 if self.op.secondary_ip:
6534 # Ok even without locking, because this can't be changed by any LU
6535 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6536 master_singlehomed = master.secondary_ip == master.primary_ip
6537 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6538 if self.op.force and node.name == master.name:
6539 self.LogWarning("Transitioning from single-homed to multi-homed"
6540 " cluster; all nodes will require a secondary IP"
6543 raise errors.OpPrereqError("Changing the secondary ip on a"
6544 " single-homed cluster requires the"
6545 " --force option to be passed, and the"
6546 " target node to be the master",
6548 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6549 if self.op.force and node.name == master.name:
6550 self.LogWarning("Transitioning from multi-homed to single-homed"
6551 " cluster; secondary IP addresses will have to be"
6554 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6555 " same as the primary IP on a multi-homed"
6556 " cluster, unless the --force option is"
6557 " passed, and the target node is the"
6558 " master", errors.ECODE_INVAL)
6560 assert not (frozenset(affected_instances) -
6561 self.owned_locks(locking.LEVEL_INSTANCE))
6564 if affected_instances:
6565 msg = ("Cannot change secondary IP address: offline node has"
6566 " instances (%s) configured to use it" %
6567 utils.CommaJoin(affected_instances.keys()))
6568 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6570 # On online nodes, check that no instances are running, and that
6571 # the node has the new ip and we can reach it.
6572 for instance in affected_instances.values():
6573 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6574 msg="cannot change secondary ip")
6576 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6577 if master.name != node.name:
6578 # check reachability from master secondary ip to new secondary ip
6579 if not netutils.TcpPing(self.op.secondary_ip,
6580 constants.DEFAULT_NODED_PORT,
6581 source=master.secondary_ip):
6582 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6583 " based ping to node daemon port",
6584 errors.ECODE_ENVIRON)
6586 if self.op.ndparams:
6587 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6588 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6589 _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
6590 "node", "cluster or group")
6591 self.new_ndparams = new_ndparams
6593 if self.op.hv_state:
6594 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6595 self.node.hv_state_static)
6597 if self.op.disk_state:
6598 self.new_disk_state = \
6599 _MergeAndVerifyDiskState(self.op.disk_state,
6600 self.node.disk_state_static)
6602 def Exec(self, feedback_fn):
6607 old_role = self.old_role
6608 new_role = self.new_role
6612 if self.op.ndparams:
6613 node.ndparams = self.new_ndparams
6615 if self.op.powered is not None:
6616 node.powered = self.op.powered
6618 if self.op.hv_state:
6619 node.hv_state_static = self.new_hv_state
6621 if self.op.disk_state:
6622 node.disk_state_static = self.new_disk_state
6624 for attr in ["master_capable", "vm_capable"]:
6625 val = getattr(self.op, attr)
6627 setattr(node, attr, val)
6628 result.append((attr, str(val)))
6630 if new_role != old_role:
6631 # Tell the node to demote itself, if no longer MC and not offline
6632 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6633 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6635 self.LogWarning("Node failed to demote itself: %s", msg)
6637 new_flags = self._R2F[new_role]
6638 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6640 result.append((desc, str(nf)))
6641 (node.master_candidate, node.drained, node.offline) = new_flags
6643 # we locked all nodes, we adjust the CP before updating this node
6645 _AdjustCandidatePool(self, [node.name])
6647 if self.op.secondary_ip:
6648 node.secondary_ip = self.op.secondary_ip
6649 result.append(("secondary_ip", self.op.secondary_ip))
6651 # this will trigger configuration file update, if needed
6652 self.cfg.Update(node, feedback_fn)
6654 # this will trigger job queue propagation or cleanup if the mc
6656 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6657 self.context.ReaddNode(node)
6662 class LUNodePowercycle(NoHooksLU):
6663 """Powercycles a node.
6668 def CheckArguments(self):
6669 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6670 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6671 raise errors.OpPrereqError("The node is the master and the force"
6672 " parameter was not set",
6675 def ExpandNames(self):
6676 """Locking for PowercycleNode.
6678 This is a last-resort option and shouldn't block on other
6679 jobs. Therefore, we grab no locks.
6682 self.needed_locks = {}
6684 def Exec(self, feedback_fn):
6688 result = self.rpc.call_node_powercycle(self.op.node_name,
6689 self.cfg.GetHypervisorType())
6690 result.Raise("Failed to schedule the reboot")
6691 return result.payload
6694 class LUClusterQuery(NoHooksLU):
6695 """Query cluster configuration.
6700 def ExpandNames(self):
6701 self.needed_locks = {}
6703 def Exec(self, feedback_fn):
6704 """Return cluster config.
6707 cluster = self.cfg.GetClusterInfo()
6710 # Filter just for enabled hypervisors
6711 for os_name, hv_dict in cluster.os_hvp.items():
6712 os_hvp[os_name] = {}
6713 for hv_name, hv_params in hv_dict.items():
6714 if hv_name in cluster.enabled_hypervisors:
6715 os_hvp[os_name][hv_name] = hv_params
6717 # Convert ip_family to ip_version
6718 primary_ip_version = constants.IP4_VERSION
6719 if cluster.primary_ip_family == netutils.IP6Address.family:
6720 primary_ip_version = constants.IP6_VERSION
6723 "software_version": constants.RELEASE_VERSION,
6724 "protocol_version": constants.PROTOCOL_VERSION,
6725 "config_version": constants.CONFIG_VERSION,
6726 "os_api_version": max(constants.OS_API_VERSIONS),
6727 "export_version": constants.EXPORT_VERSION,
6728 "architecture": runtime.GetArchInfo(),
6729 "name": cluster.cluster_name,
6730 "master": cluster.master_node,
6731 "default_hypervisor": cluster.primary_hypervisor,
6732 "enabled_hypervisors": cluster.enabled_hypervisors,
6733 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6734 for hypervisor_name in cluster.enabled_hypervisors]),
6736 "beparams": cluster.beparams,
6737 "osparams": cluster.osparams,
6738 "ipolicy": cluster.ipolicy,
6739 "nicparams": cluster.nicparams,
6740 "ndparams": cluster.ndparams,
6741 "diskparams": cluster.diskparams,
6742 "candidate_pool_size": cluster.candidate_pool_size,
6743 "master_netdev": cluster.master_netdev,
6744 "master_netmask": cluster.master_netmask,
6745 "use_external_mip_script": cluster.use_external_mip_script,
6746 "volume_group_name": cluster.volume_group_name,
6747 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6748 "file_storage_dir": cluster.file_storage_dir,
6749 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6750 "maintain_node_health": cluster.maintain_node_health,
6751 "ctime": cluster.ctime,
6752 "mtime": cluster.mtime,
6753 "uuid": cluster.uuid,
6754 "tags": list(cluster.GetTags()),
6755 "uid_pool": cluster.uid_pool,
6756 "default_iallocator": cluster.default_iallocator,
6757 "reserved_lvs": cluster.reserved_lvs,
6758 "primary_ip_version": primary_ip_version,
6759 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6760 "hidden_os": cluster.hidden_os,
6761 "blacklisted_os": cluster.blacklisted_os,
6767 class LUClusterConfigQuery(NoHooksLU):
6768 """Return configuration values.
6773 def CheckArguments(self):
6774 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6776 def ExpandNames(self):
6777 self.cq.ExpandNames(self)
6779 def DeclareLocks(self, level):
6780 self.cq.DeclareLocks(self, level)
6782 def Exec(self, feedback_fn):
6783 result = self.cq.OldStyleQuery(self)
6785 assert len(result) == 1
6790 class _ClusterQuery(_QueryBase):
6791 FIELDS = query.CLUSTER_FIELDS
6793 #: Do not sort (there is only one item)
6796 def ExpandNames(self, lu):
6797 lu.needed_locks = {}
6799 # The following variables interact with _QueryBase._GetNames
6800 self.wanted = locking.ALL_SET
6801 self.do_locking = self.use_locking
6804 raise errors.OpPrereqError("Can not use locking for cluster queries",
6807 def DeclareLocks(self, lu, level):
6810 def _GetQueryData(self, lu):
6811 """Computes the list of nodes and their attributes.
6814 # Locking is not used
6815 assert not (compat.any(lu.glm.is_owned(level)
6816 for level in locking.LEVELS
6817 if level != locking.LEVEL_CLUSTER) or
6818 self.do_locking or self.use_locking)
6820 if query.CQ_CONFIG in self.requested_data:
6821 cluster = lu.cfg.GetClusterInfo()
6823 cluster = NotImplemented
6825 if query.CQ_QUEUE_DRAINED in self.requested_data:
6826 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6828 drain_flag = NotImplemented
6830 if query.CQ_WATCHER_PAUSE in self.requested_data:
6831 master_name = lu.cfg.GetMasterNode()
6833 result = lu.rpc.call_get_watcher_pause(master_name)
6834 result.Raise("Can't retrieve watcher pause from master node '%s'" %
6837 watcher_pause = result.payload
6839 watcher_pause = NotImplemented
6841 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6844 class LUInstanceActivateDisks(NoHooksLU):
6845 """Bring up an instance's disks.
6850 def ExpandNames(self):
6851 self._ExpandAndLockInstance()
6852 self.needed_locks[locking.LEVEL_NODE] = []
6853 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6855 def DeclareLocks(self, level):
6856 if level == locking.LEVEL_NODE:
6857 self._LockInstancesNodes()
6859 def CheckPrereq(self):
6860 """Check prerequisites.
6862 This checks that the instance is in the cluster.
6865 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6866 assert self.instance is not None, \
6867 "Cannot retrieve locked instance %s" % self.op.instance_name
6868 _CheckNodeOnline(self, self.instance.primary_node)
6870 def Exec(self, feedback_fn):
6871 """Activate the disks.
6874 disks_ok, disks_info = \
6875 _AssembleInstanceDisks(self, self.instance,
6876 ignore_size=self.op.ignore_size)
6878 raise errors.OpExecError("Cannot activate block devices")
6880 if self.op.wait_for_sync:
6881 if not _WaitForSync(self, self.instance):
6882 raise errors.OpExecError("Some disks of the instance are degraded!")
6887 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6889 """Prepare the block devices for an instance.
6891 This sets up the block devices on all nodes.
6893 @type lu: L{LogicalUnit}
6894 @param lu: the logical unit on whose behalf we execute
6895 @type instance: L{objects.Instance}
6896 @param instance: the instance for whose disks we assemble
6897 @type disks: list of L{objects.Disk} or None
6898 @param disks: which disks to assemble (or all, if None)
6899 @type ignore_secondaries: boolean
6900 @param ignore_secondaries: if true, errors on secondary nodes
6901 won't result in an error return from the function
6902 @type ignore_size: boolean
6903 @param ignore_size: if true, the current known size of the disk
6904 will not be used during the disk activation, useful for cases
6905 when the size is wrong
6906 @return: False if the operation failed, otherwise a list of
6907 (host, instance_visible_name, node_visible_name)
6908 with the mapping from node devices to instance devices
6913 iname = instance.name
6914 disks = _ExpandCheckDisks(instance, disks)
6916 # With the two passes mechanism we try to reduce the window of
6917 # opportunity for the race condition of switching DRBD to primary
6918 # before handshaking occured, but we do not eliminate it
6920 # The proper fix would be to wait (with some limits) until the
6921 # connection has been made and drbd transitions from WFConnection
6922 # into any other network-connected state (Connected, SyncTarget,
6925 # 1st pass, assemble on all nodes in secondary mode
6926 for idx, inst_disk in enumerate(disks):
6927 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6929 node_disk = node_disk.Copy()
6930 node_disk.UnsetSize()
6931 lu.cfg.SetDiskID(node_disk, node)
6932 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6934 msg = result.fail_msg
6936 is_offline_secondary = (node in instance.secondary_nodes and
6938 lu.LogWarning("Could not prepare block device %s on node %s"
6939 " (is_primary=False, pass=1): %s",
6940 inst_disk.iv_name, node, msg)
6941 if not (ignore_secondaries or is_offline_secondary):
6944 # FIXME: race condition on drbd migration to primary
6946 # 2nd pass, do only the primary node
6947 for idx, inst_disk in enumerate(disks):
6950 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6951 if node != instance.primary_node:
6954 node_disk = node_disk.Copy()
6955 node_disk.UnsetSize()
6956 lu.cfg.SetDiskID(node_disk, node)
6957 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6959 msg = result.fail_msg
6961 lu.LogWarning("Could not prepare block device %s on node %s"
6962 " (is_primary=True, pass=2): %s",
6963 inst_disk.iv_name, node, msg)
6966 dev_path = result.payload
6968 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6970 # leave the disks configured for the primary node
6971 # this is a workaround that would be fixed better by
6972 # improving the logical/physical id handling
6974 lu.cfg.SetDiskID(disk, instance.primary_node)
6976 return disks_ok, device_info
6979 def _StartInstanceDisks(lu, instance, force):
6980 """Start the disks of an instance.
6983 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6984 ignore_secondaries=force)
6986 _ShutdownInstanceDisks(lu, instance)
6987 if force is not None and not force:
6989 hint=("If the message above refers to a secondary node,"
6990 " you can retry the operation using '--force'"))
6991 raise errors.OpExecError("Disk consistency error")
6994 class LUInstanceDeactivateDisks(NoHooksLU):
6995 """Shutdown an instance's disks.
7000 def ExpandNames(self):
7001 self._ExpandAndLockInstance()
7002 self.needed_locks[locking.LEVEL_NODE] = []
7003 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7005 def DeclareLocks(self, level):
7006 if level == locking.LEVEL_NODE:
7007 self._LockInstancesNodes()
7009 def CheckPrereq(self):
7010 """Check prerequisites.
7012 This checks that the instance is in the cluster.
7015 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7016 assert self.instance is not None, \
7017 "Cannot retrieve locked instance %s" % self.op.instance_name
7019 def Exec(self, feedback_fn):
7020 """Deactivate the disks
7023 instance = self.instance
7025 _ShutdownInstanceDisks(self, instance)
7027 _SafeShutdownInstanceDisks(self, instance)
7030 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
7031 """Shutdown block devices of an instance.
7033 This function checks if an instance is running, before calling
7034 _ShutdownInstanceDisks.
7037 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
7038 _ShutdownInstanceDisks(lu, instance, disks=disks)
7041 def _ExpandCheckDisks(instance, disks):
7042 """Return the instance disks selected by the disks list
7044 @type disks: list of L{objects.Disk} or None
7045 @param disks: selected disks
7046 @rtype: list of L{objects.Disk}
7047 @return: selected instance disks to act on
7051 return instance.disks
7053 if not set(disks).issubset(instance.disks):
7054 raise errors.ProgrammerError("Can only act on disks belonging to the"
7059 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
7060 """Shutdown block devices of an instance.
7062 This does the shutdown on all nodes of the instance.
7064 If the ignore_primary is false, errors on the primary node are
7069 disks = _ExpandCheckDisks(instance, disks)
7072 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
7073 lu.cfg.SetDiskID(top_disk, node)
7074 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
7075 msg = result.fail_msg
7077 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
7078 disk.iv_name, node, msg)
7079 if ((node == instance.primary_node and not ignore_primary) or
7080 (node != instance.primary_node and not result.offline)):
7085 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
7086 """Checks if a node has enough free memory.
7088 This function checks if a given node has the needed amount of free
7089 memory. In case the node has less memory or we cannot get the
7090 information from the node, this function raises an OpPrereqError
7093 @type lu: C{LogicalUnit}
7094 @param lu: a logical unit from which we get configuration data
7096 @param node: the node to check
7097 @type reason: C{str}
7098 @param reason: string to use in the error message
7099 @type requested: C{int}
7100 @param requested: the amount of memory in MiB to check for
7101 @type hypervisor_name: C{str}
7102 @param hypervisor_name: the hypervisor to ask for memory stats
7104 @return: node current free memory
7105 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
7106 we cannot check the node
7109 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
7110 nodeinfo[node].Raise("Can't get data from node %s" % node,
7111 prereq=True, ecode=errors.ECODE_ENVIRON)
7112 (_, _, (hv_info, )) = nodeinfo[node].payload
7114 free_mem = hv_info.get("memory_free", None)
7115 if not isinstance(free_mem, int):
7116 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
7117 " was '%s'" % (node, free_mem),
7118 errors.ECODE_ENVIRON)
7119 if requested > free_mem:
7120 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
7121 " needed %s MiB, available %s MiB" %
7122 (node, reason, requested, free_mem),
7127 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
7128 """Checks if nodes have enough free disk space in all the VGs.
7130 This function checks if all given nodes have the needed amount of
7131 free disk. In case any node has less disk or we cannot get the
7132 information from the node, this function raises an OpPrereqError
7135 @type lu: C{LogicalUnit}
7136 @param lu: a logical unit from which we get configuration data
7137 @type nodenames: C{list}
7138 @param nodenames: the list of node names to check
7139 @type req_sizes: C{dict}
7140 @param req_sizes: the hash of vg and corresponding amount of disk in
7142 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7143 or we cannot check the node
7146 for vg, req_size in req_sizes.items():
7147 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
7150 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
7151 """Checks if nodes have enough free disk space in the specified VG.
7153 This function checks if all given nodes have the needed amount of
7154 free disk. In case any node has less disk or we cannot get the
7155 information from the node, this function raises an OpPrereqError
7158 @type lu: C{LogicalUnit}
7159 @param lu: a logical unit from which we get configuration data
7160 @type nodenames: C{list}
7161 @param nodenames: the list of node names to check
7163 @param vg: the volume group to check
7164 @type requested: C{int}
7165 @param requested: the amount of disk in MiB to check for
7166 @raise errors.OpPrereqError: if the node doesn't have enough disk,
7167 or we cannot check the node
7170 es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
7171 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
7172 for node in nodenames:
7173 info = nodeinfo[node]
7174 info.Raise("Cannot get current information from node %s" % node,
7175 prereq=True, ecode=errors.ECODE_ENVIRON)
7176 (_, (vg_info, ), _) = info.payload
7177 vg_free = vg_info.get("vg_free", None)
7178 if not isinstance(vg_free, int):
7179 raise errors.OpPrereqError("Can't compute free disk space on node"
7180 " %s for vg %s, result was '%s'" %
7181 (node, vg, vg_free), errors.ECODE_ENVIRON)
7182 if requested > vg_free:
7183 raise errors.OpPrereqError("Not enough disk space on target node %s"
7184 " vg %s: required %d MiB, available %d MiB" %
7185 (node, vg, requested, vg_free),
7189 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
7190 """Checks if nodes have enough physical CPUs
7192 This function checks if all given nodes have the needed number of
7193 physical CPUs. In case any node has less CPUs or we cannot get the
7194 information from the node, this function raises an OpPrereqError
7197 @type lu: C{LogicalUnit}
7198 @param lu: a logical unit from which we get configuration data
7199 @type nodenames: C{list}
7200 @param nodenames: the list of node names to check
7201 @type requested: C{int}
7202 @param requested: the minimum acceptable number of physical CPUs
7203 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
7204 or we cannot check the node
7207 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
7208 for node in nodenames:
7209 info = nodeinfo[node]
7210 info.Raise("Cannot get current information from node %s" % node,
7211 prereq=True, ecode=errors.ECODE_ENVIRON)
7212 (_, _, (hv_info, )) = info.payload
7213 num_cpus = hv_info.get("cpu_total", None)
7214 if not isinstance(num_cpus, int):
7215 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
7216 " on node %s, result was '%s'" %
7217 (node, num_cpus), errors.ECODE_ENVIRON)
7218 if requested > num_cpus:
7219 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
7220 "required" % (node, num_cpus, requested),
7224 class LUInstanceStartup(LogicalUnit):
7225 """Starts an instance.
7228 HPATH = "instance-start"
7229 HTYPE = constants.HTYPE_INSTANCE
7232 def CheckArguments(self):
7234 if self.op.beparams:
7235 # fill the beparams dict
7236 objects.UpgradeBeParams(self.op.beparams)
7237 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7239 def ExpandNames(self):
7240 self._ExpandAndLockInstance()
7241 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7243 def DeclareLocks(self, level):
7244 if level == locking.LEVEL_NODE_RES:
7245 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
7247 def BuildHooksEnv(self):
7250 This runs on master, primary and secondary nodes of the instance.
7254 "FORCE": self.op.force,
7257 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7261 def BuildHooksNodes(self):
7262 """Build hooks nodes.
7265 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7268 def CheckPrereq(self):
7269 """Check prerequisites.
7271 This checks that the instance is in the cluster.
7274 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7275 assert self.instance is not None, \
7276 "Cannot retrieve locked instance %s" % self.op.instance_name
7279 if self.op.hvparams:
7280 # check hypervisor parameter syntax (locally)
7281 cluster = self.cfg.GetClusterInfo()
7282 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7283 filled_hvp = cluster.FillHV(instance)
7284 filled_hvp.update(self.op.hvparams)
7285 hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
7286 hv_type.CheckParameterSyntax(filled_hvp)
7287 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7289 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7291 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7293 if self.primary_offline and self.op.ignore_offline_nodes:
7294 self.LogWarning("Ignoring offline primary node")
7296 if self.op.hvparams or self.op.beparams:
7297 self.LogWarning("Overridden parameters are ignored")
7299 _CheckNodeOnline(self, instance.primary_node)
7301 bep = self.cfg.GetClusterInfo().FillBE(instance)
7302 bep.update(self.op.beparams)
7304 # check bridges existence
7305 _CheckInstanceBridgesExist(self, instance)
7307 remote_info = self.rpc.call_instance_info(instance.primary_node,
7309 instance.hypervisor)
7310 remote_info.Raise("Error checking node %s" % instance.primary_node,
7311 prereq=True, ecode=errors.ECODE_ENVIRON)
7312 if not remote_info.payload: # not running already
7313 _CheckNodeFreeMemory(self, instance.primary_node,
7314 "starting instance %s" % instance.name,
7315 bep[constants.BE_MINMEM], instance.hypervisor)
7317 def Exec(self, feedback_fn):
7318 """Start the instance.
7321 instance = self.instance
7322 force = self.op.force
7324 if not self.op.no_remember:
7325 self.cfg.MarkInstanceUp(instance.name)
7327 if self.primary_offline:
7328 assert self.op.ignore_offline_nodes
7329 self.LogInfo("Primary node offline, marked instance as started")
7331 node_current = instance.primary_node
7333 _StartInstanceDisks(self, instance, force)
7336 self.rpc.call_instance_start(node_current,
7337 (instance, self.op.hvparams,
7339 self.op.startup_paused)
7340 msg = result.fail_msg
7342 _ShutdownInstanceDisks(self, instance)
7343 raise errors.OpExecError("Could not start instance: %s" % msg)
7346 class LUInstanceReboot(LogicalUnit):
7347 """Reboot an instance.
7350 HPATH = "instance-reboot"
7351 HTYPE = constants.HTYPE_INSTANCE
7354 def ExpandNames(self):
7355 self._ExpandAndLockInstance()
7357 def BuildHooksEnv(self):
7360 This runs on master, primary and secondary nodes of the instance.
7364 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7365 "REBOOT_TYPE": self.op.reboot_type,
7366 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7369 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7373 def BuildHooksNodes(self):
7374 """Build hooks nodes.
7377 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7380 def CheckPrereq(self):
7381 """Check prerequisites.
7383 This checks that the instance is in the cluster.
7386 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7387 assert self.instance is not None, \
7388 "Cannot retrieve locked instance %s" % self.op.instance_name
7389 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7390 _CheckNodeOnline(self, instance.primary_node)
7392 # check bridges existence
7393 _CheckInstanceBridgesExist(self, instance)
7395 def Exec(self, feedback_fn):
7396 """Reboot the instance.
7399 instance = self.instance
7400 ignore_secondaries = self.op.ignore_secondaries
7401 reboot_type = self.op.reboot_type
7403 remote_info = self.rpc.call_instance_info(instance.primary_node,
7405 instance.hypervisor)
7406 remote_info.Raise("Error checking node %s" % instance.primary_node)
7407 instance_running = bool(remote_info.payload)
7409 node_current = instance.primary_node
7411 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7412 constants.INSTANCE_REBOOT_HARD]:
7413 for disk in instance.disks:
7414 self.cfg.SetDiskID(disk, node_current)
7415 result = self.rpc.call_instance_reboot(node_current, instance,
7417 self.op.shutdown_timeout)
7418 result.Raise("Could not reboot instance")
7420 if instance_running:
7421 result = self.rpc.call_instance_shutdown(node_current, instance,
7422 self.op.shutdown_timeout)
7423 result.Raise("Could not shutdown instance for full reboot")
7424 _ShutdownInstanceDisks(self, instance)
7426 self.LogInfo("Instance %s was already stopped, starting now",
7428 _StartInstanceDisks(self, instance, ignore_secondaries)
7429 result = self.rpc.call_instance_start(node_current,
7430 (instance, None, None), False)
7431 msg = result.fail_msg
7433 _ShutdownInstanceDisks(self, instance)
7434 raise errors.OpExecError("Could not start instance for"
7435 " full reboot: %s" % msg)
7437 self.cfg.MarkInstanceUp(instance.name)
7440 class LUInstanceShutdown(LogicalUnit):
7441 """Shutdown an instance.
7444 HPATH = "instance-stop"
7445 HTYPE = constants.HTYPE_INSTANCE
7448 def ExpandNames(self):
7449 self._ExpandAndLockInstance()
7451 def BuildHooksEnv(self):
7454 This runs on master, primary and secondary nodes of the instance.
7457 env = _BuildInstanceHookEnvByObject(self, self.instance)
7458 env["TIMEOUT"] = self.op.timeout
7461 def BuildHooksNodes(self):
7462 """Build hooks nodes.
7465 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7468 def CheckPrereq(self):
7469 """Check prerequisites.
7471 This checks that the instance is in the cluster.
7474 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7475 assert self.instance is not None, \
7476 "Cannot retrieve locked instance %s" % self.op.instance_name
7478 if not self.op.force:
7479 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7481 self.LogWarning("Ignoring offline instance check")
7483 self.primary_offline = \
7484 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7486 if self.primary_offline and self.op.ignore_offline_nodes:
7487 self.LogWarning("Ignoring offline primary node")
7489 _CheckNodeOnline(self, self.instance.primary_node)
7491 def Exec(self, feedback_fn):
7492 """Shutdown the instance.
7495 instance = self.instance
7496 node_current = instance.primary_node
7497 timeout = self.op.timeout
7499 # If the instance is offline we shouldn't mark it as down, as that
7500 # resets the offline flag.
7501 if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
7502 self.cfg.MarkInstanceDown(instance.name)
7504 if self.primary_offline:
7505 assert self.op.ignore_offline_nodes
7506 self.LogInfo("Primary node offline, marked instance as stopped")
7508 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7509 msg = result.fail_msg
7511 self.LogWarning("Could not shutdown instance: %s", msg)
7513 _ShutdownInstanceDisks(self, instance)
7516 class LUInstanceReinstall(LogicalUnit):
7517 """Reinstall an instance.
7520 HPATH = "instance-reinstall"
7521 HTYPE = constants.HTYPE_INSTANCE
7524 def ExpandNames(self):
7525 self._ExpandAndLockInstance()
7527 def BuildHooksEnv(self):
7530 This runs on master, primary and secondary nodes of the instance.
7533 return _BuildInstanceHookEnvByObject(self, self.instance)
7535 def BuildHooksNodes(self):
7536 """Build hooks nodes.
7539 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7542 def CheckPrereq(self):
7543 """Check prerequisites.
7545 This checks that the instance is in the cluster and is not running.
7548 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7549 assert instance is not None, \
7550 "Cannot retrieve locked instance %s" % self.op.instance_name
7551 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7552 " offline, cannot reinstall")
7554 if instance.disk_template == constants.DT_DISKLESS:
7555 raise errors.OpPrereqError("Instance '%s' has no disks" %
7556 self.op.instance_name,
7558 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7560 if self.op.os_type is not None:
7562 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7563 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7564 instance_os = self.op.os_type
7566 instance_os = instance.os
7568 nodelist = list(instance.all_nodes)
7570 if self.op.osparams:
7571 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7572 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7573 self.os_inst = i_osdict # the new dict (without defaults)
7577 self.instance = instance
7579 def Exec(self, feedback_fn):
7580 """Reinstall the instance.
7583 inst = self.instance
7585 if self.op.os_type is not None:
7586 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7587 inst.os = self.op.os_type
7588 # Write to configuration
7589 self.cfg.Update(inst, feedback_fn)
7591 _StartInstanceDisks(self, inst, None)
7593 feedback_fn("Running the instance OS create scripts...")
7594 # FIXME: pass debug option from opcode to backend
7595 result = self.rpc.call_instance_os_add(inst.primary_node,
7596 (inst, self.os_inst), True,
7597 self.op.debug_level)
7598 result.Raise("Could not install OS for instance %s on node %s" %
7599 (inst.name, inst.primary_node))
7601 _ShutdownInstanceDisks(self, inst)
7604 class LUInstanceRecreateDisks(LogicalUnit):
7605 """Recreate an instance's missing disks.
7608 HPATH = "instance-recreate-disks"
7609 HTYPE = constants.HTYPE_INSTANCE
7612 _MODIFYABLE = compat.UniqueFrozenset([
7613 constants.IDISK_SIZE,
7614 constants.IDISK_MODE,
7617 # New or changed disk parameters may have different semantics
7618 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7619 constants.IDISK_ADOPT,
7621 # TODO: Implement support changing VG while recreating
7623 constants.IDISK_METAVG,
7624 constants.IDISK_PROVIDER,
7627 def _RunAllocator(self):
7628 """Run the allocator based on input opcode.
7631 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7634 # The allocator should actually run in "relocate" mode, but current
7635 # allocators don't support relocating all the nodes of an instance at
7636 # the same time. As a workaround we use "allocate" mode, but this is
7637 # suboptimal for two reasons:
7638 # - The instance name passed to the allocator is present in the list of
7639 # existing instances, so there could be a conflict within the
7640 # internal structures of the allocator. This doesn't happen with the
7641 # current allocators, but it's a liability.
7642 # - The allocator counts the resources used by the instance twice: once
7643 # because the instance exists already, and once because it tries to
7644 # allocate a new instance.
7645 # The allocator could choose some of the nodes on which the instance is
7646 # running, but that's not a problem. If the instance nodes are broken,
7647 # they should be already be marked as drained or offline, and hence
7648 # skipped by the allocator. If instance disks have been lost for other
7649 # reasons, then recreating the disks on the same nodes should be fine.
7650 disk_template = self.instance.disk_template
7651 spindle_use = be_full[constants.BE_SPINDLE_USE]
7652 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7653 disk_template=disk_template,
7654 tags=list(self.instance.GetTags()),
7655 os=self.instance.os,
7657 vcpus=be_full[constants.BE_VCPUS],
7658 memory=be_full[constants.BE_MAXMEM],
7659 spindle_use=spindle_use,
7660 disks=[{constants.IDISK_SIZE: d.size,
7661 constants.IDISK_MODE: d.mode}
7662 for d in self.instance.disks],
7663 hypervisor=self.instance.hypervisor,
7664 node_whitelist=None)
7665 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7667 ial.Run(self.op.iallocator)
7669 assert req.RequiredNodes() == len(self.instance.all_nodes)
7672 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7673 " %s" % (self.op.iallocator, ial.info),
7676 self.op.nodes = ial.result
7677 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7678 self.op.instance_name, self.op.iallocator,
7679 utils.CommaJoin(ial.result))
7681 def CheckArguments(self):
7682 if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
7683 # Normalize and convert deprecated list of disk indices
7684 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7686 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7688 raise errors.OpPrereqError("Some disks have been specified more than"
7689 " once: %s" % utils.CommaJoin(duplicates),
7692 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7693 # when neither iallocator nor nodes are specified
7694 if self.op.iallocator or self.op.nodes:
7695 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7697 for (idx, params) in self.op.disks:
7698 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7699 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7701 raise errors.OpPrereqError("Parameters for disk %s try to change"
7702 " unmodifyable parameter(s): %s" %
7703 (idx, utils.CommaJoin(unsupported)),
7706 def ExpandNames(self):
7707 self._ExpandAndLockInstance()
7708 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7711 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7712 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7714 self.needed_locks[locking.LEVEL_NODE] = []
7715 if self.op.iallocator:
7716 # iallocator will select a new node in the same group
7717 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7718 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7720 self.needed_locks[locking.LEVEL_NODE_RES] = []
7722 def DeclareLocks(self, level):
7723 if level == locking.LEVEL_NODEGROUP:
7724 assert self.op.iallocator is not None
7725 assert not self.op.nodes
7726 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7727 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7728 # Lock the primary group used by the instance optimistically; this
7729 # requires going via the node before it's locked, requiring
7730 # verification later on
7731 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7732 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7734 elif level == locking.LEVEL_NODE:
7735 # If an allocator is used, then we lock all the nodes in the current
7736 # instance group, as we don't know yet which ones will be selected;
7737 # if we replace the nodes without using an allocator, locks are
7738 # already declared in ExpandNames; otherwise, we need to lock all the
7739 # instance nodes for disk re-creation
7740 if self.op.iallocator:
7741 assert not self.op.nodes
7742 assert not self.needed_locks[locking.LEVEL_NODE]
7743 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7745 # Lock member nodes of the group of the primary node
7746 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7747 self.needed_locks[locking.LEVEL_NODE].extend(
7748 self.cfg.GetNodeGroup(group_uuid).members)
7750 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7751 elif not self.op.nodes:
7752 self._LockInstancesNodes(primary_only=False)
7753 elif level == locking.LEVEL_NODE_RES:
7755 self.needed_locks[locking.LEVEL_NODE_RES] = \
7756 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7758 def BuildHooksEnv(self):
7761 This runs on master, primary and secondary nodes of the instance.
7764 return _BuildInstanceHookEnvByObject(self, self.instance)
7766 def BuildHooksNodes(self):
7767 """Build hooks nodes.
7770 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7773 def CheckPrereq(self):
7774 """Check prerequisites.
7776 This checks that the instance is in the cluster and is not running.
7779 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7780 assert instance is not None, \
7781 "Cannot retrieve locked instance %s" % self.op.instance_name
7783 if len(self.op.nodes) != len(instance.all_nodes):
7784 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7785 " %d replacement nodes were specified" %
7786 (instance.name, len(instance.all_nodes),
7787 len(self.op.nodes)),
7789 assert instance.disk_template != constants.DT_DRBD8 or \
7790 len(self.op.nodes) == 2
7791 assert instance.disk_template != constants.DT_PLAIN or \
7792 len(self.op.nodes) == 1
7793 primary_node = self.op.nodes[0]
7795 primary_node = instance.primary_node
7796 if not self.op.iallocator:
7797 _CheckNodeOnline(self, primary_node)
7799 if instance.disk_template == constants.DT_DISKLESS:
7800 raise errors.OpPrereqError("Instance '%s' has no disks" %
7801 self.op.instance_name, errors.ECODE_INVAL)
7803 # Verify if node group locks are still correct
7804 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7806 # Node group locks are acquired only for the primary node (and only
7807 # when the allocator is used)
7808 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7811 # if we replace nodes *and* the old primary is offline, we don't
7812 # check the instance state
7813 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7814 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7815 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7816 msg="cannot recreate disks")
7819 self.disks = dict(self.op.disks)
7821 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7823 maxidx = max(self.disks.keys())
7824 if maxidx >= len(instance.disks):
7825 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7828 if ((self.op.nodes or self.op.iallocator) and
7829 sorted(self.disks.keys()) != range(len(instance.disks))):
7830 raise errors.OpPrereqError("Can't recreate disks partially and"
7831 " change the nodes at the same time",
7834 self.instance = instance
7836 if self.op.iallocator:
7837 self._RunAllocator()
7838 # Release unneeded node and node resource locks
7839 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7840 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7841 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
7843 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7845 def Exec(self, feedback_fn):
7846 """Recreate the disks.
7849 instance = self.instance
7851 assert (self.owned_locks(locking.LEVEL_NODE) ==
7852 self.owned_locks(locking.LEVEL_NODE_RES))
7855 mods = [] # keeps track of needed changes
7857 for idx, disk in enumerate(instance.disks):
7859 changes = self.disks[idx]
7861 # Disk should not be recreated
7865 # update secondaries for disks, if needed
7866 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7867 # need to update the nodes and minors
7868 assert len(self.op.nodes) == 2
7869 assert len(disk.logical_id) == 6 # otherwise disk internals
7871 (_, _, old_port, _, _, old_secret) = disk.logical_id
7872 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7873 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7874 new_minors[0], new_minors[1], old_secret)
7875 assert len(disk.logical_id) == len(new_id)
7879 mods.append((idx, new_id, changes))
7881 # now that we have passed all asserts above, we can apply the mods
7882 # in a single run (to avoid partial changes)
7883 for idx, new_id, changes in mods:
7884 disk = instance.disks[idx]
7885 if new_id is not None:
7886 assert disk.dev_type == constants.LD_DRBD8
7887 disk.logical_id = new_id
7889 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7890 mode=changes.get(constants.IDISK_MODE, None))
7892 # change primary node, if needed
7894 instance.primary_node = self.op.nodes[0]
7895 self.LogWarning("Changing the instance's nodes, you will have to"
7896 " remove any disks left on the older nodes manually")
7899 self.cfg.Update(instance, feedback_fn)
7901 # All touched nodes must be locked
7902 mylocks = self.owned_locks(locking.LEVEL_NODE)
7903 assert mylocks.issuperset(frozenset(instance.all_nodes))
7904 _CreateDisks(self, instance, to_skip=to_skip)
7907 class LUInstanceRename(LogicalUnit):
7908 """Rename an instance.
7911 HPATH = "instance-rename"
7912 HTYPE = constants.HTYPE_INSTANCE
7914 def CheckArguments(self):
7918 if self.op.ip_check and not self.op.name_check:
7919 # TODO: make the ip check more flexible and not depend on the name check
7920 raise errors.OpPrereqError("IP address check requires a name check",
7923 def BuildHooksEnv(self):
7926 This runs on master, primary and secondary nodes of the instance.
7929 env = _BuildInstanceHookEnvByObject(self, self.instance)
7930 env["INSTANCE_NEW_NAME"] = self.op.new_name
7933 def BuildHooksNodes(self):
7934 """Build hooks nodes.
7937 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7940 def CheckPrereq(self):
7941 """Check prerequisites.
7943 This checks that the instance is in the cluster and is not running.
7946 self.op.instance_name = _ExpandInstanceName(self.cfg,
7947 self.op.instance_name)
7948 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7949 assert instance is not None
7950 _CheckNodeOnline(self, instance.primary_node)
7951 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7952 msg="cannot rename")
7953 self.instance = instance
7955 new_name = self.op.new_name
7956 if self.op.name_check:
7957 hostname = _CheckHostnameSane(self, new_name)
7958 new_name = self.op.new_name = hostname.name
7959 if (self.op.ip_check and
7960 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7961 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7962 (hostname.ip, new_name),
7963 errors.ECODE_NOTUNIQUE)
7965 instance_list = self.cfg.GetInstanceList()
7966 if new_name in instance_list and new_name != instance.name:
7967 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7968 new_name, errors.ECODE_EXISTS)
7970 def Exec(self, feedback_fn):
7971 """Rename the instance.
7974 inst = self.instance
7975 old_name = inst.name
7977 rename_file_storage = False
7978 if (inst.disk_template in constants.DTS_FILEBASED and
7979 self.op.new_name != inst.name):
7980 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7981 rename_file_storage = True
7983 self.cfg.RenameInstance(inst.name, self.op.new_name)
7984 # Change the instance lock. This is definitely safe while we hold the BGL.
7985 # Otherwise the new lock would have to be added in acquired mode.
7987 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
7988 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7989 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7991 # re-read the instance from the configuration after rename
7992 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7994 if rename_file_storage:
7995 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7996 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7997 old_file_storage_dir,
7998 new_file_storage_dir)
7999 result.Raise("Could not rename on node %s directory '%s' to '%s'"
8000 " (but the instance has been renamed in Ganeti)" %
8001 (inst.primary_node, old_file_storage_dir,
8002 new_file_storage_dir))
8004 _StartInstanceDisks(self, inst, None)
8005 # update info on disks
8006 info = _GetInstanceInfoText(inst)
8007 for (idx, disk) in enumerate(inst.disks):
8008 for node in inst.all_nodes:
8009 self.cfg.SetDiskID(disk, node)
8010 result = self.rpc.call_blockdev_setinfo(node, disk, info)
8012 self.LogWarning("Error setting info on node %s for disk %s: %s",
8013 node, idx, result.fail_msg)
8015 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
8016 old_name, self.op.debug_level)
8017 msg = result.fail_msg
8019 msg = ("Could not run OS rename script for instance %s on node %s"
8020 " (but the instance has been renamed in Ganeti): %s" %
8021 (inst.name, inst.primary_node, msg))
8022 self.LogWarning(msg)
8024 _ShutdownInstanceDisks(self, inst)
8029 class LUInstanceRemove(LogicalUnit):
8030 """Remove an instance.
8033 HPATH = "instance-remove"
8034 HTYPE = constants.HTYPE_INSTANCE
8037 def ExpandNames(self):
8038 self._ExpandAndLockInstance()
8039 self.needed_locks[locking.LEVEL_NODE] = []
8040 self.needed_locks[locking.LEVEL_NODE_RES] = []
8041 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8043 def DeclareLocks(self, level):
8044 if level == locking.LEVEL_NODE:
8045 self._LockInstancesNodes()
8046 elif level == locking.LEVEL_NODE_RES:
8048 self.needed_locks[locking.LEVEL_NODE_RES] = \
8049 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8051 def BuildHooksEnv(self):
8054 This runs on master, primary and secondary nodes of the instance.
8057 env = _BuildInstanceHookEnvByObject(self, self.instance)
8058 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
8061 def BuildHooksNodes(self):
8062 """Build hooks nodes.
8065 nl = [self.cfg.GetMasterNode()]
8066 nl_post = list(self.instance.all_nodes) + nl
8067 return (nl, nl_post)
8069 def CheckPrereq(self):
8070 """Check prerequisites.
8072 This checks that the instance is in the cluster.
8075 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8076 assert self.instance is not None, \
8077 "Cannot retrieve locked instance %s" % self.op.instance_name
8079 def Exec(self, feedback_fn):
8080 """Remove the instance.
8083 instance = self.instance
8084 logging.info("Shutting down instance %s on node %s",
8085 instance.name, instance.primary_node)
8087 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
8088 self.op.shutdown_timeout)
8089 msg = result.fail_msg
8091 if self.op.ignore_failures:
8092 feedback_fn("Warning: can't shutdown instance: %s" % msg)
8094 raise errors.OpExecError("Could not shutdown instance %s on"
8096 (instance.name, instance.primary_node, msg))
8098 assert (self.owned_locks(locking.LEVEL_NODE) ==
8099 self.owned_locks(locking.LEVEL_NODE_RES))
8100 assert not (set(instance.all_nodes) -
8101 self.owned_locks(locking.LEVEL_NODE)), \
8102 "Not owning correct locks"
8104 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
8107 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
8108 """Utility function to remove an instance.
8111 logging.info("Removing block devices for instance %s", instance.name)
8113 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
8114 if not ignore_failures:
8115 raise errors.OpExecError("Can't remove instance's disks")
8116 feedback_fn("Warning: can't remove instance's disks")
8118 logging.info("Removing instance %s out of cluster config", instance.name)
8120 lu.cfg.RemoveInstance(instance.name)
8122 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
8123 "Instance lock removal conflict"
8125 # Remove lock for the instance
8126 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
8129 class LUInstanceQuery(NoHooksLU):
8130 """Logical unit for querying instances.
8133 # pylint: disable=W0142
8136 def CheckArguments(self):
8137 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
8138 self.op.output_fields, self.op.use_locking)
8140 def ExpandNames(self):
8141 self.iq.ExpandNames(self)
8143 def DeclareLocks(self, level):
8144 self.iq.DeclareLocks(self, level)
8146 def Exec(self, feedback_fn):
8147 return self.iq.OldStyleQuery(self)
8150 def _ExpandNamesForMigration(lu):
8151 """Expands names for use with L{TLMigrateInstance}.
8153 @type lu: L{LogicalUnit}
8156 if lu.op.target_node is not None:
8157 lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
8159 lu.needed_locks[locking.LEVEL_NODE] = []
8160 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8162 lu.needed_locks[locking.LEVEL_NODE_RES] = []
8163 lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8165 # The node allocation lock is actually only needed for externally replicated
8166 # instances (e.g. sharedfile or RBD) and if an iallocator is used.
8167 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
8170 def _DeclareLocksForMigration(lu, level):
8171 """Declares locks for L{TLMigrateInstance}.
8173 @type lu: L{LogicalUnit}
8174 @param level: Lock level
8177 if level == locking.LEVEL_NODE_ALLOC:
8178 assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
8180 instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
8182 # Node locks are already declared here rather than at LEVEL_NODE as we need
8183 # the instance object anyway to declare the node allocation lock.
8184 if instance.disk_template in constants.DTS_EXT_MIRROR:
8185 if lu.op.target_node is None:
8186 lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8187 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
8189 lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
8191 del lu.recalculate_locks[locking.LEVEL_NODE]
8193 lu._LockInstancesNodes() # pylint: disable=W0212
8195 elif level == locking.LEVEL_NODE:
8196 # Node locks are declared together with the node allocation lock
8197 assert (lu.needed_locks[locking.LEVEL_NODE] or
8198 lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
8200 elif level == locking.LEVEL_NODE_RES:
8202 lu.needed_locks[locking.LEVEL_NODE_RES] = \
8203 _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
8206 class LUInstanceFailover(LogicalUnit):
8207 """Failover an instance.
8210 HPATH = "instance-failover"
8211 HTYPE = constants.HTYPE_INSTANCE
8214 def CheckArguments(self):
8215 """Check the arguments.
8218 self.iallocator = getattr(self.op, "iallocator", None)
8219 self.target_node = getattr(self.op, "target_node", None)
8221 def ExpandNames(self):
8222 self._ExpandAndLockInstance()
8223 _ExpandNamesForMigration(self)
8226 TLMigrateInstance(self, self.op.instance_name, False, True, False,
8227 self.op.ignore_consistency, True,
8228 self.op.shutdown_timeout, self.op.ignore_ipolicy)
8230 self.tasklets = [self._migrater]
8232 def DeclareLocks(self, level):
8233 _DeclareLocksForMigration(self, level)
8235 def BuildHooksEnv(self):
8238 This runs on master, primary and secondary nodes of the instance.
8241 instance = self._migrater.instance
8242 source_node = instance.primary_node
8243 target_node = self.op.target_node
8245 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
8246 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8247 "OLD_PRIMARY": source_node,
8248 "NEW_PRIMARY": target_node,
8251 if instance.disk_template in constants.DTS_INT_MIRROR:
8252 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
8253 env["NEW_SECONDARY"] = source_node
8255 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
8257 env.update(_BuildInstanceHookEnvByObject(self, instance))
8261 def BuildHooksNodes(self):
8262 """Build hooks nodes.
8265 instance = self._migrater.instance
8266 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
8267 return (nl, nl + [instance.primary_node])
8270 class LUInstanceMigrate(LogicalUnit):
8271 """Migrate an instance.
8273 This is migration without shutting down, compared to the failover,
8274 which is done with shutdown.
8277 HPATH = "instance-migrate"
8278 HTYPE = constants.HTYPE_INSTANCE
8281 def ExpandNames(self):
8282 self._ExpandAndLockInstance()
8283 _ExpandNamesForMigration(self)
8286 TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
8287 False, self.op.allow_failover, False,
8288 self.op.allow_runtime_changes,
8289 constants.DEFAULT_SHUTDOWN_TIMEOUT,
8290 self.op.ignore_ipolicy)
8292 self.tasklets = [self._migrater]
8294 def DeclareLocks(self, level):
8295 _DeclareLocksForMigration(self, level)
8297 def BuildHooksEnv(self):
8300 This runs on master, primary and secondary nodes of the instance.
8303 instance = self._migrater.instance
8304 source_node = instance.primary_node
8305 target_node = self.op.target_node
8306 env = _BuildInstanceHookEnvByObject(self, instance)
8308 "MIGRATE_LIVE": self._migrater.live,
8309 "MIGRATE_CLEANUP": self.op.cleanup,
8310 "OLD_PRIMARY": source_node,
8311 "NEW_PRIMARY": target_node,
8312 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8315 if instance.disk_template in constants.DTS_INT_MIRROR:
8316 env["OLD_SECONDARY"] = target_node
8317 env["NEW_SECONDARY"] = source_node
8319 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
8323 def BuildHooksNodes(self):
8324 """Build hooks nodes.
8327 instance = self._migrater.instance
8328 snodes = list(instance.secondary_nodes)
8329 nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
8333 class LUInstanceMove(LogicalUnit):
8334 """Move an instance by data-copying.
8337 HPATH = "instance-move"
8338 HTYPE = constants.HTYPE_INSTANCE
8341 def ExpandNames(self):
8342 self._ExpandAndLockInstance()
8343 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8344 self.op.target_node = target_node
8345 self.needed_locks[locking.LEVEL_NODE] = [target_node]
8346 self.needed_locks[locking.LEVEL_NODE_RES] = []
8347 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8349 def DeclareLocks(self, level):
8350 if level == locking.LEVEL_NODE:
8351 self._LockInstancesNodes(primary_only=True)
8352 elif level == locking.LEVEL_NODE_RES:
8354 self.needed_locks[locking.LEVEL_NODE_RES] = \
8355 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8357 def BuildHooksEnv(self):
8360 This runs on master, primary and secondary nodes of the instance.
8364 "TARGET_NODE": self.op.target_node,
8365 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
8367 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8370 def BuildHooksNodes(self):
8371 """Build hooks nodes.
8375 self.cfg.GetMasterNode(),
8376 self.instance.primary_node,
8377 self.op.target_node,
8381 def CheckPrereq(self):
8382 """Check prerequisites.
8384 This checks that the instance is in the cluster.
8387 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8388 assert self.instance is not None, \
8389 "Cannot retrieve locked instance %s" % self.op.instance_name
8391 if instance.disk_template not in constants.DTS_COPYABLE:
8392 raise errors.OpPrereqError("Disk template %s not suitable for copying" %
8393 instance.disk_template, errors.ECODE_STATE)
8395 node = self.cfg.GetNodeInfo(self.op.target_node)
8396 assert node is not None, \
8397 "Cannot retrieve locked node %s" % self.op.target_node
8399 self.target_node = target_node = node.name
8401 if target_node == instance.primary_node:
8402 raise errors.OpPrereqError("Instance %s is already on the node %s" %
8403 (instance.name, target_node),
8406 bep = self.cfg.GetClusterInfo().FillBE(instance)
8408 for idx, dsk in enumerate(instance.disks):
8409 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
8410 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
8411 " cannot copy" % idx, errors.ECODE_STATE)
8413 _CheckNodeOnline(self, target_node)
8414 _CheckNodeNotDrained(self, target_node)
8415 _CheckNodeVmCapable(self, target_node)
8416 cluster = self.cfg.GetClusterInfo()
8417 group_info = self.cfg.GetNodeGroup(node.group)
8418 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
8419 _CheckTargetNodeIPolicy(self, ipolicy, instance, node, self.cfg,
8420 ignore=self.op.ignore_ipolicy)
8422 if instance.admin_state == constants.ADMINST_UP:
8423 # check memory requirements on the secondary node
8424 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8425 instance.name, bep[constants.BE_MAXMEM],
8426 instance.hypervisor)
8428 self.LogInfo("Not checking memory on the secondary node as"
8429 " instance will not be started")
8431 # check bridge existance
8432 _CheckInstanceBridgesExist(self, instance, node=target_node)
8434 def Exec(self, feedback_fn):
8435 """Move an instance.
8437 The move is done by shutting it down on its present node, copying
8438 the data over (slow) and starting it on the new node.
8441 instance = self.instance
8443 source_node = instance.primary_node
8444 target_node = self.target_node
8446 self.LogInfo("Shutting down instance %s on source node %s",
8447 instance.name, source_node)
8449 assert (self.owned_locks(locking.LEVEL_NODE) ==
8450 self.owned_locks(locking.LEVEL_NODE_RES))
8452 result = self.rpc.call_instance_shutdown(source_node, instance,
8453 self.op.shutdown_timeout)
8454 msg = result.fail_msg
8456 if self.op.ignore_consistency:
8457 self.LogWarning("Could not shutdown instance %s on node %s."
8458 " Proceeding anyway. Please make sure node"
8459 " %s is down. Error details: %s",
8460 instance.name, source_node, source_node, msg)
8462 raise errors.OpExecError("Could not shutdown instance %s on"
8464 (instance.name, source_node, msg))
8466 # create the target disks
8468 _CreateDisks(self, instance, target_node=target_node)
8469 except errors.OpExecError:
8470 self.LogWarning("Device creation failed")
8471 self.cfg.ReleaseDRBDMinors(instance.name)
8474 cluster_name = self.cfg.GetClusterInfo().cluster_name
8477 # activate, get path, copy the data over
8478 for idx, disk in enumerate(instance.disks):
8479 self.LogInfo("Copying data for disk %d", idx)
8480 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8481 instance.name, True, idx)
8483 self.LogWarning("Can't assemble newly created disk %d: %s",
8484 idx, result.fail_msg)
8485 errs.append(result.fail_msg)
8487 dev_path = result.payload
8488 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8489 target_node, dev_path,
8492 self.LogWarning("Can't copy data over for disk %d: %s",
8493 idx, result.fail_msg)
8494 errs.append(result.fail_msg)
8498 self.LogWarning("Some disks failed to copy, aborting")
8500 _RemoveDisks(self, instance, target_node=target_node)
8502 self.cfg.ReleaseDRBDMinors(instance.name)
8503 raise errors.OpExecError("Errors during disk copy: %s" %
8506 instance.primary_node = target_node
8507 self.cfg.Update(instance, feedback_fn)
8509 self.LogInfo("Removing the disks on the original node")
8510 _RemoveDisks(self, instance, target_node=source_node)
8512 # Only start the instance if it's marked as up
8513 if instance.admin_state == constants.ADMINST_UP:
8514 self.LogInfo("Starting instance %s on node %s",
8515 instance.name, target_node)
8517 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8518 ignore_secondaries=True)
8520 _ShutdownInstanceDisks(self, instance)
8521 raise errors.OpExecError("Can't activate the instance's disks")
8523 result = self.rpc.call_instance_start(target_node,
8524 (instance, None, None), False)
8525 msg = result.fail_msg
8527 _ShutdownInstanceDisks(self, instance)
8528 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8529 (instance.name, target_node, msg))
8532 class LUNodeMigrate(LogicalUnit):
8533 """Migrate all instances from a node.
8536 HPATH = "node-migrate"
8537 HTYPE = constants.HTYPE_NODE
8540 def CheckArguments(self):
8543 def ExpandNames(self):
8544 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8546 self.share_locks = _ShareAll()
8547 self.needed_locks = {
8548 locking.LEVEL_NODE: [self.op.node_name],
8551 def BuildHooksEnv(self):
8554 This runs on the master, the primary and all the secondaries.
8558 "NODE_NAME": self.op.node_name,
8559 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8562 def BuildHooksNodes(self):
8563 """Build hooks nodes.
8566 nl = [self.cfg.GetMasterNode()]
8569 def CheckPrereq(self):
8572 def Exec(self, feedback_fn):
8573 # Prepare jobs for migration instances
8574 allow_runtime_changes = self.op.allow_runtime_changes
8576 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8579 iallocator=self.op.iallocator,
8580 target_node=self.op.target_node,
8581 allow_runtime_changes=allow_runtime_changes,
8582 ignore_ipolicy=self.op.ignore_ipolicy)]
8583 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
8585 # TODO: Run iallocator in this opcode and pass correct placement options to
8586 # OpInstanceMigrate. Since other jobs can modify the cluster between
8587 # running the iallocator and the actual migration, a good consistency model
8588 # will have to be found.
8590 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8591 frozenset([self.op.node_name]))
8593 return ResultWithJobs(jobs)
8596 class TLMigrateInstance(Tasklet):
8597 """Tasklet class for instance migration.
8600 @ivar live: whether the migration will be done live or non-live;
8601 this variable is initalized only after CheckPrereq has run
8602 @type cleanup: boolean
8603 @ivar cleanup: Wheater we cleanup from a failed migration
8604 @type iallocator: string
8605 @ivar iallocator: The iallocator used to determine target_node
8606 @type target_node: string
8607 @ivar target_node: If given, the target_node to reallocate the instance to
8608 @type failover: boolean
8609 @ivar failover: Whether operation results in failover or migration
8610 @type fallback: boolean
8611 @ivar fallback: Whether fallback to failover is allowed if migration not
8613 @type ignore_consistency: boolean
8614 @ivar ignore_consistency: Wheter we should ignore consistency between source
8616 @type shutdown_timeout: int
8617 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8618 @type ignore_ipolicy: bool
8619 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8624 _MIGRATION_POLL_INTERVAL = 1 # seconds
8625 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8627 def __init__(self, lu, instance_name, cleanup, failover, fallback,
8628 ignore_consistency, allow_runtime_changes, shutdown_timeout,
8630 """Initializes this class.
8633 Tasklet.__init__(self, lu)
8636 self.instance_name = instance_name
8637 self.cleanup = cleanup
8638 self.live = False # will be overridden later
8639 self.failover = failover
8640 self.fallback = fallback
8641 self.ignore_consistency = ignore_consistency
8642 self.shutdown_timeout = shutdown_timeout
8643 self.ignore_ipolicy = ignore_ipolicy
8644 self.allow_runtime_changes = allow_runtime_changes
8646 def CheckPrereq(self):
8647 """Check prerequisites.
8649 This checks that the instance is in the cluster.
8652 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8653 instance = self.cfg.GetInstanceInfo(instance_name)
8654 assert instance is not None
8655 self.instance = instance
8656 cluster = self.cfg.GetClusterInfo()
8658 if (not self.cleanup and
8659 not instance.admin_state == constants.ADMINST_UP and
8660 not self.failover and self.fallback):
8661 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8662 " switching to failover")
8663 self.failover = True
8665 if instance.disk_template not in constants.DTS_MIRRORED:
8670 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8671 " %s" % (instance.disk_template, text),
8674 if instance.disk_template in constants.DTS_EXT_MIRROR:
8675 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8677 if self.lu.op.iallocator:
8678 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8679 self._RunAllocator()
8681 # We set set self.target_node as it is required by
8683 self.target_node = self.lu.op.target_node
8685 # Check that the target node is correct in terms of instance policy
8686 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8687 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8688 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8690 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8691 ignore=self.ignore_ipolicy)
8693 # self.target_node is already populated, either directly or by the
8695 target_node = self.target_node
8696 if self.target_node == instance.primary_node:
8697 raise errors.OpPrereqError("Cannot migrate instance %s"
8698 " to its primary (%s)" %
8699 (instance.name, instance.primary_node),
8702 if len(self.lu.tasklets) == 1:
8703 # It is safe to release locks only when we're the only tasklet
8705 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8706 keep=[instance.primary_node, self.target_node])
8707 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
8710 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
8712 secondary_nodes = instance.secondary_nodes
8713 if not secondary_nodes:
8714 raise errors.ConfigurationError("No secondary node but using"
8715 " %s disk template" %
8716 instance.disk_template)
8717 target_node = secondary_nodes[0]
8718 if self.lu.op.iallocator or (self.lu.op.target_node and
8719 self.lu.op.target_node != target_node):
8721 text = "failed over"
8724 raise errors.OpPrereqError("Instances with disk template %s cannot"
8725 " be %s to arbitrary nodes"
8726 " (neither an iallocator nor a target"
8727 " node can be passed)" %
8728 (instance.disk_template, text),
8730 nodeinfo = self.cfg.GetNodeInfo(target_node)
8731 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8732 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8734 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
8735 ignore=self.ignore_ipolicy)
8737 i_be = cluster.FillBE(instance)
8739 # check memory requirements on the secondary node
8740 if (not self.cleanup and
8741 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8742 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8743 "migrating instance %s" %
8745 i_be[constants.BE_MINMEM],
8746 instance.hypervisor)
8748 self.lu.LogInfo("Not checking memory on the secondary node as"
8749 " instance will not be started")
8751 # check if failover must be forced instead of migration
8752 if (not self.cleanup and not self.failover and
8753 i_be[constants.BE_ALWAYS_FAILOVER]):
8754 self.lu.LogInfo("Instance configured to always failover; fallback"
8756 self.failover = True
8758 # check bridge existance
8759 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8761 if not self.cleanup:
8762 _CheckNodeNotDrained(self.lu, target_node)
8763 if not self.failover:
8764 result = self.rpc.call_instance_migratable(instance.primary_node,
8766 if result.fail_msg and self.fallback:
8767 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8769 self.failover = True
8771 result.Raise("Can't migrate, please use failover",
8772 prereq=True, ecode=errors.ECODE_STATE)
8774 assert not (self.failover and self.cleanup)
8776 if not self.failover:
8777 if self.lu.op.live is not None and self.lu.op.mode is not None:
8778 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8779 " parameters are accepted",
8781 if self.lu.op.live is not None:
8783 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8785 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8786 # reset the 'live' parameter to None so that repeated
8787 # invocations of CheckPrereq do not raise an exception
8788 self.lu.op.live = None
8789 elif self.lu.op.mode is None:
8790 # read the default value from the hypervisor
8791 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8792 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8794 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8796 # Failover is never live
8799 if not (self.failover or self.cleanup):
8800 remote_info = self.rpc.call_instance_info(instance.primary_node,
8802 instance.hypervisor)
8803 remote_info.Raise("Error checking instance on node %s" %
8804 instance.primary_node)
8805 instance_running = bool(remote_info.payload)
8806 if instance_running:
8807 self.current_mem = int(remote_info.payload["memory"])
8809 def _RunAllocator(self):
8810 """Run the allocator based on input opcode.
8813 assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
8815 # FIXME: add a self.ignore_ipolicy option
8816 req = iallocator.IAReqRelocate(name=self.instance_name,
8817 relocate_from=[self.instance.primary_node])
8818 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8820 ial.Run(self.lu.op.iallocator)
8823 raise errors.OpPrereqError("Can't compute nodes using"
8824 " iallocator '%s': %s" %
8825 (self.lu.op.iallocator, ial.info),
8827 self.target_node = ial.result[0]
8828 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8829 self.instance_name, self.lu.op.iallocator,
8830 utils.CommaJoin(ial.result))
8832 def _WaitUntilSync(self):
8833 """Poll with custom rpc for disk sync.
8835 This uses our own step-based rpc call.
8838 self.feedback_fn("* wait until resync is done")
8842 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8844 (self.instance.disks,
8847 for node, nres in result.items():
8848 nres.Raise("Cannot resync disks on node %s" % node)
8849 node_done, node_percent = nres.payload
8850 all_done = all_done and node_done
8851 if node_percent is not None:
8852 min_percent = min(min_percent, node_percent)
8854 if min_percent < 100:
8855 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8858 def _EnsureSecondary(self, node):
8859 """Demote a node to secondary.
8862 self.feedback_fn("* switching node %s to secondary mode" % node)
8864 for dev in self.instance.disks:
8865 self.cfg.SetDiskID(dev, node)
8867 result = self.rpc.call_blockdev_close(node, self.instance.name,
8868 self.instance.disks)
8869 result.Raise("Cannot change disk to secondary on node %s" % node)
8871 def _GoStandalone(self):
8872 """Disconnect from the network.
8875 self.feedback_fn("* changing into standalone mode")
8876 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8877 self.instance.disks)
8878 for node, nres in result.items():
8879 nres.Raise("Cannot disconnect disks node %s" % node)
8881 def _GoReconnect(self, multimaster):
8882 """Reconnect to the network.
8888 msg = "single-master"
8889 self.feedback_fn("* changing disks into %s mode" % msg)
8890 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8891 (self.instance.disks, self.instance),
8892 self.instance.name, multimaster)
8893 for node, nres in result.items():
8894 nres.Raise("Cannot change disks config on node %s" % node)
8896 def _ExecCleanup(self):
8897 """Try to cleanup after a failed migration.
8899 The cleanup is done by:
8900 - check that the instance is running only on one node
8901 (and update the config if needed)
8902 - change disks on its secondary node to secondary
8903 - wait until disks are fully synchronized
8904 - disconnect from the network
8905 - change disks into single-master mode
8906 - wait again until disks are fully synchronized
8909 instance = self.instance
8910 target_node = self.target_node
8911 source_node = self.source_node
8913 # check running on only one node
8914 self.feedback_fn("* checking where the instance actually runs"
8915 " (if this hangs, the hypervisor might be in"
8917 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8918 for node, result in ins_l.items():
8919 result.Raise("Can't contact node %s" % node)
8921 runningon_source = instance.name in ins_l[source_node].payload
8922 runningon_target = instance.name in ins_l[target_node].payload
8924 if runningon_source and runningon_target:
8925 raise errors.OpExecError("Instance seems to be running on two nodes,"
8926 " or the hypervisor is confused; you will have"
8927 " to ensure manually that it runs only on one"
8928 " and restart this operation")
8930 if not (runningon_source or runningon_target):
8931 raise errors.OpExecError("Instance does not seem to be running at all;"
8932 " in this case it's safer to repair by"
8933 " running 'gnt-instance stop' to ensure disk"
8934 " shutdown, and then restarting it")
8936 if runningon_target:
8937 # the migration has actually succeeded, we need to update the config
8938 self.feedback_fn("* instance running on secondary node (%s),"
8939 " updating config" % target_node)
8940 instance.primary_node = target_node
8941 self.cfg.Update(instance, self.feedback_fn)
8942 demoted_node = source_node
8944 self.feedback_fn("* instance confirmed to be running on its"
8945 " primary node (%s)" % source_node)
8946 demoted_node = target_node
8948 if instance.disk_template in constants.DTS_INT_MIRROR:
8949 self._EnsureSecondary(demoted_node)
8951 self._WaitUntilSync()
8952 except errors.OpExecError:
8953 # we ignore here errors, since if the device is standalone, it
8954 # won't be able to sync
8956 self._GoStandalone()
8957 self._GoReconnect(False)
8958 self._WaitUntilSync()
8960 self.feedback_fn("* done")
8962 def _RevertDiskStatus(self):
8963 """Try to revert the disk status after a failed migration.
8966 target_node = self.target_node
8967 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8971 self._EnsureSecondary(target_node)
8972 self._GoStandalone()
8973 self._GoReconnect(False)
8974 self._WaitUntilSync()
8975 except errors.OpExecError, err:
8976 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8977 " please try to recover the instance manually;"
8978 " error '%s'" % str(err))
8980 def _AbortMigration(self):
8981 """Call the hypervisor code to abort a started migration.
8984 instance = self.instance
8985 target_node = self.target_node
8986 source_node = self.source_node
8987 migration_info = self.migration_info
8989 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8993 abort_msg = abort_result.fail_msg
8995 logging.error("Aborting migration failed on target node %s: %s",
8996 target_node, abort_msg)
8997 # Don't raise an exception here, as we stil have to try to revert the
8998 # disk status, even if this step failed.
9000 abort_result = self.rpc.call_instance_finalize_migration_src(
9001 source_node, instance, False, self.live)
9002 abort_msg = abort_result.fail_msg
9004 logging.error("Aborting migration failed on source node %s: %s",
9005 source_node, abort_msg)
9007 def _ExecMigration(self):
9008 """Migrate an instance.
9010 The migrate is done by:
9011 - change the disks into dual-master mode
9012 - wait until disks are fully synchronized again
9013 - migrate the instance
9014 - change disks on the new secondary node (the old primary) to secondary
9015 - wait until disks are fully synchronized
9016 - change disks into single-master mode
9019 instance = self.instance
9020 target_node = self.target_node
9021 source_node = self.source_node
9023 # Check for hypervisor version mismatch and warn the user.
9024 nodeinfo = self.rpc.call_node_info([source_node, target_node],
9025 None, [self.instance.hypervisor], False)
9026 for ninfo in nodeinfo.values():
9027 ninfo.Raise("Unable to retrieve node information from node '%s'" %
9029 (_, _, (src_info, )) = nodeinfo[source_node].payload
9030 (_, _, (dst_info, )) = nodeinfo[target_node].payload
9032 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
9033 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
9034 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
9035 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
9036 if src_version != dst_version:
9037 self.feedback_fn("* warning: hypervisor version mismatch between"
9038 " source (%s) and target (%s) node" %
9039 (src_version, dst_version))
9041 self.feedback_fn("* checking disk consistency between source and target")
9042 for (idx, dev) in enumerate(instance.disks):
9043 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
9044 raise errors.OpExecError("Disk %s is degraded or not fully"
9045 " synchronized on target node,"
9046 " aborting migration" % idx)
9048 if self.current_mem > self.tgt_free_mem:
9049 if not self.allow_runtime_changes:
9050 raise errors.OpExecError("Memory ballooning not allowed and not enough"
9051 " free memory to fit instance %s on target"
9052 " node %s (have %dMB, need %dMB)" %
9053 (instance.name, target_node,
9054 self.tgt_free_mem, self.current_mem))
9055 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
9056 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
9059 rpcres.Raise("Cannot modify instance runtime memory")
9061 # First get the migration information from the remote node
9062 result = self.rpc.call_migration_info(source_node, instance)
9063 msg = result.fail_msg
9065 log_err = ("Failed fetching source migration information from %s: %s" %
9067 logging.error(log_err)
9068 raise errors.OpExecError(log_err)
9070 self.migration_info = migration_info = result.payload
9072 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9073 # Then switch the disks to master/master mode
9074 self._EnsureSecondary(target_node)
9075 self._GoStandalone()
9076 self._GoReconnect(True)
9077 self._WaitUntilSync()
9079 self.feedback_fn("* preparing %s to accept the instance" % target_node)
9080 result = self.rpc.call_accept_instance(target_node,
9083 self.nodes_ip[target_node])
9085 msg = result.fail_msg
9087 logging.error("Instance pre-migration failed, trying to revert"
9088 " disk status: %s", msg)
9089 self.feedback_fn("Pre-migration failed, aborting")
9090 self._AbortMigration()
9091 self._RevertDiskStatus()
9092 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
9093 (instance.name, msg))
9095 self.feedback_fn("* migrating instance to %s" % target_node)
9096 result = self.rpc.call_instance_migrate(source_node, instance,
9097 self.nodes_ip[target_node],
9099 msg = result.fail_msg
9101 logging.error("Instance migration failed, trying to revert"
9102 " disk status: %s", msg)
9103 self.feedback_fn("Migration failed, aborting")
9104 self._AbortMigration()
9105 self._RevertDiskStatus()
9106 raise errors.OpExecError("Could not migrate instance %s: %s" %
9107 (instance.name, msg))
9109 self.feedback_fn("* starting memory transfer")
9110 last_feedback = time.time()
9112 result = self.rpc.call_instance_get_migration_status(source_node,
9114 msg = result.fail_msg
9115 ms = result.payload # MigrationStatus instance
9116 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
9117 logging.error("Instance migration failed, trying to revert"
9118 " disk status: %s", msg)
9119 self.feedback_fn("Migration failed, aborting")
9120 self._AbortMigration()
9121 self._RevertDiskStatus()
9123 msg = "hypervisor returned failure"
9124 raise errors.OpExecError("Could not migrate instance %s: %s" %
9125 (instance.name, msg))
9127 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
9128 self.feedback_fn("* memory transfer complete")
9131 if (utils.TimeoutExpired(last_feedback,
9132 self._MIGRATION_FEEDBACK_INTERVAL) and
9133 ms.transferred_ram is not None):
9134 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
9135 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
9136 last_feedback = time.time()
9138 time.sleep(self._MIGRATION_POLL_INTERVAL)
9140 result = self.rpc.call_instance_finalize_migration_src(source_node,
9144 msg = result.fail_msg
9146 logging.error("Instance migration succeeded, but finalization failed"
9147 " on the source node: %s", msg)
9148 raise errors.OpExecError("Could not finalize instance migration: %s" %
9151 instance.primary_node = target_node
9153 # distribute new instance config to the other nodes
9154 self.cfg.Update(instance, self.feedback_fn)
9156 result = self.rpc.call_instance_finalize_migration_dst(target_node,
9160 msg = result.fail_msg
9162 logging.error("Instance migration succeeded, but finalization failed"
9163 " on the target node: %s", msg)
9164 raise errors.OpExecError("Could not finalize instance migration: %s" %
9167 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
9168 self._EnsureSecondary(source_node)
9169 self._WaitUntilSync()
9170 self._GoStandalone()
9171 self._GoReconnect(False)
9172 self._WaitUntilSync()
9174 # If the instance's disk template is `rbd' or `ext' and there was a
9175 # successful migration, unmap the device from the source node.
9176 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
9177 disks = _ExpandCheckDisks(instance, instance.disks)
9178 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
9180 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
9181 msg = result.fail_msg
9183 logging.error("Migration was successful, but couldn't unmap the"
9184 " block device %s on source node %s: %s",
9185 disk.iv_name, source_node, msg)
9186 logging.error("You need to unmap the device %s manually on %s",
9187 disk.iv_name, source_node)
9189 self.feedback_fn("* done")
9191 def _ExecFailover(self):
9192 """Failover an instance.
9194 The failover is done by shutting it down on its present node and
9195 starting it on the secondary.
9198 instance = self.instance
9199 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
9201 source_node = instance.primary_node
9202 target_node = self.target_node
9204 if instance.admin_state == constants.ADMINST_UP:
9205 self.feedback_fn("* checking disk consistency between source and target")
9206 for (idx, dev) in enumerate(instance.disks):
9207 # for drbd, these are drbd over lvm
9208 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
9210 if primary_node.offline:
9211 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
9213 (primary_node.name, idx, target_node))
9214 elif not self.ignore_consistency:
9215 raise errors.OpExecError("Disk %s is degraded on target node,"
9216 " aborting failover" % idx)
9218 self.feedback_fn("* not checking disk consistency as instance is not"
9221 self.feedback_fn("* shutting down instance on source node")
9222 logging.info("Shutting down instance %s on node %s",
9223 instance.name, source_node)
9225 result = self.rpc.call_instance_shutdown(source_node, instance,
9226 self.shutdown_timeout)
9227 msg = result.fail_msg
9229 if self.ignore_consistency or primary_node.offline:
9230 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
9231 " proceeding anyway; please make sure node"
9232 " %s is down; error details: %s",
9233 instance.name, source_node, source_node, msg)
9235 raise errors.OpExecError("Could not shutdown instance %s on"
9237 (instance.name, source_node, msg))
9239 self.feedback_fn("* deactivating the instance's disks on source node")
9240 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
9241 raise errors.OpExecError("Can't shut down the instance's disks")
9243 instance.primary_node = target_node
9244 # distribute new instance config to the other nodes
9245 self.cfg.Update(instance, self.feedback_fn)
9247 # Only start the instance if it's marked as up
9248 if instance.admin_state == constants.ADMINST_UP:
9249 self.feedback_fn("* activating the instance's disks on target node %s" %
9251 logging.info("Starting instance %s on node %s",
9252 instance.name, target_node)
9254 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
9255 ignore_secondaries=True)
9257 _ShutdownInstanceDisks(self.lu, instance)
9258 raise errors.OpExecError("Can't activate the instance's disks")
9260 self.feedback_fn("* starting the instance on the target node %s" %
9262 result = self.rpc.call_instance_start(target_node, (instance, None, None),
9264 msg = result.fail_msg
9266 _ShutdownInstanceDisks(self.lu, instance)
9267 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
9268 (instance.name, target_node, msg))
9270 def Exec(self, feedback_fn):
9271 """Perform the migration.
9274 self.feedback_fn = feedback_fn
9275 self.source_node = self.instance.primary_node
9277 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
9278 if self.instance.disk_template in constants.DTS_INT_MIRROR:
9279 self.target_node = self.instance.secondary_nodes[0]
9280 # Otherwise self.target_node has been populated either
9281 # directly, or through an iallocator.
9283 self.all_nodes = [self.source_node, self.target_node]
9284 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
9285 in self.cfg.GetMultiNodeInfo(self.all_nodes))
9288 feedback_fn("Failover instance %s" % self.instance.name)
9289 self._ExecFailover()
9291 feedback_fn("Migrating instance %s" % self.instance.name)
9294 return self._ExecCleanup()
9296 return self._ExecMigration()
9299 def _CreateBlockDev(lu, node, instance, device, force_create, info,
9301 """Wrapper around L{_CreateBlockDevInner}.
9303 This method annotates the root device first.
9306 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
9307 excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
9308 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
9309 force_open, excl_stor)
9312 def _CreateBlockDevInner(lu, node, instance, device, force_create,
9313 info, force_open, excl_stor):
9314 """Create a tree of block devices on a given node.
9316 If this device type has to be created on secondaries, create it and
9319 If not, just recurse to children keeping the same 'force' value.
9321 @attention: The device has to be annotated already.
9323 @param lu: the lu on whose behalf we execute
9324 @param node: the node on which to create the device
9325 @type instance: L{objects.Instance}
9326 @param instance: the instance which owns the device
9327 @type device: L{objects.Disk}
9328 @param device: the device to create
9329 @type force_create: boolean
9330 @param force_create: whether to force creation of this device; this
9331 will be change to True whenever we find a device which has
9332 CreateOnSecondary() attribute
9333 @param info: the extra 'metadata' we should attach to the device
9334 (this will be represented as a LVM tag)
9335 @type force_open: boolean
9336 @param force_open: this parameter will be passes to the
9337 L{backend.BlockdevCreate} function where it specifies
9338 whether we run on primary or not, and it affects both
9339 the child assembly and the device own Open() execution
9340 @type excl_stor: boolean
9341 @param excl_stor: Whether exclusive_storage is active for the node
9344 if device.CreateOnSecondary():
9348 for child in device.children:
9349 _CreateBlockDevInner(lu, node, instance, child, force_create,
9350 info, force_open, excl_stor)
9352 if not force_create:
9355 _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9359 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
9361 """Create a single block device on a given node.
9363 This will not recurse over children of the device, so they must be
9366 @param lu: the lu on whose behalf we execute
9367 @param node: the node on which to create the device
9368 @type instance: L{objects.Instance}
9369 @param instance: the instance which owns the device
9370 @type device: L{objects.Disk}
9371 @param device: the device to create
9372 @param info: the extra 'metadata' we should attach to the device
9373 (this will be represented as a LVM tag)
9374 @type force_open: boolean
9375 @param force_open: this parameter will be passes to the
9376 L{backend.BlockdevCreate} function where it specifies
9377 whether we run on primary or not, and it affects both
9378 the child assembly and the device own Open() execution
9379 @type excl_stor: boolean
9380 @param excl_stor: Whether exclusive_storage is active for the node
9383 lu.cfg.SetDiskID(device, node)
9384 result = lu.rpc.call_blockdev_create(node, device, device.size,
9385 instance.name, force_open, info,
9387 result.Raise("Can't create block device %s on"
9388 " node %s for instance %s" % (device, node, instance.name))
9389 if device.physical_id is None:
9390 device.physical_id = result.payload
9393 def _GenerateUniqueNames(lu, exts):
9394 """Generate a suitable LV name.
9396 This will generate a logical volume name for the given instance.
9401 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
9402 results.append("%s%s" % (new_id, val))
9406 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9407 iv_name, p_minor, s_minor):
9408 """Generate a drbd8 device complete with its children.
9411 assert len(vgnames) == len(names) == 2
9412 port = lu.cfg.AllocatePort()
9413 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9415 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9416 logical_id=(vgnames[0], names[0]),
9418 dev_meta = objects.Disk(dev_type=constants.LD_LV,
9419 size=constants.DRBD_META_SIZE,
9420 logical_id=(vgnames[1], names[1]),
9422 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
9423 logical_id=(primary, secondary, port,
9426 children=[dev_data, dev_meta],
9427 iv_name=iv_name, params={})
9431 _DISK_TEMPLATE_NAME_PREFIX = {
9432 constants.DT_PLAIN: "",
9433 constants.DT_RBD: ".rbd",
9434 constants.DT_EXT: ".ext",
9438 _DISK_TEMPLATE_DEVICE_TYPE = {
9439 constants.DT_PLAIN: constants.LD_LV,
9440 constants.DT_FILE: constants.LD_FILE,
9441 constants.DT_SHARED_FILE: constants.LD_FILE,
9442 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9443 constants.DT_RBD: constants.LD_RBD,
9444 constants.DT_EXT: constants.LD_EXT,
9448 def _GenerateDiskTemplate(
9449 lu, template_name, instance_name, primary_node, secondary_nodes,
9450 disk_info, file_storage_dir, file_driver, base_index,
9451 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9452 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9453 """Generate the entire disk layout for a given template type.
9456 vgname = lu.cfg.GetVGName()
9457 disk_count = len(disk_info)
9460 if template_name == constants.DT_DISKLESS:
9462 elif template_name == constants.DT_DRBD8:
9463 if len(secondary_nodes) != 1:
9464 raise errors.ProgrammerError("Wrong template configuration")
9465 remote_node = secondary_nodes[0]
9466 minors = lu.cfg.AllocateDRBDMinor(
9467 [primary_node, remote_node] * len(disk_info), instance_name)
9469 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9471 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9474 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9475 for i in range(disk_count)]):
9476 names.append(lv_prefix + "_data")
9477 names.append(lv_prefix + "_meta")
9478 for idx, disk in enumerate(disk_info):
9479 disk_index = idx + base_index
9480 data_vg = disk.get(constants.IDISK_VG, vgname)
9481 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9482 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9483 disk[constants.IDISK_SIZE],
9485 names[idx * 2:idx * 2 + 2],
9486 "disk/%d" % disk_index,
9487 minors[idx * 2], minors[idx * 2 + 1])
9488 disk_dev.mode = disk[constants.IDISK_MODE]
9489 disks.append(disk_dev)
9492 raise errors.ProgrammerError("Wrong template configuration")
9494 if template_name == constants.DT_FILE:
9496 elif template_name == constants.DT_SHARED_FILE:
9497 _req_shr_file_storage()
9499 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9500 if name_prefix is None:
9503 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9504 (name_prefix, base_index + i)
9505 for i in range(disk_count)])
9507 if template_name == constants.DT_PLAIN:
9509 def logical_id_fn(idx, _, disk):
9510 vg = disk.get(constants.IDISK_VG, vgname)
9511 return (vg, names[idx])
9513 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9515 lambda _, disk_index, disk: (file_driver,
9516 "%s/disk%d" % (file_storage_dir,
9518 elif template_name == constants.DT_BLOCK:
9520 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9521 disk[constants.IDISK_ADOPT])
9522 elif template_name == constants.DT_RBD:
9523 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9524 elif template_name == constants.DT_EXT:
9525 def logical_id_fn(idx, _, disk):
9526 provider = disk.get(constants.IDISK_PROVIDER, None)
9527 if provider is None:
9528 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9529 " not found", constants.DT_EXT,
9530 constants.IDISK_PROVIDER)
9531 return (provider, names[idx])
9533 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9535 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9537 for idx, disk in enumerate(disk_info):
9539 # Only for the Ext template add disk_info to params
9540 if template_name == constants.DT_EXT:
9541 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9543 if key not in constants.IDISK_PARAMS:
9544 params[key] = disk[key]
9545 disk_index = idx + base_index
9546 size = disk[constants.IDISK_SIZE]
9547 feedback_fn("* disk %s, size %s" %
9548 (disk_index, utils.FormatUnit(size, "h")))
9549 disks.append(objects.Disk(dev_type=dev_type, size=size,
9550 logical_id=logical_id_fn(idx, disk_index, disk),
9551 iv_name="disk/%d" % disk_index,
9552 mode=disk[constants.IDISK_MODE],
9558 def _GetInstanceInfoText(instance):
9559 """Compute that text that should be added to the disk's metadata.
9562 return "originstname+%s" % instance.name
9565 def _CalcEta(time_taken, written, total_size):
9566 """Calculates the ETA based on size written and total size.
9568 @param time_taken: The time taken so far
9569 @param written: amount written so far
9570 @param total_size: The total size of data to be written
9571 @return: The remaining time in seconds
9574 avg_time = time_taken / float(written)
9575 return (total_size - written) * avg_time
9578 def _WipeDisks(lu, instance, disks=None):
9579 """Wipes instance disks.
9581 @type lu: L{LogicalUnit}
9582 @param lu: the logical unit on whose behalf we execute
9583 @type instance: L{objects.Instance}
9584 @param instance: the instance whose disks we should create
9585 @return: the success of the wipe
9588 node = instance.primary_node
9591 disks = [(idx, disk, 0)
9592 for (idx, disk) in enumerate(instance.disks)]
9594 for (_, device, _) in disks:
9595 lu.cfg.SetDiskID(device, node)
9597 logging.info("Pausing synchronization of disks of instance '%s'",
9599 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9600 (map(compat.snd, disks),
9603 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9605 for idx, success in enumerate(result.payload):
9607 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9608 " failed", idx, instance.name)
9611 for (idx, device, offset) in disks:
9612 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9613 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9615 int(min(constants.MAX_WIPE_CHUNK,
9616 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9620 start_time = time.time()
9625 info_text = (" (from %s to %s)" %
9626 (utils.FormatUnit(offset, "h"),
9627 utils.FormatUnit(size, "h")))
9629 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9631 logging.info("Wiping disk %d for instance %s on node %s using"
9632 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9634 while offset < size:
9635 wipe_size = min(wipe_chunk_size, size - offset)
9637 logging.debug("Wiping disk %d, offset %s, chunk %s",
9638 idx, offset, wipe_size)
9640 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9642 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9643 (idx, offset, wipe_size))
9647 if now - last_output >= 60:
9648 eta = _CalcEta(now - start_time, offset, size)
9649 lu.LogInfo(" - done: %.1f%% ETA: %s",
9650 offset / float(size) * 100, utils.FormatSeconds(eta))
9653 logging.info("Resuming synchronization of disks for instance '%s'",
9656 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9657 (map(compat.snd, disks),
9662 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9663 node, result.fail_msg)
9665 for idx, success in enumerate(result.payload):
9667 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9668 " failed", idx, instance.name)
9671 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9672 """Create all disks for an instance.
9674 This abstracts away some work from AddInstance.
9676 @type lu: L{LogicalUnit}
9677 @param lu: the logical unit on whose behalf we execute
9678 @type instance: L{objects.Instance}
9679 @param instance: the instance whose disks we should create
9681 @param to_skip: list of indices to skip
9682 @type target_node: string
9683 @param target_node: if passed, overrides the target node for creation
9685 @return: the success of the creation
9688 info = _GetInstanceInfoText(instance)
9689 if target_node is None:
9690 pnode = instance.primary_node
9691 all_nodes = instance.all_nodes
9696 if instance.disk_template in constants.DTS_FILEBASED:
9697 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9698 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9700 result.Raise("Failed to create directory '%s' on"
9701 " node %s" % (file_storage_dir, pnode))
9704 # Note: this needs to be kept in sync with adding of disks in
9705 # LUInstanceSetParams
9706 for idx, device in enumerate(instance.disks):
9707 if to_skip and idx in to_skip:
9709 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9711 for node in all_nodes:
9712 f_create = node == pnode
9714 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9715 disks_created.append((node, device))
9716 except errors.OpExecError:
9717 logging.warning("Creating disk %s for instance '%s' failed",
9719 for (node, disk) in disks_created:
9720 lu.cfg.SetDiskID(disk, node)
9721 result = lu.rpc.call_blockdev_remove(node, disk)
9723 logging.warning("Failed to remove newly-created disk %s on node %s:"
9724 " %s", device, node, result.fail_msg)
9728 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9729 """Remove all disks for an instance.
9731 This abstracts away some work from `AddInstance()` and
9732 `RemoveInstance()`. Note that in case some of the devices couldn't
9733 be removed, the removal will continue with the other ones.
9735 @type lu: L{LogicalUnit}
9736 @param lu: the logical unit on whose behalf we execute
9737 @type instance: L{objects.Instance}
9738 @param instance: the instance whose disks we should remove
9739 @type target_node: string
9740 @param target_node: used to override the node on which to remove the disks
9742 @return: the success of the removal
9745 logging.info("Removing block devices for instance %s", instance.name)
9748 ports_to_release = set()
9749 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9750 for (idx, device) in enumerate(anno_disks):
9752 edata = [(target_node, device)]
9754 edata = device.ComputeNodeTree(instance.primary_node)
9755 for node, disk in edata:
9756 lu.cfg.SetDiskID(disk, node)
9757 result = lu.rpc.call_blockdev_remove(node, disk)
9759 lu.LogWarning("Could not remove disk %s on node %s,"
9760 " continuing anyway: %s", idx, node, result.fail_msg)
9761 if not (result.offline and node != instance.primary_node):
9764 # if this is a DRBD disk, return its port to the pool
9765 if device.dev_type in constants.LDS_DRBD:
9766 ports_to_release.add(device.logical_id[2])
9768 if all_result or ignore_failures:
9769 for port in ports_to_release:
9770 lu.cfg.AddTcpUdpPort(port)
9772 if instance.disk_template in constants.DTS_FILEBASED:
9773 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9777 tgt = instance.primary_node
9778 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9780 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9781 file_storage_dir, instance.primary_node, result.fail_msg)
9787 def _ComputeDiskSizePerVG(disk_template, disks):
9788 """Compute disk size requirements in the volume group
9791 def _compute(disks, payload):
9792 """Universal algorithm.
9797 vgs[disk[constants.IDISK_VG]] = \
9798 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9802 # Required free disk space as a function of disk and swap space
9804 constants.DT_DISKLESS: {},
9805 constants.DT_PLAIN: _compute(disks, 0),
9806 # 128 MB are added for drbd metadata for each disk
9807 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9808 constants.DT_FILE: {},
9809 constants.DT_SHARED_FILE: {},
9812 if disk_template not in req_size_dict:
9813 raise errors.ProgrammerError("Disk template '%s' size requirement"
9814 " is unknown" % disk_template)
9816 return req_size_dict[disk_template]
9819 def _FilterVmNodes(lu, nodenames):
9820 """Filters out non-vm_capable nodes from a list.
9822 @type lu: L{LogicalUnit}
9823 @param lu: the logical unit for which we check
9824 @type nodenames: list
9825 @param nodenames: the list of nodes on which we should check
9827 @return: the list of vm-capable nodes
9830 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9831 return [name for name in nodenames if name not in vm_nodes]
9834 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9835 """Hypervisor parameter validation.
9837 This function abstract the hypervisor parameter validation to be
9838 used in both instance create and instance modify.
9840 @type lu: L{LogicalUnit}
9841 @param lu: the logical unit for which we check
9842 @type nodenames: list
9843 @param nodenames: the list of nodes on which we should check
9844 @type hvname: string
9845 @param hvname: the name of the hypervisor we should use
9846 @type hvparams: dict
9847 @param hvparams: the parameters which we need to check
9848 @raise errors.OpPrereqError: if the parameters are not valid
9851 nodenames = _FilterVmNodes(lu, nodenames)
9853 cluster = lu.cfg.GetClusterInfo()
9854 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9856 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9857 for node in nodenames:
9861 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9864 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9865 """OS parameters validation.
9867 @type lu: L{LogicalUnit}
9868 @param lu: the logical unit for which we check
9869 @type required: boolean
9870 @param required: whether the validation should fail if the OS is not
9872 @type nodenames: list
9873 @param nodenames: the list of nodes on which we should check
9874 @type osname: string
9875 @param osname: the name of the hypervisor we should use
9876 @type osparams: dict
9877 @param osparams: the parameters which we need to check
9878 @raise errors.OpPrereqError: if the parameters are not valid
9881 nodenames = _FilterVmNodes(lu, nodenames)
9882 result = lu.rpc.call_os_validate(nodenames, required, osname,
9883 [constants.OS_VALIDATE_PARAMETERS],
9885 for node, nres in result.items():
9886 # we don't check for offline cases since this should be run only
9887 # against the master node and/or an instance's nodes
9888 nres.Raise("OS Parameters validation failed on node %s" % node)
9889 if not nres.payload:
9890 lu.LogInfo("OS %s not found on node %s, validation skipped",
9894 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
9895 """Wrapper around IAReqInstanceAlloc.
9897 @param op: The instance opcode
9898 @param disks: The computed disks
9899 @param nics: The computed nics
9900 @param beparams: The full filled beparams
9901 @param node_whitelist: List of nodes which should appear as online to the
9902 allocator (unless the node is already marked offline)
9904 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9907 spindle_use = beparams[constants.BE_SPINDLE_USE]
9908 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9909 disk_template=op.disk_template,
9912 vcpus=beparams[constants.BE_VCPUS],
9913 memory=beparams[constants.BE_MAXMEM],
9914 spindle_use=spindle_use,
9916 nics=[n.ToDict() for n in nics],
9917 hypervisor=op.hypervisor,
9918 node_whitelist=node_whitelist)
9921 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
9922 """Computes the nics.
9924 @param op: The instance opcode
9925 @param cluster: Cluster configuration object
9926 @param default_ip: The default ip to assign
9927 @param cfg: An instance of the configuration object
9928 @param ec_id: Execution context ID
9930 @returns: The build up nics
9935 nic_mode_req = nic.get(constants.INIC_MODE, None)
9936 nic_mode = nic_mode_req
9937 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9938 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9940 net = nic.get(constants.INIC_NETWORK, None)
9941 link = nic.get(constants.NIC_LINK, None)
9942 ip = nic.get(constants.INIC_IP, None)
9944 if net is None or net.lower() == constants.VALUE_NONE:
9947 if nic_mode_req is not None or link is not None:
9948 raise errors.OpPrereqError("If network is given, no mode or link"
9949 " is allowed to be passed",
9952 # ip validity checks
9953 if ip is None or ip.lower() == constants.VALUE_NONE:
9955 elif ip.lower() == constants.VALUE_AUTO:
9956 if not op.name_check:
9957 raise errors.OpPrereqError("IP address set to auto but name checks"
9958 " have been skipped",
9962 # We defer pool operations until later, so that the iallocator has
9963 # filled in the instance's node(s) dimara
9964 if ip.lower() == constants.NIC_IP_POOL:
9966 raise errors.OpPrereqError("if ip=pool, parameter network"
9967 " must be passed too",
9970 elif not netutils.IPAddress.IsValid(ip):
9971 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9976 # TODO: check the ip address for uniqueness
9977 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9978 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9981 # MAC address verification
9982 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9983 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9984 mac = utils.NormalizeAndValidateMac(mac)
9987 # TODO: We need to factor this out
9988 cfg.ReserveMAC(mac, ec_id)
9989 except errors.ReservationError:
9990 raise errors.OpPrereqError("MAC address %s already in use"
9991 " in cluster" % mac,
9992 errors.ECODE_NOTUNIQUE)
9994 # Build nic parameters
9997 nicparams[constants.NIC_MODE] = nic_mode
9999 nicparams[constants.NIC_LINK] = link
10001 check_params = cluster.SimpleFillNIC(nicparams)
10002 objects.NIC.CheckParameterSyntax(check_params)
10003 net_uuid = cfg.LookupNetwork(net)
10004 nics.append(objects.NIC(mac=mac, ip=nic_ip,
10005 network=net_uuid, nicparams=nicparams))
10010 def _ComputeDisks(op, default_vg):
10011 """Computes the instance disks.
10013 @param op: The instance opcode
10014 @param default_vg: The default_vg to assume
10016 @return: The computed disks
10020 for disk in op.disks:
10021 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
10022 if mode not in constants.DISK_ACCESS_SET:
10023 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
10024 mode, errors.ECODE_INVAL)
10025 size = disk.get(constants.IDISK_SIZE, None)
10027 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
10030 except (TypeError, ValueError):
10031 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
10032 errors.ECODE_INVAL)
10034 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
10035 if ext_provider and op.disk_template != constants.DT_EXT:
10036 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10037 " disk template, not %s" %
10038 (constants.IDISK_PROVIDER, constants.DT_EXT,
10039 op.disk_template), errors.ECODE_INVAL)
10041 data_vg = disk.get(constants.IDISK_VG, default_vg)
10043 constants.IDISK_SIZE: size,
10044 constants.IDISK_MODE: mode,
10045 constants.IDISK_VG: data_vg,
10048 if constants.IDISK_METAVG in disk:
10049 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10050 if constants.IDISK_ADOPT in disk:
10051 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10053 # For extstorage, demand the `provider' option and add any
10054 # additional parameters (ext-params) to the dict
10055 if op.disk_template == constants.DT_EXT:
10057 new_disk[constants.IDISK_PROVIDER] = ext_provider
10059 if key not in constants.IDISK_PARAMS:
10060 new_disk[key] = disk[key]
10062 raise errors.OpPrereqError("Missing provider for template '%s'" %
10063 constants.DT_EXT, errors.ECODE_INVAL)
10065 disks.append(new_disk)
10070 def _ComputeFullBeParams(op, cluster):
10071 """Computes the full beparams.
10073 @param op: The instance opcode
10074 @param cluster: The cluster config object
10076 @return: The fully filled beparams
10079 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10080 for param, value in op.beparams.iteritems():
10081 if value == constants.VALUE_AUTO:
10082 op.beparams[param] = default_beparams[param]
10083 objects.UpgradeBeParams(op.beparams)
10084 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
10085 return cluster.SimpleFillBE(op.beparams)
10088 def _CheckOpportunisticLocking(op):
10089 """Generate error if opportunistic locking is not possible.
10092 if op.opportunistic_locking and not op.iallocator:
10093 raise errors.OpPrereqError("Opportunistic locking is only available in"
10094 " combination with an instance allocator",
10095 errors.ECODE_INVAL)
10098 class LUInstanceCreate(LogicalUnit):
10099 """Create an instance.
10102 HPATH = "instance-add"
10103 HTYPE = constants.HTYPE_INSTANCE
10106 def CheckArguments(self):
10107 """Check arguments.
10110 # do not require name_check to ease forward/backward compatibility
10112 if self.op.no_install and self.op.start:
10113 self.LogInfo("No-installation mode selected, disabling startup")
10114 self.op.start = False
10115 # validate/normalize the instance name
10116 self.op.instance_name = \
10117 netutils.Hostname.GetNormalizedName(self.op.instance_name)
10119 if self.op.ip_check and not self.op.name_check:
10120 # TODO: make the ip check more flexible and not depend on the name check
10121 raise errors.OpPrereqError("Cannot do IP address check without a name"
10122 " check", errors.ECODE_INVAL)
10124 # check nics' parameter names
10125 for nic in self.op.nics:
10126 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
10128 # check disks. parameter names and consistent adopt/no-adopt strategy
10129 has_adopt = has_no_adopt = False
10130 for disk in self.op.disks:
10131 if self.op.disk_template != constants.DT_EXT:
10132 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
10133 if constants.IDISK_ADOPT in disk:
10136 has_no_adopt = True
10137 if has_adopt and has_no_adopt:
10138 raise errors.OpPrereqError("Either all disks are adopted or none is",
10139 errors.ECODE_INVAL)
10141 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
10142 raise errors.OpPrereqError("Disk adoption is not supported for the"
10143 " '%s' disk template" %
10144 self.op.disk_template,
10145 errors.ECODE_INVAL)
10146 if self.op.iallocator is not None:
10147 raise errors.OpPrereqError("Disk adoption not allowed with an"
10148 " iallocator script", errors.ECODE_INVAL)
10149 if self.op.mode == constants.INSTANCE_IMPORT:
10150 raise errors.OpPrereqError("Disk adoption not allowed for"
10151 " instance import", errors.ECODE_INVAL)
10153 if self.op.disk_template in constants.DTS_MUST_ADOPT:
10154 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
10155 " but no 'adopt' parameter given" %
10156 self.op.disk_template,
10157 errors.ECODE_INVAL)
10159 self.adopt_disks = has_adopt
10161 # instance name verification
10162 if self.op.name_check:
10163 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
10164 self.op.instance_name = self.hostname1.name
10165 # used in CheckPrereq for ip ping check
10166 self.check_ip = self.hostname1.ip
10168 self.check_ip = None
10170 # file storage checks
10171 if (self.op.file_driver and
10172 not self.op.file_driver in constants.FILE_DRIVER):
10173 raise errors.OpPrereqError("Invalid file driver name '%s'" %
10174 self.op.file_driver, errors.ECODE_INVAL)
10176 if self.op.disk_template == constants.DT_FILE:
10177 opcodes.RequireFileStorage()
10178 elif self.op.disk_template == constants.DT_SHARED_FILE:
10179 opcodes.RequireSharedFileStorage()
10181 ### Node/iallocator related checks
10182 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
10184 if self.op.pnode is not None:
10185 if self.op.disk_template in constants.DTS_INT_MIRROR:
10186 if self.op.snode is None:
10187 raise errors.OpPrereqError("The networked disk templates need"
10188 " a mirror node", errors.ECODE_INVAL)
10189 elif self.op.snode:
10190 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
10192 self.op.snode = None
10194 _CheckOpportunisticLocking(self.op)
10196 self._cds = _GetClusterDomainSecret()
10198 if self.op.mode == constants.INSTANCE_IMPORT:
10199 # On import force_variant must be True, because if we forced it at
10200 # initial install, our only chance when importing it back is that it
10202 self.op.force_variant = True
10204 if self.op.no_install:
10205 self.LogInfo("No-installation mode has no effect during import")
10207 elif self.op.mode == constants.INSTANCE_CREATE:
10208 if self.op.os_type is None:
10209 raise errors.OpPrereqError("No guest OS specified",
10210 errors.ECODE_INVAL)
10211 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
10212 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
10213 " installation" % self.op.os_type,
10214 errors.ECODE_STATE)
10215 if self.op.disk_template is None:
10216 raise errors.OpPrereqError("No disk template specified",
10217 errors.ECODE_INVAL)
10219 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10220 # Check handshake to ensure both clusters have the same domain secret
10221 src_handshake = self.op.source_handshake
10222 if not src_handshake:
10223 raise errors.OpPrereqError("Missing source handshake",
10224 errors.ECODE_INVAL)
10226 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
10229 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
10230 errors.ECODE_INVAL)
10232 # Load and check source CA
10233 self.source_x509_ca_pem = self.op.source_x509_ca
10234 if not self.source_x509_ca_pem:
10235 raise errors.OpPrereqError("Missing source X509 CA",
10236 errors.ECODE_INVAL)
10239 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
10241 except OpenSSL.crypto.Error, err:
10242 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
10243 (err, ), errors.ECODE_INVAL)
10245 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10246 if errcode is not None:
10247 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
10248 errors.ECODE_INVAL)
10250 self.source_x509_ca = cert
10252 src_instance_name = self.op.source_instance_name
10253 if not src_instance_name:
10254 raise errors.OpPrereqError("Missing source instance name",
10255 errors.ECODE_INVAL)
10257 self.source_instance_name = \
10258 netutils.GetHostname(name=src_instance_name).name
10261 raise errors.OpPrereqError("Invalid instance creation mode %r" %
10262 self.op.mode, errors.ECODE_INVAL)
10264 def ExpandNames(self):
10265 """ExpandNames for CreateInstance.
10267 Figure out the right locks for instance creation.
10270 self.needed_locks = {}
10272 instance_name = self.op.instance_name
10273 # this is just a preventive check, but someone might still add this
10274 # instance in the meantime, and creation will fail at lock-add time
10275 if instance_name in self.cfg.GetInstanceList():
10276 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
10277 instance_name, errors.ECODE_EXISTS)
10279 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
10281 if self.op.iallocator:
10282 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
10283 # specifying a group on instance creation and then selecting nodes from
10285 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10286 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10288 if self.op.opportunistic_locking:
10289 self.opportunistic_locks[locking.LEVEL_NODE] = True
10290 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
10292 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
10293 nodelist = [self.op.pnode]
10294 if self.op.snode is not None:
10295 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
10296 nodelist.append(self.op.snode)
10297 self.needed_locks[locking.LEVEL_NODE] = nodelist
10299 # in case of import lock the source node too
10300 if self.op.mode == constants.INSTANCE_IMPORT:
10301 src_node = self.op.src_node
10302 src_path = self.op.src_path
10304 if src_path is None:
10305 self.op.src_path = src_path = self.op.instance_name
10307 if src_node is None:
10308 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10309 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10310 self.op.src_node = None
10311 if os.path.isabs(src_path):
10312 raise errors.OpPrereqError("Importing an instance from a path"
10313 " requires a source node option",
10314 errors.ECODE_INVAL)
10316 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
10317 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
10318 self.needed_locks[locking.LEVEL_NODE].append(src_node)
10319 if not os.path.isabs(src_path):
10320 self.op.src_path = src_path = \
10321 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
10323 self.needed_locks[locking.LEVEL_NODE_RES] = \
10324 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
10326 def _RunAllocator(self):
10327 """Run the allocator based on input opcode.
10330 if self.op.opportunistic_locking:
10331 # Only consider nodes for which a lock is held
10332 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
10334 node_whitelist = None
10336 #TODO Export network to iallocator so that it chooses a pnode
10337 # in a nodegroup that has the desired network connected to
10338 req = _CreateInstanceAllocRequest(self.op, self.disks,
10339 self.nics, self.be_full,
10341 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10343 ial.Run(self.op.iallocator)
10345 if not ial.success:
10346 # When opportunistic locks are used only a temporary failure is generated
10347 if self.op.opportunistic_locking:
10348 ecode = errors.ECODE_TEMP_NORES
10350 ecode = errors.ECODE_NORES
10352 raise errors.OpPrereqError("Can't compute nodes using"
10353 " iallocator '%s': %s" %
10354 (self.op.iallocator, ial.info),
10357 self.op.pnode = ial.result[0]
10358 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
10359 self.op.instance_name, self.op.iallocator,
10360 utils.CommaJoin(ial.result))
10362 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
10364 if req.RequiredNodes() == 2:
10365 self.op.snode = ial.result[1]
10367 def BuildHooksEnv(self):
10368 """Build hooks env.
10370 This runs on master, primary and secondary nodes of the instance.
10374 "ADD_MODE": self.op.mode,
10376 if self.op.mode == constants.INSTANCE_IMPORT:
10377 env["SRC_NODE"] = self.op.src_node
10378 env["SRC_PATH"] = self.op.src_path
10379 env["SRC_IMAGES"] = self.src_images
10381 env.update(_BuildInstanceHookEnv(
10382 name=self.op.instance_name,
10383 primary_node=self.op.pnode,
10384 secondary_nodes=self.secondaries,
10385 status=self.op.start,
10386 os_type=self.op.os_type,
10387 minmem=self.be_full[constants.BE_MINMEM],
10388 maxmem=self.be_full[constants.BE_MAXMEM],
10389 vcpus=self.be_full[constants.BE_VCPUS],
10390 nics=_NICListToTuple(self, self.nics),
10391 disk_template=self.op.disk_template,
10392 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
10393 for d in self.disks],
10396 hypervisor_name=self.op.hypervisor,
10402 def BuildHooksNodes(self):
10403 """Build hooks nodes.
10406 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
10409 def _ReadExportInfo(self):
10410 """Reads the export information from disk.
10412 It will override the opcode source node and path with the actual
10413 information, if these two were not specified before.
10415 @return: the export information
10418 assert self.op.mode == constants.INSTANCE_IMPORT
10420 src_node = self.op.src_node
10421 src_path = self.op.src_path
10423 if src_node is None:
10424 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
10425 exp_list = self.rpc.call_export_list(locked_nodes)
10427 for node in exp_list:
10428 if exp_list[node].fail_msg:
10430 if src_path in exp_list[node].payload:
10432 self.op.src_node = src_node = node
10433 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
10437 raise errors.OpPrereqError("No export found for relative path %s" %
10438 src_path, errors.ECODE_INVAL)
10440 _CheckNodeOnline(self, src_node)
10441 result = self.rpc.call_export_info(src_node, src_path)
10442 result.Raise("No export or invalid export found in dir %s" % src_path)
10444 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
10445 if not export_info.has_section(constants.INISECT_EXP):
10446 raise errors.ProgrammerError("Corrupted export config",
10447 errors.ECODE_ENVIRON)
10449 ei_version = export_info.get(constants.INISECT_EXP, "version")
10450 if (int(ei_version) != constants.EXPORT_VERSION):
10451 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
10452 (ei_version, constants.EXPORT_VERSION),
10453 errors.ECODE_ENVIRON)
10456 def _ReadExportParams(self, einfo):
10457 """Use export parameters as defaults.
10459 In case the opcode doesn't specify (as in override) some instance
10460 parameters, then try to use them from the export information, if
10461 that declares them.
10464 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
10466 if self.op.disk_template is None:
10467 if einfo.has_option(constants.INISECT_INS, "disk_template"):
10468 self.op.disk_template = einfo.get(constants.INISECT_INS,
10470 if self.op.disk_template not in constants.DISK_TEMPLATES:
10471 raise errors.OpPrereqError("Disk template specified in configuration"
10472 " file is not one of the allowed values:"
10474 " ".join(constants.DISK_TEMPLATES),
10475 errors.ECODE_INVAL)
10477 raise errors.OpPrereqError("No disk template specified and the export"
10478 " is missing the disk_template information",
10479 errors.ECODE_INVAL)
10481 if not self.op.disks:
10483 # TODO: import the disk iv_name too
10484 for idx in range(constants.MAX_DISKS):
10485 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
10486 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
10487 disks.append({constants.IDISK_SIZE: disk_sz})
10488 self.op.disks = disks
10489 if not disks and self.op.disk_template != constants.DT_DISKLESS:
10490 raise errors.OpPrereqError("No disk info specified and the export"
10491 " is missing the disk information",
10492 errors.ECODE_INVAL)
10494 if not self.op.nics:
10496 for idx in range(constants.MAX_NICS):
10497 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
10499 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
10500 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
10505 self.op.nics = nics
10507 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
10508 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
10510 if (self.op.hypervisor is None and
10511 einfo.has_option(constants.INISECT_INS, "hypervisor")):
10512 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
10514 if einfo.has_section(constants.INISECT_HYP):
10515 # use the export parameters but do not override the ones
10516 # specified by the user
10517 for name, value in einfo.items(constants.INISECT_HYP):
10518 if name not in self.op.hvparams:
10519 self.op.hvparams[name] = value
10521 if einfo.has_section(constants.INISECT_BEP):
10522 # use the parameters, without overriding
10523 for name, value in einfo.items(constants.INISECT_BEP):
10524 if name not in self.op.beparams:
10525 self.op.beparams[name] = value
10526 # Compatibility for the old "memory" be param
10527 if name == constants.BE_MEMORY:
10528 if constants.BE_MAXMEM not in self.op.beparams:
10529 self.op.beparams[constants.BE_MAXMEM] = value
10530 if constants.BE_MINMEM not in self.op.beparams:
10531 self.op.beparams[constants.BE_MINMEM] = value
10533 # try to read the parameters old style, from the main section
10534 for name in constants.BES_PARAMETERS:
10535 if (name not in self.op.beparams and
10536 einfo.has_option(constants.INISECT_INS, name)):
10537 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
10539 if einfo.has_section(constants.INISECT_OSP):
10540 # use the parameters, without overriding
10541 for name, value in einfo.items(constants.INISECT_OSP):
10542 if name not in self.op.osparams:
10543 self.op.osparams[name] = value
10545 def _RevertToDefaults(self, cluster):
10546 """Revert the instance parameters to the default values.
10550 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
10551 for name in self.op.hvparams.keys():
10552 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
10553 del self.op.hvparams[name]
10555 be_defs = cluster.SimpleFillBE({})
10556 for name in self.op.beparams.keys():
10557 if name in be_defs and be_defs[name] == self.op.beparams[name]:
10558 del self.op.beparams[name]
10560 nic_defs = cluster.SimpleFillNIC({})
10561 for nic in self.op.nics:
10562 for name in constants.NICS_PARAMETERS:
10563 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
10566 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
10567 for name in self.op.osparams.keys():
10568 if name in os_defs and os_defs[name] == self.op.osparams[name]:
10569 del self.op.osparams[name]
10571 def _CalculateFileStorageDir(self):
10572 """Calculate final instance file storage dir.
10575 # file storage dir calculation/check
10576 self.instance_file_storage_dir = None
10577 if self.op.disk_template in constants.DTS_FILEBASED:
10578 # build the full file storage dir path
10581 if self.op.disk_template == constants.DT_SHARED_FILE:
10582 get_fsd_fn = self.cfg.GetSharedFileStorageDir
10584 get_fsd_fn = self.cfg.GetFileStorageDir
10586 cfg_storagedir = get_fsd_fn()
10587 if not cfg_storagedir:
10588 raise errors.OpPrereqError("Cluster file storage dir not defined",
10589 errors.ECODE_STATE)
10590 joinargs.append(cfg_storagedir)
10592 if self.op.file_storage_dir is not None:
10593 joinargs.append(self.op.file_storage_dir)
10595 joinargs.append(self.op.instance_name)
10597 # pylint: disable=W0142
10598 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10600 def CheckPrereq(self): # pylint: disable=R0914
10601 """Check prerequisites.
10604 self._CalculateFileStorageDir()
10606 if self.op.mode == constants.INSTANCE_IMPORT:
10607 export_info = self._ReadExportInfo()
10608 self._ReadExportParams(export_info)
10609 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10611 self._old_instance_name = None
10613 if (not self.cfg.GetVGName() and
10614 self.op.disk_template not in constants.DTS_NOT_LVM):
10615 raise errors.OpPrereqError("Cluster does not support lvm-based"
10616 " instances", errors.ECODE_STATE)
10618 if (self.op.hypervisor is None or
10619 self.op.hypervisor == constants.VALUE_AUTO):
10620 self.op.hypervisor = self.cfg.GetHypervisorType()
10622 cluster = self.cfg.GetClusterInfo()
10623 enabled_hvs = cluster.enabled_hypervisors
10624 if self.op.hypervisor not in enabled_hvs:
10625 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10627 (self.op.hypervisor, ",".join(enabled_hvs)),
10628 errors.ECODE_STATE)
10630 # Check tag validity
10631 for tag in self.op.tags:
10632 objects.TaggableObject.ValidateTag(tag)
10634 # check hypervisor parameter syntax (locally)
10635 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10636 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10638 hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
10639 hv_type.CheckParameterSyntax(filled_hvp)
10640 self.hv_full = filled_hvp
10641 # check that we don't specify global parameters on an instance
10642 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
10643 "instance", "cluster")
10645 # fill and remember the beparams dict
10646 self.be_full = _ComputeFullBeParams(self.op, cluster)
10648 # build os parameters
10649 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10651 # now that hvp/bep are in final format, let's reset to defaults,
10653 if self.op.identify_defaults:
10654 self._RevertToDefaults(cluster)
10657 self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
10658 self.proc.GetECId())
10660 # disk checks/pre-build
10661 default_vg = self.cfg.GetVGName()
10662 self.disks = _ComputeDisks(self.op, default_vg)
10664 if self.op.mode == constants.INSTANCE_IMPORT:
10666 for idx in range(len(self.disks)):
10667 option = "disk%d_dump" % idx
10668 if export_info.has_option(constants.INISECT_INS, option):
10669 # FIXME: are the old os-es, disk sizes, etc. useful?
10670 export_name = export_info.get(constants.INISECT_INS, option)
10671 image = utils.PathJoin(self.op.src_path, export_name)
10672 disk_images.append(image)
10674 disk_images.append(False)
10676 self.src_images = disk_images
10678 if self.op.instance_name == self._old_instance_name:
10679 for idx, nic in enumerate(self.nics):
10680 if nic.mac == constants.VALUE_AUTO:
10681 nic_mac_ini = "nic%d_mac" % idx
10682 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10684 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10686 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10687 if self.op.ip_check:
10688 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10689 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10690 (self.check_ip, self.op.instance_name),
10691 errors.ECODE_NOTUNIQUE)
10693 #### mac address generation
10694 # By generating here the mac address both the allocator and the hooks get
10695 # the real final mac address rather than the 'auto' or 'generate' value.
10696 # There is a race condition between the generation and the instance object
10697 # creation, which means that we know the mac is valid now, but we're not
10698 # sure it will be when we actually add the instance. If things go bad
10699 # adding the instance will abort because of a duplicate mac, and the
10700 # creation job will fail.
10701 for nic in self.nics:
10702 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10703 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10707 if self.op.iallocator is not None:
10708 self._RunAllocator()
10710 # Release all unneeded node locks
10711 keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
10712 _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
10713 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
10714 _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
10716 assert (self.owned_locks(locking.LEVEL_NODE) ==
10717 self.owned_locks(locking.LEVEL_NODE_RES)), \
10718 "Node locks differ from node resource locks"
10720 #### node related checks
10722 # check primary node
10723 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10724 assert self.pnode is not None, \
10725 "Cannot retrieve locked node %s" % self.op.pnode
10727 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10728 pnode.name, errors.ECODE_STATE)
10730 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10731 pnode.name, errors.ECODE_STATE)
10732 if not pnode.vm_capable:
10733 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10734 " '%s'" % pnode.name, errors.ECODE_STATE)
10736 self.secondaries = []
10738 # Fill in any IPs from IP pools. This must happen here, because we need to
10739 # know the nic's primary node, as specified by the iallocator
10740 for idx, nic in enumerate(self.nics):
10741 net_uuid = nic.network
10742 if net_uuid is not None:
10743 nobj = self.cfg.GetNetwork(net_uuid)
10744 netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
10745 if netparams is None:
10746 raise errors.OpPrereqError("No netparams found for network"
10747 " %s. Propably not connected to"
10748 " node's %s nodegroup" %
10749 (nobj.name, self.pnode.name),
10750 errors.ECODE_INVAL)
10751 self.LogInfo("NIC/%d inherits netparams %s" %
10752 (idx, netparams.values()))
10753 nic.nicparams = dict(netparams)
10754 if nic.ip is not None:
10755 if nic.ip.lower() == constants.NIC_IP_POOL:
10757 nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
10758 except errors.ReservationError:
10759 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10760 " from the address pool" % idx,
10761 errors.ECODE_STATE)
10762 self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
10765 self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
10766 except errors.ReservationError:
10767 raise errors.OpPrereqError("IP address %s already in use"
10768 " or does not belong to network %s" %
10769 (nic.ip, nobj.name),
10770 errors.ECODE_NOTUNIQUE)
10772 # net is None, ip None or given
10773 elif self.op.conflicts_check:
10774 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10776 # mirror node verification
10777 if self.op.disk_template in constants.DTS_INT_MIRROR:
10778 if self.op.snode == pnode.name:
10779 raise errors.OpPrereqError("The secondary node cannot be the"
10780 " primary node", errors.ECODE_INVAL)
10781 _CheckNodeOnline(self, self.op.snode)
10782 _CheckNodeNotDrained(self, self.op.snode)
10783 _CheckNodeVmCapable(self, self.op.snode)
10784 self.secondaries.append(self.op.snode)
10786 snode = self.cfg.GetNodeInfo(self.op.snode)
10787 if pnode.group != snode.group:
10788 self.LogWarning("The primary and secondary nodes are in two"
10789 " different node groups; the disk parameters"
10790 " from the first disk's node group will be"
10793 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
10795 if self.op.disk_template in constants.DTS_INT_MIRROR:
10796 nodes.append(snode)
10797 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
10798 if compat.any(map(has_es, nodes)):
10799 raise errors.OpPrereqError("Disk template %s not supported with"
10800 " exclusive storage" % self.op.disk_template,
10801 errors.ECODE_STATE)
10803 nodenames = [pnode.name] + self.secondaries
10805 if not self.adopt_disks:
10806 if self.op.disk_template == constants.DT_RBD:
10807 # _CheckRADOSFreeSpace() is just a placeholder.
10808 # Any function that checks prerequisites can be placed here.
10809 # Check if there is enough space on the RADOS cluster.
10810 _CheckRADOSFreeSpace()
10811 elif self.op.disk_template == constants.DT_EXT:
10812 # FIXME: Function that checks prereqs if needed
10815 # Check lv size requirements, if not adopting
10816 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10817 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10819 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10820 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10821 disk[constants.IDISK_ADOPT])
10822 for disk in self.disks])
10823 if len(all_lvs) != len(self.disks):
10824 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10825 errors.ECODE_INVAL)
10826 for lv_name in all_lvs:
10828 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10829 # to ReserveLV uses the same syntax
10830 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10831 except errors.ReservationError:
10832 raise errors.OpPrereqError("LV named %s used by another instance" %
10833 lv_name, errors.ECODE_NOTUNIQUE)
10835 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10836 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10838 node_lvs = self.rpc.call_lv_list([pnode.name],
10839 vg_names.payload.keys())[pnode.name]
10840 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10841 node_lvs = node_lvs.payload
10843 delta = all_lvs.difference(node_lvs.keys())
10845 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10846 utils.CommaJoin(delta),
10847 errors.ECODE_INVAL)
10848 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10850 raise errors.OpPrereqError("Online logical volumes found, cannot"
10851 " adopt: %s" % utils.CommaJoin(online_lvs),
10852 errors.ECODE_STATE)
10853 # update the size of disk based on what is found
10854 for dsk in self.disks:
10855 dsk[constants.IDISK_SIZE] = \
10856 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10857 dsk[constants.IDISK_ADOPT])][0]))
10859 elif self.op.disk_template == constants.DT_BLOCK:
10860 # Normalize and de-duplicate device paths
10861 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10862 for disk in self.disks])
10863 if len(all_disks) != len(self.disks):
10864 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10865 errors.ECODE_INVAL)
10866 baddisks = [d for d in all_disks
10867 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10869 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10870 " cannot be adopted" %
10871 (utils.CommaJoin(baddisks),
10872 constants.ADOPTABLE_BLOCKDEV_ROOT),
10873 errors.ECODE_INVAL)
10875 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10876 list(all_disks))[pnode.name]
10877 node_disks.Raise("Cannot get block device information from node %s" %
10879 node_disks = node_disks.payload
10880 delta = all_disks.difference(node_disks.keys())
10882 raise errors.OpPrereqError("Missing block device(s): %s" %
10883 utils.CommaJoin(delta),
10884 errors.ECODE_INVAL)
10885 for dsk in self.disks:
10886 dsk[constants.IDISK_SIZE] = \
10887 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10889 # Verify instance specs
10890 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10892 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10893 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10894 constants.ISPEC_DISK_COUNT: len(self.disks),
10895 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10896 for disk in self.disks],
10897 constants.ISPEC_NIC_COUNT: len(self.nics),
10898 constants.ISPEC_SPINDLE_USE: spindle_use,
10901 group_info = self.cfg.GetNodeGroup(pnode.group)
10902 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10903 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec,
10904 self.op.disk_template)
10905 if not self.op.ignore_ipolicy and res:
10906 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10907 (pnode.group, group_info.name, utils.CommaJoin(res)))
10908 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10910 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10912 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10913 # check OS parameters (remotely)
10914 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10916 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10918 #TODO: _CheckExtParams (remotely)
10919 # Check parameters for extstorage
10921 # memory check on primary node
10922 #TODO(dynmem): use MINMEM for checking
10924 _CheckNodeFreeMemory(self, self.pnode.name,
10925 "creating instance %s" % self.op.instance_name,
10926 self.be_full[constants.BE_MAXMEM],
10927 self.op.hypervisor)
10929 self.dry_run_result = list(nodenames)
10931 def Exec(self, feedback_fn):
10932 """Create and add the instance to the cluster.
10935 instance = self.op.instance_name
10936 pnode_name = self.pnode.name
10938 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10939 self.owned_locks(locking.LEVEL_NODE)), \
10940 "Node locks differ from node resource locks"
10941 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
10943 ht_kind = self.op.hypervisor
10944 if ht_kind in constants.HTS_REQ_PORT:
10945 network_port = self.cfg.AllocatePort()
10947 network_port = None
10949 # This is ugly but we got a chicken-egg problem here
10950 # We can only take the group disk parameters, as the instance
10951 # has no disks yet (we are generating them right here).
10952 node = self.cfg.GetNodeInfo(pnode_name)
10953 nodegroup = self.cfg.GetNodeGroup(node.group)
10954 disks = _GenerateDiskTemplate(self,
10955 self.op.disk_template,
10956 instance, pnode_name,
10959 self.instance_file_storage_dir,
10960 self.op.file_driver,
10963 self.cfg.GetGroupDiskParams(nodegroup))
10965 iobj = objects.Instance(name=instance, os=self.op.os_type,
10966 primary_node=pnode_name,
10967 nics=self.nics, disks=disks,
10968 disk_template=self.op.disk_template,
10969 admin_state=constants.ADMINST_DOWN,
10970 network_port=network_port,
10971 beparams=self.op.beparams,
10972 hvparams=self.op.hvparams,
10973 hypervisor=self.op.hypervisor,
10974 osparams=self.op.osparams,
10978 for tag in self.op.tags:
10981 if self.adopt_disks:
10982 if self.op.disk_template == constants.DT_PLAIN:
10983 # rename LVs to the newly-generated names; we need to construct
10984 # 'fake' LV disks with the old data, plus the new unique_id
10985 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10987 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10988 rename_to.append(t_dsk.logical_id)
10989 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10990 self.cfg.SetDiskID(t_dsk, pnode_name)
10991 result = self.rpc.call_blockdev_rename(pnode_name,
10992 zip(tmp_disks, rename_to))
10993 result.Raise("Failed to rename adoped LVs")
10995 feedback_fn("* creating instance disks...")
10997 _CreateDisks(self, iobj)
10998 except errors.OpExecError:
10999 self.LogWarning("Device creation failed")
11000 self.cfg.ReleaseDRBDMinors(instance)
11003 feedback_fn("adding instance %s to cluster config" % instance)
11005 self.cfg.AddInstance(iobj, self.proc.GetECId())
11007 # Declare that we don't want to remove the instance lock anymore, as we've
11008 # added the instance to the config
11009 del self.remove_locks[locking.LEVEL_INSTANCE]
11011 if self.op.mode == constants.INSTANCE_IMPORT:
11012 # Release unused nodes
11013 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
11015 # Release all nodes
11016 _ReleaseLocks(self, locking.LEVEL_NODE)
11019 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
11020 feedback_fn("* wiping instance disks...")
11022 _WipeDisks(self, iobj)
11023 except errors.OpExecError, err:
11024 logging.exception("Wiping disks failed")
11025 self.LogWarning("Wiping instance disks failed (%s)", err)
11029 # Something is already wrong with the disks, don't do anything else
11031 elif self.op.wait_for_sync:
11032 disk_abort = not _WaitForSync(self, iobj)
11033 elif iobj.disk_template in constants.DTS_INT_MIRROR:
11034 # make sure the disks are not degraded (still sync-ing is ok)
11035 feedback_fn("* checking mirrors status")
11036 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
11041 _RemoveDisks(self, iobj)
11042 self.cfg.RemoveInstance(iobj.name)
11043 # Make sure the instance lock gets removed
11044 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
11045 raise errors.OpExecError("There are some degraded disks for"
11048 # Release all node resource locks
11049 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11051 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
11052 # we need to set the disks ID to the primary node, since the
11053 # preceding code might or might have not done it, depending on
11054 # disk template and other options
11055 for disk in iobj.disks:
11056 self.cfg.SetDiskID(disk, pnode_name)
11057 if self.op.mode == constants.INSTANCE_CREATE:
11058 if not self.op.no_install:
11059 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
11060 not self.op.wait_for_sync)
11062 feedback_fn("* pausing disk sync to install instance OS")
11063 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11066 for idx, success in enumerate(result.payload):
11068 logging.warn("pause-sync of instance %s for disk %d failed",
11071 feedback_fn("* running the instance OS create scripts...")
11072 # FIXME: pass debug option from opcode to backend
11074 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
11075 self.op.debug_level)
11077 feedback_fn("* resuming disk sync")
11078 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
11081 for idx, success in enumerate(result.payload):
11083 logging.warn("resume-sync of instance %s for disk %d failed",
11086 os_add_result.Raise("Could not add os for instance %s"
11087 " on node %s" % (instance, pnode_name))
11090 if self.op.mode == constants.INSTANCE_IMPORT:
11091 feedback_fn("* running the instance OS import scripts...")
11095 for idx, image in enumerate(self.src_images):
11099 # FIXME: pass debug option from opcode to backend
11100 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
11101 constants.IEIO_FILE, (image, ),
11102 constants.IEIO_SCRIPT,
11103 (iobj.disks[idx], idx),
11105 transfers.append(dt)
11108 masterd.instance.TransferInstanceData(self, feedback_fn,
11109 self.op.src_node, pnode_name,
11110 self.pnode.secondary_ip,
11112 if not compat.all(import_result):
11113 self.LogWarning("Some disks for instance %s on node %s were not"
11114 " imported successfully" % (instance, pnode_name))
11116 rename_from = self._old_instance_name
11118 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
11119 feedback_fn("* preparing remote import...")
11120 # The source cluster will stop the instance before attempting to make
11121 # a connection. In some cases stopping an instance can take a long
11122 # time, hence the shutdown timeout is added to the connection
11124 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
11125 self.op.source_shutdown_timeout)
11126 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11128 assert iobj.primary_node == self.pnode.name
11130 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
11131 self.source_x509_ca,
11132 self._cds, timeouts)
11133 if not compat.all(disk_results):
11134 # TODO: Should the instance still be started, even if some disks
11135 # failed to import (valid for local imports, too)?
11136 self.LogWarning("Some disks for instance %s on node %s were not"
11137 " imported successfully" % (instance, pnode_name))
11139 rename_from = self.source_instance_name
11142 # also checked in the prereq part
11143 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
11146 # Run rename script on newly imported instance
11147 assert iobj.name == instance
11148 feedback_fn("Running rename script for %s" % instance)
11149 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
11151 self.op.debug_level)
11152 if result.fail_msg:
11153 self.LogWarning("Failed to run rename script for %s on node"
11154 " %s: %s" % (instance, pnode_name, result.fail_msg))
11156 assert not self.owned_locks(locking.LEVEL_NODE_RES)
11159 iobj.admin_state = constants.ADMINST_UP
11160 self.cfg.Update(iobj, feedback_fn)
11161 logging.info("Starting instance %s on node %s", instance, pnode_name)
11162 feedback_fn("* starting instance...")
11163 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
11165 result.Raise("Could not start instance")
11167 return list(iobj.all_nodes)
11170 class LUInstanceMultiAlloc(NoHooksLU):
11171 """Allocates multiple instances at the same time.
11176 def CheckArguments(self):
11177 """Check arguments.
11181 for inst in self.op.instances:
11182 if inst.iallocator is not None:
11183 raise errors.OpPrereqError("iallocator are not allowed to be set on"
11184 " instance objects", errors.ECODE_INVAL)
11185 nodes.append(bool(inst.pnode))
11186 if inst.disk_template in constants.DTS_INT_MIRROR:
11187 nodes.append(bool(inst.snode))
11189 has_nodes = compat.any(nodes)
11190 if compat.all(nodes) ^ has_nodes:
11191 raise errors.OpPrereqError("There are instance objects providing"
11192 " pnode/snode while others do not",
11193 errors.ECODE_INVAL)
11195 if self.op.iallocator is None:
11196 default_iallocator = self.cfg.GetDefaultIAllocator()
11197 if default_iallocator and has_nodes:
11198 self.op.iallocator = default_iallocator
11200 raise errors.OpPrereqError("No iallocator or nodes on the instances"
11201 " given and no cluster-wide default"
11202 " iallocator found; please specify either"
11203 " an iallocator or nodes on the instances"
11204 " or set a cluster-wide default iallocator",
11205 errors.ECODE_INVAL)
11207 _CheckOpportunisticLocking(self.op)
11209 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
11211 raise errors.OpPrereqError("There are duplicate instance names: %s" %
11212 utils.CommaJoin(dups), errors.ECODE_INVAL)
11214 def ExpandNames(self):
11215 """Calculate the locks.
11218 self.share_locks = _ShareAll()
11219 self.needed_locks = {
11220 # iallocator will select nodes and even if no iallocator is used,
11221 # collisions with LUInstanceCreate should be avoided
11222 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11225 if self.op.iallocator:
11226 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11227 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
11229 if self.op.opportunistic_locking:
11230 self.opportunistic_locks[locking.LEVEL_NODE] = True
11231 self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
11234 for inst in self.op.instances:
11235 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
11236 nodeslist.append(inst.pnode)
11237 if inst.snode is not None:
11238 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
11239 nodeslist.append(inst.snode)
11241 self.needed_locks[locking.LEVEL_NODE] = nodeslist
11242 # Lock resources of instance's primary and secondary nodes (copy to
11243 # prevent accidential modification)
11244 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
11246 def CheckPrereq(self):
11247 """Check prerequisite.
11250 cluster = self.cfg.GetClusterInfo()
11251 default_vg = self.cfg.GetVGName()
11252 ec_id = self.proc.GetECId()
11254 if self.op.opportunistic_locking:
11255 # Only consider nodes for which a lock is held
11256 node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
11258 node_whitelist = None
11260 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
11261 _ComputeNics(op, cluster, None,
11263 _ComputeFullBeParams(op, cluster),
11265 for op in self.op.instances]
11267 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
11268 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11270 ial.Run(self.op.iallocator)
11272 if not ial.success:
11273 raise errors.OpPrereqError("Can't compute nodes using"
11274 " iallocator '%s': %s" %
11275 (self.op.iallocator, ial.info),
11276 errors.ECODE_NORES)
11278 self.ia_result = ial.result
11280 if self.op.dry_run:
11281 self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
11282 constants.JOB_IDS_KEY: [],
11285 def _ConstructPartialResult(self):
11286 """Contructs the partial result.
11289 (allocatable, failed) = self.ia_result
11291 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
11292 map(compat.fst, allocatable),
11293 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
11296 def Exec(self, feedback_fn):
11297 """Executes the opcode.
11300 op2inst = dict((op.instance_name, op) for op in self.op.instances)
11301 (allocatable, failed) = self.ia_result
11304 for (name, nodes) in allocatable:
11305 op = op2inst.pop(name)
11308 (op.pnode, op.snode) = nodes
11310 (op.pnode,) = nodes
11314 missing = set(op2inst.keys()) - set(failed)
11315 assert not missing, \
11316 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
11318 return ResultWithJobs(jobs, **self._ConstructPartialResult())
11321 def _CheckRADOSFreeSpace():
11322 """Compute disk size requirements inside the RADOS cluster.
11325 # For the RADOS cluster we assume there is always enough space.
11329 class LUInstanceConsole(NoHooksLU):
11330 """Connect to an instance's console.
11332 This is somewhat special in that it returns the command line that
11333 you need to run on the master node in order to connect to the
11339 def ExpandNames(self):
11340 self.share_locks = _ShareAll()
11341 self._ExpandAndLockInstance()
11343 def CheckPrereq(self):
11344 """Check prerequisites.
11346 This checks that the instance is in the cluster.
11349 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11350 assert self.instance is not None, \
11351 "Cannot retrieve locked instance %s" % self.op.instance_name
11352 _CheckNodeOnline(self, self.instance.primary_node)
11354 def Exec(self, feedback_fn):
11355 """Connect to the console of an instance
11358 instance = self.instance
11359 node = instance.primary_node
11361 node_insts = self.rpc.call_instance_list([node],
11362 [instance.hypervisor])[node]
11363 node_insts.Raise("Can't get node information from %s" % node)
11365 if instance.name not in node_insts.payload:
11366 if instance.admin_state == constants.ADMINST_UP:
11367 state = constants.INSTST_ERRORDOWN
11368 elif instance.admin_state == constants.ADMINST_DOWN:
11369 state = constants.INSTST_ADMINDOWN
11371 state = constants.INSTST_ADMINOFFLINE
11372 raise errors.OpExecError("Instance %s is not running (state %s)" %
11373 (instance.name, state))
11375 logging.debug("Connecting to console of %s on %s", instance.name, node)
11377 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
11380 def _GetInstanceConsole(cluster, instance):
11381 """Returns console information for an instance.
11383 @type cluster: L{objects.Cluster}
11384 @type instance: L{objects.Instance}
11388 hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
11389 # beparams and hvparams are passed separately, to avoid editing the
11390 # instance and then saving the defaults in the instance itself.
11391 hvparams = cluster.FillHV(instance)
11392 beparams = cluster.FillBE(instance)
11393 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
11395 assert console.instance == instance.name
11396 assert console.Validate()
11398 return console.ToDict()
11401 class LUInstanceReplaceDisks(LogicalUnit):
11402 """Replace the disks of an instance.
11405 HPATH = "mirrors-replace"
11406 HTYPE = constants.HTYPE_INSTANCE
11409 def CheckArguments(self):
11410 """Check arguments.
11413 remote_node = self.op.remote_node
11414 ialloc = self.op.iallocator
11415 if self.op.mode == constants.REPLACE_DISK_CHG:
11416 if remote_node is None and ialloc is None:
11417 raise errors.OpPrereqError("When changing the secondary either an"
11418 " iallocator script must be used or the"
11419 " new node given", errors.ECODE_INVAL)
11421 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11423 elif remote_node is not None or ialloc is not None:
11424 # Not replacing the secondary
11425 raise errors.OpPrereqError("The iallocator and new node options can"
11426 " only be used when changing the"
11427 " secondary node", errors.ECODE_INVAL)
11429 def ExpandNames(self):
11430 self._ExpandAndLockInstance()
11432 assert locking.LEVEL_NODE not in self.needed_locks
11433 assert locking.LEVEL_NODE_RES not in self.needed_locks
11434 assert locking.LEVEL_NODEGROUP not in self.needed_locks
11436 assert self.op.iallocator is None or self.op.remote_node is None, \
11437 "Conflicting options"
11439 if self.op.remote_node is not None:
11440 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11442 # Warning: do not remove the locking of the new secondary here
11443 # unless DRBD8.AddChildren is changed to work in parallel;
11444 # currently it doesn't since parallel invocations of
11445 # FindUnusedMinor will conflict
11446 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
11447 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11449 self.needed_locks[locking.LEVEL_NODE] = []
11450 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11452 if self.op.iallocator is not None:
11453 # iallocator will select a new node in the same group
11454 self.needed_locks[locking.LEVEL_NODEGROUP] = []
11455 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
11457 self.needed_locks[locking.LEVEL_NODE_RES] = []
11459 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
11460 self.op.iallocator, self.op.remote_node,
11461 self.op.disks, self.op.early_release,
11462 self.op.ignore_ipolicy)
11464 self.tasklets = [self.replacer]
11466 def DeclareLocks(self, level):
11467 if level == locking.LEVEL_NODEGROUP:
11468 assert self.op.remote_node is None
11469 assert self.op.iallocator is not None
11470 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11472 self.share_locks[locking.LEVEL_NODEGROUP] = 1
11473 # Lock all groups used by instance optimistically; this requires going
11474 # via the node before it's locked, requiring verification later on
11475 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11476 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11478 elif level == locking.LEVEL_NODE:
11479 if self.op.iallocator is not None:
11480 assert self.op.remote_node is None
11481 assert not self.needed_locks[locking.LEVEL_NODE]
11482 assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
11484 # Lock member nodes of all locked groups
11485 self.needed_locks[locking.LEVEL_NODE] = \
11487 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11488 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
11490 assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11492 self._LockInstancesNodes()
11494 elif level == locking.LEVEL_NODE_RES:
11496 self.needed_locks[locking.LEVEL_NODE_RES] = \
11497 self.needed_locks[locking.LEVEL_NODE]
11499 def BuildHooksEnv(self):
11500 """Build hooks env.
11502 This runs on the master, the primary and all the secondaries.
11505 instance = self.replacer.instance
11507 "MODE": self.op.mode,
11508 "NEW_SECONDARY": self.op.remote_node,
11509 "OLD_SECONDARY": instance.secondary_nodes[0],
11511 env.update(_BuildInstanceHookEnvByObject(self, instance))
11514 def BuildHooksNodes(self):
11515 """Build hooks nodes.
11518 instance = self.replacer.instance
11520 self.cfg.GetMasterNode(),
11521 instance.primary_node,
11523 if self.op.remote_node is not None:
11524 nl.append(self.op.remote_node)
11527 def CheckPrereq(self):
11528 """Check prerequisites.
11531 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
11532 self.op.iallocator is None)
11534 # Verify if node group locks are still correct
11535 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11537 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
11539 return LogicalUnit.CheckPrereq(self)
11542 class TLReplaceDisks(Tasklet):
11543 """Replaces disks for an instance.
11545 Note: Locking is not within the scope of this class.
11548 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
11549 disks, early_release, ignore_ipolicy):
11550 """Initializes this class.
11553 Tasklet.__init__(self, lu)
11556 self.instance_name = instance_name
11558 self.iallocator_name = iallocator_name
11559 self.remote_node = remote_node
11561 self.early_release = early_release
11562 self.ignore_ipolicy = ignore_ipolicy
11565 self.instance = None
11566 self.new_node = None
11567 self.target_node = None
11568 self.other_node = None
11569 self.remote_node_info = None
11570 self.node_secondary_ip = None
11573 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
11574 """Compute a new secondary node using an IAllocator.
11577 req = iallocator.IAReqRelocate(name=instance_name,
11578 relocate_from=list(relocate_from))
11579 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
11581 ial.Run(iallocator_name)
11583 if not ial.success:
11584 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
11585 " %s" % (iallocator_name, ial.info),
11586 errors.ECODE_NORES)
11588 remote_node_name = ial.result[0]
11590 lu.LogInfo("Selected new secondary for instance '%s': %s",
11591 instance_name, remote_node_name)
11593 return remote_node_name
11595 def _FindFaultyDisks(self, node_name):
11596 """Wrapper for L{_FindFaultyInstanceDisks}.
11599 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
11602 def _CheckDisksActivated(self, instance):
11603 """Checks if the instance disks are activated.
11605 @param instance: The instance to check disks
11606 @return: True if they are activated, False otherwise
11609 nodes = instance.all_nodes
11611 for idx, dev in enumerate(instance.disks):
11613 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11614 self.cfg.SetDiskID(dev, node)
11616 result = _BlockdevFind(self, node, dev, instance)
11620 elif result.fail_msg or not result.payload:
11625 def CheckPrereq(self):
11626 """Check prerequisites.
11628 This checks that the instance is in the cluster.
11631 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
11632 assert instance is not None, \
11633 "Cannot retrieve locked instance %s" % self.instance_name
11635 if instance.disk_template != constants.DT_DRBD8:
11636 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
11637 " instances", errors.ECODE_INVAL)
11639 if len(instance.secondary_nodes) != 1:
11640 raise errors.OpPrereqError("The instance has a strange layout,"
11641 " expected one secondary but found %d" %
11642 len(instance.secondary_nodes),
11643 errors.ECODE_FAULT)
11645 instance = self.instance
11646 secondary_node = instance.secondary_nodes[0]
11648 if self.iallocator_name is None:
11649 remote_node = self.remote_node
11651 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11652 instance.name, instance.secondary_nodes)
11654 if remote_node is None:
11655 self.remote_node_info = None
11657 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11658 "Remote node '%s' is not locked" % remote_node
11660 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11661 assert self.remote_node_info is not None, \
11662 "Cannot retrieve locked node %s" % remote_node
11664 if remote_node == self.instance.primary_node:
11665 raise errors.OpPrereqError("The specified node is the primary node of"
11666 " the instance", errors.ECODE_INVAL)
11668 if remote_node == secondary_node:
11669 raise errors.OpPrereqError("The specified node is already the"
11670 " secondary node of the instance",
11671 errors.ECODE_INVAL)
11673 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11674 constants.REPLACE_DISK_CHG):
11675 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11676 errors.ECODE_INVAL)
11678 if self.mode == constants.REPLACE_DISK_AUTO:
11679 if not self._CheckDisksActivated(instance):
11680 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11681 " first" % self.instance_name,
11682 errors.ECODE_STATE)
11683 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11684 faulty_secondary = self._FindFaultyDisks(secondary_node)
11686 if faulty_primary and faulty_secondary:
11687 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11688 " one node and can not be repaired"
11689 " automatically" % self.instance_name,
11690 errors.ECODE_STATE)
11693 self.disks = faulty_primary
11694 self.target_node = instance.primary_node
11695 self.other_node = secondary_node
11696 check_nodes = [self.target_node, self.other_node]
11697 elif faulty_secondary:
11698 self.disks = faulty_secondary
11699 self.target_node = secondary_node
11700 self.other_node = instance.primary_node
11701 check_nodes = [self.target_node, self.other_node]
11707 # Non-automatic modes
11708 if self.mode == constants.REPLACE_DISK_PRI:
11709 self.target_node = instance.primary_node
11710 self.other_node = secondary_node
11711 check_nodes = [self.target_node, self.other_node]
11713 elif self.mode == constants.REPLACE_DISK_SEC:
11714 self.target_node = secondary_node
11715 self.other_node = instance.primary_node
11716 check_nodes = [self.target_node, self.other_node]
11718 elif self.mode == constants.REPLACE_DISK_CHG:
11719 self.new_node = remote_node
11720 self.other_node = instance.primary_node
11721 self.target_node = secondary_node
11722 check_nodes = [self.new_node, self.other_node]
11724 _CheckNodeNotDrained(self.lu, remote_node)
11725 _CheckNodeVmCapable(self.lu, remote_node)
11727 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11728 assert old_node_info is not None
11729 if old_node_info.offline and not self.early_release:
11730 # doesn't make sense to delay the release
11731 self.early_release = True
11732 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11733 " early-release mode", secondary_node)
11736 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11739 # If not specified all disks should be replaced
11741 self.disks = range(len(self.instance.disks))
11743 # TODO: This is ugly, but right now we can't distinguish between internal
11744 # submitted opcode and external one. We should fix that.
11745 if self.remote_node_info:
11746 # We change the node, lets verify it still meets instance policy
11747 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11748 cluster = self.cfg.GetClusterInfo()
11749 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11751 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11752 self.cfg, ignore=self.ignore_ipolicy)
11754 for node in check_nodes:
11755 _CheckNodeOnline(self.lu, node)
11757 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11760 if node_name is not None)
11762 # Release unneeded node and node resource locks
11763 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11764 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11765 _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
11767 # Release any owned node group
11768 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11770 # Check whether disks are valid
11771 for disk_idx in self.disks:
11772 instance.FindDisk(disk_idx)
11774 # Get secondary node IP addresses
11775 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11776 in self.cfg.GetMultiNodeInfo(touched_nodes))
11778 def Exec(self, feedback_fn):
11779 """Execute disk replacement.
11781 This dispatches the disk replacement to the appropriate handler.
11785 # Verify owned locks before starting operation
11786 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11787 assert set(owned_nodes) == set(self.node_secondary_ip), \
11788 ("Incorrect node locks, owning %s, expected %s" %
11789 (owned_nodes, self.node_secondary_ip.keys()))
11790 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11791 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11792 assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
11794 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11795 assert list(owned_instances) == [self.instance_name], \
11796 "Instance '%s' not locked" % self.instance_name
11798 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11799 "Should not own any node group lock at this point"
11802 feedback_fn("No disks need replacement for instance '%s'" %
11803 self.instance.name)
11806 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11807 (utils.CommaJoin(self.disks), self.instance.name))
11808 feedback_fn("Current primary node: %s" % self.instance.primary_node)
11809 feedback_fn("Current seconary node: %s" %
11810 utils.CommaJoin(self.instance.secondary_nodes))
11812 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11814 # Activate the instance disks if we're replacing them on a down instance
11816 _StartInstanceDisks(self.lu, self.instance, True)
11819 # Should we replace the secondary node?
11820 if self.new_node is not None:
11821 fn = self._ExecDrbd8Secondary
11823 fn = self._ExecDrbd8DiskOnly
11825 result = fn(feedback_fn)
11827 # Deactivate the instance disks if we're replacing them on a
11830 _SafeShutdownInstanceDisks(self.lu, self.instance)
11832 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11835 # Verify owned locks
11836 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11837 nodes = frozenset(self.node_secondary_ip)
11838 assert ((self.early_release and not owned_nodes) or
11839 (not self.early_release and not (set(owned_nodes) - nodes))), \
11840 ("Not owning the correct locks, early_release=%s, owned=%r,"
11841 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11845 def _CheckVolumeGroup(self, nodes):
11846 self.lu.LogInfo("Checking volume groups")
11848 vgname = self.cfg.GetVGName()
11850 # Make sure volume group exists on all involved nodes
11851 results = self.rpc.call_vg_list(nodes)
11853 raise errors.OpExecError("Can't list volume groups on the nodes")
11856 res = results[node]
11857 res.Raise("Error checking node %s" % node)
11858 if vgname not in res.payload:
11859 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11862 def _CheckDisksExistence(self, nodes):
11863 # Check disk existence
11864 for idx, dev in enumerate(self.instance.disks):
11865 if idx not in self.disks:
11869 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
11870 self.cfg.SetDiskID(dev, node)
11872 result = _BlockdevFind(self, node, dev, self.instance)
11874 msg = result.fail_msg
11875 if msg or not result.payload:
11877 msg = "disk not found"
11878 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11881 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11882 for idx, dev in enumerate(self.instance.disks):
11883 if idx not in self.disks:
11886 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11889 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11890 on_primary, ldisk=ldisk):
11891 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11892 " replace disks for instance %s" %
11893 (node_name, self.instance.name))
11895 def _CreateNewStorage(self, node_name):
11896 """Create new storage on the primary or secondary node.
11898 This is only used for same-node replaces, not for changing the
11899 secondary node, hence we don't want to modify the existing disk.
11904 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11905 for idx, dev in enumerate(disks):
11906 if idx not in self.disks:
11909 self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
11911 self.cfg.SetDiskID(dev, node_name)
11913 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11914 names = _GenerateUniqueNames(self.lu, lv_names)
11916 (data_disk, meta_disk) = dev.children
11917 vg_data = data_disk.logical_id[0]
11918 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11919 logical_id=(vg_data, names[0]),
11920 params=data_disk.params)
11921 vg_meta = meta_disk.logical_id[0]
11922 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11923 size=constants.DRBD_META_SIZE,
11924 logical_id=(vg_meta, names[1]),
11925 params=meta_disk.params)
11927 new_lvs = [lv_data, lv_meta]
11928 old_lvs = [child.Copy() for child in dev.children]
11929 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11930 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
11932 # we pass force_create=True to force the LVM creation
11933 for new_lv in new_lvs:
11934 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11935 _GetInstanceInfoText(self.instance), False,
11940 def _CheckDevices(self, node_name, iv_names):
11941 for name, (dev, _, _) in iv_names.iteritems():
11942 self.cfg.SetDiskID(dev, node_name)
11944 result = _BlockdevFind(self, node_name, dev, self.instance)
11946 msg = result.fail_msg
11947 if msg or not result.payload:
11949 msg = "disk not found"
11950 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11953 if result.payload.is_degraded:
11954 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11956 def _RemoveOldStorage(self, node_name, iv_names):
11957 for name, (_, old_lvs, _) in iv_names.iteritems():
11958 self.lu.LogInfo("Remove logical volumes for %s", name)
11961 self.cfg.SetDiskID(lv, node_name)
11963 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11965 self.lu.LogWarning("Can't remove old LV: %s", msg,
11966 hint="remove unused LVs manually")
11968 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11969 """Replace a disk on the primary or secondary for DRBD 8.
11971 The algorithm for replace is quite complicated:
11973 1. for each disk to be replaced:
11975 1. create new LVs on the target node with unique names
11976 1. detach old LVs from the drbd device
11977 1. rename old LVs to name_replaced.<time_t>
11978 1. rename new LVs to old LVs
11979 1. attach the new LVs (with the old names now) to the drbd device
11981 1. wait for sync across all devices
11983 1. for each modified disk:
11985 1. remove old LVs (which have the name name_replaces.<time_t>)
11987 Failures are not very well handled.
11992 # Step: check device activation
11993 self.lu.LogStep(1, steps_total, "Check device existence")
11994 self._CheckDisksExistence([self.other_node, self.target_node])
11995 self._CheckVolumeGroup([self.target_node, self.other_node])
11997 # Step: check other node consistency
11998 self.lu.LogStep(2, steps_total, "Check peer consistency")
11999 self._CheckDisksConsistency(self.other_node,
12000 self.other_node == self.instance.primary_node,
12003 # Step: create new storage
12004 self.lu.LogStep(3, steps_total, "Allocate new storage")
12005 iv_names = self._CreateNewStorage(self.target_node)
12007 # Step: for each lv, detach+rename*2+attach
12008 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12009 for dev, old_lvs, new_lvs in iv_names.itervalues():
12010 self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
12012 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
12014 result.Raise("Can't detach drbd from local storage on node"
12015 " %s for device %s" % (self.target_node, dev.iv_name))
12017 #cfg.Update(instance)
12019 # ok, we created the new LVs, so now we know we have the needed
12020 # storage; as such, we proceed on the target node to rename
12021 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
12022 # using the assumption that logical_id == physical_id (which in
12023 # turn is the unique_id on that node)
12025 # FIXME(iustin): use a better name for the replaced LVs
12026 temp_suffix = int(time.time())
12027 ren_fn = lambda d, suff: (d.physical_id[0],
12028 d.physical_id[1] + "_replaced-%s" % suff)
12030 # Build the rename list based on what LVs exist on the node
12031 rename_old_to_new = []
12032 for to_ren in old_lvs:
12033 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
12034 if not result.fail_msg and result.payload:
12036 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
12038 self.lu.LogInfo("Renaming the old LVs on the target node")
12039 result = self.rpc.call_blockdev_rename(self.target_node,
12041 result.Raise("Can't rename old LVs on node %s" % self.target_node)
12043 # Now we rename the new LVs to the old LVs
12044 self.lu.LogInfo("Renaming the new LVs on the target node")
12045 rename_new_to_old = [(new, old.physical_id)
12046 for old, new in zip(old_lvs, new_lvs)]
12047 result = self.rpc.call_blockdev_rename(self.target_node,
12049 result.Raise("Can't rename new LVs on node %s" % self.target_node)
12051 # Intermediate steps of in memory modifications
12052 for old, new in zip(old_lvs, new_lvs):
12053 new.logical_id = old.logical_id
12054 self.cfg.SetDiskID(new, self.target_node)
12056 # We need to modify old_lvs so that removal later removes the
12057 # right LVs, not the newly added ones; note that old_lvs is a
12059 for disk in old_lvs:
12060 disk.logical_id = ren_fn(disk, temp_suffix)
12061 self.cfg.SetDiskID(disk, self.target_node)
12063 # Now that the new lvs have the old name, we can add them to the device
12064 self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
12065 result = self.rpc.call_blockdev_addchildren(self.target_node,
12066 (dev, self.instance), new_lvs)
12067 msg = result.fail_msg
12069 for new_lv in new_lvs:
12070 msg2 = self.rpc.call_blockdev_remove(self.target_node,
12073 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
12074 hint=("cleanup manually the unused logical"
12076 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
12078 cstep = itertools.count(5)
12080 if self.early_release:
12081 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12082 self._RemoveOldStorage(self.target_node, iv_names)
12083 # TODO: Check if releasing locks early still makes sense
12084 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12086 # Release all resource locks except those used by the instance
12087 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12088 keep=self.node_secondary_ip.keys())
12090 # Release all node locks while waiting for sync
12091 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12093 # TODO: Can the instance lock be downgraded here? Take the optional disk
12094 # shutdown in the caller into consideration.
12097 # This can fail as the old devices are degraded and _WaitForSync
12098 # does a combined result over all disks, so we don't check its return value
12099 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12100 _WaitForSync(self.lu, self.instance)
12102 # Check all devices manually
12103 self._CheckDevices(self.instance.primary_node, iv_names)
12105 # Step: remove old storage
12106 if not self.early_release:
12107 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12108 self._RemoveOldStorage(self.target_node, iv_names)
12110 def _ExecDrbd8Secondary(self, feedback_fn):
12111 """Replace the secondary node for DRBD 8.
12113 The algorithm for replace is quite complicated:
12114 - for all disks of the instance:
12115 - create new LVs on the new node with same names
12116 - shutdown the drbd device on the old secondary
12117 - disconnect the drbd network on the primary
12118 - create the drbd device on the new secondary
12119 - network attach the drbd on the primary, using an artifice:
12120 the drbd code for Attach() will connect to the network if it
12121 finds a device which is connected to the good local disks but
12122 not network enabled
12123 - wait for sync across all devices
12124 - remove all disks from the old secondary
12126 Failures are not very well handled.
12131 pnode = self.instance.primary_node
12133 # Step: check device activation
12134 self.lu.LogStep(1, steps_total, "Check device existence")
12135 self._CheckDisksExistence([self.instance.primary_node])
12136 self._CheckVolumeGroup([self.instance.primary_node])
12138 # Step: check other node consistency
12139 self.lu.LogStep(2, steps_total, "Check peer consistency")
12140 self._CheckDisksConsistency(self.instance.primary_node, True, True)
12142 # Step: create new storage
12143 self.lu.LogStep(3, steps_total, "Allocate new storage")
12144 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
12145 excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
12146 for idx, dev in enumerate(disks):
12147 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
12148 (self.new_node, idx))
12149 # we pass force_create=True to force LVM creation
12150 for new_lv in dev.children:
12151 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
12152 True, _GetInstanceInfoText(self.instance), False,
12155 # Step 4: dbrd minors and drbd setups changes
12156 # after this, we must manually remove the drbd minors on both the
12157 # error and the success paths
12158 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
12159 minors = self.cfg.AllocateDRBDMinor([self.new_node
12160 for dev in self.instance.disks],
12161 self.instance.name)
12162 logging.debug("Allocated minors %r", minors)
12165 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
12166 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
12167 (self.new_node, idx))
12168 # create new devices on new_node; note that we create two IDs:
12169 # one without port, so the drbd will be activated without
12170 # networking information on the new node at this stage, and one
12171 # with network, for the latter activation in step 4
12172 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
12173 if self.instance.primary_node == o_node1:
12176 assert self.instance.primary_node == o_node2, "Three-node instance?"
12179 new_alone_id = (self.instance.primary_node, self.new_node, None,
12180 p_minor, new_minor, o_secret)
12181 new_net_id = (self.instance.primary_node, self.new_node, o_port,
12182 p_minor, new_minor, o_secret)
12184 iv_names[idx] = (dev, dev.children, new_net_id)
12185 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
12187 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
12188 logical_id=new_alone_id,
12189 children=dev.children,
12192 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
12195 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
12197 _GetInstanceInfoText(self.instance), False,
12199 except errors.GenericError:
12200 self.cfg.ReleaseDRBDMinors(self.instance.name)
12203 # We have new devices, shutdown the drbd on the old secondary
12204 for idx, dev in enumerate(self.instance.disks):
12205 self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
12206 self.cfg.SetDiskID(dev, self.target_node)
12207 msg = self.rpc.call_blockdev_shutdown(self.target_node,
12208 (dev, self.instance)).fail_msg
12210 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
12211 "node: %s" % (idx, msg),
12212 hint=("Please cleanup this device manually as"
12213 " soon as possible"))
12215 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
12216 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
12217 self.instance.disks)[pnode]
12219 msg = result.fail_msg
12221 # detaches didn't succeed (unlikely)
12222 self.cfg.ReleaseDRBDMinors(self.instance.name)
12223 raise errors.OpExecError("Can't detach the disks from the network on"
12224 " old node: %s" % (msg,))
12226 # if we managed to detach at least one, we update all the disks of
12227 # the instance to point to the new secondary
12228 self.lu.LogInfo("Updating instance configuration")
12229 for dev, _, new_logical_id in iv_names.itervalues():
12230 dev.logical_id = new_logical_id
12231 self.cfg.SetDiskID(dev, self.instance.primary_node)
12233 self.cfg.Update(self.instance, feedback_fn)
12235 # Release all node locks (the configuration has been updated)
12236 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
12238 # and now perform the drbd attach
12239 self.lu.LogInfo("Attaching primary drbds to new secondary"
12240 " (standalone => connected)")
12241 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
12243 self.node_secondary_ip,
12244 (self.instance.disks, self.instance),
12245 self.instance.name,
12247 for to_node, to_result in result.items():
12248 msg = to_result.fail_msg
12250 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
12252 hint=("please do a gnt-instance info to see the"
12253 " status of disks"))
12255 cstep = itertools.count(5)
12257 if self.early_release:
12258 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12259 self._RemoveOldStorage(self.target_node, iv_names)
12260 # TODO: Check if releasing locks early still makes sense
12261 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
12263 # Release all resource locks except those used by the instance
12264 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
12265 keep=self.node_secondary_ip.keys())
12267 # TODO: Can the instance lock be downgraded here? Take the optional disk
12268 # shutdown in the caller into consideration.
12271 # This can fail as the old devices are degraded and _WaitForSync
12272 # does a combined result over all disks, so we don't check its return value
12273 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
12274 _WaitForSync(self.lu, self.instance)
12276 # Check all devices manually
12277 self._CheckDevices(self.instance.primary_node, iv_names)
12279 # Step: remove old storage
12280 if not self.early_release:
12281 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
12282 self._RemoveOldStorage(self.target_node, iv_names)
12285 class LURepairNodeStorage(NoHooksLU):
12286 """Repairs the volume group on a node.
12291 def CheckArguments(self):
12292 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12294 storage_type = self.op.storage_type
12296 if (constants.SO_FIX_CONSISTENCY not in
12297 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
12298 raise errors.OpPrereqError("Storage units of type '%s' can not be"
12299 " repaired" % storage_type,
12300 errors.ECODE_INVAL)
12302 def ExpandNames(self):
12303 self.needed_locks = {
12304 locking.LEVEL_NODE: [self.op.node_name],
12307 def _CheckFaultyDisks(self, instance, node_name):
12308 """Ensure faulty disks abort the opcode or at least warn."""
12310 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
12312 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
12313 " node '%s'" % (instance.name, node_name),
12314 errors.ECODE_STATE)
12315 except errors.OpPrereqError, err:
12316 if self.op.ignore_consistency:
12317 self.LogWarning(str(err.args[0]))
12321 def CheckPrereq(self):
12322 """Check prerequisites.
12325 # Check whether any instance on this node has faulty disks
12326 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
12327 if inst.admin_state != constants.ADMINST_UP:
12329 check_nodes = set(inst.all_nodes)
12330 check_nodes.discard(self.op.node_name)
12331 for inst_node_name in check_nodes:
12332 self._CheckFaultyDisks(inst, inst_node_name)
12334 def Exec(self, feedback_fn):
12335 feedback_fn("Repairing storage unit '%s' on %s ..." %
12336 (self.op.name, self.op.node_name))
12338 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
12339 result = self.rpc.call_storage_execute(self.op.node_name,
12340 self.op.storage_type, st_args,
12342 constants.SO_FIX_CONSISTENCY)
12343 result.Raise("Failed to repair storage unit '%s' on %s" %
12344 (self.op.name, self.op.node_name))
12347 class LUNodeEvacuate(NoHooksLU):
12348 """Evacuates instances off a list of nodes.
12353 _MODE2IALLOCATOR = {
12354 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
12355 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
12356 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
12358 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
12359 assert (frozenset(_MODE2IALLOCATOR.values()) ==
12360 constants.IALLOCATOR_NEVAC_MODES)
12362 def CheckArguments(self):
12363 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
12365 def ExpandNames(self):
12366 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
12368 if self.op.remote_node is not None:
12369 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12370 assert self.op.remote_node
12372 if self.op.remote_node == self.op.node_name:
12373 raise errors.OpPrereqError("Can not use evacuated node as a new"
12374 " secondary node", errors.ECODE_INVAL)
12376 if self.op.mode != constants.NODE_EVAC_SEC:
12377 raise errors.OpPrereqError("Without the use of an iallocator only"
12378 " secondary instances can be evacuated",
12379 errors.ECODE_INVAL)
12382 self.share_locks = _ShareAll()
12383 self.needed_locks = {
12384 locking.LEVEL_INSTANCE: [],
12385 locking.LEVEL_NODEGROUP: [],
12386 locking.LEVEL_NODE: [],
12389 # Determine nodes (via group) optimistically, needs verification once locks
12390 # have been acquired
12391 self.lock_nodes = self._DetermineNodes()
12393 def _DetermineNodes(self):
12394 """Gets the list of nodes to operate on.
12397 if self.op.remote_node is None:
12398 # Iallocator will choose any node(s) in the same group
12399 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
12401 group_nodes = frozenset([self.op.remote_node])
12403 # Determine nodes to be locked
12404 return set([self.op.node_name]) | group_nodes
12406 def _DetermineInstances(self):
12407 """Builds list of instances to operate on.
12410 assert self.op.mode in constants.NODE_EVAC_MODES
12412 if self.op.mode == constants.NODE_EVAC_PRI:
12413 # Primary instances only
12414 inst_fn = _GetNodePrimaryInstances
12415 assert self.op.remote_node is None, \
12416 "Evacuating primary instances requires iallocator"
12417 elif self.op.mode == constants.NODE_EVAC_SEC:
12418 # Secondary instances only
12419 inst_fn = _GetNodeSecondaryInstances
12422 assert self.op.mode == constants.NODE_EVAC_ALL
12423 inst_fn = _GetNodeInstances
12424 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
12426 raise errors.OpPrereqError("Due to an issue with the iallocator"
12427 " interface it is not possible to evacuate"
12428 " all instances at once; specify explicitly"
12429 " whether to evacuate primary or secondary"
12431 errors.ECODE_INVAL)
12433 return inst_fn(self.cfg, self.op.node_name)
12435 def DeclareLocks(self, level):
12436 if level == locking.LEVEL_INSTANCE:
12437 # Lock instances optimistically, needs verification once node and group
12438 # locks have been acquired
12439 self.needed_locks[locking.LEVEL_INSTANCE] = \
12440 set(i.name for i in self._DetermineInstances())
12442 elif level == locking.LEVEL_NODEGROUP:
12443 # Lock node groups for all potential target nodes optimistically, needs
12444 # verification once nodes have been acquired
12445 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12446 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
12448 elif level == locking.LEVEL_NODE:
12449 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
12451 def CheckPrereq(self):
12453 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12454 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
12455 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
12457 need_nodes = self._DetermineNodes()
12459 if not owned_nodes.issuperset(need_nodes):
12460 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
12461 " locks were acquired, current nodes are"
12462 " are '%s', used to be '%s'; retry the"
12464 (self.op.node_name,
12465 utils.CommaJoin(need_nodes),
12466 utils.CommaJoin(owned_nodes)),
12467 errors.ECODE_STATE)
12469 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
12470 if owned_groups != wanted_groups:
12471 raise errors.OpExecError("Node groups changed since locks were acquired,"
12472 " current groups are '%s', used to be '%s';"
12473 " retry the operation" %
12474 (utils.CommaJoin(wanted_groups),
12475 utils.CommaJoin(owned_groups)))
12477 # Determine affected instances
12478 self.instances = self._DetermineInstances()
12479 self.instance_names = [i.name for i in self.instances]
12481 if set(self.instance_names) != owned_instances:
12482 raise errors.OpExecError("Instances on node '%s' changed since locks"
12483 " were acquired, current instances are '%s',"
12484 " used to be '%s'; retry the operation" %
12485 (self.op.node_name,
12486 utils.CommaJoin(self.instance_names),
12487 utils.CommaJoin(owned_instances)))
12489 if self.instance_names:
12490 self.LogInfo("Evacuating instances from node '%s': %s",
12492 utils.CommaJoin(utils.NiceSort(self.instance_names)))
12494 self.LogInfo("No instances to evacuate from node '%s'",
12497 if self.op.remote_node is not None:
12498 for i in self.instances:
12499 if i.primary_node == self.op.remote_node:
12500 raise errors.OpPrereqError("Node %s is the primary node of"
12501 " instance %s, cannot use it as"
12503 (self.op.remote_node, i.name),
12504 errors.ECODE_INVAL)
12506 def Exec(self, feedback_fn):
12507 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
12509 if not self.instance_names:
12510 # No instances to evacuate
12513 elif self.op.iallocator is not None:
12514 # TODO: Implement relocation to other group
12515 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
12516 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
12517 instances=list(self.instance_names))
12518 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
12520 ial.Run(self.op.iallocator)
12522 if not ial.success:
12523 raise errors.OpPrereqError("Can't compute node evacuation using"
12524 " iallocator '%s': %s" %
12525 (self.op.iallocator, ial.info),
12526 errors.ECODE_NORES)
12528 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
12530 elif self.op.remote_node is not None:
12531 assert self.op.mode == constants.NODE_EVAC_SEC
12533 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
12534 remote_node=self.op.remote_node,
12536 mode=constants.REPLACE_DISK_CHG,
12537 early_release=self.op.early_release)]
12538 for instance_name in self.instance_names]
12541 raise errors.ProgrammerError("No iallocator or remote node")
12543 return ResultWithJobs(jobs)
12546 def _SetOpEarlyRelease(early_release, op):
12547 """Sets C{early_release} flag on opcodes if available.
12551 op.early_release = early_release
12552 except AttributeError:
12553 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
12558 def _NodeEvacDest(use_nodes, group, nodes):
12559 """Returns group or nodes depending on caller's choice.
12563 return utils.CommaJoin(nodes)
12568 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
12569 """Unpacks the result of change-group and node-evacuate iallocator requests.
12571 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
12572 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
12574 @type lu: L{LogicalUnit}
12575 @param lu: Logical unit instance
12576 @type alloc_result: tuple/list
12577 @param alloc_result: Result from iallocator
12578 @type early_release: bool
12579 @param early_release: Whether to release locks early if possible
12580 @type use_nodes: bool
12581 @param use_nodes: Whether to display node names instead of groups
12584 (moved, failed, jobs) = alloc_result
12587 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
12588 for (name, reason) in failed)
12589 lu.LogWarning("Unable to evacuate instances %s", failreason)
12590 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
12593 lu.LogInfo("Instances to be moved: %s",
12594 utils.CommaJoin("%s (to %s)" %
12595 (name, _NodeEvacDest(use_nodes, group, nodes))
12596 for (name, group, nodes) in moved))
12598 return [map(compat.partial(_SetOpEarlyRelease, early_release),
12599 map(opcodes.OpCode.LoadOpCode, ops))
12603 def _DiskSizeInBytesToMebibytes(lu, size):
12604 """Converts a disk size in bytes to mebibytes.
12606 Warns and rounds up if the size isn't an even multiple of 1 MiB.
12609 (mib, remainder) = divmod(size, 1024 * 1024)
12612 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
12613 " to not overwrite existing data (%s bytes will not be"
12614 " wiped)", (1024 * 1024) - remainder)
12620 class LUInstanceGrowDisk(LogicalUnit):
12621 """Grow a disk of an instance.
12624 HPATH = "disk-grow"
12625 HTYPE = constants.HTYPE_INSTANCE
12628 def ExpandNames(self):
12629 self._ExpandAndLockInstance()
12630 self.needed_locks[locking.LEVEL_NODE] = []
12631 self.needed_locks[locking.LEVEL_NODE_RES] = []
12632 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12633 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
12635 def DeclareLocks(self, level):
12636 if level == locking.LEVEL_NODE:
12637 self._LockInstancesNodes()
12638 elif level == locking.LEVEL_NODE_RES:
12640 self.needed_locks[locking.LEVEL_NODE_RES] = \
12641 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12643 def BuildHooksEnv(self):
12644 """Build hooks env.
12646 This runs on the master, the primary and all the secondaries.
12650 "DISK": self.op.disk,
12651 "AMOUNT": self.op.amount,
12652 "ABSOLUTE": self.op.absolute,
12654 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12657 def BuildHooksNodes(self):
12658 """Build hooks nodes.
12661 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12664 def CheckPrereq(self):
12665 """Check prerequisites.
12667 This checks that the instance is in the cluster.
12670 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12671 assert instance is not None, \
12672 "Cannot retrieve locked instance %s" % self.op.instance_name
12673 nodenames = list(instance.all_nodes)
12674 for node in nodenames:
12675 _CheckNodeOnline(self, node)
12677 self.instance = instance
12679 if instance.disk_template not in constants.DTS_GROWABLE:
12680 raise errors.OpPrereqError("Instance's disk layout does not support"
12681 " growing", errors.ECODE_INVAL)
12683 self.disk = instance.FindDisk(self.op.disk)
12685 if self.op.absolute:
12686 self.target = self.op.amount
12687 self.delta = self.target - self.disk.size
12689 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12690 "current disk size (%s)" %
12691 (utils.FormatUnit(self.target, "h"),
12692 utils.FormatUnit(self.disk.size, "h")),
12693 errors.ECODE_STATE)
12695 self.delta = self.op.amount
12696 self.target = self.disk.size + self.delta
12698 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12699 utils.FormatUnit(self.delta, "h"),
12700 errors.ECODE_INVAL)
12702 self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
12704 def _CheckDiskSpace(self, nodenames, req_vgspace):
12705 template = self.instance.disk_template
12706 if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
12707 # TODO: check the free disk space for file, when that feature will be
12709 nodes = map(self.cfg.GetNodeInfo, nodenames)
12710 es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
12713 # With exclusive storage we need to something smarter than just looking
12714 # at free space; for now, let's simply abort the operation.
12715 raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
12716 " is enabled", errors.ECODE_STATE)
12717 _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
12719 def Exec(self, feedback_fn):
12720 """Execute disk grow.
12723 instance = self.instance
12726 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12727 assert (self.owned_locks(locking.LEVEL_NODE) ==
12728 self.owned_locks(locking.LEVEL_NODE_RES))
12730 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12732 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12734 raise errors.OpExecError("Cannot activate block device to grow")
12736 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12737 (self.op.disk, instance.name,
12738 utils.FormatUnit(self.delta, "h"),
12739 utils.FormatUnit(self.target, "h")))
12741 # First run all grow ops in dry-run mode
12742 for node in instance.all_nodes:
12743 self.cfg.SetDiskID(disk, node)
12744 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12746 result.Raise("Dry-run grow request failed to node %s" % node)
12749 # Get disk size from primary node for wiping
12750 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12751 result.Raise("Failed to retrieve disk size from node '%s'" %
12752 instance.primary_node)
12754 (disk_size_in_bytes, ) = result.payload
12756 if disk_size_in_bytes is None:
12757 raise errors.OpExecError("Failed to retrieve disk size from primary"
12758 " node '%s'" % instance.primary_node)
12760 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12762 assert old_disk_size >= disk.size, \
12763 ("Retrieved disk size too small (got %s, should be at least %s)" %
12764 (old_disk_size, disk.size))
12766 old_disk_size = None
12768 # We know that (as far as we can test) operations across different
12769 # nodes will succeed, time to run it for real on the backing storage
12770 for node in instance.all_nodes:
12771 self.cfg.SetDiskID(disk, node)
12772 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12774 result.Raise("Grow request failed to node %s" % node)
12776 # And now execute it for logical storage, on the primary node
12777 node = instance.primary_node
12778 self.cfg.SetDiskID(disk, node)
12779 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12781 result.Raise("Grow request failed to node %s" % node)
12783 disk.RecordGrow(self.delta)
12784 self.cfg.Update(instance, feedback_fn)
12786 # Changes have been recorded, release node lock
12787 _ReleaseLocks(self, locking.LEVEL_NODE)
12789 # Downgrade lock while waiting for sync
12790 self.glm.downgrade(locking.LEVEL_INSTANCE)
12792 assert wipe_disks ^ (old_disk_size is None)
12795 assert instance.disks[self.op.disk] == disk
12797 # Wipe newly added disk space
12798 _WipeDisks(self, instance,
12799 disks=[(self.op.disk, disk, old_disk_size)])
12801 if self.op.wait_for_sync:
12802 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12804 self.LogWarning("Disk syncing has not returned a good status; check"
12806 if instance.admin_state != constants.ADMINST_UP:
12807 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12808 elif instance.admin_state != constants.ADMINST_UP:
12809 self.LogWarning("Not shutting down the disk even if the instance is"
12810 " not supposed to be running because no wait for"
12811 " sync mode was requested")
12813 assert self.owned_locks(locking.LEVEL_NODE_RES)
12814 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12817 class LUInstanceQueryData(NoHooksLU):
12818 """Query runtime instance data.
12823 def ExpandNames(self):
12824 self.needed_locks = {}
12826 # Use locking if requested or when non-static information is wanted
12827 if not (self.op.static or self.op.use_locking):
12828 self.LogWarning("Non-static data requested, locks need to be acquired")
12829 self.op.use_locking = True
12831 if self.op.instances or not self.op.use_locking:
12832 # Expand instance names right here
12833 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12835 # Will use acquired locks
12836 self.wanted_names = None
12838 if self.op.use_locking:
12839 self.share_locks = _ShareAll()
12841 if self.wanted_names is None:
12842 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12844 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12846 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12847 self.needed_locks[locking.LEVEL_NODE] = []
12848 self.needed_locks[locking.LEVEL_NETWORK] = []
12849 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12851 def DeclareLocks(self, level):
12852 if self.op.use_locking:
12853 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12854 if level == locking.LEVEL_NODEGROUP:
12856 # Lock all groups used by instances optimistically; this requires going
12857 # via the node before it's locked, requiring verification later on
12858 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12859 frozenset(group_uuid
12860 for instance_name in owned_instances
12862 self.cfg.GetInstanceNodeGroups(instance_name))
12864 elif level == locking.LEVEL_NODE:
12865 self._LockInstancesNodes()
12867 elif level == locking.LEVEL_NETWORK:
12868 self.needed_locks[locking.LEVEL_NETWORK] = \
12870 for instance_name in owned_instances
12872 self.cfg.GetInstanceNetworks(instance_name))
12874 def CheckPrereq(self):
12875 """Check prerequisites.
12877 This only checks the optional instance list against the existing names.
12880 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12881 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12882 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12883 owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
12885 if self.wanted_names is None:
12886 assert self.op.use_locking, "Locking was not used"
12887 self.wanted_names = owned_instances
12889 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12891 if self.op.use_locking:
12892 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12895 assert not (owned_instances or owned_groups or
12896 owned_nodes or owned_networks)
12898 self.wanted_instances = instances.values()
12900 def _ComputeBlockdevStatus(self, node, instance, dev):
12901 """Returns the status of a block device
12904 if self.op.static or not node:
12907 self.cfg.SetDiskID(dev, node)
12909 result = self.rpc.call_blockdev_find(node, dev)
12913 result.Raise("Can't compute disk status for %s" % instance.name)
12915 status = result.payload
12919 return (status.dev_path, status.major, status.minor,
12920 status.sync_percent, status.estimated_time,
12921 status.is_degraded, status.ldisk_status)
12923 def _ComputeDiskStatus(self, instance, snode, dev):
12924 """Compute block device status.
12927 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12929 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12931 def _ComputeDiskStatusInner(self, instance, snode, dev):
12932 """Compute block device status.
12934 @attention: The device has to be annotated already.
12937 if dev.dev_type in constants.LDS_DRBD:
12938 # we change the snode then (otherwise we use the one passed in)
12939 if dev.logical_id[0] == instance.primary_node:
12940 snode = dev.logical_id[1]
12942 snode = dev.logical_id[0]
12944 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12946 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12949 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12956 "iv_name": dev.iv_name,
12957 "dev_type": dev.dev_type,
12958 "logical_id": dev.logical_id,
12959 "physical_id": dev.physical_id,
12960 "pstatus": dev_pstatus,
12961 "sstatus": dev_sstatus,
12962 "children": dev_children,
12967 def Exec(self, feedback_fn):
12968 """Gather and return data"""
12971 cluster = self.cfg.GetClusterInfo()
12973 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12974 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12976 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12977 for node in nodes.values()))
12979 group2name_fn = lambda uuid: groups[uuid].name
12980 for instance in self.wanted_instances:
12981 pnode = nodes[instance.primary_node]
12983 if self.op.static or pnode.offline:
12984 remote_state = None
12986 self.LogWarning("Primary node %s is marked offline, returning static"
12987 " information only for instance %s" %
12988 (pnode.name, instance.name))
12990 remote_info = self.rpc.call_instance_info(instance.primary_node,
12992 instance.hypervisor)
12993 remote_info.Raise("Error checking node %s" % instance.primary_node)
12994 remote_info = remote_info.payload
12995 if remote_info and "state" in remote_info:
12996 remote_state = "up"
12998 if instance.admin_state == constants.ADMINST_UP:
12999 remote_state = "down"
13001 remote_state = instance.admin_state
13003 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
13006 snodes_group_uuids = [nodes[snode_name].group
13007 for snode_name in instance.secondary_nodes]
13009 result[instance.name] = {
13010 "name": instance.name,
13011 "config_state": instance.admin_state,
13012 "run_state": remote_state,
13013 "pnode": instance.primary_node,
13014 "pnode_group_uuid": pnode.group,
13015 "pnode_group_name": group2name_fn(pnode.group),
13016 "snodes": instance.secondary_nodes,
13017 "snodes_group_uuids": snodes_group_uuids,
13018 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
13020 # this happens to be the same format used for hooks
13021 "nics": _NICListToTuple(self, instance.nics),
13022 "disk_template": instance.disk_template,
13024 "hypervisor": instance.hypervisor,
13025 "network_port": instance.network_port,
13026 "hv_instance": instance.hvparams,
13027 "hv_actual": cluster.FillHV(instance, skip_globals=True),
13028 "be_instance": instance.beparams,
13029 "be_actual": cluster.FillBE(instance),
13030 "os_instance": instance.osparams,
13031 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
13032 "serial_no": instance.serial_no,
13033 "mtime": instance.mtime,
13034 "ctime": instance.ctime,
13035 "uuid": instance.uuid,
13041 def PrepareContainerMods(mods, private_fn):
13042 """Prepares a list of container modifications by adding a private data field.
13044 @type mods: list of tuples; (operation, index, parameters)
13045 @param mods: List of modifications
13046 @type private_fn: callable or None
13047 @param private_fn: Callable for constructing a private data field for a
13052 if private_fn is None:
13057 return [(op, idx, params, fn()) for (op, idx, params) in mods]
13060 #: Type description for changes as returned by L{ApplyContainerMods}'s
13062 _TApplyContModsCbChanges = \
13063 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
13064 ht.TNonEmptyString,
13069 def ApplyContainerMods(kind, container, chgdesc, mods,
13070 create_fn, modify_fn, remove_fn):
13071 """Applies descriptions in C{mods} to C{container}.
13074 @param kind: One-word item description
13075 @type container: list
13076 @param container: Container to modify
13077 @type chgdesc: None or list
13078 @param chgdesc: List of applied changes
13080 @param mods: Modifications as returned by L{PrepareContainerMods}
13081 @type create_fn: callable
13082 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
13083 receives absolute item index, parameters and private data object as added
13084 by L{PrepareContainerMods}, returns tuple containing new item and changes
13086 @type modify_fn: callable
13087 @param modify_fn: Callback for modifying an existing item
13088 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
13089 and private data object as added by L{PrepareContainerMods}, returns
13091 @type remove_fn: callable
13092 @param remove_fn: Callback on removing item; receives absolute item index,
13093 item and private data object as added by L{PrepareContainerMods}
13096 for (op, idx, params, private) in mods:
13099 absidx = len(container) - 1
13101 raise IndexError("Not accepting negative indices other than -1")
13102 elif idx > len(container):
13103 raise IndexError("Got %s index %s, but there are only %s" %
13104 (kind, idx, len(container)))
13110 if op == constants.DDM_ADD:
13111 # Calculate where item will be added
13113 addidx = len(container)
13117 if create_fn is None:
13120 (item, changes) = create_fn(addidx, params, private)
13123 container.append(item)
13126 assert idx <= len(container)
13127 # list.insert does so before the specified index
13128 container.insert(idx, item)
13130 # Retrieve existing item
13132 item = container[absidx]
13134 raise IndexError("Invalid %s index %s" % (kind, idx))
13136 if op == constants.DDM_REMOVE:
13139 if remove_fn is not None:
13140 remove_fn(absidx, item, private)
13142 changes = [("%s/%s" % (kind, absidx), "remove")]
13144 assert container[absidx] == item
13145 del container[absidx]
13146 elif op == constants.DDM_MODIFY:
13147 if modify_fn is not None:
13148 changes = modify_fn(absidx, item, params, private)
13150 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13152 assert _TApplyContModsCbChanges(changes)
13154 if not (chgdesc is None or changes is None):
13155 chgdesc.extend(changes)
13158 def _UpdateIvNames(base_index, disks):
13159 """Updates the C{iv_name} attribute of disks.
13161 @type disks: list of L{objects.Disk}
13164 for (idx, disk) in enumerate(disks):
13165 disk.iv_name = "disk/%s" % (base_index + idx, )
13168 class _InstNicModPrivate:
13169 """Data structure for network interface modifications.
13171 Used by L{LUInstanceSetParams}.
13174 def __init__(self):
13179 class LUInstanceSetParams(LogicalUnit):
13180 """Modifies an instances's parameters.
13183 HPATH = "instance-modify"
13184 HTYPE = constants.HTYPE_INSTANCE
13188 def _UpgradeDiskNicMods(kind, mods, verify_fn):
13189 assert ht.TList(mods)
13190 assert not mods or len(mods[0]) in (2, 3)
13192 if mods and len(mods[0]) == 2:
13196 for op, params in mods:
13197 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
13198 result.append((op, -1, params))
13202 raise errors.OpPrereqError("Only one %s add or remove operation is"
13203 " supported at a time" % kind,
13204 errors.ECODE_INVAL)
13206 result.append((constants.DDM_MODIFY, op, params))
13208 assert verify_fn(result)
13215 def _CheckMods(kind, mods, key_types, item_fn):
13216 """Ensures requested disk/NIC modifications are valid.
13219 for (op, _, params) in mods:
13220 assert ht.TDict(params)
13222 # If 'key_types' is an empty dict, we assume we have an
13223 # 'ext' template and thus do not ForceDictType
13225 utils.ForceDictType(params, key_types)
13227 if op == constants.DDM_REMOVE:
13229 raise errors.OpPrereqError("No settings should be passed when"
13230 " removing a %s" % kind,
13231 errors.ECODE_INVAL)
13232 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
13233 item_fn(op, params)
13235 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
13238 def _VerifyDiskModification(op, params):
13239 """Verifies a disk modification.
13242 if op == constants.DDM_ADD:
13243 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
13244 if mode not in constants.DISK_ACCESS_SET:
13245 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
13246 errors.ECODE_INVAL)
13248 size = params.get(constants.IDISK_SIZE, None)
13250 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
13251 constants.IDISK_SIZE, errors.ECODE_INVAL)
13255 except (TypeError, ValueError), err:
13256 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
13257 errors.ECODE_INVAL)
13259 params[constants.IDISK_SIZE] = size
13261 elif op == constants.DDM_MODIFY:
13262 if constants.IDISK_SIZE in params:
13263 raise errors.OpPrereqError("Disk size change not possible, use"
13264 " grow-disk", errors.ECODE_INVAL)
13265 if constants.IDISK_MODE not in params:
13266 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
13267 " modification supported, but missing",
13268 errors.ECODE_NOENT)
13269 if len(params) > 1:
13270 raise errors.OpPrereqError("Disk modification doesn't support"
13271 " additional arbitrary parameters",
13272 errors.ECODE_INVAL)
13275 def _VerifyNicModification(op, params):
13276 """Verifies a network interface modification.
13279 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
13280 ip = params.get(constants.INIC_IP, None)
13281 req_net = params.get(constants.INIC_NETWORK, None)
13282 link = params.get(constants.NIC_LINK, None)
13283 mode = params.get(constants.NIC_MODE, None)
13284 if req_net is not None:
13285 if req_net.lower() == constants.VALUE_NONE:
13286 params[constants.INIC_NETWORK] = None
13288 elif link is not None or mode is not None:
13289 raise errors.OpPrereqError("If network is given"
13290 " mode or link should not",
13291 errors.ECODE_INVAL)
13293 if op == constants.DDM_ADD:
13294 macaddr = params.get(constants.INIC_MAC, None)
13295 if macaddr is None:
13296 params[constants.INIC_MAC] = constants.VALUE_AUTO
13299 if ip.lower() == constants.VALUE_NONE:
13300 params[constants.INIC_IP] = None
13302 if ip.lower() == constants.NIC_IP_POOL:
13303 if op == constants.DDM_ADD and req_net is None:
13304 raise errors.OpPrereqError("If ip=pool, parameter network"
13306 errors.ECODE_INVAL)
13308 if not netutils.IPAddress.IsValid(ip):
13309 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
13310 errors.ECODE_INVAL)
13312 if constants.INIC_MAC in params:
13313 macaddr = params[constants.INIC_MAC]
13314 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13315 macaddr = utils.NormalizeAndValidateMac(macaddr)
13317 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
13318 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
13319 " modifying an existing NIC",
13320 errors.ECODE_INVAL)
13322 def CheckArguments(self):
13323 if not (self.op.nics or self.op.disks or self.op.disk_template or
13324 self.op.hvparams or self.op.beparams or self.op.os_name or
13325 self.op.offline is not None or self.op.runtime_mem):
13326 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
13328 if self.op.hvparams:
13329 _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
13330 "hypervisor", "instance", "cluster")
13332 self.op.disks = self._UpgradeDiskNicMods(
13333 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
13334 self.op.nics = self._UpgradeDiskNicMods(
13335 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
13337 if self.op.disks and self.op.disk_template is not None:
13338 raise errors.OpPrereqError("Disk template conversion and other disk"
13339 " changes not supported at the same time",
13340 errors.ECODE_INVAL)
13342 if (self.op.disk_template and
13343 self.op.disk_template in constants.DTS_INT_MIRROR and
13344 self.op.remote_node is None):
13345 raise errors.OpPrereqError("Changing the disk template to a mirrored"
13346 " one requires specifying a secondary node",
13347 errors.ECODE_INVAL)
13349 # Check NIC modifications
13350 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
13351 self._VerifyNicModification)
13353 def ExpandNames(self):
13354 self._ExpandAndLockInstance()
13355 self.needed_locks[locking.LEVEL_NODEGROUP] = []
13356 # Can't even acquire node locks in shared mode as upcoming changes in
13357 # Ganeti 2.6 will start to modify the node object on disk conversion
13358 self.needed_locks[locking.LEVEL_NODE] = []
13359 self.needed_locks[locking.LEVEL_NODE_RES] = []
13360 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
13361 # Look node group to look up the ipolicy
13362 self.share_locks[locking.LEVEL_NODEGROUP] = 1
13364 def DeclareLocks(self, level):
13365 if level == locking.LEVEL_NODEGROUP:
13366 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13367 # Acquire locks for the instance's nodegroups optimistically. Needs
13368 # to be verified in CheckPrereq
13369 self.needed_locks[locking.LEVEL_NODEGROUP] = \
13370 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13371 elif level == locking.LEVEL_NODE:
13372 self._LockInstancesNodes()
13373 if self.op.disk_template and self.op.remote_node:
13374 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
13375 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
13376 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
13378 self.needed_locks[locking.LEVEL_NODE_RES] = \
13379 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
13381 def BuildHooksEnv(self):
13382 """Build hooks env.
13384 This runs on the master, primary and secondaries.
13388 if constants.BE_MINMEM in self.be_new:
13389 args["minmem"] = self.be_new[constants.BE_MINMEM]
13390 if constants.BE_MAXMEM in self.be_new:
13391 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
13392 if constants.BE_VCPUS in self.be_new:
13393 args["vcpus"] = self.be_new[constants.BE_VCPUS]
13394 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
13395 # information at all.
13397 if self._new_nics is not None:
13400 for nic in self._new_nics:
13401 n = copy.deepcopy(nic)
13402 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
13403 n.nicparams = nicparams
13404 nics.append(_NICToTuple(self, n))
13406 args["nics"] = nics
13408 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
13409 if self.op.disk_template:
13410 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
13411 if self.op.runtime_mem:
13412 env["RUNTIME_MEMORY"] = self.op.runtime_mem
13416 def BuildHooksNodes(self):
13417 """Build hooks nodes.
13420 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
13423 def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
13424 old_params, cluster, pnode):
13426 update_params_dict = dict([(key, params[key])
13427 for key in constants.NICS_PARAMETERS
13430 req_link = update_params_dict.get(constants.NIC_LINK, None)
13431 req_mode = update_params_dict.get(constants.NIC_MODE, None)
13433 new_net_uuid = None
13434 new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
13435 if new_net_uuid_or_name:
13436 new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
13437 new_net_obj = self.cfg.GetNetwork(new_net_uuid)
13440 old_net_obj = self.cfg.GetNetwork(old_net_uuid)
13443 netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
13445 raise errors.OpPrereqError("No netparams found for the network"
13446 " %s, probably not connected" %
13447 new_net_obj.name, errors.ECODE_INVAL)
13448 new_params = dict(netparams)
13450 new_params = _GetUpdatedParams(old_params, update_params_dict)
13452 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
13454 new_filled_params = cluster.SimpleFillNIC(new_params)
13455 objects.NIC.CheckParameterSyntax(new_filled_params)
13457 new_mode = new_filled_params[constants.NIC_MODE]
13458 if new_mode == constants.NIC_MODE_BRIDGED:
13459 bridge = new_filled_params[constants.NIC_LINK]
13460 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
13462 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
13464 self.warn.append(msg)
13466 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
13468 elif new_mode == constants.NIC_MODE_ROUTED:
13469 ip = params.get(constants.INIC_IP, old_ip)
13471 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
13472 " on a routed NIC", errors.ECODE_INVAL)
13474 elif new_mode == constants.NIC_MODE_OVS:
13475 # TODO: check OVS link
13476 self.LogInfo("OVS links are currently not checked for correctness")
13478 if constants.INIC_MAC in params:
13479 mac = params[constants.INIC_MAC]
13481 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
13482 errors.ECODE_INVAL)
13483 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
13484 # otherwise generate the MAC address
13485 params[constants.INIC_MAC] = \
13486 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13488 # or validate/reserve the current one
13490 self.cfg.ReserveMAC(mac, self.proc.GetECId())
13491 except errors.ReservationError:
13492 raise errors.OpPrereqError("MAC address '%s' already in use"
13493 " in cluster" % mac,
13494 errors.ECODE_NOTUNIQUE)
13495 elif new_net_uuid != old_net_uuid:
13497 def get_net_prefix(net_uuid):
13500 nobj = self.cfg.GetNetwork(net_uuid)
13501 mac_prefix = nobj.mac_prefix
13505 new_prefix = get_net_prefix(new_net_uuid)
13506 old_prefix = get_net_prefix(old_net_uuid)
13507 if old_prefix != new_prefix:
13508 params[constants.INIC_MAC] = \
13509 self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
13511 # if there is a change in (ip, network) tuple
13512 new_ip = params.get(constants.INIC_IP, old_ip)
13513 if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
13515 # if IP is pool then require a network and generate one IP
13516 if new_ip.lower() == constants.NIC_IP_POOL:
13519 new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
13520 except errors.ReservationError:
13521 raise errors.OpPrereqError("Unable to get a free IP"
13522 " from the address pool",
13523 errors.ECODE_STATE)
13524 self.LogInfo("Chose IP %s from network %s",
13527 params[constants.INIC_IP] = new_ip
13529 raise errors.OpPrereqError("ip=pool, but no network found",
13530 errors.ECODE_INVAL)
13531 # Reserve new IP if in the new network if any
13534 self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
13535 self.LogInfo("Reserving IP %s in network %s",
13536 new_ip, new_net_obj.name)
13537 except errors.ReservationError:
13538 raise errors.OpPrereqError("IP %s not available in network %s" %
13539 (new_ip, new_net_obj.name),
13540 errors.ECODE_NOTUNIQUE)
13541 # new network is None so check if new IP is a conflicting IP
13542 elif self.op.conflicts_check:
13543 _CheckForConflictingIp(self, new_ip, pnode)
13545 # release old IP if old network is not None
13546 if old_ip and old_net_uuid:
13548 self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
13549 except errors.AddressPoolError:
13550 logging.warning("Release IP %s not contained in network %s",
13551 old_ip, old_net_obj.name)
13553 # there are no changes in (ip, network) tuple and old network is not None
13554 elif (old_net_uuid is not None and
13555 (req_link is not None or req_mode is not None)):
13556 raise errors.OpPrereqError("Not allowed to change link or mode of"
13557 " a NIC that is connected to a network",
13558 errors.ECODE_INVAL)
13560 private.params = new_params
13561 private.filled = new_filled_params
13563 def _PreCheckDiskTemplate(self, pnode_info):
13564 """CheckPrereq checks related to a new disk template."""
13565 # Arguments are passed to avoid configuration lookups
13566 instance = self.instance
13567 pnode = instance.primary_node
13568 cluster = self.cluster
13569 if instance.disk_template == self.op.disk_template:
13570 raise errors.OpPrereqError("Instance already has disk template %s" %
13571 instance.disk_template, errors.ECODE_INVAL)
13573 if (instance.disk_template,
13574 self.op.disk_template) not in self._DISK_CONVERSIONS:
13575 raise errors.OpPrereqError("Unsupported disk template conversion from"
13576 " %s to %s" % (instance.disk_template,
13577 self.op.disk_template),
13578 errors.ECODE_INVAL)
13579 _CheckInstanceState(self, instance, INSTANCE_DOWN,
13580 msg="cannot change disk template")
13581 if self.op.disk_template in constants.DTS_INT_MIRROR:
13582 if self.op.remote_node == pnode:
13583 raise errors.OpPrereqError("Given new secondary node %s is the same"
13584 " as the primary node of the instance" %
13585 self.op.remote_node, errors.ECODE_STATE)
13586 _CheckNodeOnline(self, self.op.remote_node)
13587 _CheckNodeNotDrained(self, self.op.remote_node)
13588 # FIXME: here we assume that the old instance type is DT_PLAIN
13589 assert instance.disk_template == constants.DT_PLAIN
13590 disks = [{constants.IDISK_SIZE: d.size,
13591 constants.IDISK_VG: d.logical_id[0]}
13592 for d in instance.disks]
13593 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
13594 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
13596 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
13597 snode_group = self.cfg.GetNodeGroup(snode_info.group)
13598 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13600 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, self.cfg,
13601 ignore=self.op.ignore_ipolicy)
13602 if pnode_info.group != snode_info.group:
13603 self.LogWarning("The primary and secondary nodes are in two"
13604 " different node groups; the disk parameters"
13605 " from the first disk's node group will be"
13608 if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
13609 # Make sure none of the nodes require exclusive storage
13610 nodes = [pnode_info]
13611 if self.op.disk_template in constants.DTS_INT_MIRROR:
13613 nodes.append(snode_info)
13614 has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
13615 if compat.any(map(has_es, nodes)):
13616 errmsg = ("Cannot convert disk template from %s to %s when exclusive"
13617 " storage is enabled" % (instance.disk_template,
13618 self.op.disk_template))
13619 raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
13621 def CheckPrereq(self):
13622 """Check prerequisites.
13624 This only checks the instance list against the existing names.
13627 assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
13628 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13630 cluster = self.cluster = self.cfg.GetClusterInfo()
13631 assert self.instance is not None, \
13632 "Cannot retrieve locked instance %s" % self.op.instance_name
13634 pnode = instance.primary_node
13635 assert pnode in self.owned_locks(locking.LEVEL_NODE)
13636 nodelist = list(instance.all_nodes)
13637 pnode_info = self.cfg.GetNodeInfo(pnode)
13638 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
13640 #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
13641 assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
13642 group_info = self.cfg.GetNodeGroup(pnode_info.group)
13644 # dictionary with instance information after the modification
13647 # Check disk modifications. This is done here and not in CheckArguments
13648 # (as with NICs), because we need to know the instance's disk template
13649 if instance.disk_template == constants.DT_EXT:
13650 self._CheckMods("disk", self.op.disks, {},
13651 self._VerifyDiskModification)
13653 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
13654 self._VerifyDiskModification)
13656 # Prepare disk/NIC modifications
13657 self.diskmod = PrepareContainerMods(self.op.disks, None)
13658 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
13660 # Check the validity of the `provider' parameter
13661 if instance.disk_template in constants.DT_EXT:
13662 for mod in self.diskmod:
13663 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13664 if mod[0] == constants.DDM_ADD:
13665 if ext_provider is None:
13666 raise errors.OpPrereqError("Instance template is '%s' and parameter"
13667 " '%s' missing, during disk add" %
13669 constants.IDISK_PROVIDER),
13670 errors.ECODE_NOENT)
13671 elif mod[0] == constants.DDM_MODIFY:
13673 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
13675 constants.IDISK_PROVIDER,
13676 errors.ECODE_INVAL)
13678 for mod in self.diskmod:
13679 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
13680 if ext_provider is not None:
13681 raise errors.OpPrereqError("Parameter '%s' is only valid for"
13682 " instances of type '%s'" %
13683 (constants.IDISK_PROVIDER,
13685 errors.ECODE_INVAL)
13688 if self.op.os_name and not self.op.force:
13689 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
13690 self.op.force_variant)
13691 instance_os = self.op.os_name
13693 instance_os = instance.os
13695 assert not (self.op.disk_template and self.op.disks), \
13696 "Can't modify disk template and apply disk changes at the same time"
13698 if self.op.disk_template:
13699 self._PreCheckDiskTemplate(pnode_info)
13701 # hvparams processing
13702 if self.op.hvparams:
13703 hv_type = instance.hypervisor
13704 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
13705 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
13706 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
13709 hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
13710 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
13711 self.hv_proposed = self.hv_new = hv_new # the new actual values
13712 self.hv_inst = i_hvdict # the new dict (without defaults)
13714 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
13716 self.hv_new = self.hv_inst = {}
13718 # beparams processing
13719 if self.op.beparams:
13720 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
13722 objects.UpgradeBeParams(i_bedict)
13723 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
13724 be_new = cluster.SimpleFillBE(i_bedict)
13725 self.be_proposed = self.be_new = be_new # the new actual values
13726 self.be_inst = i_bedict # the new dict (without defaults)
13728 self.be_new = self.be_inst = {}
13729 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
13730 be_old = cluster.FillBE(instance)
13732 # CPU param validation -- checking every time a parameter is
13733 # changed to cover all cases where either CPU mask or vcpus have
13735 if (constants.BE_VCPUS in self.be_proposed and
13736 constants.HV_CPU_MASK in self.hv_proposed):
13738 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
13739 # Verify mask is consistent with number of vCPUs. Can skip this
13740 # test if only 1 entry in the CPU mask, which means same mask
13741 # is applied to all vCPUs.
13742 if (len(cpu_list) > 1 and
13743 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
13744 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13746 (self.be_proposed[constants.BE_VCPUS],
13747 self.hv_proposed[constants.HV_CPU_MASK]),
13748 errors.ECODE_INVAL)
13750 # Only perform this test if a new CPU mask is given
13751 if constants.HV_CPU_MASK in self.hv_new:
13752 # Calculate the largest CPU number requested
13753 max_requested_cpu = max(map(max, cpu_list))
13754 # Check that all of the instance's nodes have enough physical CPUs to
13755 # satisfy the requested CPU mask
13756 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13757 max_requested_cpu + 1, instance.hypervisor)
13759 # osparams processing
13760 if self.op.osparams:
13761 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13762 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13763 self.os_inst = i_osdict # the new dict (without defaults)
13769 #TODO(dynmem): do the appropriate check involving MINMEM
13770 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13771 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13772 mem_check_list = [pnode]
13773 if be_new[constants.BE_AUTO_BALANCE]:
13774 # either we changed auto_balance to yes or it was from before
13775 mem_check_list.extend(instance.secondary_nodes)
13776 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13777 instance.hypervisor)
13778 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13779 [instance.hypervisor], False)
13780 pninfo = nodeinfo[pnode]
13781 msg = pninfo.fail_msg
13783 # Assume the primary node is unreachable and go ahead
13784 self.warn.append("Can't get info from primary node %s: %s" %
13787 (_, _, (pnhvinfo, )) = pninfo.payload
13788 if not isinstance(pnhvinfo.get("memory_free", None), int):
13789 self.warn.append("Node data from primary node %s doesn't contain"
13790 " free memory information" % pnode)
13791 elif instance_info.fail_msg:
13792 self.warn.append("Can't get instance runtime information: %s" %
13793 instance_info.fail_msg)
13795 if instance_info.payload:
13796 current_mem = int(instance_info.payload["memory"])
13798 # Assume instance not running
13799 # (there is a slight race condition here, but it's not very
13800 # probable, and we have no other way to check)
13801 # TODO: Describe race condition
13803 #TODO(dynmem): do the appropriate check involving MINMEM
13804 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13805 pnhvinfo["memory_free"])
13807 raise errors.OpPrereqError("This change will prevent the instance"
13808 " from starting, due to %d MB of memory"
13809 " missing on its primary node" %
13810 miss_mem, errors.ECODE_NORES)
13812 if be_new[constants.BE_AUTO_BALANCE]:
13813 for node, nres in nodeinfo.items():
13814 if node not in instance.secondary_nodes:
13816 nres.Raise("Can't get info from secondary node %s" % node,
13817 prereq=True, ecode=errors.ECODE_STATE)
13818 (_, _, (nhvinfo, )) = nres.payload
13819 if not isinstance(nhvinfo.get("memory_free", None), int):
13820 raise errors.OpPrereqError("Secondary node %s didn't return free"
13821 " memory information" % node,
13822 errors.ECODE_STATE)
13823 #TODO(dynmem): do the appropriate check involving MINMEM
13824 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13825 raise errors.OpPrereqError("This change will prevent the instance"
13826 " from failover to its secondary node"
13827 " %s, due to not enough memory" % node,
13828 errors.ECODE_STATE)
13830 if self.op.runtime_mem:
13831 remote_info = self.rpc.call_instance_info(instance.primary_node,
13833 instance.hypervisor)
13834 remote_info.Raise("Error checking node %s" % instance.primary_node)
13835 if not remote_info.payload: # not running already
13836 raise errors.OpPrereqError("Instance %s is not running" %
13837 instance.name, errors.ECODE_STATE)
13839 current_memory = remote_info.payload["memory"]
13840 if (not self.op.force and
13841 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13842 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13843 raise errors.OpPrereqError("Instance %s must have memory between %d"
13844 " and %d MB of memory unless --force is"
13847 self.be_proposed[constants.BE_MINMEM],
13848 self.be_proposed[constants.BE_MAXMEM]),
13849 errors.ECODE_INVAL)
13851 delta = self.op.runtime_mem - current_memory
13853 _CheckNodeFreeMemory(self, instance.primary_node,
13854 "ballooning memory for instance %s" %
13855 instance.name, delta, instance.hypervisor)
13857 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13858 raise errors.OpPrereqError("Disk operations not supported for"
13859 " diskless instances", errors.ECODE_INVAL)
13861 def _PrepareNicCreate(_, params, private):
13862 self._PrepareNicModification(params, private, None, None,
13863 {}, cluster, pnode)
13864 return (None, None)
13866 def _PrepareNicMod(_, nic, params, private):
13867 self._PrepareNicModification(params, private, nic.ip, nic.network,
13868 nic.nicparams, cluster, pnode)
13871 def _PrepareNicRemove(_, params, __):
13873 net = params.network
13874 if net is not None and ip is not None:
13875 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13877 # Verify NIC changes (operating on copy)
13878 nics = instance.nics[:]
13879 ApplyContainerMods("NIC", nics, None, self.nicmod,
13880 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13881 if len(nics) > constants.MAX_NICS:
13882 raise errors.OpPrereqError("Instance has too many network interfaces"
13883 " (%d), cannot add more" % constants.MAX_NICS,
13884 errors.ECODE_STATE)
13886 # Verify disk changes (operating on a copy)
13887 disks = instance.disks[:]
13888 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13889 if len(disks) > constants.MAX_DISKS:
13890 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13891 " more" % constants.MAX_DISKS,
13892 errors.ECODE_STATE)
13893 disk_sizes = [disk.size for disk in instance.disks]
13894 disk_sizes.extend(params["size"] for (op, idx, params, private) in
13895 self.diskmod if op == constants.DDM_ADD)
13896 ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
13897 ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
13899 if self.op.offline is not None and self.op.offline:
13900 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
13901 msg="can't change to offline")
13903 # Pre-compute NIC changes (necessary to use result in hooks)
13904 self._nic_chgdesc = []
13906 # Operate on copies as this is still in prereq
13907 nics = [nic.Copy() for nic in instance.nics]
13908 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13909 self._CreateNewNic, self._ApplyNicMods, None)
13910 self._new_nics = nics
13911 ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
13913 self._new_nics = None
13914 ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
13916 if not self.op.ignore_ipolicy:
13917 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
13920 # Fill ispec with backend parameters
13921 ispec[constants.ISPEC_SPINDLE_USE] = \
13922 self.be_new.get(constants.BE_SPINDLE_USE, None)
13923 ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
13926 # Copy ispec to verify parameters with min/max values separately
13927 if self.op.disk_template:
13928 new_disk_template = self.op.disk_template
13930 new_disk_template = instance.disk_template
13931 ispec_max = ispec.copy()
13932 ispec_max[constants.ISPEC_MEM_SIZE] = \
13933 self.be_new.get(constants.BE_MAXMEM, None)
13934 res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max,
13936 ispec_min = ispec.copy()
13937 ispec_min[constants.ISPEC_MEM_SIZE] = \
13938 self.be_new.get(constants.BE_MINMEM, None)
13939 res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min,
13942 if (res_max or res_min):
13943 # FIXME: Improve error message by including information about whether
13944 # the upper or lower limit of the parameter fails the ipolicy.
13945 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
13946 (group_info, group_info.name,
13947 utils.CommaJoin(set(res_max + res_min))))
13948 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
13950 def _ConvertPlainToDrbd(self, feedback_fn):
13951 """Converts an instance from plain to drbd.
13954 feedback_fn("Converting template to drbd")
13955 instance = self.instance
13956 pnode = instance.primary_node
13957 snode = self.op.remote_node
13959 assert instance.disk_template == constants.DT_PLAIN
13961 # create a fake disk info for _GenerateDiskTemplate
13962 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13963 constants.IDISK_VG: d.logical_id[0]}
13964 for d in instance.disks]
13965 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13966 instance.name, pnode, [snode],
13967 disk_info, None, None, 0, feedback_fn,
13969 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13971 p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
13972 s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
13973 info = _GetInstanceInfoText(instance)
13974 feedback_fn("Creating additional volumes...")
13975 # first, create the missing data and meta devices
13976 for disk in anno_disks:
13977 # unfortunately this is... not too nice
13978 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13979 info, True, p_excl_stor)
13980 for child in disk.children:
13981 _CreateSingleBlockDev(self, snode, instance, child, info, True,
13983 # at this stage, all new LVs have been created, we can rename the
13985 feedback_fn("Renaming original volumes...")
13986 rename_list = [(o, n.children[0].logical_id)
13987 for (o, n) in zip(instance.disks, new_disks)]
13988 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13989 result.Raise("Failed to rename original LVs")
13991 feedback_fn("Initializing DRBD devices...")
13992 # all child devices are in place, we can now create the DRBD devices
13993 for disk in anno_disks:
13994 for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
13995 f_create = node == pnode
13996 _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
13999 # at this point, the instance has been modified
14000 instance.disk_template = constants.DT_DRBD8
14001 instance.disks = new_disks
14002 self.cfg.Update(instance, feedback_fn)
14004 # Release node locks while waiting for sync
14005 _ReleaseLocks(self, locking.LEVEL_NODE)
14007 # disks are created, waiting for sync
14008 disk_abort = not _WaitForSync(self, instance,
14009 oneshot=not self.op.wait_for_sync)
14011 raise errors.OpExecError("There are some degraded disks for"
14012 " this instance, please cleanup manually")
14014 # Node resource locks will be released by caller
14016 def _ConvertDrbdToPlain(self, feedback_fn):
14017 """Converts an instance from drbd to plain.
14020 instance = self.instance
14022 assert len(instance.secondary_nodes) == 1
14023 assert instance.disk_template == constants.DT_DRBD8
14025 pnode = instance.primary_node
14026 snode = instance.secondary_nodes[0]
14027 feedback_fn("Converting template to plain")
14029 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
14030 new_disks = [d.children[0] for d in instance.disks]
14032 # copy over size and mode
14033 for parent, child in zip(old_disks, new_disks):
14034 child.size = parent.size
14035 child.mode = parent.mode
14037 # this is a DRBD disk, return its port to the pool
14038 # NOTE: this must be done right before the call to cfg.Update!
14039 for disk in old_disks:
14040 tcp_port = disk.logical_id[2]
14041 self.cfg.AddTcpUdpPort(tcp_port)
14043 # update instance structure
14044 instance.disks = new_disks
14045 instance.disk_template = constants.DT_PLAIN
14046 _UpdateIvNames(0, instance.disks)
14047 self.cfg.Update(instance, feedback_fn)
14049 # Release locks in case removing disks takes a while
14050 _ReleaseLocks(self, locking.LEVEL_NODE)
14052 feedback_fn("Removing volumes on the secondary node...")
14053 for disk in old_disks:
14054 self.cfg.SetDiskID(disk, snode)
14055 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
14057 self.LogWarning("Could not remove block device %s on node %s,"
14058 " continuing anyway: %s", disk.iv_name, snode, msg)
14060 feedback_fn("Removing unneeded volumes on the primary node...")
14061 for idx, disk in enumerate(old_disks):
14062 meta = disk.children[1]
14063 self.cfg.SetDiskID(meta, pnode)
14064 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
14066 self.LogWarning("Could not remove metadata for disk %d on node %s,"
14067 " continuing anyway: %s", idx, pnode, msg)
14069 def _CreateNewDisk(self, idx, params, _):
14070 """Creates a new disk.
14073 instance = self.instance
14076 if instance.disk_template in constants.DTS_FILEBASED:
14077 (file_driver, file_path) = instance.disks[0].logical_id
14078 file_path = os.path.dirname(file_path)
14080 file_driver = file_path = None
14083 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
14084 instance.primary_node, instance.secondary_nodes,
14085 [params], file_path, file_driver, idx,
14086 self.Log, self.diskparams)[0]
14088 info = _GetInstanceInfoText(instance)
14090 logging.info("Creating volume %s for instance %s",
14091 disk.iv_name, instance.name)
14092 # Note: this needs to be kept in sync with _CreateDisks
14094 for node in instance.all_nodes:
14095 f_create = (node == instance.primary_node)
14097 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
14098 except errors.OpExecError, err:
14099 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
14100 disk.iv_name, disk, node, err)
14103 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
14107 def _ModifyDisk(idx, disk, params, _):
14108 """Modifies a disk.
14111 disk.mode = params[constants.IDISK_MODE]
14114 ("disk.mode/%d" % idx, disk.mode),
14117 def _RemoveDisk(self, idx, root, _):
14121 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
14122 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
14123 self.cfg.SetDiskID(disk, node)
14124 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
14126 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
14127 " continuing anyway", idx, node, msg)
14129 # if this is a DRBD disk, return its port to the pool
14130 if root.dev_type in constants.LDS_DRBD:
14131 self.cfg.AddTcpUdpPort(root.logical_id[2])
14133 def _CreateNewNic(self, idx, params, private):
14134 """Creates data structure for a new network interface.
14137 mac = params[constants.INIC_MAC]
14138 ip = params.get(constants.INIC_IP, None)
14139 net = params.get(constants.INIC_NETWORK, None)
14140 net_uuid = self.cfg.LookupNetwork(net)
14141 #TODO: not private.filled?? can a nic have no nicparams??
14142 nicparams = private.filled
14143 nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, nicparams=nicparams)
14147 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
14148 (mac, ip, private.filled[constants.NIC_MODE],
14149 private.filled[constants.NIC_LINK],
14153 def _ApplyNicMods(self, idx, nic, params, private):
14154 """Modifies a network interface.
14159 for key in [constants.INIC_MAC, constants.INIC_IP]:
14161 changes.append(("nic.%s/%d" % (key, idx), params[key]))
14162 setattr(nic, key, params[key])
14164 new_net = params.get(constants.INIC_NETWORK, nic.network)
14165 new_net_uuid = self.cfg.LookupNetwork(new_net)
14166 if new_net_uuid != nic.network:
14167 changes.append(("nic.network/%d" % idx, new_net))
14168 nic.network = new_net_uuid
14171 nic.nicparams = private.filled
14173 for (key, val) in nic.nicparams.items():
14174 changes.append(("nic.%s/%d" % (key, idx), val))
14178 def Exec(self, feedback_fn):
14179 """Modifies an instance.
14181 All parameters take effect only at the next restart of the instance.
14184 # Process here the warnings from CheckPrereq, as we don't have a
14185 # feedback_fn there.
14186 # TODO: Replace with self.LogWarning
14187 for warn in self.warn:
14188 feedback_fn("WARNING: %s" % warn)
14190 assert ((self.op.disk_template is None) ^
14191 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
14192 "Not owning any node resource locks"
14195 instance = self.instance
14198 if self.op.runtime_mem:
14199 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
14201 self.op.runtime_mem)
14202 rpcres.Raise("Cannot modify instance runtime memory")
14203 result.append(("runtime_memory", self.op.runtime_mem))
14205 # Apply disk changes
14206 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
14207 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
14208 _UpdateIvNames(0, instance.disks)
14210 if self.op.disk_template:
14212 check_nodes = set(instance.all_nodes)
14213 if self.op.remote_node:
14214 check_nodes.add(self.op.remote_node)
14215 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
14216 owned = self.owned_locks(level)
14217 assert not (check_nodes - owned), \
14218 ("Not owning the correct locks, owning %r, expected at least %r" %
14219 (owned, check_nodes))
14221 r_shut = _ShutdownInstanceDisks(self, instance)
14223 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
14224 " proceed with disk template conversion")
14225 mode = (instance.disk_template, self.op.disk_template)
14227 self._DISK_CONVERSIONS[mode](self, feedback_fn)
14229 self.cfg.ReleaseDRBDMinors(instance.name)
14231 result.append(("disk_template", self.op.disk_template))
14233 assert instance.disk_template == self.op.disk_template, \
14234 ("Expected disk template '%s', found '%s'" %
14235 (self.op.disk_template, instance.disk_template))
14237 # Release node and resource locks if there are any (they might already have
14238 # been released during disk conversion)
14239 _ReleaseLocks(self, locking.LEVEL_NODE)
14240 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
14242 # Apply NIC changes
14243 if self._new_nics is not None:
14244 instance.nics = self._new_nics
14245 result.extend(self._nic_chgdesc)
14248 if self.op.hvparams:
14249 instance.hvparams = self.hv_inst
14250 for key, val in self.op.hvparams.iteritems():
14251 result.append(("hv/%s" % key, val))
14254 if self.op.beparams:
14255 instance.beparams = self.be_inst
14256 for key, val in self.op.beparams.iteritems():
14257 result.append(("be/%s" % key, val))
14260 if self.op.os_name:
14261 instance.os = self.op.os_name
14264 if self.op.osparams:
14265 instance.osparams = self.os_inst
14266 for key, val in self.op.osparams.iteritems():
14267 result.append(("os/%s" % key, val))
14269 if self.op.offline is None:
14272 elif self.op.offline:
14273 # Mark instance as offline
14274 self.cfg.MarkInstanceOffline(instance.name)
14275 result.append(("admin_state", constants.ADMINST_OFFLINE))
14277 # Mark instance as online, but stopped
14278 self.cfg.MarkInstanceDown(instance.name)
14279 result.append(("admin_state", constants.ADMINST_DOWN))
14281 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
14283 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
14284 self.owned_locks(locking.LEVEL_NODE)), \
14285 "All node locks should have been released by now"
14289 _DISK_CONVERSIONS = {
14290 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
14291 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
14295 class LUInstanceChangeGroup(LogicalUnit):
14296 HPATH = "instance-change-group"
14297 HTYPE = constants.HTYPE_INSTANCE
14300 def ExpandNames(self):
14301 self.share_locks = _ShareAll()
14303 self.needed_locks = {
14304 locking.LEVEL_NODEGROUP: [],
14305 locking.LEVEL_NODE: [],
14306 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14309 self._ExpandAndLockInstance()
14311 if self.op.target_groups:
14312 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14313 self.op.target_groups)
14315 self.req_target_uuids = None
14317 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14319 def DeclareLocks(self, level):
14320 if level == locking.LEVEL_NODEGROUP:
14321 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14323 if self.req_target_uuids:
14324 lock_groups = set(self.req_target_uuids)
14326 # Lock all groups used by instance optimistically; this requires going
14327 # via the node before it's locked, requiring verification later on
14328 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
14329 lock_groups.update(instance_groups)
14331 # No target groups, need to lock all of them
14332 lock_groups = locking.ALL_SET
14334 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14336 elif level == locking.LEVEL_NODE:
14337 if self.req_target_uuids:
14338 # Lock all nodes used by instances
14339 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14340 self._LockInstancesNodes()
14342 # Lock all nodes in all potential target groups
14343 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
14344 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
14345 member_nodes = [node_name
14346 for group in lock_groups
14347 for node_name in self.cfg.GetNodeGroup(group).members]
14348 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14350 # Lock all nodes as all groups are potential targets
14351 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14353 def CheckPrereq(self):
14354 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14355 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14356 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14358 assert (self.req_target_uuids is None or
14359 owned_groups.issuperset(self.req_target_uuids))
14360 assert owned_instances == set([self.op.instance_name])
14362 # Get instance information
14363 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
14365 # Check if node groups for locked instance are still correct
14366 assert owned_nodes.issuperset(self.instance.all_nodes), \
14367 ("Instance %s's nodes changed while we kept the lock" %
14368 self.op.instance_name)
14370 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
14373 if self.req_target_uuids:
14374 # User requested specific target groups
14375 self.target_uuids = frozenset(self.req_target_uuids)
14377 # All groups except those used by the instance are potential targets
14378 self.target_uuids = owned_groups - inst_groups
14380 conflicting_groups = self.target_uuids & inst_groups
14381 if conflicting_groups:
14382 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
14383 " used by the instance '%s'" %
14384 (utils.CommaJoin(conflicting_groups),
14385 self.op.instance_name),
14386 errors.ECODE_INVAL)
14388 if not self.target_uuids:
14389 raise errors.OpPrereqError("There are no possible target groups",
14390 errors.ECODE_INVAL)
14392 def BuildHooksEnv(self):
14393 """Build hooks env.
14396 assert self.target_uuids
14399 "TARGET_GROUPS": " ".join(self.target_uuids),
14402 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14406 def BuildHooksNodes(self):
14407 """Build hooks nodes.
14410 mn = self.cfg.GetMasterNode()
14411 return ([mn], [mn])
14413 def Exec(self, feedback_fn):
14414 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14416 assert instances == [self.op.instance_name], "Instance not locked"
14418 req = iallocator.IAReqGroupChange(instances=instances,
14419 target_groups=list(self.target_uuids))
14420 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14422 ial.Run(self.op.iallocator)
14424 if not ial.success:
14425 raise errors.OpPrereqError("Can't compute solution for changing group of"
14426 " instance '%s' using iallocator '%s': %s" %
14427 (self.op.instance_name, self.op.iallocator,
14428 ial.info), errors.ECODE_NORES)
14430 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14432 self.LogInfo("Iallocator returned %s job(s) for changing group of"
14433 " instance '%s'", len(jobs), self.op.instance_name)
14435 return ResultWithJobs(jobs)
14438 class LUBackupQuery(NoHooksLU):
14439 """Query the exports list
14444 def CheckArguments(self):
14445 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
14446 ["node", "export"], self.op.use_locking)
14448 def ExpandNames(self):
14449 self.expq.ExpandNames(self)
14451 def DeclareLocks(self, level):
14452 self.expq.DeclareLocks(self, level)
14454 def Exec(self, feedback_fn):
14457 for (node, expname) in self.expq.OldStyleQuery(self):
14458 if expname is None:
14459 result[node] = False
14461 result.setdefault(node, []).append(expname)
14466 class _ExportQuery(_QueryBase):
14467 FIELDS = query.EXPORT_FIELDS
14469 #: The node name is not a unique key for this query
14470 SORT_FIELD = "node"
14472 def ExpandNames(self, lu):
14473 lu.needed_locks = {}
14475 # The following variables interact with _QueryBase._GetNames
14477 self.wanted = _GetWantedNodes(lu, self.names)
14479 self.wanted = locking.ALL_SET
14481 self.do_locking = self.use_locking
14483 if self.do_locking:
14484 lu.share_locks = _ShareAll()
14485 lu.needed_locks = {
14486 locking.LEVEL_NODE: self.wanted,
14490 lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14492 def DeclareLocks(self, lu, level):
14495 def _GetQueryData(self, lu):
14496 """Computes the list of nodes and their attributes.
14499 # Locking is not used
14501 assert not (compat.any(lu.glm.is_owned(level)
14502 for level in locking.LEVELS
14503 if level != locking.LEVEL_CLUSTER) or
14504 self.do_locking or self.use_locking)
14506 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
14510 for (node, nres) in lu.rpc.call_export_list(nodes).items():
14512 result.append((node, None))
14514 result.extend((node, expname) for expname in nres.payload)
14519 class LUBackupPrepare(NoHooksLU):
14520 """Prepares an instance for an export and returns useful information.
14525 def ExpandNames(self):
14526 self._ExpandAndLockInstance()
14528 def CheckPrereq(self):
14529 """Check prerequisites.
14532 instance_name = self.op.instance_name
14534 self.instance = self.cfg.GetInstanceInfo(instance_name)
14535 assert self.instance is not None, \
14536 "Cannot retrieve locked instance %s" % self.op.instance_name
14537 _CheckNodeOnline(self, self.instance.primary_node)
14539 self._cds = _GetClusterDomainSecret()
14541 def Exec(self, feedback_fn):
14542 """Prepares an instance for an export.
14545 instance = self.instance
14547 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14548 salt = utils.GenerateSecret(8)
14550 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
14551 result = self.rpc.call_x509_cert_create(instance.primary_node,
14552 constants.RIE_CERT_VALIDITY)
14553 result.Raise("Can't create X509 key and certificate on %s" % result.node)
14555 (name, cert_pem) = result.payload
14557 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
14561 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
14562 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
14564 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
14570 class LUBackupExport(LogicalUnit):
14571 """Export an instance to an image in the cluster.
14574 HPATH = "instance-export"
14575 HTYPE = constants.HTYPE_INSTANCE
14578 def CheckArguments(self):
14579 """Check the arguments.
14582 self.x509_key_name = self.op.x509_key_name
14583 self.dest_x509_ca_pem = self.op.destination_x509_ca
14585 if self.op.mode == constants.EXPORT_MODE_REMOTE:
14586 if not self.x509_key_name:
14587 raise errors.OpPrereqError("Missing X509 key name for encryption",
14588 errors.ECODE_INVAL)
14590 if not self.dest_x509_ca_pem:
14591 raise errors.OpPrereqError("Missing destination X509 CA",
14592 errors.ECODE_INVAL)
14594 def ExpandNames(self):
14595 self._ExpandAndLockInstance()
14597 # Lock all nodes for local exports
14598 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14599 # FIXME: lock only instance primary and destination node
14601 # Sad but true, for now we have do lock all nodes, as we don't know where
14602 # the previous export might be, and in this LU we search for it and
14603 # remove it from its current node. In the future we could fix this by:
14604 # - making a tasklet to search (share-lock all), then create the
14605 # new one, then one to remove, after
14606 # - removing the removal operation altogether
14607 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14609 # Allocations should be stopped while this LU runs with node locks, but
14610 # it doesn't have to be exclusive
14611 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14612 self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
14614 def DeclareLocks(self, level):
14615 """Last minute lock declaration."""
14616 # All nodes are locked anyway, so nothing to do here.
14618 def BuildHooksEnv(self):
14619 """Build hooks env.
14621 This will run on the master, primary node and target node.
14625 "EXPORT_MODE": self.op.mode,
14626 "EXPORT_NODE": self.op.target_node,
14627 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
14628 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
14629 # TODO: Generic function for boolean env variables
14630 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
14633 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
14637 def BuildHooksNodes(self):
14638 """Build hooks nodes.
14641 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
14643 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14644 nl.append(self.op.target_node)
14648 def CheckPrereq(self):
14649 """Check prerequisites.
14651 This checks that the instance and node names are valid.
14654 instance_name = self.op.instance_name
14656 self.instance = self.cfg.GetInstanceInfo(instance_name)
14657 assert self.instance is not None, \
14658 "Cannot retrieve locked instance %s" % self.op.instance_name
14659 _CheckNodeOnline(self, self.instance.primary_node)
14661 if (self.op.remove_instance and
14662 self.instance.admin_state == constants.ADMINST_UP and
14663 not self.op.shutdown):
14664 raise errors.OpPrereqError("Can not remove instance without shutting it"
14665 " down before", errors.ECODE_STATE)
14667 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14668 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
14669 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
14670 assert self.dst_node is not None
14672 _CheckNodeOnline(self, self.dst_node.name)
14673 _CheckNodeNotDrained(self, self.dst_node.name)
14676 self.dest_disk_info = None
14677 self.dest_x509_ca = None
14679 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14680 self.dst_node = None
14682 if len(self.op.target_node) != len(self.instance.disks):
14683 raise errors.OpPrereqError(("Received destination information for %s"
14684 " disks, but instance %s has %s disks") %
14685 (len(self.op.target_node), instance_name,
14686 len(self.instance.disks)),
14687 errors.ECODE_INVAL)
14689 cds = _GetClusterDomainSecret()
14691 # Check X509 key name
14693 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
14694 except (TypeError, ValueError), err:
14695 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
14696 errors.ECODE_INVAL)
14698 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
14699 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
14700 errors.ECODE_INVAL)
14702 # Load and verify CA
14704 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
14705 except OpenSSL.crypto.Error, err:
14706 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
14707 (err, ), errors.ECODE_INVAL)
14709 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
14710 if errcode is not None:
14711 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
14712 (msg, ), errors.ECODE_INVAL)
14714 self.dest_x509_ca = cert
14716 # Verify target information
14718 for idx, disk_data in enumerate(self.op.target_node):
14720 (host, port, magic) = \
14721 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
14722 except errors.GenericError, err:
14723 raise errors.OpPrereqError("Target info for disk %s: %s" %
14724 (idx, err), errors.ECODE_INVAL)
14726 disk_info.append((host, port, magic))
14728 assert len(disk_info) == len(self.op.target_node)
14729 self.dest_disk_info = disk_info
14732 raise errors.ProgrammerError("Unhandled export mode %r" %
14735 # instance disk type verification
14736 # TODO: Implement export support for file-based disks
14737 for disk in self.instance.disks:
14738 if disk.dev_type == constants.LD_FILE:
14739 raise errors.OpPrereqError("Export not supported for instances with"
14740 " file-based disks", errors.ECODE_INVAL)
14742 def _CleanupExports(self, feedback_fn):
14743 """Removes exports of current instance from all other nodes.
14745 If an instance in a cluster with nodes A..D was exported to node C, its
14746 exports will be removed from the nodes A, B and D.
14749 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14751 nodelist = self.cfg.GetNodeList()
14752 nodelist.remove(self.dst_node.name)
14754 # on one-node clusters nodelist will be empty after the removal
14755 # if we proceed the backup would be removed because OpBackupQuery
14756 # substitutes an empty list with the full cluster node list.
14757 iname = self.instance.name
14759 feedback_fn("Removing old exports for instance %s" % iname)
14760 exportlist = self.rpc.call_export_list(nodelist)
14761 for node in exportlist:
14762 if exportlist[node].fail_msg:
14764 if iname in exportlist[node].payload:
14765 msg = self.rpc.call_export_remove(node, iname).fail_msg
14767 self.LogWarning("Could not remove older export for instance %s"
14768 " on node %s: %s", iname, node, msg)
14770 def Exec(self, feedback_fn):
14771 """Export an instance to an image in the cluster.
14774 assert self.op.mode in constants.EXPORT_MODES
14776 instance = self.instance
14777 src_node = instance.primary_node
14779 if self.op.shutdown:
14780 # shutdown the instance, but not the disks
14781 feedback_fn("Shutting down instance %s" % instance.name)
14782 result = self.rpc.call_instance_shutdown(src_node, instance,
14783 self.op.shutdown_timeout)
14784 # TODO: Maybe ignore failures if ignore_remove_failures is set
14785 result.Raise("Could not shutdown instance %s on"
14786 " node %s" % (instance.name, src_node))
14788 # set the disks ID correctly since call_instance_start needs the
14789 # correct drbd minor to create the symlinks
14790 for disk in instance.disks:
14791 self.cfg.SetDiskID(disk, src_node)
14793 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14796 # Activate the instance disks if we'exporting a stopped instance
14797 feedback_fn("Activating disks for %s" % instance.name)
14798 _StartInstanceDisks(self, instance, None)
14801 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14804 helper.CreateSnapshots()
14806 if (self.op.shutdown and
14807 instance.admin_state == constants.ADMINST_UP and
14808 not self.op.remove_instance):
14809 assert not activate_disks
14810 feedback_fn("Starting instance %s" % instance.name)
14811 result = self.rpc.call_instance_start(src_node,
14812 (instance, None, None), False)
14813 msg = result.fail_msg
14815 feedback_fn("Failed to start instance: %s" % msg)
14816 _ShutdownInstanceDisks(self, instance)
14817 raise errors.OpExecError("Could not start instance: %s" % msg)
14819 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14820 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14821 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14822 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14823 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14825 (key_name, _, _) = self.x509_key_name
14828 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14831 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14832 key_name, dest_ca_pem,
14837 # Check for backwards compatibility
14838 assert len(dresults) == len(instance.disks)
14839 assert compat.all(isinstance(i, bool) for i in dresults), \
14840 "Not all results are boolean: %r" % dresults
14844 feedback_fn("Deactivating disks for %s" % instance.name)
14845 _ShutdownInstanceDisks(self, instance)
14847 if not (compat.all(dresults) and fin_resu):
14850 failures.append("export finalization")
14851 if not compat.all(dresults):
14852 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14854 failures.append("disk export: disk(s) %s" % fdsk)
14856 raise errors.OpExecError("Export failed, errors in %s" %
14857 utils.CommaJoin(failures))
14859 # At this point, the export was successful, we can cleanup/finish
14861 # Remove instance if requested
14862 if self.op.remove_instance:
14863 feedback_fn("Removing instance %s" % instance.name)
14864 _RemoveInstance(self, feedback_fn, instance,
14865 self.op.ignore_remove_failures)
14867 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14868 self._CleanupExports(feedback_fn)
14870 return fin_resu, dresults
14873 class LUBackupRemove(NoHooksLU):
14874 """Remove exports related to the named instance.
14879 def ExpandNames(self):
14880 self.needed_locks = {
14881 # We need all nodes to be locked in order for RemoveExport to work, but
14882 # we don't need to lock the instance itself, as nothing will happen to it
14883 # (and we can remove exports also for a removed instance)
14884 locking.LEVEL_NODE: locking.ALL_SET,
14886 # Removing backups is quick, so blocking allocations is justified
14887 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
14890 # Allocations should be stopped while this LU runs with node locks, but it
14891 # doesn't have to be exclusive
14892 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
14894 def Exec(self, feedback_fn):
14895 """Remove any export.
14898 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14899 # If the instance was not found we'll try with the name that was passed in.
14900 # This will only work if it was an FQDN, though.
14902 if not instance_name:
14904 instance_name = self.op.instance_name
14906 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14907 exportlist = self.rpc.call_export_list(locked_nodes)
14909 for node in exportlist:
14910 msg = exportlist[node].fail_msg
14912 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14914 if instance_name in exportlist[node].payload:
14916 result = self.rpc.call_export_remove(node, instance_name)
14917 msg = result.fail_msg
14919 logging.error("Could not remove export for instance %s"
14920 " on node %s: %s", instance_name, node, msg)
14922 if fqdn_warn and not found:
14923 feedback_fn("Export not found. If trying to remove an export belonging"
14924 " to a deleted instance please use its Fully Qualified"
14928 class LUGroupAdd(LogicalUnit):
14929 """Logical unit for creating node groups.
14932 HPATH = "group-add"
14933 HTYPE = constants.HTYPE_GROUP
14936 def ExpandNames(self):
14937 # We need the new group's UUID here so that we can create and acquire the
14938 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14939 # that it should not check whether the UUID exists in the configuration.
14940 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14941 self.needed_locks = {}
14942 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14944 def CheckPrereq(self):
14945 """Check prerequisites.
14947 This checks that the given group name is not an existing node group
14952 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14953 except errors.OpPrereqError:
14956 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14957 " node group (UUID: %s)" %
14958 (self.op.group_name, existing_uuid),
14959 errors.ECODE_EXISTS)
14961 if self.op.ndparams:
14962 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14964 if self.op.hv_state:
14965 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14967 self.new_hv_state = None
14969 if self.op.disk_state:
14970 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14972 self.new_disk_state = None
14974 if self.op.diskparams:
14975 for templ in constants.DISK_TEMPLATES:
14976 if templ in self.op.diskparams:
14977 utils.ForceDictType(self.op.diskparams[templ],
14978 constants.DISK_DT_TYPES)
14979 self.new_diskparams = self.op.diskparams
14981 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14982 except errors.OpPrereqError, err:
14983 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14984 errors.ECODE_INVAL)
14986 self.new_diskparams = {}
14988 if self.op.ipolicy:
14989 cluster = self.cfg.GetClusterInfo()
14990 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14992 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14993 except errors.ConfigurationError, err:
14994 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14995 errors.ECODE_INVAL)
14997 def BuildHooksEnv(self):
14998 """Build hooks env.
15002 "GROUP_NAME": self.op.group_name,
15005 def BuildHooksNodes(self):
15006 """Build hooks nodes.
15009 mn = self.cfg.GetMasterNode()
15010 return ([mn], [mn])
15012 def Exec(self, feedback_fn):
15013 """Add the node group to the cluster.
15016 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
15017 uuid=self.group_uuid,
15018 alloc_policy=self.op.alloc_policy,
15019 ndparams=self.op.ndparams,
15020 diskparams=self.new_diskparams,
15021 ipolicy=self.op.ipolicy,
15022 hv_state_static=self.new_hv_state,
15023 disk_state_static=self.new_disk_state)
15025 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
15026 del self.remove_locks[locking.LEVEL_NODEGROUP]
15029 class LUGroupAssignNodes(NoHooksLU):
15030 """Logical unit for assigning nodes to groups.
15035 def ExpandNames(self):
15036 # These raise errors.OpPrereqError on their own:
15037 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15038 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15040 # We want to lock all the affected nodes and groups. We have readily
15041 # available the list of nodes, and the *destination* group. To gather the
15042 # list of "source" groups, we need to fetch node information later on.
15043 self.needed_locks = {
15044 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
15045 locking.LEVEL_NODE: self.op.nodes,
15048 def DeclareLocks(self, level):
15049 if level == locking.LEVEL_NODEGROUP:
15050 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
15052 # Try to get all affected nodes' groups without having the group or node
15053 # lock yet. Needs verification later in the code flow.
15054 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
15056 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
15058 def CheckPrereq(self):
15059 """Check prerequisites.
15062 assert self.needed_locks[locking.LEVEL_NODEGROUP]
15063 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
15064 frozenset(self.op.nodes))
15066 expected_locks = (set([self.group_uuid]) |
15067 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
15068 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
15069 if actual_locks != expected_locks:
15070 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
15071 " current groups are '%s', used to be '%s'" %
15072 (utils.CommaJoin(expected_locks),
15073 utils.CommaJoin(actual_locks)))
15075 self.node_data = self.cfg.GetAllNodesInfo()
15076 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15077 instance_data = self.cfg.GetAllInstancesInfo()
15079 if self.group is None:
15080 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15081 (self.op.group_name, self.group_uuid))
15083 (new_splits, previous_splits) = \
15084 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
15085 for node in self.op.nodes],
15086 self.node_data, instance_data)
15089 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
15091 if not self.op.force:
15092 raise errors.OpExecError("The following instances get split by this"
15093 " change and --force was not given: %s" %
15096 self.LogWarning("This operation will split the following instances: %s",
15099 if previous_splits:
15100 self.LogWarning("In addition, these already-split instances continue"
15101 " to be split across groups: %s",
15102 utils.CommaJoin(utils.NiceSort(previous_splits)))
15104 def Exec(self, feedback_fn):
15105 """Assign nodes to a new group.
15108 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
15110 self.cfg.AssignGroupNodes(mods)
15113 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
15114 """Check for split instances after a node assignment.
15116 This method considers a series of node assignments as an atomic operation,
15117 and returns information about split instances after applying the set of
15120 In particular, it returns information about newly split instances, and
15121 instances that were already split, and remain so after the change.
15123 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
15126 @type changes: list of (node_name, new_group_uuid) pairs.
15127 @param changes: list of node assignments to consider.
15128 @param node_data: a dict with data for all nodes
15129 @param instance_data: a dict with all instances to consider
15130 @rtype: a two-tuple
15131 @return: a list of instances that were previously okay and result split as a
15132 consequence of this change, and a list of instances that were previously
15133 split and this change does not fix.
15136 changed_nodes = dict((node, group) for node, group in changes
15137 if node_data[node].group != group)
15139 all_split_instances = set()
15140 previously_split_instances = set()
15142 def InstanceNodes(instance):
15143 return [instance.primary_node] + list(instance.secondary_nodes)
15145 for inst in instance_data.values():
15146 if inst.disk_template not in constants.DTS_INT_MIRROR:
15149 instance_nodes = InstanceNodes(inst)
15151 if len(set(node_data[node].group for node in instance_nodes)) > 1:
15152 previously_split_instances.add(inst.name)
15154 if len(set(changed_nodes.get(node, node_data[node].group)
15155 for node in instance_nodes)) > 1:
15156 all_split_instances.add(inst.name)
15158 return (list(all_split_instances - previously_split_instances),
15159 list(previously_split_instances & all_split_instances))
15162 class _GroupQuery(_QueryBase):
15163 FIELDS = query.GROUP_FIELDS
15165 def ExpandNames(self, lu):
15166 lu.needed_locks = {}
15168 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
15169 self._cluster = lu.cfg.GetClusterInfo()
15170 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
15173 self.wanted = [name_to_uuid[name]
15174 for name in utils.NiceSort(name_to_uuid.keys())]
15176 # Accept names to be either names or UUIDs.
15179 all_uuid = frozenset(self._all_groups.keys())
15181 for name in self.names:
15182 if name in all_uuid:
15183 self.wanted.append(name)
15184 elif name in name_to_uuid:
15185 self.wanted.append(name_to_uuid[name])
15187 missing.append(name)
15190 raise errors.OpPrereqError("Some groups do not exist: %s" %
15191 utils.CommaJoin(missing),
15192 errors.ECODE_NOENT)
15194 def DeclareLocks(self, lu, level):
15197 def _GetQueryData(self, lu):
15198 """Computes the list of node groups and their attributes.
15201 do_nodes = query.GQ_NODE in self.requested_data
15202 do_instances = query.GQ_INST in self.requested_data
15204 group_to_nodes = None
15205 group_to_instances = None
15207 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
15208 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
15209 # latter GetAllInstancesInfo() is not enough, for we have to go through
15210 # instance->node. Hence, we will need to process nodes even if we only need
15211 # instance information.
15212 if do_nodes or do_instances:
15213 all_nodes = lu.cfg.GetAllNodesInfo()
15214 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
15217 for node in all_nodes.values():
15218 if node.group in group_to_nodes:
15219 group_to_nodes[node.group].append(node.name)
15220 node_to_group[node.name] = node.group
15223 all_instances = lu.cfg.GetAllInstancesInfo()
15224 group_to_instances = dict((uuid, []) for uuid in self.wanted)
15226 for instance in all_instances.values():
15227 node = instance.primary_node
15228 if node in node_to_group:
15229 group_to_instances[node_to_group[node]].append(instance.name)
15232 # Do not pass on node information if it was not requested.
15233 group_to_nodes = None
15235 return query.GroupQueryData(self._cluster,
15236 [self._all_groups[uuid]
15237 for uuid in self.wanted],
15238 group_to_nodes, group_to_instances,
15239 query.GQ_DISKPARAMS in self.requested_data)
15242 class LUGroupQuery(NoHooksLU):
15243 """Logical unit for querying node groups.
15248 def CheckArguments(self):
15249 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
15250 self.op.output_fields, False)
15252 def ExpandNames(self):
15253 self.gq.ExpandNames(self)
15255 def DeclareLocks(self, level):
15256 self.gq.DeclareLocks(self, level)
15258 def Exec(self, feedback_fn):
15259 return self.gq.OldStyleQuery(self)
15262 class LUGroupSetParams(LogicalUnit):
15263 """Modifies the parameters of a node group.
15266 HPATH = "group-modify"
15267 HTYPE = constants.HTYPE_GROUP
15270 def CheckArguments(self):
15273 self.op.diskparams,
15274 self.op.alloc_policy,
15276 self.op.disk_state,
15280 if all_changes.count(None) == len(all_changes):
15281 raise errors.OpPrereqError("Please pass at least one modification",
15282 errors.ECODE_INVAL)
15284 def ExpandNames(self):
15285 # This raises errors.OpPrereqError on its own:
15286 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15288 self.needed_locks = {
15289 locking.LEVEL_INSTANCE: [],
15290 locking.LEVEL_NODEGROUP: [self.group_uuid],
15293 self.share_locks[locking.LEVEL_INSTANCE] = 1
15295 def DeclareLocks(self, level):
15296 if level == locking.LEVEL_INSTANCE:
15297 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15299 # Lock instances optimistically, needs verification once group lock has
15301 self.needed_locks[locking.LEVEL_INSTANCE] = \
15302 self.cfg.GetNodeGroupInstances(self.group_uuid)
15305 def _UpdateAndVerifyDiskParams(old, new):
15306 """Updates and verifies disk parameters.
15309 new_params = _GetUpdatedParams(old, new)
15310 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
15313 def CheckPrereq(self):
15314 """Check prerequisites.
15317 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15319 # Check if locked instances are still correct
15320 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15322 self.group = self.cfg.GetNodeGroup(self.group_uuid)
15323 cluster = self.cfg.GetClusterInfo()
15325 if self.group is None:
15326 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15327 (self.op.group_name, self.group_uuid))
15329 if self.op.ndparams:
15330 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
15331 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
15332 self.new_ndparams = new_ndparams
15334 if self.op.diskparams:
15335 diskparams = self.group.diskparams
15336 uavdp = self._UpdateAndVerifyDiskParams
15337 # For each disktemplate subdict update and verify the values
15338 new_diskparams = dict((dt,
15339 uavdp(diskparams.get(dt, {}),
15340 self.op.diskparams[dt]))
15341 for dt in constants.DISK_TEMPLATES
15342 if dt in self.op.diskparams)
15343 # As we've all subdicts of diskparams ready, lets merge the actual
15344 # dict with all updated subdicts
15345 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
15347 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
15348 except errors.OpPrereqError, err:
15349 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
15350 errors.ECODE_INVAL)
15352 if self.op.hv_state:
15353 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
15354 self.group.hv_state_static)
15356 if self.op.disk_state:
15357 self.new_disk_state = \
15358 _MergeAndVerifyDiskState(self.op.disk_state,
15359 self.group.disk_state_static)
15361 if self.op.ipolicy:
15362 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
15366 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
15367 inst_filter = lambda inst: inst.name in owned_instances
15368 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
15369 gmi = ganeti.masterd.instance
15371 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
15373 new_ipolicy, instances, self.cfg)
15376 self.LogWarning("After the ipolicy change the following instances"
15377 " violate them: %s",
15378 utils.CommaJoin(violations))
15380 def BuildHooksEnv(self):
15381 """Build hooks env.
15385 "GROUP_NAME": self.op.group_name,
15386 "NEW_ALLOC_POLICY": self.op.alloc_policy,
15389 def BuildHooksNodes(self):
15390 """Build hooks nodes.
15393 mn = self.cfg.GetMasterNode()
15394 return ([mn], [mn])
15396 def Exec(self, feedback_fn):
15397 """Modifies the node group.
15402 if self.op.ndparams:
15403 self.group.ndparams = self.new_ndparams
15404 result.append(("ndparams", str(self.group.ndparams)))
15406 if self.op.diskparams:
15407 self.group.diskparams = self.new_diskparams
15408 result.append(("diskparams", str(self.group.diskparams)))
15410 if self.op.alloc_policy:
15411 self.group.alloc_policy = self.op.alloc_policy
15413 if self.op.hv_state:
15414 self.group.hv_state_static = self.new_hv_state
15416 if self.op.disk_state:
15417 self.group.disk_state_static = self.new_disk_state
15419 if self.op.ipolicy:
15420 self.group.ipolicy = self.new_ipolicy
15422 self.cfg.Update(self.group, feedback_fn)
15426 class LUGroupRemove(LogicalUnit):
15427 HPATH = "group-remove"
15428 HTYPE = constants.HTYPE_GROUP
15431 def ExpandNames(self):
15432 # This will raises errors.OpPrereqError on its own:
15433 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15434 self.needed_locks = {
15435 locking.LEVEL_NODEGROUP: [self.group_uuid],
15438 def CheckPrereq(self):
15439 """Check prerequisites.
15441 This checks that the given group name exists as a node group, that is
15442 empty (i.e., contains no nodes), and that is not the last group of the
15446 # Verify that the group is empty.
15447 group_nodes = [node.name
15448 for node in self.cfg.GetAllNodesInfo().values()
15449 if node.group == self.group_uuid]
15452 raise errors.OpPrereqError("Group '%s' not empty, has the following"
15454 (self.op.group_name,
15455 utils.CommaJoin(utils.NiceSort(group_nodes))),
15456 errors.ECODE_STATE)
15458 # Verify the cluster would not be left group-less.
15459 if len(self.cfg.GetNodeGroupList()) == 1:
15460 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
15461 " removed" % self.op.group_name,
15462 errors.ECODE_STATE)
15464 def BuildHooksEnv(self):
15465 """Build hooks env.
15469 "GROUP_NAME": self.op.group_name,
15472 def BuildHooksNodes(self):
15473 """Build hooks nodes.
15476 mn = self.cfg.GetMasterNode()
15477 return ([mn], [mn])
15479 def Exec(self, feedback_fn):
15480 """Remove the node group.
15484 self.cfg.RemoveNodeGroup(self.group_uuid)
15485 except errors.ConfigurationError:
15486 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
15487 (self.op.group_name, self.group_uuid))
15489 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
15492 class LUGroupRename(LogicalUnit):
15493 HPATH = "group-rename"
15494 HTYPE = constants.HTYPE_GROUP
15497 def ExpandNames(self):
15498 # This raises errors.OpPrereqError on its own:
15499 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15501 self.needed_locks = {
15502 locking.LEVEL_NODEGROUP: [self.group_uuid],
15505 def CheckPrereq(self):
15506 """Check prerequisites.
15508 Ensures requested new name is not yet used.
15512 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
15513 except errors.OpPrereqError:
15516 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
15517 " node group (UUID: %s)" %
15518 (self.op.new_name, new_name_uuid),
15519 errors.ECODE_EXISTS)
15521 def BuildHooksEnv(self):
15522 """Build hooks env.
15526 "OLD_NAME": self.op.group_name,
15527 "NEW_NAME": self.op.new_name,
15530 def BuildHooksNodes(self):
15531 """Build hooks nodes.
15534 mn = self.cfg.GetMasterNode()
15536 all_nodes = self.cfg.GetAllNodesInfo()
15537 all_nodes.pop(mn, None)
15540 run_nodes.extend(node.name for node in all_nodes.values()
15541 if node.group == self.group_uuid)
15543 return (run_nodes, run_nodes)
15545 def Exec(self, feedback_fn):
15546 """Rename the node group.
15549 group = self.cfg.GetNodeGroup(self.group_uuid)
15552 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
15553 (self.op.group_name, self.group_uuid))
15555 group.name = self.op.new_name
15556 self.cfg.Update(group, feedback_fn)
15558 return self.op.new_name
15561 class LUGroupEvacuate(LogicalUnit):
15562 HPATH = "group-evacuate"
15563 HTYPE = constants.HTYPE_GROUP
15566 def ExpandNames(self):
15567 # This raises errors.OpPrereqError on its own:
15568 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
15570 if self.op.target_groups:
15571 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
15572 self.op.target_groups)
15574 self.req_target_uuids = []
15576 if self.group_uuid in self.req_target_uuids:
15577 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
15578 " as a target group (targets are %s)" %
15580 utils.CommaJoin(self.req_target_uuids)),
15581 errors.ECODE_INVAL)
15583 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
15585 self.share_locks = _ShareAll()
15586 self.needed_locks = {
15587 locking.LEVEL_INSTANCE: [],
15588 locking.LEVEL_NODEGROUP: [],
15589 locking.LEVEL_NODE: [],
15592 def DeclareLocks(self, level):
15593 if level == locking.LEVEL_INSTANCE:
15594 assert not self.needed_locks[locking.LEVEL_INSTANCE]
15596 # Lock instances optimistically, needs verification once node and group
15597 # locks have been acquired
15598 self.needed_locks[locking.LEVEL_INSTANCE] = \
15599 self.cfg.GetNodeGroupInstances(self.group_uuid)
15601 elif level == locking.LEVEL_NODEGROUP:
15602 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
15604 if self.req_target_uuids:
15605 lock_groups = set([self.group_uuid] + self.req_target_uuids)
15607 # Lock all groups used by instances optimistically; this requires going
15608 # via the node before it's locked, requiring verification later on
15609 lock_groups.update(group_uuid
15610 for instance_name in
15611 self.owned_locks(locking.LEVEL_INSTANCE)
15613 self.cfg.GetInstanceNodeGroups(instance_name))
15615 # No target groups, need to lock all of them
15616 lock_groups = locking.ALL_SET
15618 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
15620 elif level == locking.LEVEL_NODE:
15621 # This will only lock the nodes in the group to be evacuated which
15622 # contain actual instances
15623 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
15624 self._LockInstancesNodes()
15626 # Lock all nodes in group to be evacuated and target groups
15627 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15628 assert self.group_uuid in owned_groups
15629 member_nodes = [node_name
15630 for group in owned_groups
15631 for node_name in self.cfg.GetNodeGroup(group).members]
15632 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
15634 def CheckPrereq(self):
15635 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
15636 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
15637 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15639 assert owned_groups.issuperset(self.req_target_uuids)
15640 assert self.group_uuid in owned_groups
15642 # Check if locked instances are still correct
15643 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
15645 # Get instance information
15646 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
15648 # Check if node groups for locked instances are still correct
15649 _CheckInstancesNodeGroups(self.cfg, self.instances,
15650 owned_groups, owned_nodes, self.group_uuid)
15652 if self.req_target_uuids:
15653 # User requested specific target groups
15654 self.target_uuids = self.req_target_uuids
15656 # All groups except the one to be evacuated are potential targets
15657 self.target_uuids = [group_uuid for group_uuid in owned_groups
15658 if group_uuid != self.group_uuid]
15660 if not self.target_uuids:
15661 raise errors.OpPrereqError("There are no possible target groups",
15662 errors.ECODE_INVAL)
15664 def BuildHooksEnv(self):
15665 """Build hooks env.
15669 "GROUP_NAME": self.op.group_name,
15670 "TARGET_GROUPS": " ".join(self.target_uuids),
15673 def BuildHooksNodes(self):
15674 """Build hooks nodes.
15677 mn = self.cfg.GetMasterNode()
15679 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
15681 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
15683 return (run_nodes, run_nodes)
15685 def Exec(self, feedback_fn):
15686 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
15688 assert self.group_uuid not in self.target_uuids
15690 req = iallocator.IAReqGroupChange(instances=instances,
15691 target_groups=self.target_uuids)
15692 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15694 ial.Run(self.op.iallocator)
15696 if not ial.success:
15697 raise errors.OpPrereqError("Can't compute group evacuation using"
15698 " iallocator '%s': %s" %
15699 (self.op.iallocator, ial.info),
15700 errors.ECODE_NORES)
15702 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
15704 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
15705 len(jobs), self.op.group_name)
15707 return ResultWithJobs(jobs)
15710 class TagsLU(NoHooksLU): # pylint: disable=W0223
15711 """Generic tags LU.
15713 This is an abstract class which is the parent of all the other tags LUs.
15716 def ExpandNames(self):
15717 self.group_uuid = None
15718 self.needed_locks = {}
15720 if self.op.kind == constants.TAG_NODE:
15721 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
15722 lock_level = locking.LEVEL_NODE
15723 lock_name = self.op.name
15724 elif self.op.kind == constants.TAG_INSTANCE:
15725 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
15726 lock_level = locking.LEVEL_INSTANCE
15727 lock_name = self.op.name
15728 elif self.op.kind == constants.TAG_NODEGROUP:
15729 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
15730 lock_level = locking.LEVEL_NODEGROUP
15731 lock_name = self.group_uuid
15732 elif self.op.kind == constants.TAG_NETWORK:
15733 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
15734 lock_level = locking.LEVEL_NETWORK
15735 lock_name = self.network_uuid
15740 if lock_level and getattr(self.op, "use_locking", True):
15741 self.needed_locks[lock_level] = lock_name
15743 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
15744 # not possible to acquire the BGL based on opcode parameters)
15746 def CheckPrereq(self):
15747 """Check prerequisites.
15750 if self.op.kind == constants.TAG_CLUSTER:
15751 self.target = self.cfg.GetClusterInfo()
15752 elif self.op.kind == constants.TAG_NODE:
15753 self.target = self.cfg.GetNodeInfo(self.op.name)
15754 elif self.op.kind == constants.TAG_INSTANCE:
15755 self.target = self.cfg.GetInstanceInfo(self.op.name)
15756 elif self.op.kind == constants.TAG_NODEGROUP:
15757 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15758 elif self.op.kind == constants.TAG_NETWORK:
15759 self.target = self.cfg.GetNetwork(self.network_uuid)
15761 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15762 str(self.op.kind), errors.ECODE_INVAL)
15765 class LUTagsGet(TagsLU):
15766 """Returns the tags of a given object.
15771 def ExpandNames(self):
15772 TagsLU.ExpandNames(self)
15774 # Share locks as this is only a read operation
15775 self.share_locks = _ShareAll()
15777 def Exec(self, feedback_fn):
15778 """Returns the tag list.
15781 return list(self.target.GetTags())
15784 class LUTagsSearch(NoHooksLU):
15785 """Searches the tags for a given pattern.
15790 def ExpandNames(self):
15791 self.needed_locks = {}
15793 def CheckPrereq(self):
15794 """Check prerequisites.
15796 This checks the pattern passed for validity by compiling it.
15800 self.re = re.compile(self.op.pattern)
15801 except re.error, err:
15802 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15803 (self.op.pattern, err), errors.ECODE_INVAL)
15805 def Exec(self, feedback_fn):
15806 """Returns the tag list.
15810 tgts = [("/cluster", cfg.GetClusterInfo())]
15811 ilist = cfg.GetAllInstancesInfo().values()
15812 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15813 nlist = cfg.GetAllNodesInfo().values()
15814 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15815 tgts.extend(("/nodegroup/%s" % n.name, n)
15816 for n in cfg.GetAllNodeGroupsInfo().values())
15818 for path, target in tgts:
15819 for tag in target.GetTags():
15820 if self.re.search(tag):
15821 results.append((path, tag))
15825 class LUTagsSet(TagsLU):
15826 """Sets a tag on a given object.
15831 def CheckPrereq(self):
15832 """Check prerequisites.
15834 This checks the type and length of the tag name and value.
15837 TagsLU.CheckPrereq(self)
15838 for tag in self.op.tags:
15839 objects.TaggableObject.ValidateTag(tag)
15841 def Exec(self, feedback_fn):
15846 for tag in self.op.tags:
15847 self.target.AddTag(tag)
15848 except errors.TagError, err:
15849 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15850 self.cfg.Update(self.target, feedback_fn)
15853 class LUTagsDel(TagsLU):
15854 """Delete a list of tags from a given object.
15859 def CheckPrereq(self):
15860 """Check prerequisites.
15862 This checks that we have the given tag.
15865 TagsLU.CheckPrereq(self)
15866 for tag in self.op.tags:
15867 objects.TaggableObject.ValidateTag(tag)
15868 del_tags = frozenset(self.op.tags)
15869 cur_tags = self.target.GetTags()
15871 diff_tags = del_tags - cur_tags
15873 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15874 raise errors.OpPrereqError("Tag(s) %s not found" %
15875 (utils.CommaJoin(diff_names), ),
15876 errors.ECODE_NOENT)
15878 def Exec(self, feedback_fn):
15879 """Remove the tag from the object.
15882 for tag in self.op.tags:
15883 self.target.RemoveTag(tag)
15884 self.cfg.Update(self.target, feedback_fn)
15887 class LUTestDelay(NoHooksLU):
15888 """Sleep for a specified amount of time.
15890 This LU sleeps on the master and/or nodes for a specified amount of
15896 def ExpandNames(self):
15897 """Expand names and set required locks.
15899 This expands the node list, if any.
15902 self.needed_locks = {}
15903 if self.op.on_nodes:
15904 # _GetWantedNodes can be used here, but is not always appropriate to use
15905 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15906 # more information.
15907 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15908 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15910 def _TestDelay(self):
15911 """Do the actual sleep.
15914 if self.op.on_master:
15915 if not utils.TestDelay(self.op.duration):
15916 raise errors.OpExecError("Error during master delay test")
15917 if self.op.on_nodes:
15918 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15919 for node, node_result in result.items():
15920 node_result.Raise("Failure during rpc call to node %s" % node)
15922 def Exec(self, feedback_fn):
15923 """Execute the test delay opcode, with the wanted repetitions.
15926 if self.op.repeat == 0:
15929 top_value = self.op.repeat - 1
15930 for i in range(self.op.repeat):
15931 self.LogInfo("Test delay iteration %d/%d", i, top_value)
15935 class LURestrictedCommand(NoHooksLU):
15936 """Logical unit for executing restricted commands.
15941 def ExpandNames(self):
15943 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
15945 self.needed_locks = {
15946 locking.LEVEL_NODE: self.op.nodes,
15948 self.share_locks = {
15949 locking.LEVEL_NODE: not self.op.use_locking,
15952 def CheckPrereq(self):
15953 """Check prerequisites.
15957 def Exec(self, feedback_fn):
15958 """Execute restricted command and return output.
15961 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
15963 # Check if correct locks are held
15964 assert set(self.op.nodes).issubset(owned_nodes)
15966 rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
15970 for node_name in self.op.nodes:
15971 nres = rpcres[node_name]
15973 msg = ("Command '%s' on node '%s' failed: %s" %
15974 (self.op.command, node_name, nres.fail_msg))
15975 result.append((False, msg))
15977 result.append((True, nres.payload))
15982 class LUTestJqueue(NoHooksLU):
15983 """Utility LU to test some aspects of the job queue.
15988 # Must be lower than default timeout for WaitForJobChange to see whether it
15989 # notices changed jobs
15990 _CLIENT_CONNECT_TIMEOUT = 20.0
15991 _CLIENT_CONFIRM_TIMEOUT = 60.0
15994 def _NotifyUsingSocket(cls, cb, errcls):
15995 """Opens a Unix socket and waits for another program to connect.
15998 @param cb: Callback to send socket name to client
15999 @type errcls: class
16000 @param errcls: Exception class to use for errors
16003 # Using a temporary directory as there's no easy way to create temporary
16004 # sockets without writing a custom loop around tempfile.mktemp and
16006 tmpdir = tempfile.mkdtemp()
16008 tmpsock = utils.PathJoin(tmpdir, "sock")
16010 logging.debug("Creating temporary socket at %s", tmpsock)
16011 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
16016 # Send details to client
16019 # Wait for client to connect before continuing
16020 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
16022 (conn, _) = sock.accept()
16023 except socket.error, err:
16024 raise errcls("Client didn't connect in time (%s)" % err)
16028 # Remove as soon as client is connected
16029 shutil.rmtree(tmpdir)
16031 # Wait for client to close
16034 # pylint: disable=E1101
16035 # Instance of '_socketobject' has no ... member
16036 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
16038 except socket.error, err:
16039 raise errcls("Client failed to confirm notification (%s)" % err)
16043 def _SendNotification(self, test, arg, sockname):
16044 """Sends a notification to the client.
16047 @param test: Test name
16048 @param arg: Test argument (depends on test)
16049 @type sockname: string
16050 @param sockname: Socket path
16053 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
16055 def _Notify(self, prereq, test, arg):
16056 """Notifies the client of a test.
16059 @param prereq: Whether this is a prereq-phase test
16061 @param test: Test name
16062 @param arg: Test argument (depends on test)
16066 errcls = errors.OpPrereqError
16068 errcls = errors.OpExecError
16070 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
16074 def CheckArguments(self):
16075 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
16076 self.expandnames_calls = 0
16078 def ExpandNames(self):
16079 checkargs_calls = getattr(self, "checkargs_calls", 0)
16080 if checkargs_calls < 1:
16081 raise errors.ProgrammerError("CheckArguments was not called")
16083 self.expandnames_calls += 1
16085 if self.op.notify_waitlock:
16086 self._Notify(True, constants.JQT_EXPANDNAMES, None)
16088 self.LogInfo("Expanding names")
16090 # Get lock on master node (just to get a lock, not for a particular reason)
16091 self.needed_locks = {
16092 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
16095 def Exec(self, feedback_fn):
16096 if self.expandnames_calls < 1:
16097 raise errors.ProgrammerError("ExpandNames was not called")
16099 if self.op.notify_exec:
16100 self._Notify(False, constants.JQT_EXEC, None)
16102 self.LogInfo("Executing")
16104 if self.op.log_messages:
16105 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
16106 for idx, msg in enumerate(self.op.log_messages):
16107 self.LogInfo("Sending log message %s", idx + 1)
16108 feedback_fn(constants.JQT_MSGPREFIX + msg)
16109 # Report how many test messages have been sent
16110 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
16113 raise errors.OpExecError("Opcode failure was requested")
16118 class LUTestAllocator(NoHooksLU):
16119 """Run allocator tests.
16121 This LU runs the allocator tests
16124 def CheckPrereq(self):
16125 """Check prerequisites.
16127 This checks the opcode parameters depending on the director and mode test.
16130 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
16131 constants.IALLOCATOR_MODE_MULTI_ALLOC):
16132 for attr in ["memory", "disks", "disk_template",
16133 "os", "tags", "nics", "vcpus"]:
16134 if not hasattr(self.op, attr):
16135 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
16136 attr, errors.ECODE_INVAL)
16137 iname = self.cfg.ExpandInstanceName(self.op.name)
16138 if iname is not None:
16139 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
16140 iname, errors.ECODE_EXISTS)
16141 if not isinstance(self.op.nics, list):
16142 raise errors.OpPrereqError("Invalid parameter 'nics'",
16143 errors.ECODE_INVAL)
16144 if not isinstance(self.op.disks, list):
16145 raise errors.OpPrereqError("Invalid parameter 'disks'",
16146 errors.ECODE_INVAL)
16147 for row in self.op.disks:
16148 if (not isinstance(row, dict) or
16149 constants.IDISK_SIZE not in row or
16150 not isinstance(row[constants.IDISK_SIZE], int) or
16151 constants.IDISK_MODE not in row or
16152 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
16153 raise errors.OpPrereqError("Invalid contents of the 'disks'"
16154 " parameter", errors.ECODE_INVAL)
16155 if self.op.hypervisor is None:
16156 self.op.hypervisor = self.cfg.GetHypervisorType()
16157 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16158 fname = _ExpandInstanceName(self.cfg, self.op.name)
16159 self.op.name = fname
16160 self.relocate_from = \
16161 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
16162 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
16163 constants.IALLOCATOR_MODE_NODE_EVAC):
16164 if not self.op.instances:
16165 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
16166 self.op.instances = _GetWantedInstances(self, self.op.instances)
16168 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
16169 self.op.mode, errors.ECODE_INVAL)
16171 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
16172 if self.op.iallocator is None:
16173 raise errors.OpPrereqError("Missing allocator name",
16174 errors.ECODE_INVAL)
16175 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
16176 raise errors.OpPrereqError("Wrong allocator test '%s'" %
16177 self.op.direction, errors.ECODE_INVAL)
16179 def Exec(self, feedback_fn):
16180 """Run the allocator test.
16183 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
16184 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
16185 memory=self.op.memory,
16186 disks=self.op.disks,
16187 disk_template=self.op.disk_template,
16191 vcpus=self.op.vcpus,
16192 spindle_use=self.op.spindle_use,
16193 hypervisor=self.op.hypervisor,
16194 node_whitelist=None)
16195 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
16196 req = iallocator.IAReqRelocate(name=self.op.name,
16197 relocate_from=list(self.relocate_from))
16198 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
16199 req = iallocator.IAReqGroupChange(instances=self.op.instances,
16200 target_groups=self.op.target_groups)
16201 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
16202 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
16203 evac_mode=self.op.evac_mode)
16204 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
16205 disk_template = self.op.disk_template
16206 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
16207 memory=self.op.memory,
16208 disks=self.op.disks,
16209 disk_template=disk_template,
16213 vcpus=self.op.vcpus,
16214 spindle_use=self.op.spindle_use,
16215 hypervisor=self.op.hypervisor)
16216 for idx in range(self.op.count)]
16217 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
16219 raise errors.ProgrammerError("Uncatched mode %s in"
16220 " LUTestAllocator.Exec", self.op.mode)
16222 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
16223 if self.op.direction == constants.IALLOCATOR_DIR_IN:
16224 result = ial.in_text
16226 ial.Run(self.op.iallocator, validate=False)
16227 result = ial.out_text
16231 class LUNetworkAdd(LogicalUnit):
16232 """Logical unit for creating networks.
16235 HPATH = "network-add"
16236 HTYPE = constants.HTYPE_NETWORK
16239 def BuildHooksNodes(self):
16240 """Build hooks nodes.
16243 mn = self.cfg.GetMasterNode()
16244 return ([mn], [mn])
16246 def CheckArguments(self):
16247 if self.op.mac_prefix:
16248 self.op.mac_prefix = \
16249 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16251 def ExpandNames(self):
16252 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
16254 if self.op.conflicts_check:
16255 self.share_locks[locking.LEVEL_NODE] = 1
16256 self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
16257 self.needed_locks = {
16258 locking.LEVEL_NODE: locking.ALL_SET,
16259 locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
16262 self.needed_locks = {}
16264 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
16266 def CheckPrereq(self):
16267 if self.op.network is None:
16268 raise errors.OpPrereqError("Network must be given",
16269 errors.ECODE_INVAL)
16272 existing_uuid = self.cfg.LookupNetwork(self.op.network_name)
16273 except errors.OpPrereqError:
16276 raise errors.OpPrereqError("Desired network name '%s' already exists as a"
16277 " network (UUID: %s)" %
16278 (self.op.network_name, existing_uuid),
16279 errors.ECODE_EXISTS)
16281 # Check tag validity
16282 for tag in self.op.tags:
16283 objects.TaggableObject.ValidateTag(tag)
16285 def BuildHooksEnv(self):
16286 """Build hooks env.
16290 "name": self.op.network_name,
16291 "subnet": self.op.network,
16292 "gateway": self.op.gateway,
16293 "network6": self.op.network6,
16294 "gateway6": self.op.gateway6,
16295 "mac_prefix": self.op.mac_prefix,
16296 "tags": self.op.tags,
16298 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16300 def Exec(self, feedback_fn):
16301 """Add the ip pool to the cluster.
16304 nobj = objects.Network(name=self.op.network_name,
16305 network=self.op.network,
16306 gateway=self.op.gateway,
16307 network6=self.op.network6,
16308 gateway6=self.op.gateway6,
16309 mac_prefix=self.op.mac_prefix,
16310 uuid=self.network_uuid)
16311 # Initialize the associated address pool
16313 pool = network.AddressPool.InitializeNetwork(nobj)
16314 except errors.AddressPoolError, err:
16315 raise errors.OpExecError("Cannot create IP address pool for network"
16316 " '%s': %s" % (self.op.network_name, err))
16318 # Check if we need to reserve the nodes and the cluster master IP
16319 # These may not be allocated to any instances in routed mode, as
16320 # they wouldn't function anyway.
16321 if self.op.conflicts_check:
16322 for node in self.cfg.GetAllNodesInfo().values():
16323 for ip in [node.primary_ip, node.secondary_ip]:
16325 if pool.Contains(ip):
16327 self.LogInfo("Reserved IP address of node '%s' (%s)",
16329 except errors.AddressPoolError, err:
16330 self.LogWarning("Cannot reserve IP address '%s' of node '%s': %s",
16331 ip, node.name, err)
16333 master_ip = self.cfg.GetClusterInfo().master_ip
16335 if pool.Contains(master_ip):
16336 pool.Reserve(master_ip)
16337 self.LogInfo("Reserved cluster master IP address (%s)", master_ip)
16338 except errors.AddressPoolError, err:
16339 self.LogWarning("Cannot reserve cluster master IP address (%s): %s",
16342 if self.op.add_reserved_ips:
16343 for ip in self.op.add_reserved_ips:
16345 pool.Reserve(ip, external=True)
16346 except errors.AddressPoolError, err:
16347 raise errors.OpExecError("Cannot reserve IP address '%s': %s" %
16351 for tag in self.op.tags:
16354 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16355 del self.remove_locks[locking.LEVEL_NETWORK]
16358 class LUNetworkRemove(LogicalUnit):
16359 HPATH = "network-remove"
16360 HTYPE = constants.HTYPE_NETWORK
16363 def ExpandNames(self):
16364 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16366 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16367 self.needed_locks = {
16368 locking.LEVEL_NETWORK: [self.network_uuid],
16369 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16372 def CheckPrereq(self):
16373 """Check prerequisites.
16375 This checks that the given network name exists as a network, that is
16376 empty (i.e., contains no nodes), and that is not the last group of the
16380 # Verify that the network is not conncted.
16381 node_groups = [group.name
16382 for group in self.cfg.GetAllNodeGroupsInfo().values()
16383 if self.network_uuid in group.networks]
16386 self.LogWarning("Network '%s' is connected to the following"
16387 " node groups: %s" %
16388 (self.op.network_name,
16389 utils.CommaJoin(utils.NiceSort(node_groups))))
16390 raise errors.OpPrereqError("Network still connected", errors.ECODE_STATE)
16392 def BuildHooksEnv(self):
16393 """Build hooks env.
16397 "NETWORK_NAME": self.op.network_name,
16400 def BuildHooksNodes(self):
16401 """Build hooks nodes.
16404 mn = self.cfg.GetMasterNode()
16405 return ([mn], [mn])
16407 def Exec(self, feedback_fn):
16408 """Remove the network.
16412 self.cfg.RemoveNetwork(self.network_uuid)
16413 except errors.ConfigurationError:
16414 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16415 (self.op.network_name, self.network_uuid))
16418 class LUNetworkSetParams(LogicalUnit):
16419 """Modifies the parameters of a network.
16422 HPATH = "network-modify"
16423 HTYPE = constants.HTYPE_NETWORK
16426 def CheckArguments(self):
16427 if (self.op.gateway and
16428 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16429 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16430 " at once", errors.ECODE_INVAL)
16432 def ExpandNames(self):
16433 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16435 self.needed_locks = {
16436 locking.LEVEL_NETWORK: [self.network_uuid],
16439 def CheckPrereq(self):
16440 """Check prerequisites.
16443 self.network = self.cfg.GetNetwork(self.network_uuid)
16444 self.gateway = self.network.gateway
16445 self.mac_prefix = self.network.mac_prefix
16446 self.network6 = self.network.network6
16447 self.gateway6 = self.network.gateway6
16448 self.tags = self.network.tags
16450 self.pool = network.AddressPool(self.network)
16452 if self.op.gateway:
16453 if self.op.gateway == constants.VALUE_NONE:
16454 self.gateway = None
16456 self.gateway = self.op.gateway
16457 if self.pool.IsReserved(self.gateway):
16458 raise errors.OpPrereqError("Gateway IP address '%s' is already"
16459 " reserved" % self.gateway,
16460 errors.ECODE_STATE)
16462 if self.op.mac_prefix:
16463 if self.op.mac_prefix == constants.VALUE_NONE:
16464 self.mac_prefix = None
16466 self.mac_prefix = \
16467 utils.NormalizeAndValidateThreeOctetMacPrefix(self.op.mac_prefix)
16469 if self.op.gateway6:
16470 if self.op.gateway6 == constants.VALUE_NONE:
16471 self.gateway6 = None
16473 self.gateway6 = self.op.gateway6
16475 if self.op.network6:
16476 if self.op.network6 == constants.VALUE_NONE:
16477 self.network6 = None
16479 self.network6 = self.op.network6
16481 def BuildHooksEnv(self):
16482 """Build hooks env.
16486 "name": self.op.network_name,
16487 "subnet": self.network.network,
16488 "gateway": self.gateway,
16489 "network6": self.network6,
16490 "gateway6": self.gateway6,
16491 "mac_prefix": self.mac_prefix,
16494 return _BuildNetworkHookEnv(**args) # pylint: disable=W0142
16496 def BuildHooksNodes(self):
16497 """Build hooks nodes.
16500 mn = self.cfg.GetMasterNode()
16501 return ([mn], [mn])
16503 def Exec(self, feedback_fn):
16504 """Modifies the network.
16507 #TODO: reserve/release via temporary reservation manager
16508 # extend cfg.ReserveIp/ReleaseIp with the external flag
16509 if self.op.gateway:
16510 if self.gateway == self.network.gateway:
16511 self.LogWarning("Gateway is already %s", self.gateway)
16514 self.pool.Reserve(self.gateway, external=True)
16515 if self.network.gateway:
16516 self.pool.Release(self.network.gateway, external=True)
16517 self.network.gateway = self.gateway
16519 if self.op.add_reserved_ips:
16520 for ip in self.op.add_reserved_ips:
16522 if self.pool.IsReserved(ip):
16523 self.LogWarning("IP address %s is already reserved", ip)
16525 self.pool.Reserve(ip, external=True)
16526 except errors.AddressPoolError, err:
16527 self.LogWarning("Cannot reserve IP address %s: %s", ip, err)
16529 if self.op.remove_reserved_ips:
16530 for ip in self.op.remove_reserved_ips:
16531 if ip == self.network.gateway:
16532 self.LogWarning("Cannot unreserve Gateway's IP")
16535 if not self.pool.IsReserved(ip):
16536 self.LogWarning("IP address %s is already unreserved", ip)
16538 self.pool.Release(ip, external=True)
16539 except errors.AddressPoolError, err:
16540 self.LogWarning("Cannot release IP address %s: %s", ip, err)
16542 if self.op.mac_prefix:
16543 self.network.mac_prefix = self.mac_prefix
16545 if self.op.network6:
16546 self.network.network6 = self.network6
16548 if self.op.gateway6:
16549 self.network.gateway6 = self.gateway6
16551 self.pool.Validate()
16553 self.cfg.Update(self.network, feedback_fn)
16556 class _NetworkQuery(_QueryBase):
16557 FIELDS = query.NETWORK_FIELDS
16559 def ExpandNames(self, lu):
16560 lu.needed_locks = {}
16561 lu.share_locks = _ShareAll()
16563 self.do_locking = self.use_locking
16565 all_networks = lu.cfg.GetAllNetworksInfo()
16566 name_to_uuid = dict((n.name, n.uuid) for n in all_networks.values())
16572 for name in self.names:
16573 if name in name_to_uuid:
16574 self.wanted.append(name_to_uuid[name])
16576 missing.append(name)
16579 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16580 errors.ECODE_NOENT)
16582 self.wanted = locking.ALL_SET
16584 if self.do_locking:
16585 lu.needed_locks[locking.LEVEL_NETWORK] = self.wanted
16586 if query.NETQ_INST in self.requested_data:
16587 lu.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
16588 if query.NETQ_GROUP in self.requested_data:
16589 lu.needed_locks[locking.LEVEL_NODEGROUP] = locking.ALL_SET
16591 def DeclareLocks(self, lu, level):
16594 def _GetQueryData(self, lu):
16595 """Computes the list of networks and their attributes.
16598 all_networks = lu.cfg.GetAllNetworksInfo()
16600 network_uuids = self._GetNames(lu, all_networks.keys(),
16601 locking.LEVEL_NETWORK)
16603 do_instances = query.NETQ_INST in self.requested_data
16604 do_groups = query.NETQ_GROUP in self.requested_data
16606 network_to_instances = None
16607 network_to_groups = None
16609 # For NETQ_GROUP, we need to map network->[groups]
16611 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16612 network_to_groups = dict((uuid, []) for uuid in network_uuids)
16613 for _, group in all_groups.iteritems():
16614 for net_uuid in network_uuids:
16615 netparams = group.networks.get(net_uuid, None)
16617 info = (group.name, netparams[constants.NIC_MODE],
16618 netparams[constants.NIC_LINK])
16620 network_to_groups[net_uuid].append(info)
16623 all_instances = lu.cfg.GetAllInstancesInfo()
16624 network_to_instances = dict((uuid, []) for uuid in network_uuids)
16625 for instance in all_instances.values():
16626 for nic in instance.nics:
16627 if nic.network in network_uuids:
16628 network_to_instances[nic.network].append(instance.name)
16631 if query.NETQ_STATS in self.requested_data:
16634 self._GetStats(network.AddressPool(all_networks[uuid])))
16635 for uuid in network_uuids)
16639 return query.NetworkQueryData([all_networks[uuid]
16640 for uuid in network_uuids],
16642 network_to_instances,
16646 def _GetStats(pool):
16647 """Returns statistics for a network address pool.
16651 "free_count": pool.GetFreeCount(),
16652 "reserved_count": pool.GetReservedCount(),
16653 "map": pool.GetMap(),
16654 "external_reservations":
16655 utils.CommaJoin(pool.GetExternalReservations()),
16659 class LUNetworkQuery(NoHooksLU):
16660 """Logical unit for querying networks.
16665 def CheckArguments(self):
16666 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16667 self.op.output_fields, self.op.use_locking)
16669 def ExpandNames(self):
16670 self.nq.ExpandNames(self)
16672 def Exec(self, feedback_fn):
16673 return self.nq.OldStyleQuery(self)
16676 class LUNetworkConnect(LogicalUnit):
16677 """Connect a network to a nodegroup
16680 HPATH = "network-connect"
16681 HTYPE = constants.HTYPE_NETWORK
16684 def ExpandNames(self):
16685 self.network_name = self.op.network_name
16686 self.group_name = self.op.group_name
16687 self.network_mode = self.op.network_mode
16688 self.network_link = self.op.network_link
16690 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16691 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16693 self.needed_locks = {
16694 locking.LEVEL_INSTANCE: [],
16695 locking.LEVEL_NODEGROUP: [self.group_uuid],
16697 self.share_locks[locking.LEVEL_INSTANCE] = 1
16699 if self.op.conflicts_check:
16700 self.needed_locks[locking.LEVEL_NETWORK] = [self.network_uuid]
16701 self.share_locks[locking.LEVEL_NETWORK] = 1
16703 def DeclareLocks(self, level):
16704 if level == locking.LEVEL_INSTANCE:
16705 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16707 # Lock instances optimistically, needs verification once group lock has
16709 if self.op.conflicts_check:
16710 self.needed_locks[locking.LEVEL_INSTANCE] = \
16711 self.cfg.GetNodeGroupInstances(self.group_uuid)
16713 def BuildHooksEnv(self):
16715 "GROUP_NAME": self.group_name,
16716 "GROUP_NETWORK_MODE": self.network_mode,
16717 "GROUP_NETWORK_LINK": self.network_link,
16721 def BuildHooksNodes(self):
16722 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16723 return (nodes, nodes)
16725 def CheckPrereq(self):
16726 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16728 assert self.group_uuid in owned_groups
16730 # Check if locked instances are still correct
16731 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16732 if self.op.conflicts_check:
16733 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16736 constants.NIC_MODE: self.network_mode,
16737 constants.NIC_LINK: self.network_link,
16739 objects.NIC.CheckParameterSyntax(self.netparams)
16741 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16742 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16743 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16744 self.connected = False
16745 if self.network_uuid in self.group.networks:
16746 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16747 (self.network_name, self.group.name))
16748 self.connected = True
16750 # check only if not already connected
16751 elif self.op.conflicts_check:
16752 pool = network.AddressPool(self.cfg.GetNetwork(self.network_uuid))
16754 _NetworkConflictCheck(self, lambda nic: pool.Contains(nic.ip),
16755 "connect to", owned_instances)
16757 def Exec(self, feedback_fn):
16758 # Connect the network and update the group only if not already connected
16759 if not self.connected:
16760 self.group.networks[self.network_uuid] = self.netparams
16761 self.cfg.Update(self.group, feedback_fn)
16764 def _NetworkConflictCheck(lu, check_fn, action, instances):
16765 """Checks for network interface conflicts with a network.
16767 @type lu: L{LogicalUnit}
16768 @type check_fn: callable receiving one parameter (L{objects.NIC}) and
16770 @param check_fn: Function checking for conflict
16771 @type action: string
16772 @param action: Part of error message (see code)
16773 @raise errors.OpPrereqError: If conflicting IP addresses are found.
16778 for (_, instance) in lu.cfg.GetMultiInstanceInfo(instances):
16779 instconflicts = [(idx, nic.ip)
16780 for (idx, nic) in enumerate(instance.nics)
16784 conflicts.append((instance.name, instconflicts))
16787 lu.LogWarning("IP addresses from network '%s', which is about to %s"
16788 " node group '%s', are in use: %s" %
16789 (lu.network_name, action, lu.group.name,
16790 utils.CommaJoin(("%s: %s" %
16791 (name, _FmtNetworkConflict(details)))
16792 for (name, details) in conflicts)))
16794 raise errors.OpPrereqError("Conflicting IP addresses found; "
16795 " remove/modify the corresponding network"
16796 " interfaces", errors.ECODE_STATE)
16799 def _FmtNetworkConflict(details):
16800 """Utility for L{_NetworkConflictCheck}.
16803 return utils.CommaJoin("nic%s/%s" % (idx, ipaddr)
16804 for (idx, ipaddr) in details)
16807 class LUNetworkDisconnect(LogicalUnit):
16808 """Disconnect a network to a nodegroup
16811 HPATH = "network-disconnect"
16812 HTYPE = constants.HTYPE_NETWORK
16815 def ExpandNames(self):
16816 self.network_name = self.op.network_name
16817 self.group_name = self.op.group_name
16819 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16820 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16822 self.needed_locks = {
16823 locking.LEVEL_INSTANCE: [],
16824 locking.LEVEL_NODEGROUP: [self.group_uuid],
16826 self.share_locks[locking.LEVEL_INSTANCE] = 1
16828 def DeclareLocks(self, level):
16829 if level == locking.LEVEL_INSTANCE:
16830 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16832 # Lock instances optimistically, needs verification once group lock has
16834 self.needed_locks[locking.LEVEL_INSTANCE] = \
16835 self.cfg.GetNodeGroupInstances(self.group_uuid)
16837 def BuildHooksEnv(self):
16839 "GROUP_NAME": self.group_name,
16843 def BuildHooksNodes(self):
16844 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16845 return (nodes, nodes)
16847 def CheckPrereq(self):
16848 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
16850 assert self.group_uuid in owned_groups
16852 # Check if locked instances are still correct
16853 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
16854 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
16856 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16857 self.connected = True
16858 if self.network_uuid not in self.group.networks:
16859 self.LogWarning("Network '%s' is not mapped to group '%s'",
16860 self.network_name, self.group.name)
16861 self.connected = False
16863 # We need this check only if network is not already connected
16865 _NetworkConflictCheck(self, lambda nic: nic.network == self.network_uuid,
16866 "disconnect from", owned_instances)
16868 def Exec(self, feedback_fn):
16869 # Disconnect the network and update the group only if network is connected
16871 del self.group.networks[self.network_uuid]
16872 self.cfg.Update(self.group, feedback_fn)
16875 #: Query type implementations
16877 constants.QR_CLUSTER: _ClusterQuery,
16878 constants.QR_INSTANCE: _InstanceQuery,
16879 constants.QR_NODE: _NodeQuery,
16880 constants.QR_GROUP: _GroupQuery,
16881 constants.QR_NETWORK: _NetworkQuery,
16882 constants.QR_OS: _OsQuery,
16883 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16884 constants.QR_EXPORT: _ExportQuery,
16887 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16890 def _GetQueryImplementation(name):
16891 """Returns the implemtnation for a query type.
16893 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16897 return _QUERY_IMPL[name]
16899 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16900 errors.ECODE_INVAL)
16903 def _CheckForConflictingIp(lu, ip, node):
16904 """In case of conflicting IP address raise error.
16907 @param ip: IP address
16909 @param node: node name
16912 (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
16913 if conf_net is not None:
16914 raise errors.OpPrereqError(("Conflicting IP address found: '%s' != '%s'" %
16916 errors.ECODE_STATE)
16918 return (None, None)